DATA MINING
Desktop Survival Guide by Graham Williams |
|||||
Bump Chart |
See http://junkcharts.typepad.com/junk_charts/bumps_chart/ for an example.
> countries <- c("U-lande", "Afrika syd for sahara", "Europa og Centralasien", "Lantinamerika og Caribien","Mellemøstenog Nordafrika", "Sydasien","ØStasien og stillehaveet", "Kina", "Brasilien") > poor_1990 <- c(28.7,46.7,0.5,10.2,2.3,43,29.8,33,14) > poor_2004 <- c(18.1,41.1,0.9,8.6,1.5,30.8,9.1,9.9,7.5) > poor <- cbind(poor_1990,poor_2004) > rownames(poor) <- countries > oldpar <- par(no.readonly=T) > par <- par(mar=c(15,5,5,1)) > barplot(t(poor[order(poor[,2]),]),beside=T,col=c(1,2),las=3,ylab="% poor",main="Percent living for < 1 USD per day (1993 prices)",ylim=c(0,50)) > legend("topleft",c("1990","2004"),fill=c(1,2),bty="n") > par(oldpar) |
And a bump chart or parallel coordinates:
> library(ggplot2) > # Some data. > > countries <- c("U-lande", "Afrika syd for sahara", "Europa og Centralasien", "Lantinamerika og Caribien", "Mellemøstenog Nordafrika", "Sydasien", "ØStasien og stillehaveet", "Kina", "Brasilien") > poor_1990 <- c(28.7, 46.7, 0.5, 10.2, 2.3, 43, 29.8, 33, 14) > poor_2004 <- c(18.1, 41.1, 0.9, 8.6, 1.5, 30.8, 9.1, 9.9, 7.5) > # Reformat the data. > > data <- data.frame(countries, poor_1990, poor_2004) > data <- melt(data,id=c('countries'), variable_name='year') > levels(data$year) <- c('1990', '2004') > # Make a new column to make the text justification easier > > data$hjust <- 1-(as.numeric(data$year)-1) > # Start the percentage plot > > p = ggplot(data, aes(x=year, y=value, groups=countries)) > # Add the axis labels. > > p = p + labs(x='\nYear', y='%\n') > # Add lines. > > p <- p + geom_line() > # Add the text > > p = p + geom_text(aes(label=countries, hjust=hjust)) > #expand the axis to fit the text > p = p + scale_x_discrete( expand=c(2,2) ) > #show the plot > print(p) > #rank the countries > data$rank = NA > data$rank[data$year=='1990'] = rank(data$value[data$year=='1990']) > data$rank[data$year=='2004'] = rank(data$value[data$year=='2004']) > #start the rank plot > r = ggplot( data ,aes( x=year ,y=rank ,groups=countries ) ) > #add the axis labels > r = r + labs( x = '\nYear' , y = 'Rank\n' ) > #add the lines > r = r + geom_line() > #expand the axis to fit the text > r = r + scale_x_discrete( expand=c(2,2) ) > #show the plot > print(r) |