# Code for downloading world development indicators from the World Bank # We will use the World Bank's API via the wdi package in R install.packages('WDI',repos="http://cran.us.r-project.org") ibrary(WDI) # install.packages('lattice',repos="http://cran.us.r-project.org") library(lattice) # we already installed lattice package # install.packages('car',repos="http://cran.us.r-project.org") library(car) # car should be pre-installed. If not we'll install it # load package for use # main function is WDI() # country is vector of two character abbreviated names, ISO-2, such as "US". # indicator is vector of WDI indicators, use WDIsearch() for availability. # start is starting year, end is ending year of data # extra when set to TRUE returns additional returns other variables on each country, # such as region and income. # # The data are returned in country-year 'long' format, # for example, row observations on US 1992, followed by row US 1993, etc. # use the WDIsearch() function # string --- the search string # fields -- to search: 'indicator', 'name', 'description', 'sourceDatabase', 'sourceOrganization' # short -- returns indicator name and code. False returns a name, code, and a description # Let's search for indicators on fertility rate WDIsearch(string="fertility", field="name", short="TRUE") # search for GDP WDIsearch(string="GDP", field="name", short="TRUE") # with more search results, match up descriptions with variable names by line of output. # [82,] "GDP (current US$)" # [82,] "NY.GDP.MKTP.CD" the name of GDP in current $US is NY.GDP.MKTP.CD # search for life expectancy WDIsearch(string="life expectancy", field="name", short="TRUE") # income WDIsearch(string="income", field="name", short="TRUE") # We will download data on GDP per capita and fertility and construct two simple scatterplots # [94,] "NY.GDP.PCAP.KD" [94,] "GDP per capita (constant 2000 US$)" # [2,] "SP.DYN.TFRT.IN" "Fertility rate, total (births per woman)" wbdata1<-WDI(country="all", indicator=c("NY.GDP.PCAP.KD", "SP.DYN.TFRT.IN"), start=2010, end=2010, extra=TRUE) head(wbdata1) # observe data in header wbdata1<-subset(wbdata1, region!="Aggregates") # use subset() function to remove aggregate totals Arab world, East Asia, etc. head(wbdata1) # observe data in header # plotting -- what's wrong with this picture? xyplot(SP.DYN.TFRT.IN ~ NY.GDP.PCAP.KD, data=wbdata1, ylab="Fertility rate, total (births per woman)", xlab="GDP per capita (constant US dollars)") ) # GDP is very right skewed! histogram(wbdata1$NY.GDP.PCAP.KD, xlab="Histogram of GDP per capita (constant 2000 US$)") # creating log GDP to reduce skew. # It's a natural log, log(100)= 4.60517, as in e^4.60517=100. wbdata1$loggdp<-log(wbdata1$NY.GDP.PCAP.KD) histogram(wbdata1$loggdp) #skewness gone #let's look at fertility --- skewed, but not so bad histogram(wbdata1$SP.DYN.TFRT.IN) library(car) # use the car package for the scatterplot() function: scatterplot (SP.DYN.TFRT.IN ~ loggdp, data=wbdata1, smooth=FALSE, id.n=8, labels=wbdata1$country, main="GDP by Fertility Rate, Nation-States in 2010", ylab="Fertility rate, total (births per woman)", xlab="log GDP per capita (constant US dollars)") # If interested in comparing regions, then compare regions: # we'll use the lattice package xyplot() xyplot(SP.DYN.TFRT.IN ~ loggdp | region, data=wbdata1, smooth=FALSE, id.n=8, labels=wbdata1$country, main="GDP by Fertility Rate, Nation-States in 2010", ylab="Fertility rate, total (births per woman)", xlab="log GDP per capita (constant US dollars)") # Classic use of World Bank data is in a comparison of life expectancy and wealth. # These data are the basis of Hans Rosling's famous TED taks and Gapminder software. # http://www.gapminder.org. Google purchased Gapminder. # How to create Gapminder visualizations with an R package from Google, Googlevis: # First we need time series cross sectional data, across nations from 1950 to 2012: wbdata5012<-WDI(country="all", indicator=c("NY.GDP.PCAP.KD", "SP.DYN.TFRT.IN", "SP.POP.TOTL"), start=1950, end=2012, extra=TRUE) head(wbdata5012) # observe data in header wbdata5012<-subset(wbdata5012, region!="Aggregates") # use subset() function to remove aggregate totals Arab world, East Asia, etc. head(wbdata5012) # observe data in header # how far back do the years go? summary(wbdata5012$year) # only to 1960 install.packages("googleVis") library(googleVis) # myfirstmotionchart<-gvisMotionChart(wbdata5012, idvar="country", timevar="year", colorvar="region", sizevar="SP.POP.TOTL") # gvisMotionChart writes an HTML file with the motionchart a.k.a. GapMinder plot # use plot() to bring up the chart automatically in your default browser. plot(myfirstmotionchart)