# Code for downloading world development indicators from the World Bank
# We will use the World Bank's API via the wdi package in R

install.packages('WDI',repos="http://cran.us.r-project.org")
ibrary(WDI)

# install.packages('lattice',repos="http://cran.us.r-project.org")
library(lattice) # we already installed lattice package  

# install.packages('car',repos="http://cran.us.r-project.org")
library(car)   # car should be pre-installed. If not we'll install it

# load package for use 
# main function is WDI()
# country is vector of two character abbreviated names, ISO-2, such as "US". 
# indicator is vector of WDI indicators, use WDIsearch() for availability.
# start is starting year, end is ending year of data
# extra when set to TRUE returns additional returns other variables on each country, 
#       such as region and income.
# 
# The data are returned in country-year 'long' format, 
#   for example, row observations on US 1992, followed by row US 1993, etc. 

# use the WDIsearch() function
# string --- the search string          
# fields -- to search: 'indicator', 'name', 'description', 'sourceDatabase', 'sourceOrganization'
# short -- returns indicator name and code. False returns a name, code, and a description

# Let's search for indicators on fertility rate
WDIsearch(string="fertility", field="name", short="TRUE")

# search for GDP
WDIsearch(string="GDP", field="name", short="TRUE")
# with more search results, match up descriptions with variable names by line of output. 
# [82,] "GDP (current US$)"
# [82,] "NY.GDP.MKTP.CD" the name of GDP in current $US is NY.GDP.MKTP.CD     

# search for life expectancy
WDIsearch(string="life expectancy", field="name", short="TRUE")

# income
WDIsearch(string="income", field="name", short="TRUE")


# We will download data on GDP per capita and fertility and construct two simple scatterplots
# [94,] "NY.GDP.PCAP.KD"  [94,] "GDP per capita (constant 2000 US$)"    
# [2,] "SP.DYN.TFRT.IN" "Fertility rate, total (births per woman)"       
wbdata1<-WDI(country="all", indicator=c("NY.GDP.PCAP.KD", "SP.DYN.TFRT.IN"), start=2010, end=2010, extra=TRUE)

head(wbdata1) # observe data in header

wbdata1<-subset(wbdata1, region!="Aggregates") # use subset() function to remove aggregate totals Arab world, East Asia, etc. 

head(wbdata1) # observe data in header

# plotting -- what's wrong with this picture?
xyplot(SP.DYN.TFRT.IN ~ NY.GDP.PCAP.KD, data=wbdata1,
             ylab="Fertility rate, total (births per woman)",
             xlab="GDP per capita (constant US dollars)")
)

# GDP is very right skewed!
histogram(wbdata1$NY.GDP.PCAP.KD, xlab="Histogram of GDP per capita (constant 2000 US$)")

# creating log GDP to reduce skew.
# It's a natural log, log(100)= 4.60517, as in e^4.60517=100.  
wbdata1$loggdp<-log(wbdata1$NY.GDP.PCAP.KD)

histogram(wbdata1$loggdp) #skewness gone

#let's look at fertility --- skewed, but not so bad
histogram(wbdata1$SP.DYN.TFRT.IN)

library(car) # use the car package for the scatterplot() function:
scatterplot (SP.DYN.TFRT.IN ~ loggdp, data=wbdata1, smooth=FALSE, id.n=8, 
             labels=wbdata1$country, 
             main="GDP by Fertility Rate, Nation-States in 2010",
             ylab="Fertility rate, total (births per woman)",
             xlab="log GDP per capita (constant US dollars)")

# If interested in comparing regions, then compare regions:
# we'll use the lattice package xyplot()
xyplot(SP.DYN.TFRT.IN ~ loggdp | region, data=wbdata1, smooth=FALSE, id.n=8, 
             labels=wbdata1$country, 
             main="GDP by Fertility Rate, Nation-States in 2010",
             ylab="Fertility rate, total (births per woman)",
             xlab="log GDP per capita (constant US dollars)")

# Classic use of World Bank data is in a comparison of life expectancy and wealth. 
# These data are the basis of Hans Rosling's famous TED taks and Gapminder software.
# http://www.gapminder.org. Google purchased Gapminder. 

# How to create Gapminder visualizations with an R package from Google, Googlevis:


# First we need time series cross sectional data, across nations from 1950 to 2012: 
wbdata5012<-WDI(country="all", indicator=c("NY.GDP.PCAP.KD", "SP.DYN.TFRT.IN", "SP.POP.TOTL"), start=1950, end=2012, extra=TRUE)
head(wbdata5012) # observe data in header

wbdata5012<-subset(wbdata5012, region!="Aggregates") # use subset() function to remove aggregate totals Arab world, East Asia, etc. 

head(wbdata5012) # observe data in header

# how far back do the years go?
summary(wbdata5012$year) # only to 1960

install.packages("googleVis")

library(googleVis) #
myfirstmotionchart<-gvisMotionChart(wbdata5012, idvar="country", timevar="year", colorvar="region", sizevar="SP.POP.TOTL")

# gvisMotionChart writes an HTML file with the motionchart a.k.a. GapMinder plot
# use plot() to bring up the chart automatically in your default browser. 
plot(myfirstmotionchart)