# This file is an R script file. You can edit it by typing commands into it. 
# Comments begin with a # symbol
# R ignores anything following a # symbol. 
# Anything else is a line of text containing a command with instructions for R

# Script files are used to send commands from the script file to the Console pane.  
# To run a line of R code --- to send it to the Console window to be interpreted: 
#   With the cursor on the line, or multiple lines highlighted,
#    click 'Run' on the upper right corner of the script file pane. Or you could
# 	a) highlight it and press Control+Return (on Windows) or command+Return (on Mac), or 
#   b) copy and paste it to the Console
 

# getwd() lists current working directory
# This working directory is the location where R will look for files you plan to work with.
# And it will store here any output sent to file
getwd() 


# "dir()" lists the contents of your file folder
# It lists everything in the folder, even if it has nothing to do with R
dir()

# R is waiting for a command with the > symbol. 
# If R is stuck on a command (indicated by a '+' symbol)
# Either complete the command or press the 'Esc' button to start again

## The file is a workspace, which will be imported into R
load(file=url("http://faculty.gvsu.edu/kilburnw/pollingexcerpt.RData")) # Loads it into R

# The data for the lab are loaded in three different dataframes (datasets)
# The .RData file extension is commonly used for an R Workspace.

# ls() lists the names of the datasets, and any other objects created in R 
ls()

names(poll)			# Inspect the names of the variables

# poll is a very small excerpt from the 2004 American National Election Studies,
# a representative sample of U.S. citizens of voting age by the November election day. 
# Most of the variables in the dataset are mostly individual scores on 'feeling thermometers' toward
# candidates and social groups. Respondents are asked to rate how warmly or cooly they feel
# toward people, like George Bush (the variable bushft), from 0 to 100 degrees.
# other variables are a liberal-conservative self placement, respondent gender, educational attainment
# and a few others


# Because multiple datasets can exist in the same workspace, R expects you to identify a datasource.
# The name of a datset, followed by a dollar sign, identifies a dataset, followed by the name of a variable. 


table(poll$cheneyft) # shows frequency distribution of responses.  Values are above the frequency
# Notice in the above example, 53 people in the survey rated Cheney at "100", while 159 people rated Cheney at"0"

# we will tabulate the gender variable from the poll dataset
table(poll$gender)

# Calculating summary statistics --- 
# summary() provides the five-number summary
summary(poll$cheneyft)

# try calculating summary statistics on another feeling thermometer

# Other functions, such as tapply() have multiple arguments.
# Here we calculate mean feelings toward feminists by gender of respondent:

tapply(poll$feministsft, poll$gender, mean, na.rm=TRUE)

# a histogram
hist(poll$feministsft)


# with labels and adjusted y scaling
hist(poll$feministsft, ylim=c(0, 300), xlab="Feelings toward 'Feminists'", ylab="Response Frequency", main="feelings toward feminists, 2004 ANES")

# A box-whiskers plot
boxplot(feministsft ~ party, data=poll, xlab="Respondent Party Identification", ylab="Feelings toward 'Feminists'", main="feelings toward feminists by party of respondent, 2004 ANES")


# Let's install a package, called lattice, that includes some interesting features
# install.package("lattice")
# it should be already installed on the campus labs, we just have to call it into memory

library(lattice)

# just two examples:
histogram(~feministsft, data=poll) # a histogram of feelings toward feminists

## a histogram of feelings toward feminists, by party identification of survey respondent:
histogram(~feministsft | factor(party), data=poll) 

## Another example
densityplot(~ bushft + kerryft, plot.points=FALSE, auto.key=TRUE)

# ############################ Plotly website interlude!
# # In your web browser go to https://plot.ly/r/

# ##########################################################
## We are going to look at integration between R and plotly:

# The Plotly R package requires the most recent version of ggplot:
install.packages('ggplot2',repos="http://cran.us.r-project.org")
install.packages('plotly',repos="http://cran.us.r-project.org")

library(plotly)

# a histogram:
plot_ly(x = ~poll$hclintonft, type = "histogram")

# two overlaid histograms:
p <- plot_ly(alpha = 0.6) %>%
  add_histogram(x = ~poll$hclintonft) %>%
  add_histogram(x = ~poll$bclintonft) %>%
  layout(barmode = "overlay")

## These two prior examples are a bit rough.  
## Here's a better one from the website

## example with a sample dataset on diamonds
d <- diamonds[sample(nrow(diamonds), 1000), ]
plot_ly(d, x = ~carat, y = ~price, color = ~carat,
        size = ~carat, text = ~paste("Clarity: ", clarity))

##########
### Reading in an external datafile as a .csv (Comma Separated Value file)
##########

### Download to your working directory the bostonhousing.txt datafile from the DIL R resources website.
## This line below reads it in to your workspace:
boston<-read.csv(file="bostonhousing.txt", header=TRUE)
# header=TRUE is a default, but typed out as a reminder about the header 
# read.table() is a similar function