# This file is an R script file. You can edit it by typing commands into it. # Comments begin with a # symbol # R ignores anything following a # symbol. # Anything else is a line of text containing a command with instructions for R # Script files are used to send commands from the script file to the Console pane. # To run a line of R code --- to send it to the Console window to be interpreted: # With the cursor on the line, or multiple lines highlighted, # click 'Run' on the upper right corner of the script file pane. Or you could # a) highlight it and press Control+Return (on Windows) or command+Return (on Mac), or # b) copy and paste it to the Console # getwd() lists current working directory # This working directory is the location where R will look for files you plan to work with. # And it will store here any output sent to file getwd() # "dir()" lists the contents of your file folder # It lists everything in the folder, even if it has nothing to do with R dir() # R is waiting for a command with the > symbol. # If R is stuck on a command (indicated by a '+' symbol) # Either complete the command or press the 'Esc' button to start again ## The file is a workspace, which will be imported into R load(file=url("http://faculty.gvsu.edu/kilburnw/pollingexcerpt.RData")) # Loads it into R # The data for the lab are loaded in three different dataframes (datasets) # The .RData file extension is commonly used for an R Workspace. # ls() lists the names of the datasets, and any other objects created in R ls() names(poll) # Inspect the names of the variables # poll is a very small excerpt from the 2004 American National Election Studies, # a representative sample of U.S. citizens of voting age by the November election day. # Most of the variables in the dataset are mostly individual scores on 'feeling thermometers' toward # candidates and social groups. Respondents are asked to rate how warmly or cooly they feel # toward people, like George Bush (the variable bushft), from 0 to 100 degrees. # other variables are a liberal-conservative self placement, respondent gender, educational attainment # and a few others # Because multiple datasets can exist in the same workspace, R expects you to identify a datasource. # The name of a datset, followed by a dollar sign, identifies a dataset, followed by the name of a variable. table(poll$cheneyft) # shows frequency distribution of responses. Values are above the frequency # Notice in the above example, 53 people in the survey rated Cheney at "100", while 159 people rated Cheney at"0" # we will tabulate the gender variable from the poll dataset table(poll$gender) # Calculating summary statistics --- # summary() provides the five-number summary summary(poll$cheneyft) # try calculating summary statistics on another feeling thermometer # Other functions, such as tapply() have multiple arguments. # Here we calculate mean feelings toward feminists by gender of respondent: tapply(poll$feministsft, poll$gender, mean, na.rm=TRUE) # a histogram hist(poll$feministsft) # with labels and adjusted y scaling hist(poll$feministsft, ylim=c(0, 300), xlab="Feelings toward 'Feminists'", ylab="Response Frequency", main="feelings toward feminists, 2004 ANES") # A box-whiskers plot boxplot(feministsft ~ party, data=poll, xlab="Respondent Party Identification", ylab="Feelings toward 'Feminists'", main="feelings toward feminists by party of respondent, 2004 ANES") # Let's install a package, called lattice, that includes some interesting features # install.package("lattice") # it should be already installed on the campus labs, we just have to call it into memory library(lattice) # just two examples: histogram(~feministsft, data=poll) # a histogram of feelings toward feminists ## a histogram of feelings toward feminists, by party identification of survey respondent: histogram(~feministsft | factor(party), data=poll) ## Another example densityplot(~ bushft + kerryft, plot.points=FALSE, auto.key=TRUE) # ############################ Plotly website interlude! # # In your web browser go to https://plot.ly/r/ # ########################################################## ## We are going to look at integration between R and plotly: # The Plotly R package requires the most recent version of ggplot: install.packages('ggplot2',repos="http://cran.us.r-project.org") install.packages('plotly',repos="http://cran.us.r-project.org") library(plotly) # a histogram: plot_ly(x = ~poll$hclintonft, type = "histogram") # two overlaid histograms: p <- plot_ly(alpha = 0.6) %>% add_histogram(x = ~poll$hclintonft) %>% add_histogram(x = ~poll$bclintonft) %>% layout(barmode = "overlay") ## These two prior examples are a bit rough. ## Here's a better one from the website ## example with a sample dataset on diamonds d <- diamonds[sample(nrow(diamonds), 1000), ] plot_ly(d, x = ~carat, y = ~price, color = ~carat, size = ~carat, text = ~paste("Clarity: ", clarity)) ########## ### Reading in an external datafile as a .csv (Comma Separated Value file) ########## ### Download to your working directory the bostonhousing.txt datafile from the DIL R resources website. ## This line below reads it in to your workspace: boston<-read.csv(file="bostonhousing.txt", header=TRUE) # header=TRUE is a default, but typed out as a reminder about the header # read.table() is a similar function