# Intro to R # What is R? # Free, open-source, easily-extensible statistical software and programming environment, which runs on all major operating systems. # Where do I get it? # http://www.r-project.org # http://cran.wustl.edu # Basic syntax 5+3 x <- 5 # a command, giving x the value 5 x = 5 # a command, giving x the value 5 x == 5 # a question, with answers TRUE and FALSE plot(x=5, y=7) # specifies that parameter x has value 5 plot(x==5, y=7) # error a <- 7 a a[1] a[2] b <- c(2,4,6) b b[2] b[2:3] b * 4 b + 3 b * b b[3] <- 100 b B C <- matrix(1:4, nrow=2) C t(C) D <- matrix(5:8, nrow=2) D C * D C %*% D C * 2 exp(5) exp(b) e <- c("one", "two", "three") e[2] G <- letters G G[c(1:10)] G[-c(1:10)] unique(c(6,1,2,3,1,2,4,1,2,5)) sort(unique(c(6,1,2,3,1,2,4,1,2,5))) sample(1:100, 3) # sample data sets data(swiss) swiss head(swiss) ?swiss str(swiss) names(swiss) edit(swiss) # Lets you see the data; changes are not saved fix(swiss) # Lets you see the data; changes *are* saved to 'swiss' cleaned.up.swiss <- edit(swiss) # lets you see the data; changes are saved to 'cleaned.up.swiss' # Getting help ?exp ?"exp" ?"^" ?"%%" help.search("exp") apropos("exp") # ?foo shows less than apropos("foo") which shows less than help.search("foo") # RSeek.org # http://www.rseek.org/ # Search for "cheat sheet"; gets you: http://cran.r-project.org/doc/contrib/Short-refcard.pdf # Installing and using packages install.packages("car") library(car) ?car library(foreign) # Read and write various proprietary data formats, including Stata and SPSS # Loading data # Search for "logit"; gets you http://www.ats.ucla.edu/stat/r/dae/logit.htm for third hit. mydata <- read.csv(url("http://www.ats.ucla.edu/stat/r/dae/logit.csv")) # Manipulating data # One and two-dimensional descriptive statistics summary(swiss) summary(mydata) mean(mydata$gre) # note that we're using the "full name" of the variable median(mydata$gre) sd(mydata$gre) table(mydata$admit) table(mydata$gre) hist(mydata$gre) hist(mydata$gre, prob=TRUE) lines(density(mydata$gre)) qq.plot(mydata$gpa) # available because of loading the 'car' package plot(mydata$gre, mydata$gpa) # Regression reg.mydata <- lm(mydata$gpa~mydata$gre) reg.mydata summary(reg.mydata) plot(mydata$gre, mydata$gpa, xlab="GRE", ylab="GPA", main="Observed Relationship Between GRE Score and GPA") abline(reg.mydata) abline(reg.mydata, lwd=2, col="green") # Probit/logit mydata <- read.csv(url("http://www.ats.ucla.edu/stat/r/dae/logit.csv")) mylogit<- glm(admit~gre+gpa+topnotch, data=mydata, family=binomial(link="logit")) summary(mylogit)