You are on page 1of 7

# A short R tutorial, by Statistics CAs #good habit is to keep everything neat in your working directory #Windows user File/Change

dir.. #Mac user Misc/Change Working Directory getwd() #helpful to write in R script # Here is how to see a list of all the objects stored in your workspace: ls() # Now let's create some new vectors v <- 1:10 # vector of integers 1 to 10 v w <- c(2,3,5,7,11) w w2 <- c("apples", "oranges", 7) w2 # The 'class' function tells you the type of object you have class(w) class(w2) # Different uses of the 'rep' command: zeros <- rep(0,5) # vector of five zeros zeros # Alternatively, you could avoid using the rep command and use: 1:5 - 1:5 y1 <- rep(1:10, each = 2) y1 y2 <- rep(1:10, 2) y2 y3 <- rep(1:4,rep(1:4)) y3 y4 <- rep(1:4,rep(rep(1:2),2)) y4 # Randomly generated vectors: # vector of 15 independent randomly generated values from a normal distribution with mean 2 and standard deviation 0.1 normals <- rnorm(15, mean = 2, sd = 0.1) normals uniforms <- runif(10, min= 1, max = 2) uniforms

digits <- 0:9 randomDigits <- sample(digits, size = 6, replace = TRUE) randomDigits # in addition to generating random samples, we can also compute quantiles of a distribution as follows: qnorm(p = 0.975, mean = 0, sd = 1) qexp(p = 0.975, rate = 1) # there is also the pdf and cdf function: dnorm(x = 2, mean = 0, sd = 1) # pdf pnorm(q = 2, mean = 0, sd = 1) # cdf # Selecting elements in vectors: # Let's say I want to grab the 3rd uniform in the above vector: uniforms[3] # Perhaps we want the 3rd, 4th, 5th, 6th, and 10th elements: uniforms[c(3:6,10)] # Suppose I want to change the 3rd entry to 0 for some reason: uniforms[3] <- 0 uniforms # Now suppose I want to find the indices of all the elements in my vector 'norma ls' which are greater than 2: {1:length(normals)}[normals>2] # Alternatively, you can use: which(normals > 2) # To get the actual values that are greater than 2: normals[normals>2]

# Basic operations on vectors sum(normals) prod(normals) mean(normals) median(normals) sd(normals) var(normals) length(normals) max(normals) min(normals) range(normals) # An excellent command to know: summary(normals)

# Sort the vector 'normals' from largest to smallest: sort(normals, decreasing = TRUE) # gives the numerical values order(normals, decreasing = TRUE) # gives the index values

# Now let's construct some matrices: A <- matrix(c(1:12), nrow = 4, ncol = 3, byrow = FALSE) A B <- matrix(c(1:12), nrow = 4, ncol = 3, byrow = TRUE) B cbind(A,B) rbind(A,B) # The transpose of A: t(A) colnames(A) colnames(A) <- c("C1", "C2", "C3") rownames(A) <- c("R1", "R2", "R3", "R4") A # Here is how to compute the rank of a matrix: qr(A)$rank # Let's take a section of the above matrix A M <- A[2:3,2:3] M det(M) eigen(M) qr(M)$rank # This section has full rank, so it has an inverse given by M_inv <- solve(M) M_inv # CAREFUL! The following is ELEMENT-WISE multiplication of M and M_inv: M * M_inv # This is how to multiply two matrices in the usual sense: Identity <- M %*% M_inv Identity # Round the 'Identity' matrix found above round(Identity, digits = 1) # Diagonal matrices: # 5-by-5 identity matrix: diag(5) # numbers 1 to 5 on the diagonal

diag(1:5) # extract the diagonal elements: # example 1: diag(diag(1:5)) # example 2: A diag(A)

# Note that all of the above objects appear in our workspace: ls() # If we wish to remove an object from our workspace, we can do so as follows: rm(A) # If we wish to clear all stored objects from the workspace environment: rm(list = ls(all = TRUE)) # Now, let's try importing some data into R: # Built-in data from datasets data(cars) attach(cars) ?cars plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)", las = 1) detach(cars) # From Faraway: # FIRST: We need to install the faraway library, this is the same if you want to install ISwR library library(faraway) data(aatemp) dim(aatemp) aatemp ?aatemp attach(aatemp) # This is a very basic plot: notice there is not even a title - NOT GOOD! plot(year,temp) # A better plot would be: # Fill in the output file #postscript("/temp.ps", width = 6, height = 6.5) plot(year,temp,main="Annual mean temperature in Ann Arbor as a function of year" , xlab = "Year", ylab = "Annual mean temperature (in degrees F) ", xlim = c(185 0,2000), ylim = c(43,52), axes=FALSE) axis(side = 1, at = seq(1850, 2000, 25)) axis(side = 2, at = seq(43, 52, 1)) #dev.off()

# Add the least squares fit line to the plot: mtext("The least squares fit is shown by the dashed line", cex=0.8) abline(lm(temp~ year), lty = "dashed") # Note that in the above plot, we can figure out the plotting window by looking at the range of our data: range(temp) range(year) # A side-by-side comparison of the previous two plots: par(mfrow = c(2,1)) # First the bad plot: plot(year,temp) # Then the improved plot: plot(year,temp,main="Annual mean temperature in Ann Arbor as a function of year" , xlab = "Year", ylab = "Annual mean temperature (in degrees F) ", xlim = c(185 0,2000), ylim = c(43,52), axes=FALSE) axis(side = 1, at = seq(1850, 2000, 25)) axis(side = 2, at = seq(43, 52, 1)) mtext("The least squares fit is shown by the dashed line", cex=0.8) abline(lm(temp~ year), lty = "dashed") # As another example of abline, suppose I wanted to put a line through the mean of all the annual mean temperatures abline(h = mean(temp), lty= "dotted") # Make a histogram of the temperatures hist(temp,seq(43,52,0.5)) # Perhaps I want to fit a theoretical probability curve to my histogram: hist(temp, seq(43,52,0.5), probability = TRUE, main = "Probability Density Histo gram of the Annual Mean Temperature") curve(dnorm(x, 47.75, 1.5), from = 43, to = 53, add = TRUE) text(50.75,.2,"Curve is the N(47.75,2.25) pdf") detach(aatemp) # Now let's read in a data frame from our own computer # FILL IN THE QUOTES WITH THE FILE NAME # Note that we use sep = "," for a comma-separated file (csv) Frame <- read.table("", sep = ",", header = TRUE) attach(Frame) # If we wish to edit the data frame after it has been loaded in FrameNew <- edit(Frame) # Make a plot of a quantitative variable vs a qualitative variable plot(Section, Midterm.Grade, main = "Midterm Grade vs Class Section", xlab = "Se ction Number", ylab = "Midterm Grade") # Not what we wanted! Need to change 'Section' to a factor: Section.F <- as.factor(Section)

plot(Section.F, Midterm.Grade, main = "Midterm Grade vs Class Section", xlab = "Section Number", ylab = "Midterm Grade")

# Some basic functions # Remainder 21%%6 14%%3 14%/%3 floor(14/3) ceiling(14/3) #make your own function #mydistance function mydistance <- function(x,y) { sqrt(x^4+y^4) } mydistance(5,6) #mysum function calculated the sum of all the absolute values in a matrix mysum <- function( M ){ n1=nrow(M) n2=ncol(M) sum = 0 for(i in 1:n1){ for(j in 1:n2){ sum = sum + abs(M[i,j]) } } sum } mysum(A) #probablity distribution #binomial # Compute P(55 < X < 65) for X Binomial(100,0.65) sum(dbinom(56:64, 100, 0.6)) binomials=rbinom(500,100,.5) hist(binomials) #Normal dnorm(x, pnorm(q, qnorm(p, rnorm(n,

mean mean mean mean

= = = =

0, 0, 0, 0,

sd sd sd sd

= = = =

1, log = FALSE) 1, lower.tail = TRUE, log.p = FALSE) 1, lower.tail = TRUE, log.p = FALSE) 1)

#Student t dt(x, df, ncp, log = FALSE) pt(q, df, ncp, lower.tail = TRUE, log.p = FALSE) qt(p, df, ncp, lower.tail = TRUE, log.p = FALSE)

rt(n, df, ncp) #Chi Square? dchiqs

You might also like