# Exercise 0116 # (a) read in the data # the argument `header=T' tells R that the first row contains variable names. ex0116 <- read.table("ex0116.txt",header=T) # for comma separated variable files # the option `sep=","' tells R that a comma is used to separate fields. # ex0116 <- read.table("ex0116.txt",header=T,sep=",") # check to see that the data was read in correctly ex0116 # allow the variables in ex0116 to be accessed by name rather than by the long form # (for example, ex0116$Planet). attach(ex0116) # (b) Draw a scatterplot of distance versus order plot(Order,Distance) # (c) repeat using logs for distance plot(Order,log(Distance)) # (d) find mean and SD of the distances mean(Distance) sd(Distance) # (e) find mean and SD of the log distances mean(log(Distance)) sd(log(Distance)) # Exercise 0218 # verify computations in Display 2.9 case0201 <- read.table("case0201.txt",header=T) case0201 attach(case0201) # means and standard deviations # the square brackets find subsets according to a criteria mean(Humerus[Status=="PERISHED"]) mean(Humerus[Status=="SURVIVED"]) sd(Humerus[Status=="PERISHED"]) sd(Humerus[Status=="SURVIVED"]) # means and SDs again, but using split and sapply # split divides a variable according to the categories of another variable (creating a list) # lapply applies a function to a list sapply(split(Humerus,Status),mean) sapply(split(Humerus,Status),sd) # difference in means # this uses unlist (with the use.names option = F) to take the `unlist' produced by lapply # diff takes the difference between adjacent entries in a vector. diff(unlist(sapply(split(Humerus, Status), mean),use.names=F)) # find quantiles of a t distribution qt(0.975,57) # carry out a t test and find a 95% confidence interval t.test(Humerus~Status,var.equal=T) # compute an area under a t curve (p-value) # pt(x,df) gives the area to the left of x under a t curve with df degrees of freedom # one-sided example 1-pt(1.778,57) #two-sided example 2*(1-pt(1.778,57))