# Bret Larget # August 25, 2011 # R examples for cow data # Students can copy and paste commands into R to see effects. # The entire file can be read into R and executed with the command # # > source("cows.R") # # or from a menu in R. # Doing this causes all the graphs to be displayed one after another on the graphics device. # The graphs can be captured into a PDF file by uncommenting the command here ### pdf("cows.pdf") # and the dev.off() command at the end of the file. #################################################################################################### # read in the cow data # make sure that working directory for R contains the file cows.txt # note that the top row is a header # each field is separated by white space (spaces, tabs, and so on) cows = read.table("cows.txt",header=T) # check the structure to make sure data was read properly # make sure that the number of observations and type of each observation is correct str(cows) # load the lattice library # this is needed for the dotplot() and densityplot() commands library(lattice) # For this data set, it is most interesting to compare quantitative variables by treatment group # The first argument to all lattice graphing commands is a formula of the form y ~ x # where y and x are usually variables. # For dotplot(), however, there is only an x variable so we leave y empty. # Here there is a | added to the formula followed by a factor (categorical variable). # The resulting plot will be broken into sections with a separate graph for each portion of the data # with the data partitioned by levels of the variable treatment. # The second argument to dotplot() is the name of the data frame that contains the variables used. plot( dotplot(~age | treatment, data=cows) ) # The previous plot arranged the graphs in a 2 by 2 layout. # To better compare groups, force the layout to have one column and four rows. plot( dotplot(~age | treatment, data=cows, layout=c(1,4)) ) # The tretment levels are ordered alphabetically (and graphed low to high). # Ordering treatment levels by level of additive makes more sense. # This command replaces the treatment variable in the cows data frame # with the same data, but with the levels ordered by the variable level. # Note the use of $ to specify a variable from the data frame. cows$treatment = reorder(cows$treatment,cows$level) # Redo the previous plot to see the difference plot( dotplot(~age | treatment, data=cows, layout=c(1,4)) ) # A more interesting variable is fat, where there looks to be more of a difference among groups plot( dotplot(~fat | treatment, data=cows, layout=c(1,4)) ) # Density plots overlayed is another effective way to examine this data # Here, rather than adding `| treatment' to the formula which would produce four graphs, # adding the argument groups=treatment results in a single graph with different colors for each treatment group. plot( densityplot(~fat, data=cows, groups=treatment) ) # Add a simple key to the right of the plot to show names of groups. plot( densityplot(~fat, data=cows, groups=treatment, auto.key=list(space="right")) ) # Compute mean fat for each group print( with(cows, sapply(split(fat,treatment),mean) ) ) # Looking way ahead, do a quick one-way ANOVA analysis # See that there is statistical evidence for differences in mean milk fat percentage by weight # among the groups, and this difference is largely explained by a higher mean in the high treatment group # with level 0.3. fit = lm(fat ~ treatment, data=cows) print( anova(fit) ) print( summary(fit) ) ### dev.off()