# February 5, 2008 # Analysis of birds and bats data # Bret Larget # The data set is from a summary of several studies # that measure the mass and energy expenditure of several # flying birds and bats. # # The interesting biological question is to see if echo-locating bats # expend more energy than non-echo-locating bats, correcting for mass. # Since there is a huge disparity between the masses of these two types of bats, # the study includes several bird species where the mass of individuals spans # the gap. # An interesting study question will be to ask if the type of animal # (bird, echo-locating bat, or non-echo-locating bat) is an important factor # in fitting a relationship between energy and mass. # The data source is the Statistical Sleuth by Ramsey and Shafer. # The analysis here will differ. # The variables in the data set are: # species: name of species (we will not use this in the models) # mass: in grams, mass of the animal # type: a factor with levels bird, eBat, and nBat # energy: in Watts, energy expended while flying # # Read in the data # # header=T as the first line of the file contains variable names bats = read.table("bats.txt",header=T) # command to check the structure of the bats data frame. str(bats) # # Plot the data # # Load lattice library for graphics functions library(lattice) # Various univariate plots to show that non-echo-locating bats # have very different mass and energy expenditures than # echo-locating bats # # If these commands were typed in directly, # they would create a graph. # When this file is read into R via source(), # the lattice plotting commands will do nothing # unless they are arguments to print() or plot() plot( bwplot(mass ~ type,data=bats) ) plot( bwplot(energy ~ type,data=bats) ) # Vanilla scatterplot of energy versus mass # Note that one or two points look like outliers # and the pattern of points, while definitely positively associated, # may be better fit by a curve than by a straight line. plot( xyplot(energy ~ mass,data=bats) ) # Change the plotting character to a solid dot # for better visibility plot( xyplot(energy ~ mass,data=bats,pch=16) ) # Replot and add a fitted regression line # This plot also makes clear that there is some curvature: # exteme mass value points fall below the line, middle mass value points are above. plot( xyplot(energy ~ mass,data=bats,pch=16,type=c("p","r")) ) # Replot using different colors for each type plot( xyplot(energy ~ mass,groups=type,data=bats,pch=16) ) # Replot adding a key above the plot # (Note that the key does NOT use the new plotting character plot( xyplot(energy ~ mass,groups=type,data=bats,pch=16,auto.key=T) ) # Replot where the key is separated into three columns plot( xyplot(energy ~ mass,groups=type,data=bats,pch=16,auto.key=list(columns=3)) ) # Replot with the key on the right plot( xyplot(energy ~ mass,groups=type,data=bats,pch=16,auto.key=list(space="right")) ) # Here is a single plot that incorporates the following features: # (1) separate colors for each type # (2) separate symbols for each type # (3) a key that matches # (4) an included regression line for all points # This is the kind of thing you want stored as a file to source in # so you can edit it to make changes if desired. # Once you get it right, you want to quickly forget the syntax of what you just did. # # The specification of the key is just plain ugly, but I do not know a simpler way. # The panel specification is a function that plots the points using the default panel.xyplot function # and then adds a line. # The first with() is so that some arguments deep in the call to xyplot # know the names of the bats variables. # The command here is separated onto several lines so that each separate argument is easier to read. with(bats, plot( xyplot(energy ~ mass, data=bats, groups=type, pch=c(1,3,6), # circle, plus, inverted triangle panel = function(x,...) { panel.xyplot(x,...) panel.abline(coef(lm(energy~mass))) # add the regression line to the plot }, key = list(space = "top", # key is on top of plot columns = 3, # key is in three columns text = list(levels(type)), # use the default level names of type points = list(col = trellis.par.get("superpose.symbol")$col[1:3], # use first three default colors pch = c(1,3,6) # better match the ones used above! ) ) ) ) ) # # Fitting models # # A simple linear regression of energy ~ mass fit0 = lm(energy ~ mass,data=bats) # Three different ways to find the estimated coefficients # summary() is verbose print( summary(fit0) ) # coef() is just the coefficients print( coef(fit0) ) # display from the arm library is short and sweet, # but rounds off too much when the estimated coefficients # are different orders of magnitude library(arm) print( display(fit0) ) # After a question in class, here is a fit of # a curve (a quadratic model) to the data fit0.5 = lm(energy ~ mass + I(mass^2),data=bats) print( summary(fit0.5) ) # Here is a linear model that includes a categorical variable # The effect of this is to fit three parallel lines, but to allow # different intercepts. fit1 = lm(energy ~ mass + type,data=bats) print( display(fit1) ) print( summary(fit1) ) # This code, an edited version of what came before, # will plot the three parallel lines. # It uses the specific knowledge that the groups have these slopes # and intercepts where fit1.coef = coef(fit1) fit1.coef = coef(fit1) # intercept slope # birds fit1.coef[1] fit1.coef[2] # eBats sum(fit1.coef[c(1,3)]) fit1.coef[2] # nBats sum(fit1.coef[c(1,4)]) fit1.coef[2] with(bats, plot( xyplot(energy ~ mass, data=bats, groups=type, pch=c(1,3,6), # circle, plus, inverted triangle panel = function(x,...) { my.colors = trellis.par.get("superpose.symbol")$col[1:3] panel.xyplot(x,...) panel.abline(fit1.coef[1],fit1.coef[2],col=my.colors[1]) # add the regression line to the plot panel.abline(sum(fit1.coef[c(1,3)]),fit1.coef[2],col=my.colors[2]) # add the regression line to the plot panel.abline(sum(fit1.coef[c(1,4)]),fit1.coef[2],col=my.colors[3]) # add the regression line to the plot }, key = list(space = "top", # key is on top of plot columns = 3, # key is in three columns text = list(levels(type)), # use the default level names of type points = list(col = trellis.par.get("superpose.symbol")$col[1:3], # use first three default colors pch = c(1,3,6) # better match the ones used above! ) ) ) ) ) # Fit the model with an intercept term. fit2 = lm(energy ~ mass*type,data=bats) print( summary(fit2) ) # Plot this last one similarly to the previous ones. # This code, an edited version of what came before, # will plot the three separate regression lines. # Thankfully, this is the default behavior when groups are specified # and the type = c("r","p") argument is set. # It uses the specific knowledge that the groups have these slopes # and intercepts where fit2.coef = coef(fit2) fit2.coef = coef(fit2) # intercept slope # birds fit2.coef[1] fit1.coef[2] # eBats sum(fit1.coef[c(1,3)]) fit1.coef[2] # nBats sum(fit1.coef[c(1,4)]) fit1.coef[2] with(bats, plot( xyplot(energy ~ mass, data=bats, groups=type, type=c("p","r"), pch=c(1,3,6), # circle, plus, inverted triangle key = list(space = "top", # key is on top of plot columns = 3, # key is in three columns text = list(levels(type)), # use the default level names of type points = list(col = trellis.par.get("superpose.symbol")$col[1:3], # use first three default colors pch = c(1,3,6) # better match the ones used above! ) ) ) ) )