# FEV analysis # Bret Larget # Demonstration for Chapter 8 # Read in the FEV data fevdata = read.table("fevdata.txt",header=T) str(fevdata) # Fit a model without a transformation of the data fev.lm1 = lm(fev ~ age + smoking,data=fevdata) # Show a residual plot. There is a wedge. library(lattice) print( xyplot(residuals(fev.lm1) ~ fitted(fev.lm1)) ) # Display the coefficients. library(arm) display( fev.lm1 ) # Compute the estimate of the SD of the error sigma = sqrt( sum(residuals(fev.lm1)^2) / 651 ) # Find the number of observations n = nrow(fevdata) # Set the random seed so that the simulation results are repeatable set.seed(234435) # Simulate the fake data. # In each case, we find the fiited values using fitted() # which corresponds to the predicted values for each data point # and then add mean zero normal error with sigma estimated from real data # Fit a regression model to the fake data. # Repeat for a total of five fake data sets. fev.1 = fitted(fev.lm1) + rnorm(n,0,sigma) fake.1 = lm(fev.1 ~ age + smoking,data=fevdata) fev.2 = fitted(fev.lm1) + rnorm(n,0,sigma) fake.2 = lm(fev.2 ~ age + smoking,data=fevdata) fev.3 = fitted(fev.lm1) + rnorm(n,0,sigma) fake.3 = lm(fev.3 ~ age + smoking,data=fevdata) fev.4 = fitted(fev.lm1) + rnorm(n,0,sigma) fake.4 = lm(fev.4 ~ age + smoking,data=fevdata) fev.5 = fitted(fev.lm1) + rnorm(n,0,sigma) fake.5 = lm(fev.5 ~ age + smoking,data=fevdata) # Use xyplot() from lattice, but save the results # instead of printing (or plotting) them. # residuals() and fitted() find residuals and fitted values, respectively resid.true = xyplot(residuals(fev.lm1) ~ fitted(fev.lm1)) resid.1 = xyplot(residuals(fake.1) ~ fitted(fake.1)) resid.2 = xyplot(residuals(fake.2) ~ fitted(fake.2)) resid.3 = xyplot(residuals(fake.3) ~ fitted(fake.3)) resid.4 = xyplot(residuals(fake.4) ~ fitted(fake.4)) resid.5 = xyplot(residuals(fake.5) ~ fitted(fake.5)) # Now plot the graphs all on one page. # The split argument takes for elements. # 1 = column for current plot # 2 = row for current plot # 3 = total number of columns in display # 4 = total number of rows in display # The argument more=T tells R that the next plotting command # should be added to the same display. # I use print() on each saved graphics object. print( resid.true, split=c(1,1,3,2), more=T ) print( resid.1, split=c(2,1,3,2), more=T ) print( resid.2, split=c(3,1,3,2), more=T ) print( resid.3, split=c(1,2,3,2), more=T ) print( resid.4, split=c(2,2,3,2), more=T ) print( resid.5, split=c(3,2,3,2)) # after a log transformation of fev and age # From a previous analysis, we found log transformations # of both fev and age improve the goodness of fit. fev.lm2 = lm(log(fev) ~ log(age) + smoking,data=fevdata) sigma.2 = sqrt( sum(residuals(fev.lm2)^2) / 651 ) fev2.1 = exp(fitted(fev.lm2) + rnorm(n,0,sigma.2)) fake2.1 = lm(log(fev2.1) ~ log(age) + smoking,data=fevdata) fev2.2 = exp(fitted(fev.lm2) + rnorm(n,0,sigma.2)) fake2.2 = lm(log(fev2.2) ~ log(age) + smoking,data=fevdata) fev2.3 = exp(fitted(fev.lm2) + rnorm(n,0,sigma.2)) fake2.3 = lm(log(fev2.3) ~ log(age) + smoking,data=fevdata) fev2.4 = exp(fitted(fev.lm2) + rnorm(n,0,sigma.2)) fake2.4 = lm(log(fev2.4) ~ log(age) + smoking,data=fevdata) fev2.5 = exp(fitted(fev.lm2) + rnorm(n,0,sigma.2)) fake2.5 = lm(log(fev2.5) ~ log(age) + smoking,data=fevdata) # Again, use xyplot(), but save the results as objects resid2.true = xyplot(residuals(fev.lm2) ~ fitted(fev.lm2)) resid2.1 = xyplot(residuals(fake2.1) ~ fitted(fake2.1)) resid2.2 = xyplot(residuals(fake2.2) ~ fitted(fake2.2)) resid2.3 = xyplot(residuals(fake2.3) ~ fitted(fake2.3)) resid2.4 = xyplot(residuals(fake2.4) ~ fitted(fake2.4)) resid2.5 = xyplot(residuals(fake2.5) ~ fitted(fake2.5)) # Now print the results, using split and more as before. print( resid2.true, split=c(1,1,3,2), more=T ) print( resid2.1, split=c(2,1,3,2), more=T ) print( resid2.2, split=c(3,1,3,2), more=T ) print( resid2.3, split=c(1,2,3,2), more=T ) print( resid2.4, split=c(2,2,3,2), more=T ) print( resid2.5, split=c(3,2,3,2)) # Note: An alternative to using more=T in the previous command # is to use new=F in the current plotting command.