# R code for analysis of cow data # Bret Larget # May 6/8, 2008 # Read in the data cows = read.table("slaughter.csv",header=T,sep=",") str(cows) # Need to convert numerical variables to factors # Note that $cow does *not* have a unique value for each cow. # We will address this later. cows$cow = factor(cows$cow) cows$treatment = factor(cows$treatment) cows$rep = factor(cows$rep) # Make a variable for weight loss cows = with(cows, data.frame(cows,Loss=weight0 - weight36) ) # For repeated measures regression analysis, we need to rewrite the data. # Make new data frame with one row per HCT measurement # The function rep() repeats the first argument. # each specifies how many each element is repeated. # > rep(1:4,each=3) # [1] 1 1 1 2 2 2 3 3 3 4 4 4 # whereas times says how often the whole object is repeated. # > rep(1:4,times=3) # [1] 1 2 3 4 1 2 3 4 1 2 3 4 # This code creates a new data frame with one observation per row. cows2 = with(cows, data.frame(cow = rep(cow,each=5), id = factor(rep(1:91,each=5)), rep = factor(rep(as.vector(rep),each=5)), cowtemp = rep(cowtemp,each=5), hitemp = rep(hitemp,each=5), lotemp = rep(lotemp,each=5), treatment = factor(rep(as.vector(treatment),each=5)), sick = factor(rep(sick,each=5)), weight0 = rep(weight0,each=5), weight18 = rep(weight18,each=5), weight36 = rep(weight36,each=5), time = rep(c(0,9,18,27,36),times=nrow(cows)), hct = as.vector(t(cbind(HCT0,HCT9,HCT18,HCT27,HCT36))), yield = rep(YIELD,each=5) ) ) # Here we add a new column that is a factor for Healthy or Sick # status is redundant sick = rep("A",nrow(cows2)) sick[cows2$sick==0] = "Healthy" sick[cows2$sick==1] = "Sick" cows2 = data.frame(cows2,status=factor(sick)) # Load the necessary libraries library(arm) options(digits=7) # Plot hct vs time for all cows together # We notice that there is a lot of variation # There appears to be a small group effect (slopes are different, slightly) # We want to see if the difference in slopes is significant print( xyplot(hct ~ time, groups = treatment,data=cows2,auto.key=list(space="right"),type=c("p","r"),jitter.x=T) ) # Notice the difference when we make the plot using only healthy animals # and then only sick animals print( xyplot(hct ~ time, groups = treatment,data=cows2,auto.key=list(space="right"), type=c("p","r"),jitter.x=T,subset=(status=="Healthy"),main="Healthy" ) ) print( xyplot(hct ~ time, groups = treatment,data=cows2,auto.key=list(space="right"), type=c("p","r"),jitter.x=T,subset=(status=="Sick"),main="Sick" ) ) # Based on these plots, we may want to either consider only healthy cows # or make sure to include interaction terms for treatment and sickness # Plot the separate regressions for each cow by treatment group trt0 = xyplot(hct ~ time | id,data=cows2,subset=(treatment==0),type=c("p","r"),main="Treatment 0",groups=status) trt18 = xyplot(hct ~ time | id,data=cows2,subset=(treatment==18),type=c("p","r"),main="Treatment 18",groups=status) trt36 = xyplot(hct ~ time | id,data=cows2,subset=(treatment==36),type=c("p","r"),main="Treatment 36",groups=status) print(trt0) print(trt18) print(trt36) # In these plots we notice: # (1) most animals have individual patterns that look linear # (2) there is a lot of variation within groups # (3) for individual cows, it is hard to see a strong effect of sickness # Lets consider the effect on healthy cows only # The second line redefines the levels of id so that there are 69, not 91 levels # so that we do not keep levels that no longer are in the subset. cows3 = with(cows2, cows2[status=="Healthy",]) cows3$id = factor(as.vector(cows3$id)) str(cows3) # Fit a classical regression model, using few covariates cows.lm1 = lm(hct ~ time*treatment + rep,data=cows3) display(cows.lm1,digits=4) # Multilevel model with a random cow dependent intercept and slope # This model fits a main line for each treatment group # with adjustments to slope and intercept for each cow cows.lmer1 = lmer(hct ~ time*treatment + rep + (1 + time | id),data=cows3) display(cows.lmer1,digits=4) # Hypothesis testing for differences in slopes between two treatments # and the control sim.1 = sim(cows.lm1,10000) mcmc.1 = mcmcsamp(cows.lmer1,10000) print( densityplot(~sim.1$beta[,7]) ) p18.lm1 = 2*sum( sim.1$beta[,7] < 0 )/10000 print( densityplot(~sim.1$beta[,8]) ) p36.lm1 = 2*sum( sim.1$beta[,8] < 0 )/10000 print( densityplot(~mcmc.1[,7]) ) p18.lmer1 = 2*sum( mcmc.1[,7] < 0 )/10000 print( densityplot(~mcmc.1[,8]) ) p36.lmer1 = 2*sum( mcmc.1[,8] < 0 )/10000 # 95% CI for differences ci18.lm1 = quantile(sim.1$beta[,7],c(0.025,0.975)) ci36.lm1 = quantile(sim.1$beta[,8],c(0.025,0.975)) ci18.lmer1 = quantile(mcmc.1[,7],c(0.025,0.975)) ci36.lmer1 = quantile(mcmc.1[,8],c(0.025,0.975)) # Repeat lmer model using weight as an additional predictor # Better to use initial weight, but with all the missing values, use weight36 instead # The correlation is nearly 0.99, so the information is about the same with(cows, cor(weight0[sick==0],weight36[sick==0],use="c") ) cows.lmer2 = lmer(hct ~ time*treatment + rep + weight36 + (1 + time | id),data=cows3) display(cows.lmer2,digits=4) # testing and inference mcmc.2 = mcmcsamp(cows.lmer2,10000) print( densityplot(~mcmc.2[,8]) ) p18.lmer2 = 2*sum( mcmc.2[,8] < 0 )/10000 print( densityplot(~mcmc.2[,9]) ) p36.lmer2 = 2*sum( mcmc.2[,9] < 0 )/10000 # 95% CI for differences ci18.lmer2 = quantile(mcmc.2[,8],c(0.025,0.975)) ci36.lmer2 = quantile(mcmc.2[,9],c(0.025,0.975))