#########################################################problem 1 irri1 = read.table("irri1.txt", header=T) irri1 str(irri1) irri1$variety = factor(irri1$variety) irri1$field = factor(irri1$field) #---- (a) ------------------# plot(yield ~ as.numeric(field), pch=as.numeric(variety), data=irri1) legend("topleft", pch=1:2, legend=levels(irri1$variety)) # re-order and then replot: irri1$field = reorder(irri1$field, irri1$yield) plot(yield ~ as.numeric(field), pch=as.numeric(variety), data=irri1, xlab="field, ordered by mean yield") legend("topleft", pch=1:2, legend=levels(irri1$variety)) #---- (b) ----------# with(irri1, t.test(yield[variety=="v2"], yield[variety=="v1"], paired=T)) # d=1.35, p-value=0.01478 #---- (c)field as a fixed effect ------------# fit.lm = lm(yield ~ variety+field, data=irri1) summary(fit.lm) # varietyv2 1.3500 0.4200 3.214 0.014775 * # Residual standard error: 0.8401 on 7 degrees of freedom # Multiple R-squared: 0.9741, Adjusted R-squared: 0.9444 #---- (d)field as a random effect -----------# # blocks = fields library(lme4) fit.lme = lmer(yield ~ variety+(1|field), data=irri1) summary(fit.lme) # Estimate Std. Error t value #(Intercept) 39.550 1.279 30.928 #varietyv2 1.350 0.420 3.214 # conclusion: same estimated variety effect 2*pnorm(3.214, lower.tail=F) # 0.001308997 # This is too liberal. Using the normal approximation would be # okay if we had lots of fields, but here we only have 8. # or better: use the t-distribution with 8-1 degrees of freedom, # but we have not yet seen in class why we should use 7 df 2*pt(3.214, lower.tail=F, df=7) # 0.01477585 #------- (e) variability of field effects and R2 --------------# # Random effects: # Groups Name Variance Std.Dev. # field (Intercept) 12.37714 3.51812 # Residual 0.70571 0.84007 # Number of obs: 16, groups: field, 8 # fields explain most of the variability: the variance # between field mean yield is 18 times larger than the variance # between plots at the same field. # Correlation between plots at the same field: 12.37714/(12.37714+0.70571) # 0.9460584 # With fields used as fixed effect, we get an R2 of 0.9741 # because the fields have very strong effects: If we know # which one of the 8 fields we are at (and which variety is # planted), we can make an almost perfect prediction. # By the way, these predictions would work so well only on the 8 fields # that were sampled, since this R2 value was obtained while # considering fields as fixed. #----- (f) check assumptions -------------# # checking that e_i all have the same variance: plot(resid(fit.lme) ~ fitted(fit.lme)) abline(h=0) # checking the normality of residuals e_i: qqnorm(resid(fit.lme)) # checking the normality of field effects alpha_i: qqnorm( ranef(fit.lme)$field$"(Intercept)" ) # everything looks fine. No transformation needed. ################################################################problem 2 irri2 = read.table("irri2.txt", header=T) irri2 irri2$tmt = factor(c("1","2","3","4")) irri2$tmt = factor(irri2$tmt) irri2$field = factor(irri2$field) #-------- (a) plots ---------------# # response versus fields: library(lattice) xyplot(yield ~ field, group=tmt, data=irri2, type="b", auto.key=T) #------ (b) mixed model -------# str(irri2) library(lme4) fit.lme = lmer(yield ~ irrigation*variety + (1|field), data=irri2) summary(fit.lme) # Estimate Std. Error t value #irrigationi2 2.0000 0.9691 2.064 #varietyv2 0.5000 0.9691 0.516 #irrigationi2:varietyv2 0.5000 1.3705 0.365 #-------- (c)-------------------------# # Groups Name Variance Std.Dev. # field (Intercept) 12.0967 3.4780 # Residual 1.8783 1.3705 # Variance among field effects is larger, so field effects # are quite large and blocking is efficient. ###########################################################problem 3 corn3 = read.table("corn3.txt", header=T) corn3 library(lme4) fit.site <- lmer(harvwt ~ (1|site), data=corn3) summary(fit.site) #use the random effect model formula #for (a), the answer is 2*sigmae^2 #for (b), the answer is 2*sigmaalpha^2 + 2*sigmae^2 ###########################################################problem 4 bread4 = read.table("bread4.txt", header=T) bread4$day = factor(bread4$day) bread4$loaf = factor(bread4$loaf) bread4 str(bread4) #----- (a) plots ----------------------------------------# plot(calcium ~ jitter(as.numeric(day), 0.2), pch = as.numeric(recipe), data=bread4, xlab="day", ylab="calcium" ) legend("topleft", pch=1:3, legend=levels(bread4$recipe)) library(lattice) xyplot(calcium ~ recipe, group = day, data=bread4, auto.key=T) #---- (b) fit mixed effect model ------------------------# library(lme4) fit = lmer(calcium ~ (1|day) + (1|recipe), data=bread4) #---- (c) checking assumptions --------# plot(residuals(fit)~fitted(fit)) abline(h=0) qqnorm(residuals(fit)) qqnorm(ranef(fit)$"day"[,1]) #---- (d) test the random effects of days ------# fit.null = update(fit, .~.-(1|day)) anova(fit, fit.null) # we get LR= 23.617 on df=1 so p-val= 1.175e-06 *** # now using simulations under the null model that there # is no day-to-day variation. oneLR = function(){ y=simulate(fit.null) sim.null = lmer(y ~ (1|recipe) + (1|day), data=bread4) sim.alt = lmer(y ~(1|recipe) , data=bread4) anova(sim.null, sim.alt)$Chisq[2] } oneLR() sim1000 = replicate(1000,oneLR()) hist(sim1000) # shows lots of zeros, some but few higher values. quantile(sim1000) # more than 50% zeros! sum(sim1000>23)/1000 # we get 0 and report <.001