#-----------------1--------------- acorn = read.table('acorn.txt',header=T) atacorn = acorn[acorn$Region=='Atlantic',] library(lattice) fit1=lm(Acorn_size ~ Range, data=atacorn) xyplot(Acorn_size ~ Range, data=atacorn, type=c('p','r')) plot(fit1, which=1) fit2=lm(log(Acorn_size) ~ Range, data=atacorn) xyplot(log(Acorn_size) ~ Range, data=atacorn, type=c('p','r')) plot(fit2, which=1) fit3=lm(Acorn_size ~ log(Range), data=atacorn) xyplot(Acorn_size ~ log(Range), data=atacorn, type=c('p','r')) plot(fit3, which=1) fit4=lm(log(Acorn_size) ~ log(Range), data=atacorn) xyplot(log(Acorn_size) ~ log(Range), data=atacorn, type=c('p','r')) plot(fit4, which=1) #---------------2------------------ outlier = read.table("outlier.txt", header=T) fit = lm(Price.1988 ~ Price.1987, data=outlier) plot(Price.1988 ~ Price.1987, data=outlier) abline(fit) plot(fit, which=1) fit = lm(Price.1988 ~ log(Price.1987), data=outlier) plot(Price.1988 ~ log(Price.1987), data=outlier) abline(fit) plot(fit) # for high leverage point, which means unusual X value, # we need to consider to transform the X column # you need to know the difference between high leverage point (in your hwk) # and influentical point #----------------3--------------- job = read.table("job.txt", header=T) basic = lm(Y ~ 1, data=job) full = lm(Y ~ X1+X2+X3+X4, data=job) step(basic, scope = ~ X1+X2+X3+X4) # 1,3,4 AIC=73.85 step(full, scope = ~ X1+X2+X3+X4) #same step(basic, scope = ~ X1+X2+X3+X4, test="F") step(full, scope = ~ X1+X2+X3+X4, test="F") add1(basic, scope = ~ X1+X2+X3+X4, test="F") fit = update(basic, ~ . + X3) add1(fit, scope = ~ X1+X2+X3+X4, test="F") fit = update(fit, ~ . + X1) add1(fit, scope = ~ X1+X2+X3+X4, test="F") fit = update(fit, ~ . + X4) add1(fit, scope = ~ X1+X2+X3+X4, test="F") summary(fit) #Adjusted R2=0.956 drop1(full, test="F") fit = update(full, ~ . - X2) drop1(fit, test="F") ## arrive at the same model again ## note: if you get the same model, that's perfect; if not, ## you should compare them and choose one besed on AIC, ## adjusted R2, ANOVA F-test, etc. summary(lm(Y~X4, data=job))$coef summary(lm(Y~X1+X4, data=job))$coef summary(lm(Y~X3+X4, data=job))$coef summary(lm(Y~X1+X3+X4, data=job))$coef