# load in the data bear = read.table("BearData572.txt",header=T) #compute the desired response variable PctFemale = bear$NumFem / bear$Total # create a new data frame with this response variable bear = data.frame(bear,PctFemale) attach(bear) str(bear) # Variables are: # County = name of County in Wisconsin # NumFem = number of female bears harvested # NumMale = number of male bears harvested # Total = NumFem + NumMale # HarvD = harvest density (Total divided by area of county in km^2) # MastPerForest = proportion of forest that is masting (produces food such as acorns or fruit) # TotalForest = proportion of county covered by forest # AG = proportion of county used for agriculture # PctFemale = proportion of Total that is female # explore explanatory variables boxplot(HarvD) boxplot(MastPerForest) boxplot(TotalForest) boxplot(AG) # log transform of HarvD merited to reduce influence of the outlier # make a pairwise scatter plot pairs(cbind(bear[,c(9,6:8)],log(HarvD))) # map data library(maps) # make a county map of Wisconsin map('county','wisconsin') # shade red the counties in the data set for(i in 1:nrow(bear)) {map('county',paste("wisconsin",",",County[i],sep=""),add=T,fill=T,col="red")} # add totals to the plot by county for(i in 1:nrow(bear)) {map.text('county',paste("wisconsin",",",County[i],sep=""),add=T,labels=as.character(Total[i]))} # fit models bearH.glm = glm(PctFemale ~ log(HarvD),family=binomial,weights=Total) # summarize the model summary(bearH.glm) # compute the probability that a bear is female in two separate ways etaH = 0.7879 + 0.238*log(0.01) exp(etaH) / (1 + exp(etaH)) etaH = predict(bearH.glm,data.frame(HarvD = 0.01)) exp(etaH) / (1 + exp(etaH)) # fit a model with MastPerForest, TotalForest, and AG as predictors bearMTA.glm = glm(PctFemale ~ MastPerForest + TotalForest + AG,family=binomial,weights=Total) summary(bearMTA.glm) # fit a model using all four variables bearHMTA.glm = glm(PctFemale ~ log(HarvD) + MastPerForest + TotalForest + AG,family=binomial,weights=Total) summary(bearHMTA.glm) # fitting a model without TotalForest bearHMA.glm = glm(PctFemale ~ log(HarvD) + MastPerForest + AG,family=binomial,weights=Total) summary(bearHMA.glm) # Yet another model bearHMT.glm = glm(PctFemale ~ log(HarvD) + MastPerForest + TotalForest,family=binomial,weights=Total) summary(bearHMT.glm) # stepwise regression from the full model step(bearHMTA.glm)