# load in the data
bear = read.table("BearData572.txt",header=T)
#compute the desired response variable
PctFemale = bear$NumFem / bear$Total
# create a new data frame with this response variable
bear = data.frame(bear,PctFemale)
attach(bear)
str(bear)
# Variables are:
# County = name of County in Wisconsin
# NumFem = number of female bears harvested
# NumMale = number of male bears harvested
# Total = NumFem + NumMale
# HarvD = harvest density (Total divided by area of county in km^2)
# MastPerForest = proportion of forest that is masting (produces food such as acorns or fruit)
# TotalForest = proportion of county covered by forest
# AG = proportion of county used for agriculture
# PctFemale = proportion of Total that is female
# explore explanatory variables
boxplot(HarvD)
boxplot(MastPerForest)
boxplot(TotalForest)
boxplot(AG)
# log transform of HarvD merited to reduce influence of the outlier
# make a pairwise scatter plot
pairs(cbind(bear[,c(9,6:8)],log(HarvD)))
# map data
library(maps)
# make a county map of Wisconsin
map('county','wisconsin')
# shade red the counties in the data set
for(i in 1:nrow(bear)) {map('county',paste("wisconsin",",",County[i],sep=""),add=T,fill=T,col="red")}
# add totals to the plot by county
for(i in 1:nrow(bear)) {map.text('county',paste("wisconsin",",",County[i],sep=""),add=T,labels=as.character(Total[i]))}
# fit models
bearH.glm = glm(PctFemale ~ log(HarvD),family=binomial,weights=Total)
# summarize the model
summary(bearH.glm)
# compute the probability that a bear is female in two separate ways
etaH = 0.7879 + 0.238*log(0.01)
exp(etaH) / (1 + exp(etaH))
etaH = predict(bearH.glm,data.frame(HarvD = 0.01))
exp(etaH) / (1 + exp(etaH))
# fit a model with MastPerForest, TotalForest, and AG as predictors
bearMTA.glm = glm(PctFemale ~ MastPerForest + TotalForest + AG,family=binomial,weights=Total)
summary(bearMTA.glm)
# fit a model using all four variables
bearHMTA.glm = glm(PctFemale ~ log(HarvD) + MastPerForest + TotalForest + AG,family=binomial,weights=Total)
summary(bearHMTA.glm)
# fitting a model without TotalForest
bearHMA.glm = glm(PctFemale ~ log(HarvD) + MastPerForest + AG,family=binomial,weights=Total)
summary(bearHMA.glm)
# Yet another model
bearHMT.glm = glm(PctFemale ~ log(HarvD) + MastPerForest + TotalForest,family=binomial,weights=Total)
summary(bearHMT.glm)
# stepwise regression from the full model
step(bearHMTA.glm)