# Bret Larget # March 23, 2009 # Example of Bayesian analysis # Velocity of Sugar Maple Samara # Read in the data samara = read.table("samara.txt",header=T) # Create a vector which is only the velocity data from the SugarMaples species v = with(samara, velocity[species=="SugarMaple"]) # Find an appropriate model. # Want mu_0 = 100 # Want sigma to be between, say 10 and 30, with high probability. # Thus, 100 < sigma^2 < 900. # 0.001 < 1/sigma^2 < 0.01 with high probability. # X_i ~ N(mu,sigma^2) # mu ~ N(100, tau_0^2 sigma^2) # nu = 1/sigma^2 ~ Gamma(alpha_0,beta_0) # Want tau_0 * E(sigma) approx 25. # Try expected precision equal to 0.005, shape about 3, so alpha_0 = 3, beta_0 = 3/0.005 = 600 #nu = rgamma(10000,3,600) #sigma2 = 1/nu #library(lattice) #print( densityplot(~sqrt(sigma2)) ) # Too many extreme points.... make the sd smaller by making both alpha and beta larger. nu = rgamma(10000,5,1000) sigma2 = 1/nu library(lattice) print( densityplot(~sqrt(sigma2)) ) # Not too bad. #sum(sigma2<900) #sum(sigma2>900) # Now, find a good value for tau_0. # Want uncertainty in mu to have an SD of about 25 when sigma = 20 # Try tau_0^2 * 400 = 625, tau_0^2 = 625/400 approx 1.5 mu = rnorm(10000,100,sqrt(1.5*sigma2)) print(xyplot(mu ~ sqrt(sigma2))) # Looks okay. Now, let's fit the model. # Prior: alpha_0 = 5, beta_0 = 1000, mu_0 = 100, tau_0^2 = 1.5 a.0 = 5 b.0 = 10000 mu.0 = 100 tau2.0 = 1.5 # Posterior: n = length(v) v.mean = mean(v) v.sigma2 = var(v)*(n-1)/n # function to compute posterior parameter values from data and prior specification # The model is as follows. # X_1,...,X_n | mu, sigma^2 ~ iid N(mu,sigma^2) # mu | sigma^2 ~ N(mu.0, tau.0^2 sigma^2) # 1/sigma^2 ~ Gamma(alpha.0, beta.0) normal.Bayes = function(x, alpha.0, beta.0, mu.0, tau2.0) { n = length(x) x.mean = mean(x) x.sigma2 = var(x)*(n-1)/n # MLE and not sample variance alpha.1 = alpha.0 +n/2 tau2.1 = 1/(n + 1/tau2.0) mu.1 = tau2.1 * (mu.0/tau2.0 + n*x.mean) beta.1 = beta.0 + mu.0^2/(2*tau2.0) + n*(x.mean^2 + x.sigma2)/2 - tau2.1 * (mu.0/tau2.0 + n*x.mean)^2/2 return(list(alpha=alpha.1,beta=beta.1,mu=mu.1,tau2=tau2.1)) } # Simulation-based credible region: # Simulate (mu,sigma^2) from the posterior distribution by first simulating nu = 1/sigma^2 from a Gamma # and then simulating mu given sigma^2 from a normal. #bayes1 = normal.Bayes(v,2.31,100,mu.0,tau2.0) bayes1 = normal.Bayes(v,a.0,b.0,mu.0,tau2.0) print(bayes1) nu.post = rgamma(10000,bayes1$alpha,bayes1$beta) sigma2.post = 1/nu.post mu.post = rnorm(10000,bayes1$mu,sqrt(bayes1$tau2*sigma2.post)) print( round(quantile(mu.post,c(0.05,0.95)),1) ) print( round(quantile(mu.post,c(0.025,0.975)),1) ) # Compare to t-test out = t.test(v,conf=0.9) print(out$conf.int) # Notice with experiments with the prior distribution parameters # that choices can have a substantial effect on the length of the intervals.