# Bret Larget # February 17, 2009 # # Comparison of various methods for confidence intervals for theta from an Exponential distirbution. # X_1,...,X_n ~ i.i.d. Exponential(theta) # s = (x_1,...,x_n) is the sample # # Likelihood is L(theta | s) = theta^n * exp(-theta*sum(s)) # log( L(theta | s) ) = n log(theta) - theta*sum(s) # MLE is theta.hat = 1/mean(s) # Note that X.bar = mean(S) ~ Gamma(n,n*theta) and that # theta*X.bar ~ Gamma(n,n) which has mean 1. # We can use quantiles to find a confidence interval. # Let a,b be the 0.025 and 0.975 quantiles of the Gamma(n,n) distribution. # Then P(a < theta*X.bar < b) = P(a/X.bar < theta < b/X.bar) = 0.95. # CI method #1. exponential.ci1 = function(s,conf=0.95) { n = length(s) x.bar = mean(s) a = qgamma((1-conf)/2,n,n) b = qgamma(1 - (1-conf)/2,n,n) ci = c(a/x.bar,b/x.bar) return(ci) } # The mean square error MSE(1/x.bar) = theta^2 * (n+2)/( (n-1)(n-2) ) # For large samples, can use 1/x.bar +/- z*(1/x.bar)*sqrt( (n+2)/((n-1)*(n-2)) ) # CI method #2. exponential.ci2 = function(s,conf=0.95) { n = length(s) x.bar = mean(s) theta.hat = 1/x.bar z = qnorm(1 - (1-conf)/2) me = z*theta.hat*sqrt((n+2)/((n-1)*(n-2))) ci = theta.hat + me*c(-1,1) return(ci) } # The Fisher Information for the Exponential distribution is n/theta^2. # A large sample confidence interval using MLE ~ N(theta,1/n*I(theta)) is # theta.hat +/- z * theta.hat/sqrt(n). # CI method #3. exponential.ci3 = function(s,conf=0.95) { n = length(s) x.bar = mean(s) theta.hat = 1/x.bar z = qnorm(1 - (1-conf)/2) me = z*theta.hat/sqrt(n) ci = theta.hat + me*c(-1,1) return(ci) } # Another method corrects for the bias in theta.hat. Notice that E(theta.hat) = (n/(n-1))*theta, so ((n-1)/n)*theta.hat # is an unbiased estimate of theta. Let theta.tilde = ((n-1)/n) * theta.hat. Using a large sample approximation, # MSE(theta.tilde) = Var(theta.tilde) = theta^2/(n-2). # theta.tilde +/- z*theta.tilde/sqrt(n-2) # CI method #4. exponential.ci4 = function(s,conf=0.95) { n = length(s) x.bar = mean(s) theta.tilde = (1/x.bar) * (n-1)/n z = qnorm(1 - (1-conf)/2) me = z*theta.tilde/sqrt(n-2) ci = theta.tilde + me*c(-1,1) return(ci) } # Another method would find all theta such that L(theta | s) / L(theta.hat | s) > c for some c. # This method would depend on solving this problem: Find a < 1 and b > 1 such that # P( a < theta*x.bar*exp(theta*x.bar) < b ) >= gamma where a and b are solutions to h(y) = c^(1/n)/e # for function h(y) = y*exp(-y). # Note that theta*x.bar ~ Gamma(n,n) and that h(y) \propto Gamma(2,1). Also, h(1) is the maximum of h. # There must be a numerical means to solve for an appropriate c. # There is no code here for this approach. ################################################################################ # Simulation to check these methods for small n and large n. exponential.sim = function(n.sim,n,theta.true=1,conf=0.95) { numMethods = 4 s = matrix(rexp(n.sim*n,theta.true),nrow=n.sim) contain = rep(0,numMethods) out1 = apply(s,1,exponential.ci1,conf=conf) contain[1] = sum((out1[1,] < theta.true) & (out1[2,] > theta.true)) out2 = apply(s,1,exponential.ci2,conf=conf) contain[2] = sum((out2[1,] < theta.true) & (out2[2,] > theta.true)) out3 = apply(s,1,exponential.ci3,conf=conf) contain[3] = sum((out3[1,] < theta.true) & (out3[2,] > theta.true)) out4 = apply(s,1,exponential.ci4,conf=conf) contain[4] = sum((out4[1,] < theta.true) & (out4[2,] > theta.true)) r = rep(0,numMethods) r[1] = mean(out1[2,] - out1[1,]) r[2] = mean(out2[2,] - out2[1,]) r[3] = mean(out3[2,] - out3[1,]) r[4] = mean(out4[2,] - out4[1,]) names.ci = c("Exact","MLE/MSE","MLE/Fisher","Unbiased") names(contain) = names.ci names(r) = names.ci return( list(n=n,contain=contain,range=r) ) }