# Code for finding posterior of edge length # under Jukes-Cantor likelihood model # with exponential prior for edge length # posterior is proportional to # (1/4)^n * (1+3*exp(-(4/3)*t))^n1 * (1-exp(-(4/3)*t))^n2 * lambda * exp(-lambda*t) # proposal t* | t ~ abs( t + Unif(-a,a) ) # Example where n1=7 and n2=3 logh = function(tt,lambda,n1,n2) { a = exp(-4/3*tt) n = n1 + n2 if(n2==0) foo = 0 else foo = n2 * log(1-a) return( n*log(0.25) + n1*log(1+3*a) + foo + log(lambda) - lambda*tt ) } mcmc = function(ncycles,tt,lambda,n1,n2,w) { x = rep(NA,ncycles+1) x[1] = tt for(i in 1:ncycles) { proposal = abs( x[i] + runif(1,-w,w) ) acceptProb = min(1,exp(logh(proposal,lambda,n1,n2) - logh(x[i],lambda,n1,n2))) if(runif(1) < acceptProb) x[i+1] = proposal else x[i+1] = x[i] } x } getDensity = function(u,lambda,n1,n2) { lp = logh(u,lambda,n1,n2) m = max(lp) e = exp(lp-m) return(e/sum(e)) } # plot to show effect of prior on posterior as a function of sample size posteriorPlot = function(n1,n2,xlim) { u = seq(xlim[1],xlim[2],length=1001) p1 = getDensity(u,0.2,n1,n2) p2 = getDensity(u,1.0,n1,n2) p3 = getDensity(u,5.0,n1,n2) plot(c(u,u,u),c(p1,p2,p3),type="n",xlab="",ylab="") title(paste("Posterior: rates = 0.2, 1, and 5, n = (",n1,",",n2,")")) lines(u,p1,col=2) lines(u,p2,col=3) lines(u,p3,col=4) abline(v=mle.jc(n1,n2),lty=2,col=5) abline(h=0) invisible() } # Jukes-Cantor MLE mle.jc = function(n1,n2) { return( -0.75 * log( (3*n1-n2)/(3*(n1+n2)))) } # Figure 1 fig1 = function() { par(mfrow=c(2,3)) posteriorPlot(7,3,xlim=c(0,3)) posteriorPlot(35,15,xlim=c(0,1)) posteriorPlot(70,30,xlim=c(0.2,0.8)) posteriorPlot(350,150,xlim=c(0.25,0.5)) posteriorPlot(700,300,xlim=c(0.25,0.5)) posteriorPlot(3500,1500,xlim=c(0.3,0.45)) par(mfrow=c(1,1)) invisible() } # Figure 2 #run1 = mcmc(110000,2,1,7,3,0.1) fig2 = function(run) { lo = floor(10*min(run))/10 hi = ceiling(10*max(run))/10 breaks = seq(lo,hi,by=0.1) hist(run1[-c(1:10001)],prob=T,xlim=c(lo,hi),xlab="",ylab="",breaks = breaks,main="Posterior by Sample and Theory") u = seq(lo,hi,length=1001) p = getDensity(u,1,7,3) * (length(u)-1) / (hi-lo) lines(u,p,col=2) invisible() }