# Assignment 3 rdirich = function(alpha) { y = rgamma(length(alpha),alpha) return(y / sum(y)) } mcmc1 = function(n=10000,A=1,print=F,size=4,p=rep(1/size,size),one=F) { numAccepted = 0 ans = matrix(0,n,size) ans[1,] = abs(p)/sum(abs(p)) for(i in 2:n) { x = ans[i-1,] if(one) { y = rdirich(A*x+1) acceptProb = exp(sum((A*y)*log(x))-sum((A*x)*log(y))+sum(lgamma(A*x+1))-sum(lgamma(A*y+1))) } else { y = rdirich(A*x) acceptProb = exp(sum((A*y-1)*log(x))-sum((A*x-1)*log(y))+sum(lgamma(A*x))-sum(lgamma(A*y))) } if(print) { cat("prob:") print(acceptProb) cat("x: ") print(x) cat("y: ") print(y) } if(runif(1) < acceptProb) { numAccepted = numAccepted + 1 ans[i,] = y if(print) print("Accepted") } else { ans[i,] = x if(print) print("Rejected") } } print(paste("Acceptance Rate:",numAccepted,"of",n,"=",round(numAccepted/n,4))) ans } # Functions to compute the TN93 transition matrix tnQ = function(base=rep(1/4,4),ttRatio=2,rho=1) { base = abs(base) / sum(abs(base)) piR = sum(base[c(1,3)]) piY = 1 - sum(base[c(1,3)]) piAG = base[1]*base[3] piCT = base[2]*base[4] b = 1/(2*piR*piY*(1+ttRatio)) aY = (piR*piY*ttRatio-piAG-piCT) / (2*(1+ttRatio)*(piY*piAG*rho+piR*piCT)) aR = rho * aY R = matrix(0,4,4) R[1,3] = R[3,1] = aR/piR + b R[2,4] = R[4,2] = aY/piY + b R[c(1,3),c(2,4)] = b R[c(2,4),c(1,3)] = b Q = R %*% diag(base) - diag(apply(R %*% diag(base),1,sum)) Q } tnP = function(edgeLength,base=rep(1/4,4),ttRatio=2,rho=1) { Q = tnQ(base,ttRatio,rho) e = eigen(Q) U = e$vectors V = solve(U) L = e$values P = U %*% diag(exp(L*edgeLength)) %*% V list(P=P,U=U,L=L,V=V) } newP = function(edgeLength,U,L,V) { P = U %*% diag(exp(L*edgeLength)) %*% V P } # MCMC to find posterior of a single edge # using Tamura-Nei Model PigCow = matrix(c(297,32,14,13,38,252,3,51,18,2,131,2,6,42,2,237),4,4) getLL = function(P,base,counts) { ll = sum( counts * log(diag(base) %*% P) ) ll } mcmc2 = function(n=10000,t0=1,counts=matrix(0,4,4),lambda=1,base=rep(1/4,4),ttRatio=2,rho=1,w=0.1) { acceptCount = 0 tn = tnP(t0,base,ttRatio,rho) P0 = tn$P U = tn$U L = tn$L V = tn$V logLambda = log(lambda) logh = getLL(P0,base,counts) + logLambda - lambda * t0 out = matrix(NA,n+1,2) out[1,] = c(t0,logh) for(i in 1:n) { x = out[i,1] y = abs(x + w*(2*runif(1)-1)) P = newP(y,U,L,V) prop = getLL(P,base,counts) + logLambda - lambda * y if(runif(1) < exp(prop - logh)) { acceptCount = acceptCount + 1 logh = prop out[i+1,] = c(y,prop) } else out[i+1,] = c(x,logh) } print(paste("Acceptance Rate:",acceptCount,"of",n,"=",round(acceptCount/n,4))) print("95% Credible Region:") print(quantile(out[,1],c(0.025,0.975))) out } #outPosterior1 = mcmc2(counts=PigCow,base=c(0.3136,0.2947,0.1329,0.2588),ttRatio=1.2755,rho=0.3441,w=0.1) #outPosterior2 = mcmc2(counts=PigCow,base=c(0.3136,0.2947,0.1329,0.2588),ttRatio=1.2755,rho=0.3441,w=0.1) #outPosterior3 = mcmc2(counts=PigCow,base=c(0.3136,0.2947,0.1329,0.2588),ttRatio=1.2755,rho=0.3441,w=0.1) mcmc2b = function(n=10000,t0=1,counts=matrix(0,4,4),lambda=1,lambdaT = 0.5,lambdaR = 1, base=rep(1/4,4),ttRatio0=2,rho0=1,w=0.4,wT=0.4,wR=0.4) { # ttRatio >= (piA*piG + piC*piT)/(piRpiY) = K K = (base[1]*base[3] + base[2]*base[4])/(base[1]+base[3])/(base[2]+base[4]) acceptCounts = c(0,0,0) proposeCounts = c(0,0,0) tn = tnP(t0,base,ttRatio0,rho0) P0 = tn$P U = tn$U L = tn$L V = tn$V logLambda = log(lambda) logLambdaT = log(lambdaT) logLambdaR = log(lambdaR) logh = getLL(P0,base,counts) + logLambda - lambda*t0 + logLambdaT - lambdaT*(ttRatio0-K) + logLambdaR - lambdaR*rho0 out = matrix(NA,n+1,4) out[1,] = c(t0,ttRatio0,rho0,logh) for(i in 1:n) { sel = sample(3,1) proposeCounts[sel] = proposeCounts[sel] + 1 x = out[i,1] xT = out[i,2] xR = out[i,3] if(sel==1) { y = abs(x + w*(2*runif(1)-1)) yT = xT yR = xR P = newP(y,U,L,V) } else if(sel==2) { y = x yT = abs(xT + wT*(2*runif(1)-1) - K) + K yR = xR tn = tnP(y,base,yT,yR) P = tn$P } else { y = x yT = xT yR = abs(xR + wR*(2*runif(1)-1)) tn = tnP(y,base,yT,yR) P = tn$P } prop = getLL(P,base,counts) + logLambda - lambda*y + logLambdaT - lambdaT*(yT-K) + logLambdaR - lambdaR*yR if(runif(1) < exp(prop - logh)) { acceptCounts[sel] = acceptCounts[sel] + 1 logh = prop U = tn$U V = tn$V L = tn$L out[i+1,] = c(y,yT,yR,prop) } else out[i+1,] = c(x,xT,xR,logh) } print(paste("Acceptance Rates:")) for(i in 1:3) print(paste(acceptCounts[i],"of",proposeCounts[i],"=",round(acceptCounts[i]/proposeCounts[i],4))) print("95% Credible Regions:") print(quantile(out[,1],c(0.025,0.975))) print(quantile(out[,2],c(0.025,0.975))) print(quantile(out[,3],c(0.025,0.975))) out } #outPosterior1b = mcmc2b(n=25000,counts=PigCow,base=c(0.3136,0.2947,0.1329,0.2588),ttRatio=1.2755,rho=0.3441,w=0.2,wT=2,wR=1) #outPosterior2b = mcmc2b(n=25000,counts=PigCow,base=c(0.3136,0.2947,0.1329,0.2588),ttRatio=1.2755,rho=0.3441,w=0.2,wT=2,wR=1) #outPosterior3b = mcmc2b(n=25000,counts=PigCow,base=c(0.3136,0.2947,0.1329,0.2588),ttRatio=1.2755,rho=0.3441,w=0.2,wT=2,wR=1) # code to find mle values for edgeLength and parameters # specific value come from iterating optim, restarting with # the last values until there are no further changes #opt = optim(c(0.234,1.47,0.34),f,base=pcbase,counts=PigCow) opt2 = optim(c(0.2317334,1.4541827,0.3179675),f,base=pcbase,counts=PigCow) f = function(x,base,counts) { edgeLength = x[1] ttr = x[2] rho = x[3] P = (tnP(edgeLength,base,ttr,rho))$P return(-1*getLL(P,base,counts)) } #outPosterior4b = mcmc2b(n=100000,counts=PigCow,base=pcbase,ttRatio=2.17,rho=0.89,t0=0.53,lambda=2,lambdaT=0.5,lambdaR=1.1,w=0.2,wT=2,wR=1)