@HaomingJiang
2018-03-01T12:12:26.000000Z
字数 23568
阅读 1230
Haoming Jiang
> #########################################################################
> ### EXERCISE 3.1
> #########################################################################
> # baseball.dat = entire data set
> # baseball.sub = matrix of all predictors
> # salary.log = response, log salary
> # n = number of observations in the data set
> # m = number of predictors in the data set
> # num.starts = number of random starts
> # runs = matrix containing the random starts where each row is a
> # vector of the parameters included for the model
> # (1 = included, 0 = omitted)
> # runs.aic = AIC values for the best model found for each
> # of the random starts
> # itr = number of iterations for each random start
> #########################################################################
>
> ## INITIAL VALUES
> baseball.dat = read.table("baseball.dat",header=TRUE)
> baseball.dat$freeagent = factor(baseball.dat$freeagent)
> baseball.dat$arbitration = factor(baseball.dat$arbitration)
> baseball.sub = baseball.dat[,-1]
> salary.log = log(baseball.dat$salary)
> n = length(salary.log)
> m = length(baseball.sub[1,])
> num.starts = 5
> runs = matrix(0,num.starts,m)
> itr = 15
> runs.aic = matrix(0,num.starts,itr)
>
> # INITIALIZES STARTING RUNS
> set.seed(19676)
> for(i in 1:num.starts){runs[i,] = rbinom(m,1,.5)}
>
> ## MAIN
> for(k in 1:num.starts){
+ run.current = runs[k,]
+
+ # ITERATES EACH RANDOM START
+ for(j in 1:itr){
+ run.vars = baseball.sub[,run.current==1]
+ g = lm(salary.log~.,run.vars)
+ run.aic = extractAIC(g)[2]
+ run.next = run.current
+
+ # TESTS ALL MODELS IN THE 1-NEIGHBORHOOD AND SELECTS THE
+ # MODEL WITH THE LOWEST AIC
+ for(i in sample(m)){
+ run.step = run.current
+ run.step[i] = !run.current[i]
+ run.vars = baseball.sub[,run.step==1]
+ g = lm(salary.log~.,run.vars)
+ run.step.aic = extractAIC(g)[2]
+ if(run.step.aic < run.aic){
+ run.next = run.step
+ run.aic = run.step.aic
+ break
+ }
+ }
+ run.current = run.next
+ runs.aic[k,j]=run.aic
+ }
+ runs[k,] = run.current
+ }
>
> ## OUTPUT
> runs # LISTS OF PREDICTORS
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
[1,] 1 0 0 1 0 0 1 0 1 1 0 0 1 1 1 0
[2,] 1 0 1 0 0 1 0 1 0 1 1 0 1 1 1 1
[3,] 0 0 1 0 0 1 0 1 0 1 0 0 1 1 1 0
[4,] 1 1 1 0 0 1 0 1 0 1 0 0 1 1 1 1
[5,] 1 0 1 0 0 0 0 1 0 1 0 0 1 1 1 1
[,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27]
[1,] 0 1 1 0 0 0 0 0 1 0 1
[2,] 1 1 0 0 1 0 0 1 0 1 1
[3,] 0 0 0 1 1 1 0 0 1 1 0
[4,] 0 0 0 0 0 1 1 1 1 1 0
[5,] 0 0 0 0 0 0 0 1 1 0 1
> runs.aic # AIC VALUES
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] -217.9065 -217.9935 -219.8484 -220.1619 -221.0135 -222.6077 -223.7776 -223.9698
[2,] -217.2085 -218.3258 -219.5092 -219.7432 -221.7374 -223.6676 -398.4278 -399.6693
[3,] -403.9695 -405.3088 -409.3507 -410.7968 -411.9632 -412.6045 -414.2432 -416.2141
[4,] -124.5394 -127.0578 -127.5083 -400.2682 -401.1612 -401.6403 -401.6661 -402.5395
[5,] -135.9155 -137.7239 -407.5777 -409.1986 -410.8303 -412.4298 -412.6726 -413.9250
[,9] [,10] [,11] [,12] [,13] [,14] [,15]
[1,] -225.8663 -232.0466 -233.5325 -400.9725 -402.8854 -403.2838 -404.8729
[2,] -409.1566 -409.3218 -409.6924 -410.1146 -410.6694 -411.0543 -412.0218
[3,] -416.5069 -416.5069 -416.5069 -416.5069 -416.5069 -416.5069 -416.5069
[4,] -407.9254 -407.9608 -410.2451 -410.6919 -410.8825 -411.9445 -413.5147
[5,] -414.0846 -414.2130 -416.1533 -416.2576 -417.8858 -417.8858 -417.8858
>
> ##PLOT
> plot(1:(itr*num.starts),-c(t(runs.aic)),xlab="Cumulative Iterations",
+ ylab="Negative AIC",ylim=c(360,420),type="n")
> for(i in 1:num.starts) {
+ lines((i-1)*itr+(1:itr),-runs.aic[i,]) }
It takes longer time. And AIC are much greater than the previouse ones. All because somtimes it can not find a downhill 2-neighborhood.
> #########################################################################
> ### EXERCISE 3.1
> #########################################################################
> # baseball.dat = entire data set
> # baseball.sub = matrix of all predictors
> # salary.log = response, log salary
> # n = number of observations in the data set
> # m = number of predictors in the data set
> # num.starts = number of random starts
> # runs = matrix containing the random starts where each row is a
> # vector of the parameters included for the model
> # (1 = included, 0 = omitted)
> # runs.aic = AIC values for the best model found for each
> # of the random starts
> # itr = number of iterations for each random start
> #########################################################################
>
> ## INITIAL VALUES
> baseball.dat = read.table("baseball.dat",header=TRUE)
> baseball.dat$freeagent = factor(baseball.dat$freeagent)
> baseball.dat$arbitration = factor(baseball.dat$arbitration)
> baseball.sub = baseball.dat[,-1]
> salary.log = log(baseball.dat$salary)
> n = length(salary.log)
> m = length(baseball.sub[1,])
> num.starts = 5
> runs = matrix(0,num.starts,m)
> itr = 15
> runs.aic = matrix(0,num.starts,itr)
>
> # INITIALIZES STARTING RUNS
> set.seed(19676)
> for(i in 1:num.starts){runs[i,] = rbinom(m,1,.5)}
>
> all2combn = combn(m,2)
>
> ## MAIN
> for(k in 1:num.starts){
+ run.current = runs[k,]
+
+ # ITERATES EACH RANDOM START
+ for(j in 1:itr){
+ run.vars = baseball.sub[,run.current==1]
+ g = lm(salary.log~.,run.vars)
+ run.aic = extractAIC(g)[2]
+ run.next = run.current
+
+ # TESTS ALL MODELS IN THE 1-NEIGHBORHOOD AND SELECTS THE
+ # MODEL WITH THE LOWEST AIC
+ for(i in sample(m*(m-1)/2)){
+ run.step = run.current
+ run.step[all2combn[1][i]] = !run.current[all2combn[1][i]]
+ run.step[all2combn[2][i]] = !run.current[all2combn[2][i]]
+ run.vars = baseball.sub[,run.step==1]
+ g = lm(salary.log~.,run.vars)
+ run.step.aic = extractAIC(g)[2]
+ if(run.step.aic < run.aic){
+ run.next = run.step
+ run.aic = run.step.aic
+ break
+ }
+ }
+ run.current = run.next
+ runs.aic[k,j]=run.aic
+ }
+ runs[k,] = run.current
+ }
>
> ## OUTPUT
> runs # LISTS OF PREDICTORS
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
[1,] 1 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0
[2,] 1 1 0 0 0 1 1 1 1 1 1 0 1 0 0 1
[3,] 1 0 0 0 1 0 0 1 0 1 1 0 1 1 1 1
[4,] 1 0 1 0 0 1 1 0 1 1 0 0 0 1 0 1
[5,] 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1 0
[,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27]
[1,] 0 1 1 0 1 1 0 0 1 0 0
[2,] 1 1 0 1 0 0 0 1 1 1 1
[3,] 0 0 0 0 0 1 0 0 1 0 0
[4,] 0 1 1 0 1 1 1 1 0 0 0
[5,] 0 0 0 1 0 1 1 1 1 0 1
> runs.aic # AIC VALUES
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] -217.2883 -217.2883 -217.2883 -217.2883 -217.2883 -217.2883 -217.2883 -217.2883
[2,] -216.9405 -216.9405 -216.9405 -216.9405 -216.9405 -216.9405 -216.9405 -216.9405
[3,] -400.3062 -400.3062 -400.3062 -400.3062 -400.3062 -400.3062 -400.3062 -400.3062
[4,] -122.8772 -122.8772 -122.8772 -122.8772 -122.8772 -122.8772 -122.8772 -122.8772
[5,] -136.5481 -136.5481 -136.5481 -136.5481 -136.5481 -136.5481 -136.5481 -136.5481
[,9] [,10] [,11] [,12] [,13] [,14] [,15]
[1,] -217.2883 -217.2883 -217.2883 -217.2883 -217.2883 -217.2883 -217.2883
[2,] -216.9405 -216.9405 -216.9405 -216.9405 -216.9405 -216.9405 -216.9405
[3,] -400.3062 -400.3062 -400.3062 -400.3062 -400.3062 -400.3062 -400.3062
[4,] -122.8772 -122.8772 -122.8772 -122.8772 -122.8772 -122.8772 -122.8772
[5,] -136.5481 -136.5481 -136.5481 -136.5481 -136.5481 -136.5481 -136.5481
>
> ##PLOT
> plot(1:(itr*num.starts),-c(t(runs.aic)),xlab="Cumulative Iterations",
+ ylab="Negative AIC",ylim=c(360,420),type="n")
> for(i in 1:num.starts) {
+ lines((i-1)*itr+(1:itr),-runs.aic[i,]) }
We can just do kmeans, which can find a local minima.
The size of clusters are 67,62,49
The following is the visualization of the clustering in 2D space via PCA.
> wine.dat = read.table("wine.dat",header=TRUE)
> cl <- kmeans(wine.dat, 3)
> print(cl$size)
[1] 49 67 62
>
> # plot in 2D via PCA
> pcafit <- princomp(wine.dat)
> plot(pcafit$scores[,1:2], col = cl$cluster)
The M-step is the same. But for E-step, we use and to esitmate the expexted genotype frequencies instead and .
Result:
# FINAL ESTIMATE FOR ALLELE PROBABILITIES (p.c, p.i, p.t)
[1] 0.03606708 0.19579915 0.76813377
> #########################################################################
> # x = observed phenotype counts (carbonaria, insularia, typica, i or t)
> # n = expected genotype frequencies (CC, CI, CT, II, IT, TT)
> # p = allele probabilities (carbonaria, insularia, typica)
> # itr = number of iterations
> # allele.e = computes expected genotype frequencies
> # allele.m = computes allele probabilities
> #########################################################################
>
> ## INITIAL VALUES
> x = c(85, 196, 341, 578)
> n = rep(0,6)
> p = rep(1/3,3)
> itr = 40
>
> ## EXPECTATION AND MAXIMIZATION FUNCTIONS
> allele.e = function(x,p){
+ n.cc = (x[1]*(p[1]^2))/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
+ n.ci = (2*x[1]*p[1]*p[2])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
+ n.ct = (2*x[1]*p[1]*p[3])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
+ x2hat = x[2]+x[4]*((p[2]^2)+2*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
+ x3hat = x[3]+x[4]*(p[3]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
+ n.ii = (x2hat*(p[2]^2))/((p[2]^2)+2*p[2]*p[3])
+ n.it = (2*x2hat*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3])
+ n = c(n.cc,n.ci,n.ct,n.ii,n.it,x3hat)
+ return(n)
+ }
>
> allele.m = function(x,n){
+ p.c = (2*n[1]+n[2]+n[3])/(2*sum(x))
+ p.i = (2*n[4]+n[5]+n[2])/(2*sum(x))
+ p.t = (2*n[6]+n[3]+n[5])/(2*sum(x))
+ p = c(p.c,p.i,p.t)
+ return(p)
+ }
>
> ## MAIN
> for(i in 1:itr){
+ n = allele.e(x,p)
+ p = allele.m(x,n)
+ }
>
> ## OUTPUT
> p # FINAL ESTIMATE FOR ALLELE PROBABILITIES (p.c, p.i, p.t)
[1] 0.03606708 0.19579915 0.76813377
Result:
> ## OUTPUT
> sd.hat # STANDARD ERROR ESTIMATES (pc, pi, pt)
[1] 0.003840308 0.011077418 0.011530716
> cor.hat # CORRELATION ESTIMATES ({pc,pi}, {pc,pt}, {pi,pt})
[1] -0.03341484 -0.30094901 -0.94955901
Code:
> #########################################################################
> # x = observed phenotype counts (carbonaria, insularia, typica)
> # n = expected genotype frequencies (CC, CI, CT, II, IT, TT)
> # p = allele probabilities (carbonaria, insularia, typica)
> # p.em = em algorithm estimates
> # n.em = em algorithm estimates
> # itr = number of iterations
> # allele.e = computes expected genotype frequencies
> # allele.m = computes allele probabilities
> #########################################################################
>
> ## INITIAL VALUES
> x = c(85, 196, 341, 578)
> n = rep(0,6)
> itr = 40
> p = c(0.04, 0.19, 0.77)
> p.em = p
> theta = matrix(0,3,3)
> psi = rep(0,3)
> r = matrix(0,3,3)
>
> ## EXPECTATION AND MAXIMIZATION FUNCTIONS
> allele.e = function(x,p){
+ n.cc = (x[1]*(p[1]^2))/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
+ n.ci = (2*x[1]*p[1]*p[2])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
+ n.ct = (2*x[1]*p[1]*p[3])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
+ x2hat = x[2]+x[4]*((p[2]^2)+2*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
+ x3hat = x[3]+x[4]*(p[3]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
+ n.ii = (x2hat*(p[2]^2))/((p[2]^2)+2*p[2]*p[3])
+ n.it = (2*x2hat*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3])
+ n = c(n.cc,n.ci,n.ct,n.ii,n.it,x3hat)
+ return(n)
+ }
>
> allele.m = function(x,n){
+ p.c = (2*n[1]+n[2]+n[3])/(2*sum(x))
+ p.i = (2*n[4]+n[5]+n[2])/(2*sum(x))
+ p.t = (2*n[6]+n[3]+n[5])/(2*sum(x))
+ p = c(p.c,p.i,p.t)
+ return(p)
+ }
>
> ## COMPUTES EM ALGORITHM ESTIMATES
> for(i in 1:itr){
+ n.em = allele.e(x,p.em)
+ p.em = allele.m(x,n.em)
+ }
>
> ## INTIALIZES THETA
> for(j in 1:length(p)){
+ theta[,j] = p.em
+ theta[j,j] = p[j]
+ }
>
> ## MAIN
> for(t in 1:5){
+ n = allele.e(x,p)
+ p.hat = allele.m(x,n)
+ for(j in 1:length(p)){
+ theta[j,j] = p.hat[j]
+ n = allele.e(x,theta[,j])
+ psi = allele.m(x,n)
+ for(i in 1:length(p)){
+ r[i,j] = (psi[i]-p.em[i])/(theta[j,j]-p.em[j])
+ }
+ }
+ p = p.hat
+ }
>
> ## COMPLETE INFORMATION
> iy.hat=matrix(0,2,2)
> iy.hat[1,1] = ((2*n.em[1]+n.em[2]+n.em[3])/(p.em[1]^2) +
+ (2*n.em[6]+n.em[3]+n.em[5])/(p.em[3]^2))
> iy.hat[2,2] = ((2*n.em[4]+n.em[5]+n.em[2])/(p.em[2]^2) +
+ (2*n.em[6]+n.em[3]+n.em[5])/(p.em[3]^2))
> iy.hat[1,2] = iy.hat[2,1] = (2*n.em[6]+n.em[3]+n.em[5])/(p.em[3]^2)
>
> ## COMPUTES STANDARD ERRORS AND CORRELATIONS
> var.hat = solve(iy.hat)%*%(diag(2)+t(r[-3,-3])%*%solve(diag(2)-t(r[-3,-3])))
> sd.hat = c(sqrt(var.hat[1,1]),sqrt(var.hat[2,2]),sqrt(sum(var.hat)))
> cor.hat = c(var.hat[1,2]/(sd.hat[1]*sd.hat[2]),
+ (-var.hat[1,1]-var.hat[1,2])/(sd.hat[1]*sd.hat[3]),
+ (-var.hat[2,2]-var.hat[1,2])/(sd.hat[2]*sd.hat[3]))
>
> ## OUTPUT
> sd.hat # STANDARD ERROR ESTIMATES (pc, pi, pt)
[1] 0.003840308 0.011077418 0.011530716
> cor.hat # CORRELATION ESTIMATES ({pc,pi}, {pc,pt}, {pi,pt})
[1] -0.03341484 -0.30094901 -0.94955901
Result:
> ## OUTPUT
> theta[,1] # EM ESTIMATE FOR ALLELE PROBABILITIES (p.c, p.i, p.t)
[1] 0.03606708 0.19579915 0.76813377
> sd.hat # STANDARD ERROR ESTIMATES (p.c, p.i, p.t)
[1] 0.003864236 0.012547943 0.012905975
> cor.hat # CORRELATION ESTIMATES ({p.c,p.i}, {p.c,p.t}, {p.i,p.t})
[1] -0.06000425 -0.24107484 -0.95429228
Code:
> #########################################################################
> # x = observed phenotype counts (carbonaria, insularia, typica)
> # n = expected genotype frequencies (CC, CI, CT, II, IT, TT)
> # p = allele probabilities (carbonaria, insularia, typica)
> # itr = number of iterations
> # theta = allele probabilities for psuedo-data
> # allele.e = computes expected genotype frequencies
> # allele.m = computes allele probabilities
> #########################################################################
>
> ## INITIAL VALUES
> x = c(85, 196, 341,578)
> n = rep(0,6)
> p = rep(1/3,3)
> itr = 40
> theta = matrix(0,3,10000)
> set.seed(0)
>
> ## EXPECTATION AND MAXIMIZATION FUNCTIONS
> allele.e = function(x,p){
+ n.cc = (x[1]*(p[1]^2))/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
+ n.ci = (2*x[1]*p[1]*p[2])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
+ n.ct = (2*x[1]*p[1]*p[3])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
+ x2hat = x[2]+x[4]*((p[2]^2)+2*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
+ x3hat = x[3]+x[4]*(p[3]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
+ n.ii = (x2hat*(p[2]^2))/((p[2]^2)+2*p[2]*p[3])
+ n.it = (2*x2hat*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3])
+ n = c(n.cc,n.ci,n.ct,n.ii,n.it,x3hat)
+ return(n)
+ }
> allele.m = function(x,n){
+ p.c = (2*n[1]+n[2]+n[3])/(2*sum(x))
+ p.i = (2*n[4]+n[5]+n[2])/(2*sum(x))
+ p.t = (2*n[6]+n[3]+n[5])/(2*sum(x))
+ p = c(p.c,p.i,p.t)
+ return(p)
+ }
>
> ## MAIN
> for(i in 1:itr){
+ n = allele.e(x,p)
+ p = allele.m(x,n)
+ }
> theta[,1] = p
> for(j in 2:10000){
+ n.c = rbinom(1, sum(x), x[1]/sum(x))
+ n.i = rbinom(1, sum(x) - n.c, x[2]/(sum(x)-x[1]))
+ n.t = rbinom(1, sum(x) - n.c - n.i, x[3]/(sum(x)-x[1]-x[2]))
+ n.u = sum(x) - n.c - n.i - n.t
+ x.new = c(n.c, n.i, n.t, n.u)
+ n = rep(0,6)
+ p = rep(1/3,3)
+ for(i in 1:itr){
+ n = allele.e(x.new,p)
+ p = allele.m(x.new,n)
+ }
+ theta[,j] = p
+ }
>
> sd.hat = c(sd(theta[1,]), sd(theta[2,]), sd(theta[3,]))
> cor.hat = c(cor(theta[1,],theta[2,]), cor(theta[1,],theta[3,]),
+ cor(theta[2,],theta[3,]))
>
> ## OUTPUT
> theta[,1] # EM ESTIMATE FOR ALLELE PROBABILITIES (p.c, p.i, p.t)
[1] 0.03606708 0.19579915 0.76813377
> sd.hat # STANDARD ERROR ESTIMATES (p.c, p.i, p.t)
[1] 0.003864236 0.012547943 0.012905975
> cor.hat # CORRELATION ESTIMATES ({p.c,p.i}, {p.c,p.t}, {p.i,p.t})
[1] -0.06000425 -0.24107484 -0.95429228
Output:
> ## OUTPUT
> p # FINAL ESTIMATE FOR ALLELE PROBABILITIES (p.c, p.i, p.t)
[1] 0.03596147 0.19582060 0.76821793
Code:
## INITIAL VALUES
x = c(85, 196, 341, 578)
n = rep(0,6)
p = rep(1/3,3)
itr = 40
prob.values = matrix(0,3,itr+1)
prob.values[,1] = p
alpha.default = 2
## FUNCTIONS
allele.e = function(x,p){
n.cc = (x[1]*(p[1]^2))/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
n.ci = (2*x[1]*p[1]*p[2])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
n.ct = (2*x[1]*p[1]*p[3])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
x2hat = x[2]+x[4]*((p[2]^2)+2*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
x3hat = x[3]+x[4]*(p[3]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
n.ii = (x2hat*(p[2]^2))/((p[2]^2)+2*p[2]*p[3])
n.it = (2*x2hat*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3])
n = c(n.cc,n.ci,n.ct,n.ii,n.it,x3hat)
return(n)
}
allele.l = function(x,p){
l = ( x[1]*log(2*p[1] - p[1]^2) + x[2]*log(p[2]^2 + 2*p[2]*p[3]) +
2*x[3]*log(p[3]) + x[4]*log(p[2]^2 + 2*p[2]*p[3]+p[3]^2) )
return(l)
}
Q.prime = function(n,p){
da = (2*n[1]+n[2]+n[3])/(p[1]) - (2*n[6]+n[3]+n[5])/(p[3])
db = (2*n[4]+n[5]+n[2])/(p[2]) - (2*n[6]+n[3]+n[5])/(p[3])
dQ = c(da,db)
return(dQ)
}
Q.2prime = function(n,p){
da2 = -(2*n[1]+n[2]+n[3])/(p[1]^2) - (2*n[6]+n[3]+n[5])/(p[3]^2)
51
db2 = -(2*n[4]+n[5]+n[2])/(p[2]^2) - (2*n[6]+n[3]+n[5])/(p[3]^2)
dab = -(2*n[6]+n[3]+n[5])/(p[3]^2)
d2Q = matrix(c(da2,dab,dab,db2), nrow=2, byrow=TRUE)
return(d2Q)
}
## MAIN
l.old = allele.l(x,p)
for(i in 1:itr){
alpha = alpha.default
n = allele.e(x,p)
p.new = p[1:2] - alpha*solve(Q.2prime(n,p))%*%Q.prime(n,p)
p.new[3] = 1 - p.new[1] - p.new[2]
if(p.new > 0 && p.new < 1){l.new = allele.l(x,p.new)}
# REDUCE ALPHA UNTIL A CORRECT STEP IS REACHED
while(p.new < 0 || p.new > 1 || l.new < l.old){
alpha = alpha/2
p.new = p[1:2] - alpha*solve(Q.2prime(n,p))%*%Q.prime(n,p)
p.new[3] = 1 - p.new[1] - p.new[2]
if(p.new > 0 && p.new < 1){l.new = allele.l(x,p.new)}
}
p = p.new
prob.values[,i+1] = p
l.old = l.new
}
## OUTPUT
p # FINAL ESTIMATE FOR ALLELE PROBABILITIES (p.c, p.i, p.t)
## PLOT OF CONVERGENCE
pb = seq(0.001,0.4,length=100)
c = outer(pb,pb,function(a,b){1-a-b})
pbs = matrix(0,3,10000)
pbs[1,] = rep(pb,100)
pbs[2,] = rep(pb,each=100)
pbs[3,] = as.vector(c)
z = matrix(apply(pbs,2,allele.l,x=x),100,100)
contour(pb,pb,z,nlevels=20)
for(i in 1:itr){
segments(prob.values[1,i],prob.values[2,i],prob.values[1,i+1],
prob.values[2,i+1],lty=2)
}
Output:
> ## OUTPUT
> p # FINAL ESTIMATE FOR ALLELE PROBABILITIES (p.c, p.i, p.t)
[1] 0.03606708 0.19579915 0.76813377
Code:
## INITIAL VALUES
x = c(85, 196, 341,578)
n = rep(0,6)
p = rep(1/3,3)
itr = 40
prob.values = matrix(0,3,itr+1)
prob.values[,1] = p
alpha.default = 2
## FUNCTIONS
allele.e = function(x,p){
n.cc = (x[1]*(p[1]^2))/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
n.ci = (2*x[1]*p[1]*p[2])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
n.ct = (2*x[1]*p[1]*p[3])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
x2hat = x[2]+x[4]*((p[2]^2)+2*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
x3hat = x[3]+x[4]*(p[3]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
n.ii = (x2hat*(p[2]^2))/((p[2]^2)+2*p[2]*p[3])
n.it = (2*x2hat*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3])
n = c(n.cc,n.ci,n.ct,n.ii,n.it,x3hat)
return(n)
}
allele.m = function(x,n){
p.c = (2*n[1]+n[2]+n[3])/(2*sum(x))
p.i = (2*n[4]+n[5]+n[2])/(2*sum(x))
p.t = (2*n[6]+n[3]+n[5])/(2*sum(x))
p = c(p.c,p.i,p.t)
return(p)
}
allele.l = function(x,p){
l = ( x[1]*log(2*p[1] - p[1]^2) + x[2]*log(p[2]^2 + 2*p[2]*p[3]) +
2*x[3]*log(p[3]) + 2*x[4]*log(1-p[1]) )
return(l)
}
allele.iy = function(n,p){
iy.hat=matrix(0,2,2)
iy.hat[1,1] = ((2*n[1]+n[2]+n[3])/(p[1]^2) +
(2*n[6]+n[3]+n[5])/(p[3]^2))
iy.hat[2,2] = ((2*n[4]+n[5]+n[2])/(p[2]^2) +
(2*n[6]+n[3]+n[5])/(p[3]^2))
iy.hat[1,2] = iy.hat[2,1] = (2*n[6]+n[3]+n[5])/(p[3]^2)
return(iy.hat)
}
allele.l.2prime = function(x,p){
l.2prime = matrix(0,2,2)
l.2prime[1,1] = ( (-x[1]*(2-2*p[1])^2)/((2*p[1]-p[1]^2)^2) -
2*x[1]/(2*p[1]-p[1]^2) -
(4*x[2])/((-2*p[1]-p[2]+2)^2) -
2*x[3]/(p[3]^2)
-2*x[4]/(1-p[1])^2
)
l.2prime[2,2] = ( (-4*x[2]*p[3]^2)/((p[2]^2 + 2*p[2]*p[3])^2) -
2*x[2]/(p[2]^2 + 2*p[2]*p[3]) -
2*x[3]/(p[3]^2)
)
l.2prime[1,2] = ((-2*x[2])/((-2*p[1]-p[2]+2)^2) -
2*x[3]/(p[3]^2))
l.2prime[2,1] = l.2prime[1,2]
return(l.2prime)
}
## MAIN
l.old = allele.l(x,p)
for(i in 1:itr){
alpha = alpha.default
n = allele.e(x,p)
p.em = allele.m(x,n)
p.new = (p[1:2] - alpha*solve(allele.l.2prime(x,p))%*%
allele.iy(n,p)%*%(p.em[1:2]-p[1:2]))
p.new[3] = 1 - p.new[1] - p.new[2]
if(p.new > 0 && p.new < 1){l.new = allele.l(x,p.new)}
# REDUCE ALPHA UNTIL A CORRECT STEP IS REACHED
while(p.new < 0 || p.new > 1 || l.new < l.old){
alpha = alpha/2
p.new = (p[1:2] - alpha*solve(allele.l.2prime(x,p))%*%
allele.iy(n,p)%*%(p.em[1:2]-p[1:2]))
p.new[3] = 1 - p.new[1] - p.new[2]
if(p.new > 0 && p.new < 1){l.new = allele.l(x,p.new)}
}
p = p.new
prob.values[,i+1] = p
l.old = l.new
}
## OUTPUT
p # FINAL ESTIMATE FOR ALLELE PROBABILITIES (p.c, p.i, p.t)
## PLOT OF CONVERGENCE
pb = seq(0.001,0.4,length=100)
c = outer(pb,pb,function(a,b){1-a-b})
pbs = matrix(0,3,10000)
pbs[1,] = rep(pb,100)
pbs[2,] = rep(pb,each=100)
pbs[3,] = as.vector(c)
z = matrix(apply(pbs,2,allele.l,x=x),100,100)
contour(pb,pb,z,nlevels=20)
for(i in 1:itr){
segments(prob.values[1,i],prob.values[2,i],prob.values[1,i+1],
prob.values[2,i+1],lty=2)
}
Output:
> ## OUTPUT
> p # FINAL ESTIMATE FOR ALLELE PROBABILITIES (p.c, p.i, p.t)
[1] 0.03606708 0.19580543 0.76812749
Code:
## INITIAL VALUES
x = c(85, 196, 341, 578)
n = rep(0,6)
p = rep(1/3,3)
itr = 20
m = matrix(0,2,2)
b = matrix(0,2,2)
prob.values = matrix(0,3,itr+1)
prob.values[,1] = p
alpha.default = 2
## FUNCTIONS
allele.e = function(x,p){
n.cc = (x[1]*(p[1]^2))/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
n.ci = (2*x[1]*p[1]*p[2])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
n.ct = (2*x[1]*p[1]*p[3])/((p[1]^2)+2*p[1]*p[2]+2*p[1]*p[3])
x2hat = x[2]+x[4]*((p[2]^2)+2*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
x3hat = x[3]+x[4]*(p[3]*p[3])/((p[2]^2)+2*p[2]*p[3]+p[3]*p[3])
n.ii = (x2hat*(p[2]^2))/((p[2]^2)+2*p[2]*p[3])
n.it = (2*x2hat*p[2]*p[3])/((p[2]^2)+2*p[2]*p[3])
n = c(n.cc,n.ci,n.ct,n.ii,n.it,x3hat)
return(n)
}
allele.l = function(x,p){
l = ( x[1]*log(2*p[1] - p[1]^2) + x[2]*log(p[2]^2 + 2*p[2]*p[3]) +
2*x[3]*log(p[3]) + 2*x[4]*log(1-p[1]) )
return(l)
}
Q.prime = function(n,p){
da = (2*n[1]+n[2]+n[3])/(p[1]) - (2*n[6]+n[3]+n[5])/(p[3])
db = (2*n[4]+n[5]+n[2])/(p[2]) - (2*n[6]+n[3]+n[5])/(p[3])
dQ = c(da,db)
return(dQ)
}
Q.2prime = function(n,p){
da2 = -(2*n[1]+n[2]+n[3])/(p[1]^2) - (2*n[6]+n[3]+n[5])/(p[3]^2)
db2 = -(2*n[4]+n[5]+n[2])/(p[2]^2) - (2*n[6]+n[3]+n[5])/(p[3]^2)
dab = -(2*n[6]+n[3]+n[5])/(p[3]^2)
d2Q = matrix(c(da2,dab,dab,db2), nrow=2, byrow=TRUE)
return(d2Q)
}
## MAIN
l.old = allele.l(x,p)
for(i in 1:itr){
alpha = alpha.default
n = allele.e(x,p)
m = Q.2prime(n,p) - b
p.new = p[1:2] - alpha*solve(m)%*%Q.prime(n,p)
p.new[3] = 1 - p.new[1] - p.new[2]
if(p.new > 0 && p.new < 1){l.new = allele.l(x,p.new)}
# REDUCE ALPHA UNTIL A CORRECT STEP IS REACHED
while(p.new < 0 || p.new > 1 || l.new < l.old){
alpha = alpha/2
p.new = p[1:2] - alpha*solve(m)%*%Q.prime(n,p)
p.new[3] = 1 - p.new[1] - p.new[2]
if(p.new > 0 && p.new < 1){l.new = allele.l(x,p.new)}
}
at = p.new[1:2]-p[1:2]
n = allele.e(x,p.new)
bt = Q.prime(n,p)-Q.prime(n,p.new)
vt = bt - b%*%at
ct = as.numeric(1/(t(vt)%*%at))
b = b + ct*vt%*%t(vt)
p = p.new
prob.values[,i+1] = p
l.old = l.new
}
## OUTPUT
p # FINAL ESTIMATE FOR ALLELE PROBABILITIES (p.c, p.i, p.t)
## PLOT OF CONVERGENCE
pb = seq(0.001,0.4,length=100)
c = outer(pb,pb,function(a,b){1-a-b})
pbs = matrix(0,3,10000)
pbs[1,] = rep(pb,100)
pbs[2,] = rep(pb,each=100)
pbs[3,] = as.vector(c)
z = matrix(apply(pbs,2,allele.l,x=x),100,100)
contour(pb,pb,z,nlevels=20)
for(i in 1:itr){
segments(prob.values[1,i],prob.values[2,i],prob.values[1,i+1],
prob.values[2,i+1],lty=2)
}
Accoding to the figures of the above methods, we can tell the Aitken accerlerated EM is the most efficient.
Forward function:
Given for all . Then for any , we compute
Backward function:
Given for all . Then for any , we compute
The log-likelihood is :
So we can take and as laten variables. The proposed fomular is the according MLE when fixing and .
Output:
$pi
[1] 0.5 0.5
$p
[,1] [,2]
[1,] 0.5 0.5
[2,] 0.5 0.5
$e
[,1] [,2]
[1,] 0.45 0.55
[2,] 0.45 0.55
Code
x = as.matrix(read.table("coin.dat",header=TRUE))+1
n = 200
pi = c(0.5,0.5)
e = matrix(rep(0.5,4),ncol=2)
p = matrix(rep(0.5,4),ncol=2)
cal_alpha = function(pi,e,p,x)
{
alpha_new = matrix(rep(0,n*2),ncol=2)
alpha_new[1,1] = pi[1]*e[1,x[1]]
alpha_new[1,2] = pi[2]*e[2,x[1]]
for (i in 2:n)
{
alpha_new[i,1] = alpha_new[i-1,1]*p[1,1]*e[1,x[i]] + alpha_new[i-1,2]*p[2,1]*e[1,x[i]]
alpha_new[i,2] = alpha_new[i-1,1]*p[1,2]*e[2,x[i]] + alpha_new[i-1,2]*p[2,2]*e[2,x[i]]
}
return(alpha_new)
}
cal_beta = function(pi,e,p,x)
{
beta_new = matrix(rep(0,n*2),ncol=2)
beta_new[n,1] = 1
beta_new[n,2] = 1
for (i in (n-1):1)
{
beta_new[i,1] = p[1,1]*e[1,x[i+1]]*beta_new[i+1,1] + p[1,2]*e[2,x[i+1]]*beta_new[i+1,2]
beta_new[i,2] = p[2,1]*e[1,x[i+1]]*beta_new[i+1,1] + p[2,2]*e[2,x[i+1]]*beta_new[i+1,2]
}
return(beta_new)
}
estep = function(x,alpha,beta,p,e){
Nh = c(0,0)
Nhh = matrix(c(0,0,0,0),ncol=2)
Nho = matrix(c(0,0,0,0),ncol=2)
Nh = c(alpha[1,1]*beta[1,1],alpha[1,2]*beta[1,2])
for(i in 1:2)
for(j in 1:2)
for(ii in 1:(n-1))
Nhh[i,j] = Nhh[i,j]+alpha[ii,i]*p[i,j]*e[j,x[ii+1]]*beta[ii+1,j]
for(i in 1:2)
for(j in 1:2){
for(ii in 1:(n))
if(x[ii] == j)
{
Nho[i,j] = Nho[i,j]+alpha[ii,i]*beta[ii,i]
}
}
return(list(Nh=Nh,Nhh=Nhh,Nho=Nho))
}
mstep = function(Nh,Nhh,Nho){
new_pi = Nh/sum(Nh)
new_p = Nhh/rowSums(Nhh)
new_e = Nho/rowSums(Nho)
return(list(pi=new_pi,p=new_p,e=new_e))
}
iternum = 10
for(iter in 1:iternum)
{
alpha = cal_alpha(pi,e,p,x)
beta = cal_beta(pi,e,p,x)
estep_list = estep(x,alpha,beta,p,e)
mstep_list = mstep(estep_list$Nh,estep_list$Nhh,estep_list$Nho)
pi = mstep_list$pi
p = mstep_list$p
e = mstep_list$e
print(mstep_list)
}