140.778 Adv Stat Comp
Additional comments


1 November

  1. As Rui Feng mentioned, you can use the functions upper.tri and lower.tri in R to refer to the upper and lower triangles of a matrix. Consider the following code
    x <- matrix(rnorm(80),ncol=8)
    # the following are equivalent
    x[lower.tri(x)] <- 0    
    x[row(x) > col(x)] <- 0    
    
  2. A slightly extended version of the Fisher's exact test example appears in K Lange (1999) Numerical analysis for statisticians. Section 21.7.

    Note that in my fisher() function, the line

    a <- lapply(a, sample)
    
    can be replaced by
    a[[1]] <- sample(a[[1]])
    
    since we don't need to permute both the row and column indices...permuting one or the other will suffice.


8 November

I should have mentioned that what I call the “parametric bootstrap” is often called “Monte Carlo simulation.”


13 November

To clarify the discussion on the QR vs the LU decomposition (I thank Rui Feng for this): In the QR decomposition, X = QR where Q is orthonormal (Q'Q = I) and R is upper triangular. In the LU decomposition, X = LU where L is lower triangular and U is upper triangular.

If there were a lower-triangular, orthonormal Q, so that both the QR and LU decompositions were satisfied, then it would follow that Q was a diagonal matrix with entries ±1. Clearly this would not work generally. (It would require that X be triangular, right?)


11 December

Here is my R code for MCMC in the T cell assay example.
mcmc.npem <-
function(data,start=c(0.5,1.5,2.5,3.5,10,5,1),n=c(24,24,24,22),
         nstep = 1000,tol=1e-10,prnt=FALSE)
{

  # npem package: does EM for these data
  # I use it here in order to calculate the log likelihood
  #      (npem.ll)
  require(npem)
  
  gp <- length(n)
  N <- sum(n)
  output <- matrix(nrow=nstep+1,ncol=length(start)+1)
  output[1,] <- c(start,npem.ll(data,start))
  wh <- rep(1:4,n)
  
  for(i in 1:nstep) {

    # sample k's
    lam <- rep(start[1:gp],n)
    maxk <- qpois(1-tol*100,lam)
    k <- apply(cbind(data,lam,maxk),1,
               function(x,absig,i) {
                 a <- dpois(0:x[3],x[2]+tol)*
                   dnorm(x[1],absig[1]+absig[2]*(0:x[3]),absig[3])
                 sample(0:x[3],1,prob=a)
               }, start[-(1:gp)],i)

    # sample lambdas
    start[1:gp] <- rgamma(gp,tapply(k,wh,sum)+1,1/(n+1))
    
    # lin reg
    a <- summary(lm(data ~ k))
    betahat <- a$coef[,1]
    sig <- a$sigma
    Vhat <- a$cov

    # simulate sigma
    sig <- sig*sqrt((N-2)/rchisq(1,N-2))

    # simulate a,b
    beta <- rnorm(2) %*% chol(Vhat * sig) + betahat
    
    start[-(1:gp)] <- c(beta,sig)

    output[i+1,] <- c(start,npem.ll(data,start))

    if(prnt) print(round(output[i+1,],2))

  }
  output
}

13 December

Here is my R code for the reversible jump MCMC example.
# simulate data from an inhomogenouse Poisson process
#     whose rate function is a step function
#
# pos = location of steps (last value is the length
#       of the interval considered and first value is 0)
# h   = heights in each interval
#
sim.dat <-
function(pos=c(0,4,5,6,10),h=c(30,10,15,5))
{
  # lengths of intervals
  len <- diff(pos)

  # Poisson counts
  n <- rpois(length(len),len * h)

  left <- pos[-length(pos)]
  right <- pos[-1]

  # locations within intervals
  runif(sum(n),rep(left,n),rep(right,n))
}


# calculate the log likelihood for a set of data
loglik <-
function(data, pos=c(0,3,5,6,10), h=c(40,10,25,5))
{
  #     I ran into trouble in the following if
  #     there were no observed points between
  #     some of the steps
  #
  # sum(log(h)*tapply(data,cut(data,pos),length)) - sum(h*diff(pos))

  n <- tapply(data,cut(data,pos),length)
  n[is.na(n)] <- 0
  sum(log(h)*n) - sum(h*diff(pos))
}

# prior = (lambda, alpha, beta, kmax)
green.mcmc <-
function(data, prior=c(1,0.2,0.01,30),
         pos=c(0,10), h=length(data)/pos[2],
         nstep=44000)
{
  save.pos <- save.h <- vector("list",nstep+1)
  save.pos[[1]] <- pos
  save.h[[1]] <- h

  save.loglik <- 1:(nstep+1)
  save.steptype <- 1:nstep
  save.accept <- 1:nstep


  # calculate jump probabilities
  jump.prob <- matrix(ncol=4,nrow=prior[4]+1)
  p <- dpois(0:prior[4],prior[1])/ppois(prior[4]+1,prior[1])
  bk <- c(p[-1]/p[-length(p)],0)
  bk[bk > 1] <- 1
  dk <- c(0,p[-length(p)]/p[-1])
  dk[dk > 1] <- 1
  mx <- max(bk+dk)
  bk <- bk/mx*0.9
  dk <- dk/mx*0.9
  jump.prob[,3] <- bk
  jump.prob[,4] <- dk
  jump.prob[1,2] <- 0
  jump.prob[1,1] <- 1-bk[1]-dk[1]

  # Steve Knox kindly identified the following error and fix
  #jump.prob[-1,1] <- jump.prob[-1,2] <-
  #  1-jump.prob[-1,3]-jump.prob[-1,4]
  jump.prob[-1,1] <- jump.prob[-1,2] <-
    (1-jump.prob[-1,3]-jump.prob[-1,4])/2

  # calculate starting loglik
  curloglik <- save.loglik[1] <- loglik(data,pos,h)

  for(i in (1:nstep + 1)) {

    # determine what type of jump to make

    save.steptype[i-1] <- wh <- sample(1:4,1,prob=jump.prob[length(h),])
    if(wh==1) {
      step <- ht.move(data,pos,h,curloglik,prior)
      save.pos[[i]] <- pos
      save.h[[i]] <- h <- step[[1]]
      save.loglik[i] <- curloglik <- step[[2]]
      save.accept[i-1] <- step[[3]]
    }
    else if(wh==2) {
      step <- pos.move(data,pos,h,curloglik)
      save.pos[[i]] <- pos <- step[[1]]
      save.h[[i]] <- h
      save.loglik[i] <- curloglik <- step[[2]]
      save.accept[i-1] <- step[[3]]
    }
    else if(wh==3) {
      step <- birth.step(data,pos,h,curloglik,prior,jump.prob)
      save.pos[[i]] <- pos <- step[[1]]
      save.h[[i]] <- h <- step[[2]]
      save.loglik[i] <- curloglik <- step[[3]]
      save.accept[i-1] <- step[[4]]
    }
    else {
      step <- death.step(data,pos,h,curloglik,prior,jump.prob)
      save.pos[[i]] <- pos <- step[[1]]
      save.h[[i]] <- h <- step[[2]]
      save.loglik[i] <- curloglik <- step[[3]]
      save.accept[i-1] <- step[[4]]
    }

#    cat(i," ", length(pos)-1, "\n")

  }
  list(pos=save.pos,h=save.h,loglik=save.loglik,
       steptype=save.steptype,accept=save.accept)

}


ht.move <-
function(data,pos,h,curloglik,prior)
{
  j <- sample(1:length(h),1)

  newh <- h
  newh[j] <- h[j]*exp(runif(1,-0.5,0.5))

  newloglik <- loglik(data,pos,newh)
  lr <- newloglik - curloglik

  ratio <- exp(lr + prior[2]*(log(newh[j])-log(h[j])) - prior[3]*(newh[j]-h[j]))

  if(runif(1,0,1) < ratio)
    return(list(newh,newloglik,1))
  else
    return(list(h,curloglik,0))

}


pos.move <-
function(data,pos,h,curloglik)
{
  if(length(pos)==3) j <- 2
  else j <- sample(2:(length(pos)-1),1)
  newpos <- pos

  left <- pos[j-1]
  right <- pos[j+1]
  newpos[j] <- runif(1,left,right)

  newloglik <- loglik(data,newpos,h)
  lr <-  newloglik - curloglik

  ratio <- exp(lr) * (right-newpos[j])*(newpos[j]-left)/
    (right-pos[j])/(pos[j]-left)

  if(runif(1,0,1) < ratio)
    return(list(newpos,newloglik,1))
  else
    return(list(pos,curloglik,0))
}


birth.step <-
function(data,pos,h,curloglik,prior,jump.prob)
{
  # new pos
  newpos <- runif(1,0,pos[length(pos)])
  j <- sum(pos < newpos)
  left <- pos[j]
  right <- pos[j+1]

  # get new heights
  u <- runif(1,0,1)
  r <- (1-u)/u
  newh.left <- exp(log(h[j]) - (right-newpos)*log(r)/(right-left))
  newh.right <- newh.left * r

  # ratio
  # recall that prior = (lambda, alpha, beta, maxk)
  k <- length(pos) - 2
  L <- max(pos)

  p.k <- log(dpois(k + 0:1,prior[1]))

  # Steve Knox kindly identified the following error and fix
  #prior.logratio <- p.k[2] - p.k[1] +  log(2*(k+1)*(2*k+3)) - 2*log(L) +
  #  log(newpos-left) + log(right-newpos) - log(right-left) +
  #    prior[2]*log(prior[3]) - lgamma(prior[2]) +
  #      (prior[2]-1) * log(newh.left*newh.right/h[j]) -
  #        prior[3]*(newh.left+newh.right - h[j])
  prior.logratio <- p.k[2] - p.k[1] +  log(2*(k+1)*(2*k+3)) - 2*log(L) +
    log(newpos-left) + log(right-newpos) - log(right-left) +
      prior[2]*log(prior[3]) - lgamma(prior[2]) +
        (prior[2]-1) * log(newh.left*newh.right/h[j]) + # <<< - to +
          prior[3]*(newh.left+newh.right - h[j])

  proposal.ratio <- jump.prob[k+2,4]*L/jump.prob[k+1,3]/(k+1)
  jacobian <- (newh.left + newh.right)^2/h[j]

  # form new parameters
  newpos <- sort(c(pos,newpos))

  if(j < 2) newh <- c(newh.left,newh.right)
  else newh <- c(h[1:(j-1)],newh.left,newh.right)
  if(j <= length(h)-1) newh <- c(newh,h[(j+1):length(h)])

  newloglik <- loglik(data,newpos,newh)

  lr <- newloglik - curloglik

  ratio <- exp(lr + prior.logratio) * proposal.ratio * jacobian

#  print(c(exp(lr),exp(prior.logratio),proposal.ratio,jacobian,ratio))

  if(runif(1,0,1) < ratio)
    return(list(newpos,newh,newloglik,1))
  else
    return(list(pos,h,curloglik,0))

}



death.step <-
function(data,pos,h,curloglik,prior,jump.prob)
{
  # pos to drop
  if(length(pos)==3) j <- 2
  else j <- sample(2:(length(pos)-1),1)

  left <- pos[j-1]
  right <- pos[j+1]

  # get new height
  h.left <- h[j-1]
  h.right <- h[j]
  newh <- exp(((right-pos[j])*log(h.right) + (pos[j]-left)*log(h.left))/(right-left))

  # ratio
  # recall that prior = (lambda, alpha, beta, maxk)
  k <- length(pos) - 3
  L <- max(pos)
  p.k <- log(dpois(k + 0:1,prior[1]))
  prior.logratio <- p.k[1] - p.k[2] -  log(2*(k+1)*(2*k+3)) + 2*log(L) -
    log(pos[j]-left) - log(right-pos[j]) + log(right-left) -
      prior[2]*log(prior[3]) + lgamma(prior[2]) -
        (prior[2]-1) * log(h.left*h.right/newh) -
          prior[3]*(h.left+h.right - newh)
  proposal.ratio <- (k+1)*jump.prob[k+1,3]/jump.prob[k+2,4]/L
  jacobian <- newh/(h.left + h.right)^2

  # form new parameters
  newpos <- pos[-j]

  if(j < 3) newh <- newh
  else newh <- c(h[1:(j-2)],newh)
  if(j <= length(h)-1) newh <- c(newh,h[(j+1):length(h)])

  newloglik <- loglik(data,newpos,newh)

  lr <- newloglik - curloglik

  ratio <- exp(lr + prior.logratio) * proposal.ratio * jacobian

#  print(c(exp(lr),exp(prior.logratio),proposal.ratio,jacobian,ratio))

  if(runif(1,0,1) < ratio)
    return(list(newpos,newh,newloglik,1))
  else
    return(list(pos,h,curloglik,0))

}


kbroman at jhsph.edu
http://www.biostat.wisc.edu/~kbroman

Last modified: Wed Apr 15 23:02:36 2009