## 140.778 Adv Stat Comp Code for Assignment 2

```# simulate x data sim.x <- function(n=50,rho=0.5) { x <- matrix(runif(n*3),ncol=3) v <- chol(matrix(c(1,rho,rho,rho,1,rho,rho,rho,1),ncol=3)) x <- t(t(x %*% t(v))*c(5,1,20) + c(10,5,20)) x <- as.data.frame(x) names(x) <- c("x1","x2","x3") x } # simulate y data with censoring sim.data <- function(x=sim.x(),beta=c(10,0.5,1,-0.5),sigma=2,censor=10) { y <- as.numeric(as.matrix(cbind(1,x)) %*% beta) + rnorm(nrow(x),0,sigma) y[y > censor] <- censor data.frame(x,y=y) } # calculate negative log likelihood negloglik <- function(theta,d,censor=max(d\$y)) { y <- d\$y X <- as.matrix(cbind(1,d[,-ncol(d)])) beta <- theta[-length(theta)] sigma <- theta[length(theta)] loglik <- y o <- y < censor loglik[o] <- log(dnorm(y[o],X[o,] %*% beta, sigma)) loglik[!o] <- log(1-pnorm(censor, X[!o,] %*% beta, sigma)) -sum(loglik) } # LS fit ignoring censoring fit1 <- function(d) { a <- summary(lm(y ~ . , data=d)) b <- a\$coef d <- a\$sigma # calculate SEs and correlation V <- a\$cov * d^2 se <- sqrt(diag(V)) rho <- V / rep(se,length(se)) / rep(se,rep(length(se),length(se))) list(theta=c(b[,1],d),se=se,cor=rho) } # fit using the nlm function fit2 <- function(d,...) { start <- fit1(d)[[1]] o <- nlm(negloglik,start,d=d,censor=max(d\$y),hessian=T,...) # calculate SEs and correlation V <- solve(o\$hessian) se <- sqrt(diag(V)) rho <- V / rep(se,length(se)) / rep(se,rep(length(se),length(se))) list(theta=o\$est,se=se,cor=rho,o) } # fit using the EM algorithm fit3 <- function(d, start=fit1(d)\$theta, censor=max(d\$y), tol1=1e-8, tol2=tol1*100, maxit=1000, prnt=FALSE) { y <- d\$y o <- (y < censor) X <- as.matrix(cbind(1,d[,-ncol(d)])) H <- function(v) dnorm(v)/(1-pnorm(v)) theta.old <- start p <- length(start) n <- length(y) for(i in 1:maxit) { # E step w <- y # Expected observation wsq <- y^2 # Expected observation^2 mu <- X[!o,] %*% theta.old[-p] w[!o] <- mu + theta.old[p] * H((censor - mu)/theta.old[p]) wsq[!o] <- mu^2 + theta.old[p]^2 + theta.old[p] * (censor + mu) * H((censor-mu)/theta.old[p]) # M step mu <- X %*% theta.old[-p] theta <- c(lm(w ~ -1 + X)\$coef, sqrt((sum(wsq) - 2*sum(mu*w) + sum(mu^2))/n)) if(prnt) { nll <- negloglik(theta,d,censor) print(c(i,nll,theta)) } # check convergence if(all(abs(theta - theta.old) < tol1 * (abs(theta.old)+tol2))) break; theta.old <- theta } theta } ```

kbroman at jhsph.edu
http://www.biostat.wisc.edu/~kbroman

Last modified: Wed Apr 15 23:10:12 2009