BSTA 670 – Statistical Computing

BSTA 670 – Statistical Computing Lecture 16: Some R Simulations

############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate the power # of a two-sample unpaired t-test for the mean, for equal sample sizes. ############################################################################## # Initialize parameters # m1 is mean of normal distribution for group 1 # m2 is mean of normal distribution for group 2 # s is standard deviation of normal distribution for both groups # n is sample size for both groups, observations in in simulated data set # M is number of simulations to perform # a is alpha level for test m1<-5 m2<-7 s<-5 n<-100 M<-10000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL # Simulation loop for(i in 1:M) { x1<-rnorm(n,m1,s) # Generate simulated data for group 1 x2<-rnorm(n,m2,s) # Generate simulated data gor group 2 tp<-t.test(x1,x2)$p.value # Perform t-test and obtain p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } print(paste("Estimated Power of Test: ", mean(reject))) # # Theoretical power of test power.t.test(n=n,delta=m1-m2,sd=s,sig.level=a,type="two.sample",alternative="two.sided") Sim_ttest_power1.s

############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate actual # level of a two-sample unpaired t-test with equal sample sizes for the mean. ############################################################################## # Initialize parameters # m is mean of normal distributions # s is standard deviation of normal distributions # n is sample size, number of observations in simulated data sets # M is number of simulations to perform # a is confidence interval level m<-7 s<-5 n<-100 M<-1000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL # Simulation loop for(i in 1:M) { x1<-rnorm(n,m,s) # Generage data for group 1 x2<-rnorm(n,m,s) # Generate data for group 2 tp<-t.test(x1,x2)$p.value # Perform t-test and extract p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Add rejection indicator to results vector } print(paste("Estimated Level of Test: ", mean(reject))) Sim_ttest_alpha.s

############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate the power # curve of a two-sample unpaired t-test for the mean, for equal sample sizes. ############################################################################## # Initialize parameters # s is standard deviation of normal distribution for both groups # n is sample size for both groups, observations in in simulated data set # M is number of simulations to perform # a is alpha level for test # d s<-8 n<-100 M<-500 a<-0.05 k<-51 d<-5 ############################################################################### # Create matrix to receive results pcurve<-NULL # Loop over delta values for( delta in seq(from=-d,to=d, by=2*d/k) ){ # Create vector to receive results for current delta reject<-NULL # Loop to estimate power ar specified delta for(i in 1:M) { x1<-rnorm(n,0,s) # Generate simulated data for group 1, mean=0 x2<-rnorm(n,delta,s) # Generate simulated data for group 2, mean=delta tp<-t.test(x1,x2)$p.value # Perform t-test and extract p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result for current simulation i } pcurve<-rbind( pcurve, c(mean(reject),delta) ) # Save result for current delta } p<-pcurve[,1] m<-pcurve[,2] plot(m,p, xlab="Delta",ylab="Power") Sim_ttest_power_curve.s

############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate the power # curve of a Wilcoxon test (two-sample unpaired) for the medians, # for equal sample sizes. ############################################################################## # Initialize parameters # s is standard deviation of normal distribution for both groups # n is sample size for both groups, observations in in simulated data set # M is number of simulations to perform # a is alpha level for test # d s<-8 n<-100 M<-500 a<-0.05 k<-51 d<-5 ############################################################################### # Create matrix to receive results pcurve<-NULL # Loop over delta values for( delta in seq(from=-d,to=d, by=2*d/k) ){ # Create vector to receive results for current delta reject<-NULL # Loop to estimate power ar specified delta for(i in 1:M) { x1<-rnorm(n,0,s) # Generate simulated data for group 1, mean=0 x2<-rnorm(n,delta,s) # Generate simulated data for group 2, mean=delta tp<-wilcox.test(x1,x2)$p.value # Perform Wilcoxon test and extract p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result for current simulation i } pcurve<-rbind( pcurve, c(mean(reject),delta) ) # Save result for current delta } p<-pcurve[,1] m<-pcurve[,2] plot(m,p, xlab="Delta",ylab="Power") Sim_wilcoxon_power_curve.s

############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate confidence # interval coverage of asymptotic normal confidence interval for the mean. # The results of the simulated confidence intervals are plotted, with a line # denoting the actual mean. ############################################################################## # Initialize parameters # m is mean of normal distribution # s is standard deviation of normal distribution # n is sample size, number of observations in simulated data sets # M is number of simulations to perform # a is confidence interval level m<-7 s<-5 n<-100 M<-50 a<-0.95 ############################################################################### # Create vectors to receive results covers<-NULL lower<-NULL upper<-NULL yplot<-NULL xplot<-NULL # Simulation loop for(i in 1:M) { x<-rnorm(n,m,s) # Generate data ci<-t.test(x, conf.level=a) # Obtain confidence interval loweri<-ci$conf.int[1] # Extract lower confidence limit upperi<-ci$conf.int[2] # Extract upper confidence limit coveri<-ifelse(m>=loweri & m<=upperi,1,0) # determine if CI includes real mean covers<-c(covers,coveri) # add coverage indicator, coveri, to results vector lower<-c(lower,loweri) # add lower limit, loweri, to results vector upper<-c(upper,upperi) # add upper limit, upperi, to results vector xplot<-c(xplot,c(loweri,upperi,NA)) # update plotting vector for x yplot<-c(yplot, c(i, i, i)) # update plotting vector for y } Sim_coverage_normal.s Continued on next slide

print(paste("Estimated Coverage Probability: ", mean(covers))) plot(xplot,yplot,type='l', xlab="Confidence Interval",ylab="Simulation Number") abline(v=m) Sim_coverage_normal.s Continued from previous slide

Survival_exponential.s ############################################################################## # R program demonstrating generation of survival data with fitting of # parametric survival regression and Cox regression models to verify # relative rates ############################################################################## library(survival) # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution n<-200 m1<-0.5 m2<-0.6 cm<-1.0 ############################################################################### Continued on next slide

Survival_exponential.s # Create group indicator x<-c( rep(0,n) , rep(1,n) ) # Generate group 1 and group 2 complete survival times. y1<-rexp(n, rate=1/m1) y2<-rexp(n, rate=1/m2) y<-c(y1,y2) print(paste("Mean of Group 1 survival time: ", mean(y1))) print(paste("Mean of Group 2 survival time: ", mean(y2))) # Generate censoring times cen<-rexp(2*n, rate=1/cm) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit parametric survival regression out<-survreg(Surv(ycen, censored)~x, dist="exponential") print(out) # print(paste("Relative rate from simulation parameters: ", m2/m1)) print(paste("Estimated relative rate from parametric regression: ", exp(out$coefficients[2]))) # # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) print(out) print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) Continued from previous slide

############################################################################################################################################################ # R program demonstrating simulation to obtain power for Cox regression # model based on specified parameters ############################################################################## # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution # M is number of simulations to perform # a is alpha level of test n<-1000 m1<-0.5 m2<-0.6 cm<-1.0 M<-1000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL Sim_survival_exponential_power.s Continued on next slide

# Simulation loop for(i in 1:M) { # Create group indicator x<-c( rep(0,n) , rep(1,n) ) # Generate group 1 and group 2 complete survival times. y1<-rexp(n, rate=1/m1) y2<-rexp(n, rate=1/m2) y<-c(y1,y2) # print(paste("Mean of Group 1 survival time: ", mean(y1))) # print(paste("Mean of Group 2 survival time: ", mean(y2))) # Generate censoring times cen<-rexp(2*n, rate=1/cm) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) # print(out) # print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } print(paste("Estimated Power of Test: ", mean(reject))) Sim_survival_exponential_power.s Continued from previous slide

############################################################################################################################################################ # R program demonstrating simulation to obtain level for Cox regression # model based on specified parameters ############################################################################## # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution # M is number of simulations to perform # a is alpha level of test n<-1000 m<-0.5 cm<-1.0 M<-1000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL Sim_survival_exponential_level.s Continued on next slide

# Simulation loop for(i in 1:M) { # Create group indicator x<-c( rep(0,n) , rep(1,n) ) # Generate group 1 and group 2 complete survival times. y1<-rexp(n, rate=1/m) y2<-rexp(n, rate=1/m) y<-c(y1,y2) # print(paste("Mean of Group 1 survival time: ", mean(y1))) # print(paste("Mean of Group 2 survival time: ", mean(y2))) # Generate censoring times cen<-rexp(2*n, rate=1/cm) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) # print(out) # print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } print(paste("Estimated Power of Test: ", mean(reject))) Sim_survival_exponential_level.s Continued from previous slide

############################################################################################################################################################ # R program demonstrating simulation to obtain level for Cox regression # model based on specified parameters, WITH TRUNCATION # NOTE: Set tm1=tm2==0.0000001 (something near 0) to get no truncation. # In this case get "a" for level. ############################################################################## library(survival) # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution # M is number of simulations to perform # a is alpha level of test # tm1 is truncation mean for group 1 # tm2 is truncation mean for group 2 # ntrunc1 is number obs truncated from group 1 # ntrunc2 is number obs truncated from group 2 # ptrunc1 is proportion truncated from group 1 # ptrunc2 is proportion truncated from group 2 n<-1000 m1<-0.5 m2<-0.5 cm<-1.0 tm1<-0.10 tm2<-0.10 M<-50 a<-0.05 Continued on next slide Sim_survival_exponential_truncated_level.s

############################################################################################################################################################## # Create vector to receive results reject<-NULL ntrunc1<-NULL ntrunc2<-NULL ptrunc1<-NULL ptrunc2<-NULL # Create group indicator x<-c( rep(0,n) , rep(1,n) ) Continued from previous slide Sim_survival_exponential_truncated_level.s

# Simulation loop for(i in 1:M) { # Create vector to receive results y<-NULL trunc<-NULL cen<-NULL obs1<-0 ntrunci<-0 while(obs1<n) { # Generate group 1 complete survival times. yi<-rexp(1, rate=1/m1) # Generate truncation time trunci<-rexp(1, rate=1/tm1) # Generate censoring times ceni<-rexp(1, rate=1/cm) # Determine if observation is truncated truncated<-as.numeric(yi<trunci) # 1 if true if(truncated==0) { y<-c(y,yi) trunc<-c(trunc,trunci) cen<-c(cen,ceni) obs1<-obs1+1 } if(truncated==1) ntrunci<-ntrunci+1 } ntrunc1<-c(ntrunc1,ntrunci) ptrunc1<-c(ptrunc1, ntrunci/(n+ntrunci) ) Continued from previous slide Sim_survival_exponential_truncated_level.s

obs2<-0 ntrunci<-0 while(obs2<n) { # Generate group 2 complete survival times. yi<-rexp(1, rate=1/m2) # Generate truncation time trunci<-rexp(1, rate=1/tm2) # Generate censoring times ceni<-rexp(1, rate=1/cm) # Determine if observation is truncated truncated<-as.numeric(yi<trunci) # 1 if true if(truncated==0) { y<-c(y,yi) trunc<-c(trunc,trunci) cen<-c(cen,ceni) obs2<-obs2+1 } if(truncated==1) ntrunci<-ntrunci+1 } ntrunc2<-c(ntrunc2,ntrunci) ptrunc2<-c(ptrunc2, ntrunci/(n+ntrunci) ) if(i==1) print(paste("Number truncated from group 2 while simulating: ", notused)) if(i==1) print(paste("Mean of Group 1 survival time: ", mean(y[1:n]))) if(i==1) print(paste("Mean of Group 2 survival time: ", mean(y[(n+1):(2*n)]))) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) # print(out) # print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } Sim_survival_exponential_truncated_level.s Continued from previous slide

print(paste("Estimated Power of Test: ", mean(reject))) print(paste("Average number truncated from group 1 while simulating: ", mean(ntrunc1))) print(paste("Average number truncated from group 2 while simulating: ", mean(ntrunc2))) print(paste("Average proportion truncated from group 1 while simulating: ", mean(ptrunc1))) print(paste("Average proportion truncated from group 2 while simulating: ", mean(ptrunc2))) Continued from previous slide Sim_survival_exponential_truncated_level.s

BSTA 670 – Statistical Computing