1 / 18

BSTA 670 – Statistical Computing

BSTA 670 – Statistical Computing. Lecture 14: Some R Simulations. ############################################################################## # R program demonstrating use of Monte Carlo simulation to estimate the power

rigg
Download Presentation

BSTA 670 – Statistical Computing

An Image/Link below is provided (as is) to download presentation Download Policy: Content on the Website is provided to you AS IS for your information and personal use and may not be sold / licensed / shared on other websites without getting consent from its author. Content is provided to you AS IS for your information and personal use only. Download presentation by click this link. While downloading, if for some reason you are not able to download a presentation, the publisher may have deleted the file from their server. During download, if you can't get a presentation, the file might be deleted by the publisher.

E N D

Presentation Transcript


  1. BSTA 670 – Statistical Computing Lecture 14: Some R Simulations

  2. ############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate the power # of a two-sample unpaired t-test for the mean, for equal sample sizes. ############################################################################## # Initialize parameters # m1 is mean of normal distribution for group 1 # m2 is mean of normal distribution for group 2 # s is standard deviation of normal distribution for both groups # n is sample size for both groups, observations in in simulated data set # M is number of simulations to perform # a is alpha level for test m1<-5 m2<-7 s<-5 n<-100 M<-10000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL # Simulation loop for(i in 1:M) { x1<-rnorm(n,m1,s) # Generate simulated data for group 1 x2<-rnorm(n,m2,s) # Generate simulated data gor group 2 tp<-t.test(x1,x2)$p.value # Perform t-test and obtain p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } print(paste("Estimated Power of Test: ", mean(reject))) # # Theoretical power of test power.t.test(n=n,delta=m1-m2,sd=s,sig.level=a,type="two.sample",alternative="two.sided") Sim_ttest_power1.s

  3. ############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate actual # level of a two-sample unpaired t-test with equal sample sizes for the mean. ############################################################################## # Initialize parameters # m is mean of normal distributions # s is standard deviation of normal distributions # n is sample size, number of observations in simulated data sets # M is number of simulations to perform # a is confidence interval level m<-7 s<-5 n<-100 M<-1000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL # Simulation loop for(i in 1:M) { x1<-rnorm(n,m,s) # Generage data for group 1 x2<-rnorm(n,m,s) # Generate data for group 2 tp<-t.test(x1,x2)$p.value # Perform t-test and extract p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Add rejection indicator to results vector } print(paste("Estimated Level of Test: ", mean(reject))) Sim_ttest_alpha.s

  4. ############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate the power # curve of a two-sample unpaired t-test for the mean, for equal sample sizes. ############################################################################## # Initialize parameters # s is standard deviation of normal distribution for both groups # n is sample size for both groups, observations in in simulated data set # M is number of simulations to perform # a is alpha level for test # d s<-8 n<-100 M<-500 a<-0.05 k<-51 d<-5 ############################################################################### # Create matrix to receive results pcurve<-NULL # Loop over delta values for( delta in seq(from=-d,to=d, by=2*d/k) ){ # Create vector to receive results for current delta reject<-NULL # Loop to estimate power ar specified delta for(i in 1:M) { x1<-rnorm(n,0,s) # Generate simulated data for group 1, mean=0 x2<-rnorm(n,delta,s) # Generate simulated data for group 2, mean=delta tp<-t.test(x1,x2)$p.value # Perform t-test and extract p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result for current simulation i } pcurve<-rbind( pcurve, c(mean(reject),delta) ) # Save result for current delta } p<-pcurve[,1] m<-pcurve[,2] plot(m,p, xlab="Delta",ylab="Power") Sim_ttest_power_curve.s

  5. ############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate the power # curve of a Wilcoxon test (two-sample unpaired) for the medians, # for equal sample sizes. ############################################################################## # Initialize parameters # s is standard deviation of normal distribution for both groups # n is sample size for both groups, observations in in simulated data set # M is number of simulations to perform # a is alpha level for test # d s<-8 n<-100 M<-500 a<-0.05 k<-51 d<-5 ############################################################################### # Create matrix to receive results pcurve<-NULL # Loop over delta values for( delta in seq(from=-d,to=d, by=2*d/k) ){ # Create vector to receive results for current delta reject<-NULL # Loop to estimate power ar specified delta for(i in 1:M) { x1<-rnorm(n,0,s) # Generate simulated data for group 1, mean=0 x2<-rnorm(n,delta,s) # Generate simulated data for group 2, mean=delta tp<-wilcox.test(x1,x2)$p.value # Perform Wilcoxon test and extract p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result for current simulation i } pcurve<-rbind( pcurve, c(mean(reject),delta) ) # Save result for current delta } p<-pcurve[,1] m<-pcurve[,2] plot(m,p, xlab="Delta",ylab="Power") Sim_wilcoxon_power_curve.s

  6. ############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate confidence # interval coverage of asymptotic normal confidence interval for the mean. # The results of the simulated confidence intervals are plotted, with a line # denoting the actual mean. ############################################################################## # Initialize parameters # m is mean of normal distribution # s is standard deviation of normal distribution # n is sample size, number of observations in simulated data sets # M is number of simulations to perform # a is confidence interval level m<-7 s<-5 n<-100 M<-50 a<-0.95 ############################################################################### # Create vectors to receive results covers<-NULL lower<-NULL upper<-NULL yplot<-NULL xplot<-NULL # Simulation loop for(i in 1:M) { x<-rnorm(n,m,s) # Generate data ci<-t.test(x, conf.level=a) # Obtain confidence interval loweri<-ci$conf.int[1] # Extract lower confidence limit upperi<-ci$conf.int[2] # Extract upper confidence limit coveri<-ifelse(m>=loweri & m<=upperi,1,0) # determine if CI includes real mean covers<-c(covers,coveri) # add coverage indicator, coveri, to results vector lower<-c(lower,loweri) # add lower limit, loweri, to results vector upper<-c(upper,upperi) # add upper limit, upperi, to results vector xplot<-c(xplot,c(loweri,upperi,NA)) # update plotting vector for x yplot<-c(yplot, c(i, i, i)) # update plotting vector for y } Sim_coverage_normal.s Continued on next slide

  7. print(paste("Estimated Coverage Probability: ", mean(covers))) plot(xplot,yplot,type='l', xlab="Confidence Interval",ylab="Simulation Number") abline(v=m) Sim_coverage_normal.s Continued from previous slide

  8. Survival_exponential.s ############################################################################## # R program demonstrating generation of survival data with fitting of # parametric survival regression and Cox regression models to verify # relative rates ############################################################################## library(survival) # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution n<-200 m1<-0.5 m2<-0.6 cm<-1.0 ############################################################################### Continued on next slide

  9. Survival_exponential.s # Create group indicator x<-c( rep(0,n) , rep(1,n) ) # Generate group 1 and group 2 complete survival times. y1<-rexp(n, rate=1/m1) y2<-rexp(n, rate=1/m2) y<-c(y1,y2) print(paste("Mean of Group 1 survival time: ", mean(y1))) print(paste("Mean of Group 2 survival time: ", mean(y2))) # Generate censoring times cen<-rexp(2*n, rate=1/cm) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit parametric survival regression out<-survreg(Surv(ycen, censored)~x, dist="exponential") print(out) # print(paste("Relative rate from simulation parameters: ", m2/m1)) print(paste("Estimated relative rate from parametric regression: ", exp(out$coefficients[2]))) # # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) print(out) print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) Continued from previous slide

  10. ############################################################################################################################################################ # R program demonstrating simulation to obtain power for Cox regression # model based on specified parameters ############################################################################## # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution # M is number of simulations to perform # a is alpha level of test n<-1000 m1<-0.5 m2<-0.6 cm<-1.0 M<-1000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL Sim_survival_exponential_power.s Continued on next slide

  11. # Simulation loop for(i in 1:M) { # Create group indicator x<-c( rep(0,n) , rep(1,n) ) # Generate group 1 and group 2 complete survival times. y1<-rexp(n, rate=1/m1) y2<-rexp(n, rate=1/m2) y<-c(y1,y2) # print(paste("Mean of Group 1 survival time: ", mean(y1))) # print(paste("Mean of Group 2 survival time: ", mean(y2))) # Generate censoring times cen<-rexp(2*n, rate=1/cm) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) # print(out) # print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } print(paste("Estimated Power of Test: ", mean(reject))) Sim_survival_exponential_power.s Continued from previous slide

  12. ############################################################################################################################################################ # R program demonstrating simulation to obtain level for Cox regression # model based on specified parameters ############################################################################## # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution # M is number of simulations to perform # a is alpha level of test n<-1000 m<-0.5 cm<-1.0 M<-1000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL Sim_survival_exponential_level.s Continued on next slide

  13. # Simulation loop for(i in 1:M) { # Create group indicator x<-c( rep(0,n) , rep(1,n) ) # Generate group 1 and group 2 complete survival times. y1<-rexp(n, rate=1/m) y2<-rexp(n, rate=1/m) y<-c(y1,y2) # print(paste("Mean of Group 1 survival time: ", mean(y1))) # print(paste("Mean of Group 2 survival time: ", mean(y2))) # Generate censoring times cen<-rexp(2*n, rate=1/cm) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) # print(out) # print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } print(paste("Estimated Power of Test: ", mean(reject))) Sim_survival_exponential_level.s Continued from previous slide

  14. ############################################################################################################################################################ # R program demonstrating simulation to obtain level for Cox regression # model based on specified parameters, WITH TRUNCATION # NOTE: Set tm1=tm2==0.0000001 (something near 0) to get no truncation. # In this case get "a" for level. ############################################################################## library(survival) # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution # M is number of simulations to perform # a is alpha level of test # tm1 is truncation mean for group 1 # tm2 is truncation mean for group 2 # ntrunc1 is number obs truncated from group 1 # ntrunc2 is number obs truncated from group 2 # ptrunc1 is proportion truncated from group 1 # ptrunc2 is proportion truncated from group 2 n<-1000 m1<-0.5 m2<-0.5 cm<-1.0 tm1<-0.10 tm2<-0.10 M<-50 a<-0.05 Continued on next slide Sim_survival_exponential_truncated_level.s

  15. ############################################################################################################################################################## # Create vector to receive results reject<-NULL ntrunc1<-NULL ntrunc2<-NULL ptrunc1<-NULL ptrunc2<-NULL # Create group indicator x<-c( rep(0,n) , rep(1,n) ) Continued from previous slide Sim_survival_exponential_truncated_level.s

  16. # Simulation loop for(i in 1:M) { # Create vector to receive results y<-NULL trunc<-NULL cen<-NULL obs1<-0 ntrunci<-0 while(obs1<n) { # Generate group 1 complete survival times. yi<-rexp(1, rate=1/m1) # Generate truncation time trunci<-rexp(1, rate=1/tm1) # Generate censoring times ceni<-rexp(1, rate=1/cm) # Determine if observation is truncated truncated<-as.numeric(yi<trunci) # 1 if true if(truncated==0) { y<-c(y,yi) trunc<-c(trunc,trunci) cen<-c(cen,ceni) obs1<-obs1+1 } if(truncated==1) ntrunci<-ntrunci+1 } ntrunc1<-c(ntrunc1,ntrunci) ptrunc1<-c(ptrunc1, ntrunci/(n+ntrunci) ) Continued from previous slide Sim_survival_exponential_truncated_level.s

  17. obs2<-0 ntrunci<-0 while(obs2<n) { # Generate group 2 complete survival times. yi<-rexp(1, rate=1/m2) # Generate truncation time trunci<-rexp(1, rate=1/tm2) # Generate censoring times ceni<-rexp(1, rate=1/cm) # Determine if observation is truncated truncated<-as.numeric(yi<trunci) # 1 if true if(truncated==0) { y<-c(y,yi) trunc<-c(trunc,trunci) cen<-c(cen,ceni) obs2<-obs2+1 } if(truncated==1) ntrunci<-ntrunci+1 } ntrunc2<-c(ntrunc2,ntrunci) ptrunc2<-c(ptrunc2, ntrunci/(n+ntrunci) ) if(i==1) print(paste("Number truncated from group 2 while simulating: ", notused)) if(i==1) print(paste("Mean of Group 1 survival time: ", mean(y[1:n]))) if(i==1) print(paste("Mean of Group 2 survival time: ", mean(y[(n+1):(2*n)]))) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) # print(out) # print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } Sim_survival_exponential_truncated_level.s Continued from previous slide

  18. print(paste("Estimated Power of Test: ", mean(reject))) print(paste("Average number truncated from group 1 while simulating: ", mean(ntrunc1))) print(paste("Average number truncated from group 2 while simulating: ", mean(ntrunc2))) print(paste("Average proportion truncated from group 1 while simulating: ", mean(ptrunc1))) print(paste("Average proportion truncated from group 2 while simulating: ", mean(ptrunc2))) Continued from previous slide Sim_survival_exponential_truncated_level.s

More Related