This is the code for reproducing tables and plots in the main text. We have saved intermediate results (predicted CATE) under “Simulations/intermediate_result_100simulations”.
Please set up your working directory where the folder “Simulations” is saved using the following code by replacing “Your directory of ‘Simulations’ folder” with your own directory:
your_directory <- setwd(“Your directory of ‘Simulations’ folder”)
Please install the following packages if you haven’t:
#install.packages("tableone")
#install.packages("tidyr")
Please copy these codes to an R file and use server to run codes under this section. We have commented the codes out as it requires server.
This section provide codes to run R-T, R-X, B-T, B-X and CSF for 100 simulations when data is generated from Weibull models at median failure time under scenario1 to scenario 3. If time of interest is 3rd quantile of failure times (tt=22 in scenario1 and scenario 2; tt=20 in scenario3), please change “tt” in the code below (see comments in the code below). We will also prepare training and test data for running D-T and D-X in Python.
Balanced design:
###-------------------------------------------- scenario 1 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")
# generate test data
#mydata <- data_gen_censor(n=10000, p=10, PH=TRUE, censor = "30%", setting = 1, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-15 # assign a time point of interest
# simdata <- data_gen_censor(n=1000, p=10, PH=TRUE, censor = "30%", setting = 1, ex = "random",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
# p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
# csa_diff<-data.frame(diff=p.prob)
# weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=1)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
# load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
# dat$sim=i+(s-1)*5
# dat_all<-rbind(dat_all,dat)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario1.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_balanced_mediantt_n1000p10_test10000_scenario1.RData")) # save training data from 100 simulations
# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_balanced_setting1_dataframe.RData"))
###-------------------------------------------- scenario 2 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")
# generate test data
#mydata <- data_gen_censor(n=10000, p=10, PH=TRUE, censor = "30%", setting = 2, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-15 # assign a time point of interest
# simdata <- data_gen_censor(n=1000, p=10, PH=TRUE, censor = "30%", setting = 2, ex = "random",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
# p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
# csa_diff<-data.frame(diff=p.prob)
# weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=2)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
# load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
# dat$sim=i+(s-1)*5
# dat_all<-rbind(dat_all,dat)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario2.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_balanced_mediantt_n1000p10_test10000_scenario2.RData")) # save training data from 100 simulations
# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_balanced_setting2_dataframe.RData"))
###-------------------------------------------- scenario 3 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("aft_bart_np_SurvivalProb.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")
# generate test data
#mydata <- data_gen_censor(n=10000, p=10, PH=TRUE, censor = "30%", setting = 3, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-14 # assign a time point of interest
# simdata <- data_gen_censor(n=1000, p=10, PH=TRUE, censor = "30%", setting = 3, ex = "random",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
# p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
# csa_diff<-data.frame(diff=p.prob)
# weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=3)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
# load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
# dat$sim=i+(s-1)*5
# dat_all<-rbind(dat_all,dat)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario3.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_balanced_mediantt_n1000p10_test10000_scenario3.RData")) # save training data from 100 simulations
# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_balanced_setting3_dataframe.RData"))
Dependent design:
###-------------------------------------------- scenario 1 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("aft_bart_np_SurvivalProb.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")
# generate test data
#mydata <- data_gen_censor(n=10000, p=10, PH=TRUE, censor = "30%", setting = 1, ex = "dependent",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-15 # assign a time point of interest
# simdata <- data_gen_censor(n=1000, p=10, PH=TRUE, censor = "30%", setting = 1, ex = "dependent",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
# p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
# csa_diff<-data.frame(diff=p.prob)
# weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=1)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
#
# load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
# dat$sim=i+(s-1)*5
# dat_all<-rbind(dat_all,dat)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_dependent_mediantt_RTRXBTBX_n1000p10_test10000_scenario1.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_dependent_mediantt_n1000p10_test10000_scenario1.RData")) # save training data from 100 simulations
# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_dependent_setting1_dataframe.RData"))
###-------------------------------------------- scenario 2 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("aft_bart_np_SurvivalProb.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")
# generate test data
#mydata <- data_gen_censor(n=10000, p=10, PH=TRUE, censor = "30%", setting = 2, ex = "dependent",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-15 # assign a time point of interest
# simdata <- data_gen_censor(n=1000, p=10, PH=TRUE, censor = "30%", setting = 2, ex = "dependent",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = #(10/3),min.node.size = 15,num.threads = 1)
# p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
# csa_diff<-data.frame(diff=p.prob)
# weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=2)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
# load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
# dat$sim=i+(s-1)*5
# dat_all<-rbind(dat_all,dat)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_dependent_mediantt_RTRXBTBX_n1000p10_test10000_scenario2.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_dependent_mediantt_n1000p10_test10000_scenario2.RData")) # save training data from 100 simulations
# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_dependent_setting2_dataframe.RData"))
###-------------------------------------------- scenario 3 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")
# generate test data
#mydata <- data_gen_censor(n=10000, p=10, PH=TRUE, censor = "30%", setting = 3, ex = "dependent",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-14 # assign a time point of interest
# simdata <- data_gen_censor(n=1000, p=10, PH=TRUE, censor = "30%", setting = 3, ex = "dependent",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
# p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
# csa_diff<-data.frame(diff=p.prob)
# weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=3)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
# load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
# dat$sim=i+(s-1)*5
# dat_all<-rbind(dat_all,dat)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_dependent_mediantt_RTRXBTBX_n1000p10_test10000_scenario3.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_dependent_mediantt_n1000p10_test10000_scenario3.RData")) # save training data from 100 simulations
# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_dependent_setting3_dataframe.RData"))
Unbalanced design with 4000 samples in training data:
###-------------------------------------------- scenario 1 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")
# generate test data
#mydata <- data_gen_censor(n=10000, p=10, PH=TRUE, censor = "30%", setting = 1, ex = "unbalanced",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_unbalanced_scenario1_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-15 # assign a time point of interest
# simdata <- data_gen_censor(n=4000, p=10, PH=TRUE, censor = "30%", setting = 1, ex = "unbalanced",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
# p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
# csa_diff<-data.frame(diff=p.prob)
# weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=1)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_unbalanced4000n_scenario1_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario1_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario1_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
#
# load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario1_",(s-1)*5+i,".RData"))
# dat$sim=i+(s-1)*5
# dat_all<-rbind(dat_all,dat)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_unbalanced4000n_mediantt_RTRXBTBX_n1000p10_test10000_scenario1.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_unbalanced4000n_mediantt_n1000p10_test10000_scenario1.RData")) # save training data from 100 simulations
# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_unbalanced4000n_scenario1_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_unbalanced4000n_setting1_dataframe.RData"))
###-------------------------------------------- scenario 2 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")
# generate test data
#mydata <- data_gen_censor(n=10000, p=10, PH=TRUE, censor = "30%", setting = 2, ex = "unbalanced",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-15 # assign a time point of interest
# simdata <- data_gen_censor(n=4000, p=10, PH=TRUE, censor = "30%", setting = 2, ex = "unbalanced",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
# p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
# csa_diff<-data.frame(diff=p.prob)
# weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=2)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
# load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
# dat$sim=i+(s-1)*5
# dat_all<-rbind(dat_all,dat)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_unbalanced4000n_mediantt_RTRXBTBX_n1000p10_test10000_scenario2.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_unbalanced4000n_mediantt_n1000p10_test10000_scenario2.RData")) # save training data from 100 simulations
# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_unbalanced4000n_setting2_dataframe.RData"))
###-------------------------------------------- scenario 3 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")
# generate test data
#mydata <- data_gen_censor(n=10000, p=10, PH=TRUE, censor = "30%", setting = 3, ex = "unbalanced",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-14 # assign a time point of interest
# simdata <- data_gen_censor(n=4000, p=10, PH=TRUE, censor = "30%", setting = 3, ex = "unbalanced",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
# p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
# csa_diff<-data.frame(diff=p.prob)
# weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=3)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
# load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
# dat$sim=i+(s-1)*5
# dat_all<-rbind(dat_all,dat)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_unbalanced4000n_mediantt_RTRXBTBX_n1000p10_test10000_scenario3.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_unbalanced4000n_mediantt_n1000p10_test10000_scenario3.RData")) # save training data from 100 simulations
# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_unbalanced4000n_setting3_dataframe.RData"))
D-T and D-T are run using Python. Please run D-T and D-X in Python as the following:
Install the following Python modules if you haven’t: pyreadr, tensorflow, pandas, matplotlib, sklearn, numpy, multiprocessing.
These modules can be installed using command line as the following if you use Anaconda: conda activate venv. This step is to open the virtual environment of the Python version that you install. venv is the name of your virtual environment. If you run Python on server, you can open your virtual environment by typing the command line: source ~/bin/activate/venv . After entering your virtual environment, use the following command line to install Python modules: pip install module_name. If you install Python3, please type: pip3 install module_name. Every time you would like to run the .py files, please enter your virtual environment where these modules are installed.
Enter your virtual environment where these modules are installed.
Please put DNNSurv_Tlearner_100runs_simulation.py, DNNSurv_Xlearner_100runs_simulation.py, DNNSurve_TXlearner_fun_util.py, training and test data prepared above into the same folder (Please create a new folder if you would like to.).
The names of test data are listed here:
Balanced design and scenario 1: testDat10000_weib_mediantt_balanced_setting1_dataframe.RData; Balanced design and scenario 2: testDat10000_weib_mediantt_balanced_setting2_dataframe.RData; Balanced design and scenario 3: testDat10000_weib_mediantt_balanced_setting3_dataframe.RData. Dependent design and scenario1: testDat10000_weib_mediantt_dependent_setting1_dataframe.RData; Dependent design and scenario2: testDat10000_weib_mediantt_dependent_setting2_dataframe.RData; Dependent design and scenario3: testDat10000_weib_mediantt_dependent_setting3_dataframe.RData. Unbalanced design (4000 samples) and scenario 1: testDat10000_weib_mediantt_unbalanced4000n_setting1_dataframe.RData; Unbalanced design (4000 samples) and scenario 2: testDat10000_weib_mediantt_unbalanced4000n_setting2_dataframe.RData; Unbalanced design (4000 samples) and scenario 3: testDat10000_weib_mediantt_unbalanced4000n_setting3_dataframe.RData.
The names of test data are listed here:
Balanced design and scenario 1: testDat10000_weib_mediantt_balanced_setting1_dataframe.RData; Balanced design and scenario 2: testDat10000_weib_mediantt_balanced_setting2_dataframe.RData; Balanced design and scenario 3: testDat10000_weib_mediantt_balanced_setting3_dataframe.RData. Dependent design and scenario1: testDat10000_weib_mediantt_dependent_setting1_dataframe.RData; Dependent design and scenario2: testDat10000_weib_mediantt_dependent_setting2_dataframe.RData; Dependent design and scenario3: testDat10000_weib_mediantt_dependent_setting3_dataframe.RData. Unbalanced design (4000 samples) and scenario 1: testDat10000_weib_mediantt_unbalanced4000n_setting1_dataframe.RData; Unbalanced design (4000 samples) and scenario 2: testDat10000_weib_mediantt_unbalanced4000n_setting2_dataframe.RData; Unbalanced design (4000 samples) and scenario 3: testDat10000_weib_mediantt_unbalanced4000n_setting3_dataframe.RData.
Set up the folder directory in line 2 of DNNSurv_Tlearner_1run_simulation.py and DNNSurv_Xlearner_1run_simulation.py.
In line 24, change training data name for different simulation design and scenarios.
In line 25, change test data name for different simulation design and scenarios.
In line 27, please give a name to the output result file.
Submit DNNSurv_Tlearner_100runs_simulation.py and DNNSurv_Xlearner_100runs_simulation.py to Python using command line: python DNNSurv_Xlearner_100runs_simulation.py. You can add a & at the end if you are using server for nohup option.
For 100 runs, please visit “Example_code_simulation_1run.Rmd” for submitting a single run of simulation files “DNNSurv_Tlearner_1run_simulation.py” and “DNNSurv_Xlearner_1run_simulation.py”.
In the output result file, the first 10000 rows are predicted CATE; the second 10000 rows are true CATE.
For each simulation design and scenario, combine D-T and D-X with other methods that are run in R as the following code. Here we give an example of combining all simulation results under balanced design:
###----------------------------------- scenario 1 -------------------------------------###
# load R-T, R-X, B-T, B-X, CSF results
#load(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario1.RData"))
# load D-T results
#your_D_T_result_file_name <- "Tlearner_result_weib_balanced_mediantt_scenario1.csv" # please change the name of output file to what you assign in .py file line 27.
#library(readr)
#your_directory <- setwd("The directory contain your D-T output result file")
#Tlearner <- read_csv(paste0(your_directory ,your_D_T_result_file_name), col_names = FALSE)
#Tlearner1<-t(Tlearner)
#colnames(Tlearner1) <- c("deepsurv_T_diff","true_diff")
#diff_all1 <- cbind(diff_all,Tlearner1[,1])
#colnames(diff_all1)[length(colnames(diff_all1))] <- "deepsurv_T_diff"
# load D-X results
#your_D_X_result_file_name <- "Xlearner_result_weib_balanced_mediantt_scenario1.csv" # please change the name of output file to what you assign in .py file line 27.
#library(readr)
#your_directory <- setwd("The directory contain your D-X output result file")
#Xlearner <- read_csv(paste0(your_directory ,your_D_X_result_file_name), col_names = FALSE)
#Xlearner1<-t(Xlearner)
#colnames(Xlearner1) <- c("deepsurv_X_diff","true_diff")
#random1.diff.all <- cbind(diff_all1,Xlearner1[,1])
#colnames(random1.diff.all)[length(colnames(random1.diff.all))] <- "deepsurv_X_diff"
###----------------------------------- scenario 2 -------------------------------------###
# load R-T, R-X, B-T, B-X, CSF results
#load(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario2.RData"))
# load D-T results
#your_D_T_result_file_name <- "Tlearner_result_weib_balanced_mediantt_scenario2.csv" # please change the name of output file to what you assign in .py file line 27.
#library(readr)
#your_directory <- setwd("The directory contain your D-T output result file")
#Tlearner <- read_csv(paste0(your_directory ,your_D_T_result_file_name), col_names = FALSE)
#Tlearner1<-t(Tlearner)
#colnames(Tlearner1) <- c("deepsurv_T_diff","true_diff")
#diff_all1 <- cbind(diff_all,Tlearner1[,1])
#colnames(diff_all1)[length(colnames(diff_all1))] <- "deepsurv_T_diff"
# load D-X results
#your_D_X_result_file_name <- "Xlearner_result_weib_balanced_mediantt_scenario2.csv" # please change the name of output file to what you assign in .py file line 27.
#library(readr)
#your_directory <- setwd("The directory contain your D-X output result file")
#Xlearner <- read_csv(paste0(your_directory ,your_D_X_result_file_name), col_names = FALSE)
#Xlearner1<-t(Xlearner)
#colnames(Xlearner1) <- c("deepsurv_X_diff","true_diff")
#random2.diff.all <- cbind(diff_all1,Xlearner1[,1])
#colnames(random2.diff.all)[length(colnames(random2.diff.all))] <- "deepsurv_X_diff"
###----------------------------------- scenario 3 -------------------------------------###
# load R-T, R-X, B-T, B-X, CSF results
#load(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario3.RData"))
# load D-T results
#your_D_T_result_file_name <- "Tlearner_result_weib_balanced_mediantt_scenario3.csv" # please change the name of output file to what you assign in .py file line 27.
#library(readr)
#your_directory <- setwd("The directory contain your D-T output result file")
#Tlearner <- read_csv(paste0(your_directory ,your_D_T_result_file_name), col_names = FALSE)
#Tlearner1<-t(Tlearner)
#colnames(Tlearner1) <- c("deepsurv_T_diff","true_diff")
#diff_all1 <- cbind(diff_all,Tlearner1[,1])
#colnames(diff_all1)[length(colnames(diff_all1))] <- "deepsurv_T_diff"
# load D-X results
#your_D_X_result_file_name <- "Xlearner_result_weib_balanced_mediantt_scenario3.csv" # please change the name of output file to what you assign in .py file line 27.
#library(readr)
#your_directory <- setwd("The directory contain your D-X output result file")
#Xlearner <- read_csv(paste0(your_directory ,your_D_X_result_file_name), col_names = FALSE)
#Xlearner1<-t(Xlearner)
#colnames(Xlearner1) <- c("deepsurv_X_diff","true_diff")
#random3.diff.all <- cbind(diff_all1,Xlearner1[,1])
#colnames(random3.diff.all)[length(colnames(random3.diff.all))] <- "deepsurv_X_diff"
#save(random1.diff.all,random2.diff.all,random3.diff.all,file = paste0(your_directory,"/weib_balanced_mediantt.RData"))
Balanced design:
###-------------------------------------------- scenario 1 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_StandardLogistic.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("StandardLogistic_TrueModel.R")
# generate test data
#tt<-1.05 # assign a time point of interest
#mydata <- data_gen_censor_slogistic(n=10000, p=10,sigma0=1,sigma1=2,r=0.15,mu=0, censor = "30%", setting = 1, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_logis_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-1.05 # assign a time point of interest
# simdata <- data_gen_censor_slogistic(n=1000, p=10,sigma0=1,sigma1=2,r=0.15,mu=0, censor = "30%", setting = 1, ex = "random",time.interest=1.05)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# logis.true<-slogistic_true(data=dat,testdat=mydata$data,time.interest=tt,setting=1)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,logis_diff=logis.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_logis_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_logis_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario1.RData"))
###-------------------------------------------- scenario 2 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_StandardLogistic.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("StandardLogistic_TrueModel.R")
# generate test data
#tt<-1.1 # assign a time point of interest
#mydata <- data_gen_censor_slogistic(n=10000, p=10,sigma0=0.5,sigma1=1,r=0.22,mu=0, censor = "30%", setting = 2, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_logis_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-1.1 # assign a time point of interest
# simdata <- data_gen_censor_slogistic(n=1000, p=10,sigma0=0.5,sigma1=1,r=0.22,mu=0, censor = "30%", setting = 2, ex = "random",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# logis.true<-slogistic_true(data=dat,testdat=mydata$data,time.interest=tt,setting=2)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,logis_diff=logis.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_logis_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_logis_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario2.RData"))
###-------------------------------------------- scenario 3 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_StandardLogistic.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("StandardLogistic_TrueModel.R")
# generate test data
#tt<-1.29 # assign a time point of interest
#mydata <- data_gen_censor_slogistic(n=10000, p=10,sigma0=0.5,sigma1=1,r=0.2,mu=0, censor = "30%", setting = 3, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_logis_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
# B<-5
# for (i in 1:B){
# tryCatch({
# set.seed(100871+i+s*15213)
# tt<-1.29 # assign a time point of interest
# simdata <- data_gen_censor_slogistic(n=1000, p=10,sigma0=0.5,sigma1=1,r=0.2,mu=0, censor = "30%", setting = 3, ex = "random",time.interest=tt)
# dat <- simdata$data
# rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
# baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
# logis.true<-slogistic_true(data=dat,testdat=mydata$data,time.interest=tt,setting=3)
# save prediction results
# diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,logis_diff=logis.true$diff,true_diff=mydata$true.diff)
# save(diff,ratio,file=paste0(your_directory,"/result_logis_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
# save simulation data in order to run D-T and D-X in Python
# save(dat,file=paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
# }
# ,error=function(e){
# print(paste("Error at i=",(s-1)*5+i))
# })
# }
#}
#rm(list=ls())
#diff_all<-NULL
#for (s in 1:20){
# for (i in 1:5){
# tryCatch({
# load(paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
# diff$sim=i+(s-1)*5
# diff_all<-rbind(diff_all,diff)
# },error=function(e){
# print(paste("Error at s,i",s,i))
# })
# }
#}
#save(diff_all,file=paste0(your_directory,"/result_logis_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario3.RData"))
This section provides codes to reproduce tables and plots in main text of simulations by loading the intermediate results.
This figure shows binned bias and RMSE when data is generated from Weibull models under balanced design and scenario 1. Because the size of the intermediate results are too large, we have saved summarized biases and RMSEs from 100 simulations in “Simulations/intermediate_result_100simulations.RData”.
The following codes are examples for loading the predicted CATE on test datasets in 100 runs of the simulations. The codes are commented out because the result files are too large to be loaded for the manuscript submission. Please jump to the next chunk for loading summarized biases and RMSEs.
Load predicted CATE from R-T, R-X, B-T, B-X, D-T, D-X and CSF when data is generated from Weibull models under balanced design and scenario1:
#load(paste0(your_directory,"/weib_balanced_mediantt.RData"))
#random1.diff.all$ID<-random2.diff.all$ID<-random3.diff.all$ID<-rep(1:10000,100)
#random<-list(random1.diff.all,random2.diff.all,random3.diff.all)
Calculate binned bias and RMSE:
#RMSE_rsf_T<-RMSE_rsf_X<-RMSE_baft_T<-RMSE_baft_X<-RMSE_weib<-RMSE_dnnsurv_T<-RMSE_dnnsurv_X<-RMSE_CSF<-list(rep(NA,100),rep(NA,100),rep(NA,100))
#bias_rsf_T<-bias_rsf_X<-bias_baft_T<-bias_baft_X<-bias_weib<-bias_dnnsurv_T<-bias_dnnsurv_X<-bias_CSF<-list(rep(NA,100),rep(NA,100),rep(NA,100))
#for (s in 1:3){ # random, setting 1 to 3
# for (i in 1:100){ # 100 runs of results
# dat<-random[[s]][which(random[[s]]$sim==i),]
# dat<-dat[order(dat$true_diff),]
# quant<-quantile(dat$true_diff,probs=seq(0,1,length.out=51))
# bias_rsf_T[[s]][i]<-0
# bias_rsf_X[[s]][i]<-0
# bias_baft_T[[s]][i]<-0
# bias_baft_X[[s]][i]<-0
# bias_weib[[s]][i]<-0
# bias_dnnsurv_T[[s]][i]<-0
# bias_dnnsurv_X[[s]][i]<-0
# bias_CSF[[s]][i]<-0
# RMSE_rsf_T[[s]][i]<-0
# RMSE_rsf_X[[s]][i]<-0
# RMSE_baft_T[[s]][i]<-0
# RMSE_baft_X[[s]][i]<-0
# RMSE_weib[[s]][i]<-0
# RMSE_dnnsurv_T[[s]][i]<-0
# RMSE_dnnsurv_X[[s]][i]<-0
# RMSE_CSF[[s]][i]<-0
# for (j in 1:50){
# datq<-dat[which(dat$true_diff>=quant[j]&dat$true_diff<=quant[j+1]),]
# bias_rsf_T[[s]][i]<-bias_rsf_T[[s]][i]+mean(datq$rsf_T_diff-datq$true_diff)
# bias_rsf_X[[s]][i]<-bias_rsf_X[[s]][i]+mean(datq$rsf_X_diff-datq$true_diff)
# bias_baft_T[[s]][i]<-bias_baft_T[[s]][i]+mean(datq$baft_T_diff-datq$true_diff)
# bias_baft_X[[s]][i]<-bias_baft_X[[s]][i]+mean(datq$baft_X_diff-datq$true_diff)
# bias_weib[[s]][i]<-bias_weib[[s]][i]+mean(datq$weib_diff-datq$true_diff)
# bias_dnnsurv_T[[s]][i]<-bias_dnnsurv_T[[s]][i]+mean(datq$deepsurv_T_diff-datq$true_diff)
# bias_dnnsurv_X[[s]][i]<-bias_dnnsurv_X[[s]][i]+mean(datq$deepsurv_X_diff-datq$true_diff)
# bias_CSF[[s]][i]<-bias_CSF[[s]][i]+mean(datq$CSF_diff-datq$true_diff)
# RMSE_rsf_T[[s]][i]<-RMSE_rsf_T[[s]][i]+sqrt(mean((datq$rsf_T_diff-datq$true_diff)^2))
# RMSE_rsf_X[[s]][i]<-RMSE_rsf_X[[s]][i]+sqrt(mean((datq$rsf_X_diff-datq$true_diff)^2))
# RMSE_baft_T[[s]][i]<-RMSE_baft_T[[s]][i]+sqrt(mean((datq$baft_T_diff-datq$true_diff)^2))
# RMSE_baft_X[[s]][i]<-RMSE_baft_X[[s]][i]+sqrt(mean((datq$baft_X_diff-datq$true_diff)^2))
# RMSE_weib[[s]][i]<-RMSE_weib[[s]][i]+sqrt(mean((datq$weib_diff-datq$true_diff)^2))
# RMSE_dnnsurv_T[[s]][i]<-RMSE_dnnsurv_T[[s]][i]+sqrt(mean((datq$deepsurv_T_diff-datq$true_diff)^2))
# RMSE_dnnsurv_X[[s]][i]<-RMSE_dnnsurv_X[[s]][i]+sqrt(mean((datq$deepsurv_X_diff-datq$true_diff)^2))
# RMSE_CSF[[s]][i]<-RMSE_CSF[[s]][i]+sqrt(mean((datq$CSF_diff-datq$true_diff)^2))
# }
# bias_rsf_T[[s]][i]=bias_rsf_T[[s]][i]/50
# bias_rsf_X[[s]][i]=bias_rsf_X[[s]][i]/50
# bias_baft_T[[s]][i]=bias_baft_T[[s]][i]/50
# bias_baft_X[[s]][i]=bias_baft_X[[s]][i]/50
# bias_weib[[s]][i]=bias_weib[[s]][i]/50
# bias_dnnsurv_T[[s]][i]=bias_dnnsurv_T[[s]][i]/50
# bias_dnnsurv_X[[s]][i]=bias_dnnsurv_X[[s]][i]/50
# bias_CSF[[s]][i]=bias_CSF[[s]][i]/50
# RMSE_rsf_T[[s]][i]=RMSE_rsf_T[[s]][i]/50
# RMSE_rsf_X[[s]][i]=RMSE_rsf_X[[s]][i]/50
# RMSE_baft_T[[s]][i]=RMSE_baft_T[[s]][i]/50
# RMSE_baft_X[[s]][i]=RMSE_baft_X[[s]][i]/50
# RMSE_weib[[s]][i]=RMSE_weib[[s]][i]/50
# RMSE_dnnsurv_T[[s]][i]=RMSE_dnnsurv_T[[s]][i]/50
# RMSE_dnnsurv_X[[s]][i]=RMSE_dnnsurv_X[[s]][i]/50
# RMSE_CSF[[s]][i]=RMSE_CSF[[s]][i]/50
# }
#}
#save(bias_rsf_T,bias_rsf_X,bias_baft_T,bias_baft_X,bias_weib,bias_dnnsurv_T,bias_dnnsurv_X,bias_CSF,RMSE_rsf_T,RMSE_rsf_X,RMSE_baft_T,RMSE_baft_X,RMSE_weib,RMSE_dnnsurv_T,RMSE_dnnsurv_X,RMSE_CSF,file =paste0(your_directory,"/simulation_weib_bias_RMSE_summary.RData"))
Plot Figure 1:
load(paste0(your_directory,"/intermediate_result_100simulations.RData"))
par(mfrow=c(2,3))
par(mar = c(2.4, 4.8, 2, 1.2))
par(cex.lab=2)
par(cex.axis=2)
for(m in 1:3){
if (m==1){
boxplot(simulation_weib_bias_RMSE_summary$bias_rsf_T[[m]],simulation_weib_bias_RMSE_summary$bias_rsf_X[[m]],simulation_weib_bias_RMSE_summary$bias_baft_T[[m]],simulation_weib_bias_RMSE_summary$bias_baft_X[[m]],simulation_weib_bias_RMSE_summary$bias_dnnsurv_T[[m]],simulation_weib_bias_RMSE_summary$bias_dnnsurv_X[[m]],simulation_weib_bias_RMSE_summary$bias_CSF[[m]],simulation_weib_bias_RMSE_summary$bias_weib[[m]],xaxt="n",ylim=c(-0.12,0.12),cex.axis=2,ylab="Bias") #bias_rsf_X_xtype1[[m]],bias_rsf_X_xtype2[[m]],bias_rsf_X_xtype3[[m]],bias_rsf_X_xtype4[[m]], bias_baft_X_xtype1[[m]],bias_baft_X_xtype2[[m]],bias_baft_X_xtype3[[m]],bias_baft_X_xtype4[[m]]
abline(h=0,col="red",lty=2,lwd=2)
points(x=1,y=mean(simulation_weib_bias_RMSE_summary$bias_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=2,y=mean(simulation_weib_bias_RMSE_summary$bias_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=3,y=mean(simulation_weib_bias_RMSE_summary$bias_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=4,y=mean(simulation_weib_bias_RMSE_summary$bias_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=5,y=mean(simulation_weib_bias_RMSE_summary$bias_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=6,y=mean(simulation_weib_bias_RMSE_summary$bias_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=7,y=mean(simulation_weib_bias_RMSE_summary$bias_CSF[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=8,y=mean(simulation_weib_bias_RMSE_summary$bias_weib[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
title(paste0("Scenario ",m),cex.main=2)
}else{
boxplot(simulation_weib_bias_RMSE_summary$bias_rsf_T[[m]],simulation_weib_bias_RMSE_summary$bias_rsf_X[[m]],simulation_weib_bias_RMSE_summary$bias_baft_T[[m]],simulation_weib_bias_RMSE_summary$bias_baft_X[[m]],simulation_weib_bias_RMSE_summary$bias_dnnsurv_T[[m]],simulation_weib_bias_RMSE_summary$bias_dnnsurv_X[[m]],simulation_weib_bias_RMSE_summary$bias_CSF[[m]],simulation_weib_bias_RMSE_summary$bias_weib[[m]],xaxt="n",ylim=c(-0.12,0.12),cex.axis=2) #bias_rsf_X_xtype1[[m]],bias_rsf_X_xtype2[[m]],bias_rsf_X_xtype3[[m]],bias_rsf_X_xtype4[[m]], bias_baft_X_xtype1[[m]],bias_baft_X_xtype2[[m]],bias_baft_X_xtype3[[m]],bias_baft_X_xtype4[[m]]
abline(h=0,col="red",lty=2,lwd=2)
points(x=1,y=mean(simulation_weib_bias_RMSE_summary$bias_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=2,y=mean(simulation_weib_bias_RMSE_summary$bias_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=3,y=mean(simulation_weib_bias_RMSE_summary$bias_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=4,y=mean(simulation_weib_bias_RMSE_summary$bias_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=5,y=mean(simulation_weib_bias_RMSE_summary$bias_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=6,y=mean(simulation_weib_bias_RMSE_summary$bias_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=7,y=mean(simulation_weib_bias_RMSE_summary$bias_CSF[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=8,y=mean(simulation_weib_bias_RMSE_summary$bias_weib[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
title(paste0("Scenario ",m),cex.main=2)
}
}
# RMSE
for(m in 1:3){
if (m==1){
boxplot(simulation_weib_bias_RMSE_summary$RMSE_rsf_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_rsf_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_baft_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_baft_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_CSF[[m]],simulation_weib_bias_RMSE_summary$RMSE_weib[[m]],ylim=c(0,0.25),cex.axis=2,ylab="Binned RMSE",names = c("R-T","R-X","B-T","B-X","D-T","D-X","CSF","Weib"))
points(x=1,y=mean(simulation_weib_bias_RMSE_summary$RMSE_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=2,y=mean(simulation_weib_bias_RMSE_summary$RMSE_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=3,y=mean(simulation_weib_bias_RMSE_summary$RMSE_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=4,y=mean(simulation_weib_bias_RMSE_summary$RMSE_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=5,y=mean(simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=6,y=mean(simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=7,y=mean(simulation_weib_bias_RMSE_summary$RMSE_CSF[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=8,y=mean(simulation_weib_bias_RMSE_summary$RMSE_weib[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
}else{
boxplot(simulation_weib_bias_RMSE_summary$RMSE_rsf_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_rsf_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_baft_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_baft_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_CSF[[m]],simulation_weib_bias_RMSE_summary$RMSE_weib[[m]],ylim=c(0,0.25),cex.axis=2,names = c("R-T","R-X","B-T","B-X","D-T","D-X","CSF","Weib"))
points(x=1,y=mean(simulation_weib_bias_RMSE_summary$RMSE_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=2,y=mean(simulation_weib_bias_RMSE_summary$RMSE_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=3,y=mean(simulation_weib_bias_RMSE_summary$RMSE_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=4,y=mean(simulation_weib_bias_RMSE_summary$RMSE_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=5,y=mean(simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=6,y=mean(simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=7,y=mean(simulation_weib_bias_RMSE_summary$RMSE_CSF[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=8,y=mean(simulation_weib_bias_RMSE_summary$RMSE_weib[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
}
}
This section reproduces prediction accuracy of each method (Table 1).
The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.
Scenario 1 (upper panel of Table 1): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)
#random1.diff.all1 <- random1.diff.all[,c(2,4,7,9,11,12,16,13,14)]
#random2.diff.all1 <- random2.diff.all[,c(2,4,7,9,11,12,16,13,14)]
#random3.diff.all1 <- random3.diff.all[,c(2,4,7,9,11,12,16,13,14)]
#random1<-list(random1.diff.all1,random2.diff.all1,random3.diff.all1)
### random setting 1
#library(tableone)
#bimatrix.random1<-data.frame(matrix(rep(NA,10000*100*9),ncol=9))
#for (i in 1:9){
#bimatrix.random1[,i]<-ifelse(random1[[1]][,i]>=0,1,0)
#}
#colnames(bimatrix.random1)<-colnames(random1[[1]])[1:9]
#bimatrix.random1$sim<-random[[1]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*8),ncol=8)
#for (i in 1:100){
# dat<-bimatrix.random1[which(bimatrix.random1$sim==i),]
# for (j in 1:8){
# tryCatch({
# metrics<-table(dat[,j],dat$true_diff)
# tn<-metrics[1,1]
# tp<-metrics[2,2]
# fn<-metrics[1,2]
# fp<-metrics[2,1]
# acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
# ppv[i,j]<-tp/(tp+fp)
# npv[i,j]<-tn/(tn+fn)
# sens[i,j]<-tp/(tp+fn)
# spec[i,j]<-tn/(tn+fp)
# fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
# ,error=function(e){
# print(paste("Error at i=",i,j))
# })
# }} # in simu 8, 11, 41, 63, 83, predictions from CSF all >=0.
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X","BAFT_T","BAFT_X","DNNSurv_T","DNNSurv_X","CSF","Weib")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_weib_accuracy_summary_S1.RData"))
library(tidyr)
library(tableone)
acc_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$acc), method, value, RSF_T:Weib, factor_key=TRUE)
ppv_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$ppv), method, value, RSF_T:Weib, factor_key=TRUE)
npv_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$npv), method, value, RSF_T:Weib, factor_key=TRUE)
sens_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$sens), method, value, RSF_T:Weib, factor_key=TRUE)
spec_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$sepc), method, value, RSF_T:Weib, factor_key=TRUE)
fmeasure_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$fmeasure), method, value, RSF_T:Weib, factor_key=TRUE)
statmat.random1<-data.frame(cbind(acc_long.random1,ppv_long.random1[,2],npv_long.random1[,2],sens_long.random1[,2],spec_long.random1[,2],fmeasure_long.random1[,2]))
names(statmat.random1)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random1$acc<-statmat.random1$acc*100
statmat.random1$ppv<-statmat.random1$ppv*100
statmat.random1$npv<-statmat.random1$npv*100
statmat.random1$sens<-statmat.random1$sens*100
statmat.random1$spec<-statmat.random1$spec*100
statmat.random1$fmeasure<-statmat.random1$fmeasure*100
table_one.random1<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random1)
table_one_matrix.random1<-print(table_one.random1)
## Stratified by method
## RSF_T RSF_X BAFT_T BAFT_X
## n 100 100 100 100
## acc (mean (SD)) 90.11 (1.81) 90.58 (1.85) 85.99 (2.03) 88.55 (1.91)
## ppv (mean (SD)) 91.78 (3.21) 92.33 (3.30) 90.67 (2.32) 92.18 (2.50)
## npv (mean (SD)) 87.17 (5.99) 87.56 (6.19) 76.13 (4.54) 80.93 (4.81)
## sens (mean (SD)) 94.56 (3.24) 94.62 (3.38) 89.26 (2.96) 91.51 (2.94)
## spec (mean (SD)) 79.74 (9.21) 81.16 (9.29) 78.36 (6.22) 81.64 (6.66)
## fmeasure (mean (SD)) 93.05 (1.22) 93.36 (1.27) 89.90 (1.52) 91.79 (1.39)
## Stratified by method
## DNNSurv_T DNNSurv_X CSF Weib
## n 100 100 100 100
## acc (mean (SD)) 92.39 (1.71) 93.44 (1.67) 82.02 (7.15) 96.37 (1.62)
## ppv (mean (SD)) 94.96 (3.02) 95.52 (2.94) 81.18 (7.56) 97.36 (2.33)
## npv (mean (SD)) 87.62 (5.77) 89.63 (5.43) 95.55 (6.05) 94.68 (4.90)
## sens (mean (SD)) 94.30 (3.25) 95.25 (3.04) 98.38 (2.67) 97.55 (2.44)
## spec (mean (SD)) 87.92 (8.00) 89.20 (7.63) 43.86 (27.72) 93.62 (5.89)
## fmeasure (mean (SD)) 94.54 (1.22) 95.31 (1.20) 88.67 (3.84) 97.41 (1.15)
## Stratified by method
## p test
## n
## acc (mean (SD)) <0.001
## ppv (mean (SD)) <0.001
## npv (mean (SD)) <0.001
## sens (mean (SD)) <0.001
## spec (mean (SD)) <0.001
## fmeasure (mean (SD)) <0.001
The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.
Scenario 2 (middle panel of Table 1): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)
#library(tableone)
#bimatrix.random2<-data.frame(matrix(rep(NA,10000*100*9),ncol=9))
#for (i in 1:9){
# bimatrix.random2[,i]<-ifelse(random1[[2]][,i]>=0,1,0)
#}
#colnames(bimatrix.random2)<-colnames(random1[[2]])[1:9]
#bimatrix.random2$sim<-random[[2]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*8),ncol=8)
#for (i in 1:100){
# dat<-bimatrix.random2[which(bimatrix.random2$sim==i),]
# for (j in 1:8){
# tryCatch({
# metrics<-table(dat[,j],dat$true_diff)
# tn<-metrics[1,1]
# tp<-metrics[2,2]
# fn<-metrics[1,2]
# fp<-metrics[2,1]
# acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
# ppv[i,j]<-tp/(tp+fp)
# npv[i,j]<-tn/(tn+fn)
# sens[i,j]<-tp/(tp+fn)
# spec[i,j]<-tn/(tn+fp)
# fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
# ,error=function(e){
# print(paste("Error at i=",i,j))
# })
# }} # simulation 74 and 75: a lot of missing in weibull_diff, all other with numbers >0; in 33 simu, predictions from CSF all >=0.
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X","BAFT_T","BAFT_X","DNNSurv_T","DNNSurv_X","CSF","Weib")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_weib_accuracy_summary_S2.RData"))
library(tidyr)
library(tableone)
acc_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$acc), method, value, RSF_T:Weib, factor_key=TRUE)
ppv_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$ppv), method, value, RSF_T:Weib, factor_key=TRUE)
npv_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$npv), method, value, RSF_T:Weib, factor_key=TRUE)
sens_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$sens), method, value, RSF_T:Weib, factor_key=TRUE)
spec_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$sepc), method, value, RSF_T:Weib, factor_key=TRUE)
fmeasure_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$fmeasure), method, value, RSF_T:Weib, factor_key=TRUE)
statmat.random2<-data.frame(cbind(acc_long.random2,ppv_long.random2[,2],npv_long.random2[,2],sens_long.random2[,2],spec_long.random2[,2],fmeasure_long.random2[,2]))
names(statmat.random2)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random2$acc<-statmat.random2$acc*100
statmat.random2$ppv<-statmat.random2$ppv*100
statmat.random2$npv<-statmat.random2$npv*100
statmat.random2$sens<-statmat.random2$sens*100
statmat.random2$spec<-statmat.random2$spec*100
statmat.random2$fmeasure<-statmat.random2$fmeasure*100
table_one.random2<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random2)
table_one_matrix.random2<-print(table_one.random2)
## Stratified by method
## RSF_T RSF_X BAFT_T BAFT_X
## n 100 100 100 100
## acc (mean (SD)) 79.68 (4.01) 80.50 (4.15) 72.15 (3.78) 75.29 (3.91)
## ppv (mean (SD)) 86.55 (4.87) 86.97 (5.11) 84.69 (3.50) 86.18 (4.01)
## npv (mean (SD)) 66.96 (9.64) 68.80 (10.19) 51.73 (5.27) 56.37 (6.08)
## sens (mean (SD)) 85.31 (8.21) 86.15 (8.26) 74.42 (5.21) 78.00 (5.61)
## spec (mean (SD)) 65.89 (16.12) 66.65 (16.80) 66.60 (9.65) 68.66 (11.45)
## fmeasure (mean (SD)) 85.50 (3.47) 86.12 (3.51) 79.08 (3.20) 81.70 (3.19)
## Stratified by method
## DNNSurv_T DNNSurv_X CSF Weib
## n 100 100 100 100
## acc (mean (SD)) 82.10 (4.50) 82.76 (4.68) 76.31 (5.49) 91.04 (3.53)
## ppv (mean (SD)) 89.60 (5.28) 89.63 (5.78) 78.84 (8.21) 94.07 (4.47)
## npv (mean (SD)) 70.15 (11.31) 72.14 (11.67) 82.95 (14.92) 86.84 (10.55)
## sens (mean (SD)) 85.38 (8.42) 86.57 (8.68) 94.02 (9.68) 93.69 (6.01)
## spec (mean (SD)) 74.07 (16.28) 73.41 (18.48) 32.90 (31.65) 84.53 (12.63)
## fmeasure (mean (SD)) 87.00 (3.74) 87.57 (3.78) 84.89 (4.15) 93.64 (2.65)
## Stratified by method
## p test
## n
## acc (mean (SD)) <0.001
## ppv (mean (SD)) <0.001
## npv (mean (SD)) <0.001
## sens (mean (SD)) <0.001
## spec (mean (SD)) <0.001
## fmeasure (mean (SD)) <0.001
The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.
Scenario 3 (lower panel of Table 1): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)
#library(tableone)
#bimatrix.random3<-data.frame(matrix(rep(NA,10000*100*9),ncol=9))
#for (i in 1:9){
# bimatrix.random3[,i]<-ifelse(random1[[3]][,i]>=0,1,0)
#}
#colnames(bimatrix.random3)<-colnames(random1[[3]])[1:9]
#bimatrix.random3$sim<-random[[3]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*8),ncol=8)
#for (i in 1:100){
# dat<-bimatrix.random3[which(bimatrix.random3$sim==i),]
# for (j in 1:8){
# tryCatch({
# metrics<-table(dat[,j],dat$true_diff)
# tn<-metrics[1,1]
# tp<-metrics[2,2]
# fn<-metrics[1,2]
# fp<-metrics[2,1]
# acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
# ppv[i,j]<-tp/(tp+fp)
# npv[i,j]<-tn/(tn+fn)
# sens[i,j]<-tp/(tp+fn)
# spec[i,j]<-tn/(tn+fp)
# fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
# ,error=function(e){
# print(paste("Error at i=",i,j))
# })
# }} # in many simulations, CATE predictions from CSF all >=0.
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X","BAFT_T","BAFT_X","DNNSurv_T","DNNSurv_X","CSF","Weib")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_weib_accuracy_summary_S3.RData"))
library(tidyr)
library(tableone)
acc_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$acc), method, value, RSF_T:Weib, factor_key=TRUE)
ppv_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$ppv), method, value, RSF_T:Weib, factor_key=TRUE)
npv_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$npv), method, value, RSF_T:Weib, factor_key=TRUE)
sens_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$sens), method, value, RSF_T:Weib, factor_key=TRUE)
spec_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$sepc), method, value, RSF_T:Weib, factor_key=TRUE)
fmeasure_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$fmeasure), method, value, RSF_T:Weib, factor_key=TRUE)
statmat.random3<-data.frame(cbind(acc_long.random3,ppv_long.random3[,2],npv_long.random3[,2],sens_long.random3[,2],spec_long.random3[,2],fmeasure_long.random3[,2]))
names(statmat.random3)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random3$acc<-statmat.random3$acc*100
statmat.random3$ppv<-statmat.random3$ppv*100
statmat.random3$npv<-statmat.random3$npv*100
statmat.random3$sens<-statmat.random3$sens*100
statmat.random3$spec<-statmat.random3$spec*100
statmat.random3$fmeasure<-statmat.random3$fmeasure*100
table_one.random3<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random3)
table_one_matrix.random3<-print(table_one.random3)
## Stratified by method
## RSF_T RSF_X BAFT_T BAFT_X
## n 100 100 100 100
## acc (mean (SD)) 83.47 (2.16) 83.74 (2.20) 79.21 (2.83) 81.30 (2.57)
## ppv (mean (SD)) 86.61 (3.66) 86.42 (3.86) 87.52 (2.46) 88.06 (2.57)
## npv (mean (SD)) 76.62 (7.85) 78.17 (7.93) 62.53 (5.16) 66.87 (5.52)
## sens (mean (SD)) 91.33 (4.92) 92.08 (4.77) 82.76 (4.15) 85.53 (4.31)
## spec (mean (SD)) 63.91 (12.66) 62.95 (13.52) 70.37 (7.06) 70.77 (7.78)
## fmeasure (mean (SD)) 88.72 (1.59) 88.97 (1.50) 84.99 (2.28) 86.67 (2.06)
## Stratified by method
## DNNSurv_T DNNSurv_X CSF Weib
## n 100 100 100 100
## acc (mean (SD)) 86.73 (2.58) 86.77 (2.19) 74.33 (3.36) 92.12 (1.89)
## ppv (mean (SD)) 91.22 (3.19) 90.34 (3.70) 74.18 (3.96) 94.86 (2.53)
## npv (mean (SD)) 77.89 (8.26) 79.66 (7.75) 94.78 (8.29) 86.67 (6.79)
## sens (mean (SD)) 90.35 (5.21) 91.58 (4.73) 98.97 (3.09) 94.20 (3.63)
## spec (mean (SD)) 77.73 (9.68) 74.77 (11.60) 13.00 (17.12) 86.96 (7.14)
## fmeasure (mean (SD)) 90.62 (2.05) 90.78 (1.60) 84.66 (1.57) 94.45 (1.41)
## Stratified by method
## p test
## n
## acc (mean (SD)) <0.001
## ppv (mean (SD)) <0.001
## npv (mean (SD)) <0.001
## sens (mean (SD)) <0.001
## spec (mean (SD)) <0.001
## fmeasure (mean (SD)) <0.001
This figure shows binned bias and RMSE when data is generated from standard logistic models under balanced design and scenario 1. Because the size of the intermediate results are too large, we have saved summarized biases and RMSEs from 100 simulations in “Simulations/intermediate_result_100simulations.RData”.
The following codes are examples for loading the predicted CATE on test datasets in 100 runs of the simulations. The codes are commented out because the result files are too large to be loaded for the manuscript submission. Please jump to the next chunk for loading summarized biases and RMSEs. We have saved intermediate results from 100 simulations in “Simulations/intermediate_result_100simulations.RData”.
Load predicted CATE from R-T, R-X, B-T, B-X, D-T, D-X and CSF when data is generated from standard logistic models under balanced design and scenario1:
#load(paste0(your_directory,"/logis_balanced_mediantt.RData"))
#random1.diff$ID<-rep(1:10000,100)
#random2.diff$ID<-rep(1:10000,100)
#random3.diff$ID<-rep(1:10000,100)
#random<-list(random1.diff,random2.diff,random3.diff)
Calculate binned bias and RMSE:
#RMSE_rsf_T<-RMSE_rsf_X<-RMSE_baft_T<-RMSE_baft_X<-RMSE_slogistic<-RMSE_weib<-RMSE_dnnsurv_T<-RMSE_dnnsurv_X<-list(rep(NA,100),rep(NA,100),rep(NA,100))
#bias_rsf_T<-bias_rsf_X<-bias_baft_T<-bias_baft_X<-bias_slogistic<-bias_weib<-bias_dnnsurv_T<-bias_dnnsurv_X<-list(rep(NA,100),rep(NA,100),rep(NA,100))
# random, setting 1 to setting 3
#for (s in 1:3){
# for (i in 1:100){ # 100 runs of results
# dat<-random[[s]][which(random[[s]]$sim==i),]
# dat<-dat[order(dat$true_diff),]
# quant<-quantile(dat$true_diff,probs=seq(0,1,length.out=51))
# bias_rsf_T[[s]][i]<-0
# bias_rsf_X[[s]][i]<-0
# bias_baft_T[[s]][i]<-0
# bias_baft_X[[s]][i]<-0
# bias_slogistic[[s]][i]<-0
# bias_weib[[s]][i]<-0
# bias_dnnsurv_T[[s]][i]<-0
# bias_dnnsurv_X[[s]][i]<-0
# RMSE_rsf_T[[s]][i]<-0
# RMSE_rsf_X[[s]][i]<-0
# RMSE_baft_T[[s]][i]<-0
# RMSE_baft_X[[s]][i]<-0
# RMSE_slogistic[[s]][i]<-0
# RMSE_weib[[s]][i]<-0
# RMSE_dnnsurv_T[[s]][i]<-0
# RMSE_dnnsurv_X[[s]][i]<-0
# for (j in 1:50){
# datq<-dat[which(dat$true_diff>=quant[j]&dat$true_diff<=quant[j+1]),]
# bias_rsf_T[[s]][i]<-bias_rsf_T[[s]][i]+mean(datq$rsf_T-datq$true_diff)
# bias_rsf_X[[s]][i]<-bias_rsf_X[[s]][i]+mean(datq$rsf_X_diff0-datq$true_diff)
# bias_baft_T[[s]][i]<-bias_baft_T[[s]][i]+mean(datq$baft_T-datq$true_diff)
# bias_baft_X[[s]][i]<-bias_baft_X[[s]][i]+mean(datq$baft_X_diff0-datq$true_diff)
# bias_slogistic[[s]][i]<-bias_slogistic[[s]][i]+mean(datq$slogis-datq$true_diff)
# bias_weib[[s]][i]<-bias_weib[[s]][i]+mean(datq$weib-datq$true_diff)
# bias_dnnsurv_T[[s]][i]<-bias_dnnsurv_T[[s]][i]+mean(datq$deepsurv_T-datq$true_diff)
# bias_dnnsurv_X[[s]][i]<-bias_dnnsurv_X[[s]][i]+mean(datq$deepsurv_X-datq$true_diff)
# RMSE_rsf_T[[s]][i]<-RMSE_rsf_T[[s]][i]+sqrt(mean((datq$rsf_T-datq$true_diff)^2))
# RMSE_rsf_X[[s]][i]<-RMSE_rsf_X[[s]][i]+sqrt(mean((datq$rsf_X_diff0-datq$true_diff)^2))
# RMSE_baft_T[[s]][i]<-RMSE_baft_T[[s]][i]+sqrt(mean((datq$baft_T-datq$true_diff)^2))
# RMSE_baft_X[[s]][i]<-RMSE_baft_X[[s]][i]+sqrt(mean((datq$baft_X_diff0-datq$true_diff)^2))
# RMSE_slogistic[[s]][i]<-RMSE_slogistic[[s]][i]+sqrt(mean((datq$slogis-datq$true_diff)^2))
# RMSE_weib[[s]][i]<-RMSE_weib[[s]][i]+sqrt(mean((datq$weib-datq$true_diff)^2))
# RMSE_dnnsurv_T[[s]][i]<-RMSE_dnnsurv_T[[s]][i]+sqrt(mean((datq$deepsurv_T-datq$true_diff)^2))
# RMSE_dnnsurv_X[[s]][i]<-RMSE_dnnsurv_X[[s]][i]+sqrt(mean((datq$deepsurv_X-datq$true_diff)^2))
# }
# bias_rsf_T[[s]][i]=bias_rsf_T[[s]][i]/50
# bias_rsf_X[[s]][i]=bias_rsf_X[[s]][i]/50
# bias_baft_T[[s]][i]=bias_baft_T[[s]][i]/50
# bias_baft_X[[s]][i]=bias_baft_X[[s]][i]/50
# bias_slogistic[[s]][i]=bias_slogistic[[s]][i]/50
# bias_weib[[s]][i]=bias_weib[[s]][i]/50
# bias_dnnsurv_T[[s]][i]=bias_dnnsurv_T[[s]][i]/50
# bias_dnnsurv_X[[s]][i]=bias_dnnsurv_X[[s]][i]/50
# RMSE_rsf_T[[s]][i]=RMSE_rsf_T[[s]][i]/50
# RMSE_rsf_X[[s]][i]=RMSE_rsf_X[[s]][i]/50
# RMSE_baft_T[[s]][i]=RMSE_baft_T[[s]][i]/50
# RMSE_baft_X[[s]][i]=RMSE_baft_X[[s]][i]/50
# RMSE_slogistic[[s]][i]=RMSE_slogistic[[s]][i]/50
# RMSE_weib[[s]][i]=RMSE_weib[[s]][i]/50
# RMSE_dnnsurv_T[[s]][i]=RMSE_dnnsurv_T[[s]][i]/50
# RMSE_dnnsurv_X[[s]][i]=RMSE_dnnsurv_X[[s]][i]/50
# }
#}
#save(bias_rsf_T,bias_rsf_X,bias_baft_T,bias_baft_X,bias_slogistic,bias_dnnsurv_T,bias_dnnsurv_X,bias_CSF,RMSE_rsf_T,RMSE_rsf_X,RMSE_baft_T,RMSE_baft_X,RMSE_slogistic,RMSE_dnnsurv_T,RMSE_dnnsurv_X,RMSE_CSF,file =paste0(your_directory,"/simulation_logis_bias_RMSE_summary.RData"))
Plot Figure 2:
par(mfrow=c(2,3))
par(mar = c(2.4, 4.8, 2, 1.2))
par(cex.lab=2)
par(cex.axis=2)
for(m in 1:3){
if (m==1){
boxplot(simulation_logis_bias_RMSE_summary$bias_rsf_T[[m]],simulation_logis_bias_RMSE_summary$bias_rsf_X[[m]],simulation_logis_bias_RMSE_summary$bias_baft_T[[m]],simulation_logis_bias_RMSE_summary$bias_baft_X[[m]],simulation_logis_bias_RMSE_summary$bias_dnnsurv_T[[m]],simulation_logis_bias_RMSE_summary$bias_dnnsurv_X[[m]],simulation_logis_bias_RMSE_summary$bias_slogistic[[m]],xaxt="n",ylim=c(-0.12,0.12),cex.axis=2,ylab="Bias") #bias_rsf_X_xtype1_dependent[[m]],bias_rsf_X_xtype2_dependent[[m]],bias_rsf_X_xtype3_dependent[[m]],bias_rsf_X_xtype4_dependent[[m]],bias_baft_X_xtype1_dependent[[m]],bias_baft_X_xtype2_dependent[[m]],bias_baft_X_xtype3_dependent[[m]],bias_baft_X_xtype4_dependent[[m]]
abline(h=0,col="red",lty=2,lwd=2)
points(x=1,y=mean(simulation_logis_bias_RMSE_summary$bias_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=2,y=mean(simulation_logis_bias_RMSE_summary$bias_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=3,y=mean(simulation_logis_bias_RMSE_summary$bias_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=4,y=mean(simulation_logis_bias_RMSE_summary$bias_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=5,y=mean(simulation_logis_bias_RMSE_summary$bias_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=6,y=mean(simulation_logis_bias_RMSE_summary$bias_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=7,y=mean(simulation_logis_bias_RMSE_summary$bias_slogistic[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
title(paste0("Scenario ",m),cex.main=2)
}else{
boxplot(simulation_logis_bias_RMSE_summary$bias_rsf_T[[m]],simulation_logis_bias_RMSE_summary$bias_rsf_X[[m]],simulation_logis_bias_RMSE_summary$bias_baft_T[[m]],simulation_logis_bias_RMSE_summary$bias_baft_X[[m]],simulation_logis_bias_RMSE_summary$bias_dnnsurv_T[[m]],simulation_logis_bias_RMSE_summary$bias_dnnsurv_X[[m]],simulation_logis_bias_RMSE_summary$bias_slogistic[[m]],xaxt="n",ylim=c(-0.12,0.12),cex.axis=2) #bias_rsf_X_xtype1_dependent[[m]],bias_rsf_X_xtype2_dependent[[m]],bias_rsf_X_xtype3_dependent[[m]],bias_rsf_X_xtype4_dependent[[m]],bias_baft_X_xtype1_dependent[[m]],bias_baft_X_xtype2_dependent[[m]],bias_baft_X_xtype3_dependent[[m]],bias_baft_X_xtype4_dependent[[m]]
abline(h=0,col="red",lty=2,lwd=2)
points(x=1,y=mean(simulation_logis_bias_RMSE_summary$bias_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=2,y=mean(simulation_logis_bias_RMSE_summary$bias_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=3,y=mean(simulation_logis_bias_RMSE_summary$bias_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=4,y=mean(simulation_logis_bias_RMSE_summary$bias_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=5,y=mean(simulation_logis_bias_RMSE_summary$bias_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=6,y=mean(simulation_logis_bias_RMSE_summary$bias_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=7,y=mean(simulation_logis_bias_RMSE_summary$bias_slogistic[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
title(paste0("Scenario ",m),cex.main=2)
}
}
## plot bias and RMSE
# RMSE
for(m in 1:3){
if (m==1){
boxplot(simulation_logis_bias_RMSE_summary$RMSE_rsf_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_rsf_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_baft_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_baft_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_slogistic[[m]],ylim=c(0,0.25),cex.axis=2,ylab="Binned RMSE",names = c("R-T","R-X","B-T","B-X","D-T","D-X","Logistic"))#RMSE_rsf_X_xtype1_dependent[[m]],RMSE_rsf_X_xtype2_dependent[[m]],RMSE_rsf_X_xtype3_dependent[[m]],RMSE_rsf_X_xtype4_dependent[[m]],RMSE_baft_X_xtype1_dependent[[m]],RMSE_baft_X_xtype2_dependent[[m]],RMSE_baft_X_xtype3_dependent[[m]],RMSE_baft_X_xtype4_dependent[[m]]
points(x=1,y=mean(simulation_logis_bias_RMSE_summary$RMSE_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=2,y=mean(simulation_logis_bias_RMSE_summary$RMSE_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=3,y=mean(simulation_logis_bias_RMSE_summary$RMSE_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=4,y=mean(simulation_logis_bias_RMSE_summary$RMSE_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=5,y=mean(simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=6,y=mean(simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=7,y=mean(simulation_logis_bias_RMSE_summary$RMSE_slogistic[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
}else{
boxplot(simulation_logis_bias_RMSE_summary$RMSE_rsf_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_rsf_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_baft_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_baft_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_slogistic[[m]],ylim=c(0,0.25),cex.axis=2,names = c("R-T","R-X","B-T","B-X","D-T","D-X","Logistic"))#RMSE_rsf_X_xtype1_dependent[[m]],RMSE_rsf_X_xtype2_dependent[[m]],RMSE_rsf_X_xtype3_dependent[[m]],RMSE_rsf_X_xtype4_dependent[[m]],RMSE_baft_X_xtype1_dependent[[m]],RMSE_baft_X_xtype2_dependent[[m]],RMSE_baft_X_xtype3_dependent[[m]],RMSE_baft_X_xtype4_dependent[[m]]
points(x=1,y=mean(simulation_logis_bias_RMSE_summary$RMSE_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=2,y=mean(simulation_logis_bias_RMSE_summary$RMSE_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=3,y=mean(simulation_logis_bias_RMSE_summary$RMSE_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=4,y=mean(simulation_logis_bias_RMSE_summary$RMSE_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=5,y=mean(simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=6,y=mean(simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
points(x=7,y=mean(simulation_logis_bias_RMSE_summary$RMSE_slogistic[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
}
}
This section reproduces prediction accuracy of each method (Table 2). Please run the last section (Figure 2 section) first.
The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.
Scenario 1 (upper panel of Table 2): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)
#random1.diff <- random1.diff[,c(2:5,8:9,1,6,7)]
#random2.diff <- random2.diff[,c(2:3,5:6,8:9,1,4,7)]
#random3.diff <- random3.diff[,c(2:3,5:6,8:9,1,4,7)]
#random<-list(random1.diff,random2.diff,random3.diff)
#library(tableone)
#bimatrix.random1<-data.frame(matrix(rep(NA,10000*100*8),ncol=8))
#for (i in 1:8){
# bimatrix.random1[,i]<-ifelse(random[[1]][,i]>=0,1,0)
#}
#colnames(bimatrix.random1)<-colnames(random[[1]])[1:8]
#bimatrix.random1$sim<-random[[1]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*7),ncol=7)
#for (i in 1:100){
# dat<-bimatrix.random1[which(bimatrix.random1$sim==i),]
# for (j in 1:7){
# tryCatch({
# metrics<-table(dat[,j],dat$true_diff)
# tn<-metrics[1,1]
# tp<-metrics[2,2]
# fn<-metrics[1,2]
# fp<-metrics[2,1]
# acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
# ppv[i,j]<-tp/(tp+fp)
# npv[i,j]<-tn/(tn+fn)
# sens[i,j]<-tp/(tp+fn)
# spec[i,j]<-tn/(tn+fp)
# fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
# ,error=function(e){
# print(paste("Error at i=",i,j))
# })
# }}
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X0","BAFT_T","BAFT_X0","DNNSurv_T","DNNSurv_X","Logis")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_logis_accuracy_summary_S1.RData"))
library(tidyr)
library(tableone)
acc_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$acc), method, value, RSF_T:Logis, factor_key=TRUE)
ppv_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$ppv), method, value, RSF_T:Logis, factor_key=TRUE)
npv_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$npv), method, value, RSF_T:Logis, factor_key=TRUE)
sens_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$sens), method, value, RSF_T:Logis, factor_key=TRUE)
spec_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$sepc), method, value, RSF_T:Logis, factor_key=TRUE)
fmeasure_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$fmeasure), method, value, RSF_T:Logis, factor_key=TRUE)
statmat.random1<-data.frame(cbind(acc_long.random1,ppv_long.random1[,2],npv_long.random1[,2],sens_long.random1[,2],spec_long.random1[,2],fmeasure_long.random1[,2]))
names(statmat.random1)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random1$acc<-statmat.random1$acc*100
statmat.random1$ppv<-statmat.random1$ppv*100
statmat.random1$npv<-statmat.random1$npv*100
statmat.random1$sens<-statmat.random1$sens*100
statmat.random1$spec<-statmat.random1$spec*100
statmat.random1$fmeasure<-statmat.random1$fmeasure*100
table_one.random1<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random1)
table_one_matrix.random1<-print(table_one.random1)
## Stratified by method
## RSF_T RSF_X0 BAFT_T BAFT_X0
## n 100 100 100 100
## acc (mean (SD)) 79.04 (4.40) 79.75 (4.53) 78.63 (3.65) 80.65 (3.73)
## ppv (mean (SD)) 84.06 (5.25) 84.67 (5.51) 85.21 (4.18) 86.58 (4.57)
## npv (mean (SD)) 72.20 (7.75) 73.24 (7.91) 69.28 (5.86) 72.34 (6.28)
## sens (mean (SD)) 83.84 (7.56) 84.38 (7.68) 81.03 (5.81) 83.07 (5.98)
## spec (mean (SD)) 70.49 (12.90) 71.51 (13.37) 74.34 (9.16) 76.33 (9.99)
## fmeasure (mean (SD)) 83.58 (3.85) 84.13 (3.94) 82.87 (3.18) 84.56 (3.18)
## Stratified by method
## DNNSurv_T DNNSurv_X Logis p test
## n 100 100 100
## acc (mean (SD)) 83.23 (4.83) 83.75 (5.06) 91.56 (4.28) <0.001
## ppv (mean (SD)) 87.62 (5.84) 87.72 (5.81) 94.32 (5.20) <0.001
## npv (mean (SD)) 78.02 (8.68) 79.26 (9.47) 88.74 (8.56) <0.001
## sens (mean (SD)) 86.81 (7.58) 87.59 (7.71) 92.87 (6.22) <0.001
## spec (mean (SD)) 76.85 (13.63) 76.91 (13.55) 89.22 (10.73) <0.001
## fmeasure (mean (SD)) 86.83 (4.03) 87.28 (4.16) 93.34 (3.44) <0.001
The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.
Scenario 2 (middle panel of Table 2): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)
#library(tableone)
#bimatrix.random2<-data.frame(matrix(rep(NA,10000*100*8),ncol=8))
#for (i in 1:8){
# bimatrix.random2[,i]<-ifelse(random[[2]][,i]>=0,1,0)
#}
#colnames(bimatrix.random2)<-colnames(random[[2]])[1:8]
#bimatrix.random2$sim<-random[[2]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*7),ncol=7)
#for (i in 1:100){
# dat<-bimatrix.random2[which(bimatrix.random2$sim==i),]
# for (j in 1:7){
# tryCatch({
# metrics<-table(dat[,j],dat$true_diff)
# tn<-metrics[1,1]
# tp<-metrics[2,2]
# fn<-metrics[1,2]
# fp<-metrics[2,1]
# acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
# ppv[i,j]<-tp/(tp+fp)
# npv[i,j]<-tn/(tn+fn)
# sens[i,j]<-tp/(tp+fn)
# spec[i,j]<-tn/(tn+fp)
# fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
# ,error=function(e){
# print(paste("Error at i=",i,j))
# })
# }}
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X0","BAFT_T","BAFT_X0","DNNSurv_T","DNNSurv_X","Logis")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_logis_accuracy_summary_S2.RData"))
library(tidyr)
library(tableone)
acc_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$acc), method, value, RSF_T:Logis, factor_key=TRUE)
ppv_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$ppv), method, value, RSF_T:Logis, factor_key=TRUE)
npv_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$npv), method, value, RSF_T:Logis, factor_key=TRUE)
sens_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$sens), method, value, RSF_T:Logis, factor_key=TRUE)
spec_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$sepc), method, value, RSF_T:Logis, factor_key=TRUE)
fmeasure_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$fmeasure), method, value, RSF_T:Logis, factor_key=TRUE)
statmat.random2<-data.frame(cbind(acc_long.random2,ppv_long.random2[,2],npv_long.random2[,2],sens_long.random2[,2],spec_long.random2[,2],fmeasure_long.random2[,2]))
names(statmat.random2)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random2$acc<-statmat.random2$acc*100
statmat.random2$ppv<-statmat.random2$ppv*100
statmat.random2$npv<-statmat.random2$npv*100
statmat.random2$sens<-statmat.random2$sens*100
statmat.random2$spec<-statmat.random2$spec*100
statmat.random2$fmeasure<-statmat.random2$fmeasure*100
table_one.random2<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random2)
table_one_matrix.random2<-print(table_one.random2)
## Stratified by method
## RSF_T RSF_X0 BAFT_T BAFT_X0
## n 100 100 100 100
## acc (mean (SD)) 80.98 (3.54) 82.05 (3.67) 79.21 (3.18) 81.97 (3.20)
## ppv (mean (SD)) 81.45 (6.34) 82.54 (6.59) 78.84 (4.39) 81.66 (4.81)
## npv (mean (SD)) 82.19 (6.00) 83.39 (6.36) 80.12 (4.06) 82.99 (4.39)
## sens (mean (SD)) 81.13 (8.74) 82.28 (9.04) 79.80 (5.53) 82.61 (5.83)
## spec (mean (SD)) 80.84 (9.18) 81.83 (9.36) 78.64 (5.97) 81.35 (6.40)
## fmeasure (mean (SD)) 80.74 (3.92) 81.83 (4.07) 79.13 (3.27) 81.91 (3.25)
## Stratified by method
## DNNSurv_T DNNSurv_X Logis p test
## n 100 100 100
## acc (mean (SD)) 83.13 (4.29) 83.99 (4.20) 91.90 (3.73) <0.001
## ppv (mean (SD)) 83.31 (6.89) 84.34 (6.95) 92.68 (6.23) <0.001
## npv (mean (SD)) 84.71 (6.63) 85.70 (7.26) 92.41 (5.94) <0.001
## sens (mean (SD)) 83.79 (9.05) 84.53 (9.83) 91.58 (7.47) <0.001
## spec (mean (SD)) 82.48 (9.48) 83.46 (9.58) 92.21 (7.25) <0.001
## fmeasure (mean (SD)) 83.00 (4.59) 83.79 (4.68) 91.75 (3.98) <0.001
The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.
Scenario 3 (lower panel of Table 2): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)
#library(tableone)
#bimatrix.random3<-data.frame(matrix(rep(NA,10000*100*8),ncol=8))
#for (i in 1:8){
# bimatrix.random3[,i]<-ifelse(random[[3]][,i]>=0,1,0)
#}
#colnames(bimatrix.random3)<-colnames(random[[3]])[1:8]
#bimatrix.random3$sim<-random[[3]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*7),ncol=7)
#for (i in 1:100){
# dat<-bimatrix.random3[which(bimatrix.random3$sim==i),]
# for (j in 1:7){
# tryCatch({
# metrics<-table(dat[,j],dat$true_diff)
# tn<-metrics[1,1]
# tp<-metrics[2,2]
# fn<-metrics[1,2]
# fp<-metrics[2,1]
# acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
# ppv[i,j]<-tp/(tp+fp)
# npv[i,j]<-tn/(tn+fn)
# sens[i,j]<-tp/(tp+fn)
# spec[i,j]<-tn/(tn+fp)
# fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
# ,error=function(e){
# print(paste("Error at i=",i,j))
# })
# }}
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X0","BAFT_T","BAFT_X0","DNNSurv_T","DNNSurv_X","Logis")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_logis_accuracy_summary_S3.RData"))
library(tidyr)
library(tableone)
acc_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$acc), method, value, RSF_T:Logis, factor_key=TRUE)
ppv_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$ppv), method, value, RSF_T:Logis, factor_key=TRUE)
npv_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$npv), method, value, RSF_T:Logis, factor_key=TRUE)
sens_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$sens), method, value, RSF_T:Logis, factor_key=TRUE)
spec_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$sepc), method, value, RSF_T:Logis, factor_key=TRUE)
fmeasure_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$fmeasure), method, value, RSF_T:Logis, factor_key=TRUE)
statmat.random3<-data.frame(cbind(acc_long.random3,ppv_long.random3[,2],npv_long.random3[,2],sens_long.random3[,2],spec_long.random3[,2],fmeasure_long.random3[,2]))
names(statmat.random3)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random3$acc<-statmat.random3$acc*100
statmat.random3$ppv<-statmat.random3$ppv*100
statmat.random3$npv<-statmat.random3$npv*100
statmat.random3$sens<-statmat.random3$sens*100
statmat.random3$spec<-statmat.random3$spec*100
statmat.random3$fmeasure<-statmat.random3$fmeasure*100
table_one.random3<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random3)
table_one_matrix.random3<-print(table_one.random3)
## Stratified by method
## RSF_T RSF_X0 BAFT_T BAFT_X0
## n 100 100 100 100
## acc (mean (SD)) 82.12 (2.28) 82.15 (2.35) 83.39 (2.39) 84.16 (2.33)
## ppv (mean (SD)) 83.61 (4.36) 83.48 (4.50) 85.86 (3.16) 86.11 (3.35)
## npv (mean (SD)) 81.37 (5.63) 81.83 (6.00) 80.45 (3.99) 82.00 (4.24)
## sens (mean (SD)) 86.78 (6.08) 87.12 (6.41) 85.69 (4.05) 86.99 (4.29)
## spec (mean (SD)) 75.66 (8.82) 75.26 (9.26) 80.19 (5.46) 80.22 (5.93)
## fmeasure (mean (SD)) 84.89 (2.12) 84.95 (2.20) 85.68 (2.16) 86.42 (2.10)
## Stratified by method
## DNNSurv_T DNNSurv_X Logis p test
## n 100 100 100
## acc (mean (SD)) 85.63 (2.72) 85.68 (2.60) 92.60 (2.00) <0.001
## ppv (mean (SD)) 86.64 (4.23) 86.76 (4.54) 93.98 (3.25) <0.001
## npv (mean (SD)) 85.25 (5.46) 85.37 (5.57) 91.26 (4.12) <0.001
## sens (mean (SD)) 89.47 (5.24) 89.49 (5.36) 93.43 (3.65) <0.001
## spec (mean (SD)) 80.32 (7.73) 80.40 (8.34) 91.44 (5.10) <0.001
## fmeasure (mean (SD)) 87.83 (2.36) 87.87 (2.22) 93.61 (1.76) <0.001