This is the code for reproducing tables and plots in the main text. We have saved intermediate results (predicted CATE) under “Simulations/intermediate_result_100simulations”.

Please set up your working directory where the folder “Simulations” is saved using the following code by replacing “Your directory of ‘Simulations’ folder” with your own directory:
your_directory <- setwd(“Your directory of ‘Simulations’ folder”)

Please install the following packages if you haven’t:

#install.packages("tableone")
#install.packages("tidyr")

1 Run simulations

Please copy these codes to an R file and use server to run codes under this section. We have commented the codes out as it requires server.

1.1 Weibull model case

This section provide codes to run R-T, R-X, B-T, B-X and CSF for 100 simulations when data is generated from Weibull models at median failure time under scenario1 to scenario 3. If time of interest is 3rd quantile of failure times (tt=22 in scenario1 and scenario 2; tt=20 in scenario3), please change “tt” in the code below (see comments in the code below). We will also prepare training and test data for running D-T and D-X in Python.

Balanced design:

###-------------------------------------------- scenario 1 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")

# generate test data
#mydata <- data_gen_censor(n=10000, p=10,  PH=TRUE, censor = "30%", setting = 1, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))

# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-15  # assign a time point of interest
#      simdata <- data_gen_censor(n=1000, p=10,  PH=TRUE, censor = "30%", setting = 1, ex = "random",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
#      p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
#      csa_diff<-data.frame(diff=p.prob)
#      weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=1)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
    
#    load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
#    dat$sim=i+(s-1)*5
#    dat_all<-rbind(dat_all,dat)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario1.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_balanced_mediantt_n1000p10_test10000_scenario1.RData")) # save training data from 100 simulations

# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_balanced_setting1_dataframe.RData")) 


###-------------------------------------------- scenario 2 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")

# generate test data
#mydata <- data_gen_censor(n=10000, p=10,  PH=TRUE, censor = "30%", setting = 2, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))

# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-15  # assign a time point of interest
#      simdata <- data_gen_censor(n=1000, p=10,  PH=TRUE, censor = "30%", setting = 2, ex = "random",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
#      p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
#      csa_diff<-data.frame(diff=p.prob)
#      weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=2)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
    
#    load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
#    dat$sim=i+(s-1)*5
#    dat_all<-rbind(dat_all,dat)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario2.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_balanced_mediantt_n1000p10_test10000_scenario2.RData")) # save training data from 100 simulations

# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_balanced_setting2_dataframe.RData")) 

###-------------------------------------------- scenario 3 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("aft_bart_np_SurvivalProb.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")

# generate test data
#mydata <- data_gen_censor(n=10000, p=10,  PH=TRUE, censor = "30%", setting = 3, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
 
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-14  # assign a time point of interest
#      simdata <- data_gen_censor(n=1000, p=10,  PH=TRUE, censor = "30%", setting = 3, ex = "random",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
#      p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
#      csa_diff<-data.frame(diff=p.prob)
#      weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=3)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
    
#    load(paste0(your_directory,"/simdata_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
#    dat$sim=i+(s-1)*5
#    dat_all<-rbind(dat_all,dat)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario3.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_balanced_mediantt_n1000p10_test10000_scenario3.RData")) # save training data from 100 simulations

# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_balanced_setting3_dataframe.RData")) 

Dependent design:

###-------------------------------------------- scenario 1 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("aft_bart_np_SurvivalProb.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")

# generate test data
#mydata <- data_gen_censor(n=10000, p=10,  PH=TRUE, censor = "30%", setting = 1, ex = "dependent",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))

# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-15  # assign a time point of interest
#      simdata <- data_gen_censor(n=1000, p=10,  PH=TRUE, censor = "30%", setting = 1, ex = "dependent",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
#      p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
#      csa_diff<-data.frame(diff=p.prob)
#      weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=1)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
#    
#    load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
#    dat$sim=i+(s-1)*5
#    dat_all<-rbind(dat_all,dat)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_dependent_mediantt_RTRXBTBX_n1000p10_test10000_scenario1.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_dependent_mediantt_n1000p10_test10000_scenario1.RData")) # save training data from 100 simulations

# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario1_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_dependent_setting1_dataframe.RData")) 

###-------------------------------------------- scenario 2 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("aft_bart_np_SurvivalProb.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")

# generate test data
#mydata <- data_gen_censor(n=10000, p=10,  PH=TRUE, censor = "30%", setting = 2, ex = "dependent",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))

# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-15  # assign a time point of interest
#      simdata <- data_gen_censor(n=1000, p=10,  PH=TRUE, censor = "30%", setting = 2, ex = "dependent",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = #(10/3),min.node.size = 15,num.threads = 1)
#      p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
#      csa_diff<-data.frame(diff=p.prob)
#      weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=2)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
    
#    load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
#    dat$sim=i+(s-1)*5
#    dat_all<-rbind(dat_all,dat)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_dependent_mediantt_RTRXBTBX_n1000p10_test10000_scenario2.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_dependent_mediantt_n1000p10_test10000_scenario2.RData")) # save training data from 100 simulations

# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario2_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_dependent_setting2_dataframe.RData")) 

###-------------------------------------------- scenario 3 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")

# generate test data
#mydata <- data_gen_censor(n=10000, p=10,  PH=TRUE, censor = "30%", setting = 3, ex = "dependent",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
 
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-14  # assign a time point of interest
#      simdata <- data_gen_censor(n=1000, p=10,  PH=TRUE, censor = "30%", setting = 3, ex = "dependent",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#     baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
#      p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
#      csa_diff<-data.frame(diff=p.prob)
#      weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=3)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
    
#    load(paste0(your_directory,"/simdata_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
#    dat$sim=i+(s-1)*5
#    dat_all<-rbind(dat_all,dat)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_dependent_mediantt_RTRXBTBX_n1000p10_test10000_scenario3.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_dependent_mediantt_n1000p10_test10000_scenario3.RData")) # save training data from 100 simulations

# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_dependent_scenario3_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_dependent_setting3_dataframe.RData")) 

Unbalanced design with 4000 samples in training data:

###-------------------------------------------- scenario 1 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")

# generate test data
#mydata <- data_gen_censor(n=10000, p=10,  PH=TRUE, censor = "30%", setting = 1, ex = "unbalanced",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_unbalanced_scenario1_",(s-1)*5+i,".RData"))

# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-15  # assign a time point of interest
#      simdata <- data_gen_censor(n=4000, p=10,  PH=TRUE, censor = "30%", setting = 1, ex = "unbalanced",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
#      p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
#      csa_diff<-data.frame(diff=p.prob)
#      weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=1)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_unbalanced4000n_scenario1_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario1_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario1_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
#    
#    load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario1_",(s-1)*5+i,".RData"))
#    dat$sim=i+(s-1)*5
#    dat_all<-rbind(dat_all,dat)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_unbalanced4000n_mediantt_RTRXBTBX_n1000p10_test10000_scenario1.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_unbalanced4000n_mediantt_n1000p10_test10000_scenario1.RData")) # save training data from 100 simulations

# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_unbalanced4000n_scenario1_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_unbalanced4000n_setting1_dataframe.RData")) 

###-------------------------------------------- scenario 2 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")

# generate test data
#mydata <- data_gen_censor(n=10000, p=10,  PH=TRUE, censor = "30%", setting = 2, ex = "unbalanced",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))

# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-15  # assign a time point of interest
#      simdata <- data_gen_censor(n=4000, p=10,  PH=TRUE, censor = "30%", setting = 2, ex = "unbalanced",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#     cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
#      p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
#      csa_diff<-data.frame(diff=p.prob)
#      weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=2)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
    
#    load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
#    dat$sim=i+(s-1)*5
#    dat_all<-rbind(dat_all,dat)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_unbalanced4000n_mediantt_RTRXBTBX_n1000p10_test10000_scenario2.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_unbalanced4000n_mediantt_n1000p10_test10000_scenario2.RData")) # save training data from 100 simulations

# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_unbalanced4000n_scenario2_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_unbalanced4000n_setting2_dataframe.RData")) 

###-------------------------------------------- scenario 3 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_Weibull.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("weibull_true.R")

# generate test data
#mydata <- data_gen_censor(n=10000, p=10,  PH=TRUE, censor = "30%", setting = 3, ex = "unbalanced",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
 
# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-14  # assign a time point of interest
#      simdata <- data_gen_censor(n=4000, p=10,  PH=TRUE, censor = "30%", setting = 3, ex = "unbalanced",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      cs.forest.prob <- causal_survival_forest(dat[,c(1:10)], dat$Time, dat$Treatment, dat$Event,target = "survival.probability",horizon = tt,num.trees = 500,mtry = (10/3),min.node.size = 15,num.threads = 1)
#      p.prob = predict(cs.forest.prob, mydata$data[,c(1:10)])
#      csa_diff<-data.frame(diff=p.prob)
#      weib.true<-weibull_true(data=dat,testdat=mydata$data,time.interest=tt,setting=3)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,csf_diff=csa_diff,weib_diff=weib.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#dat_all <- NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
    
#    load(paste0(your_directory,"/simdata_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
#    dat$sim=i+(s-1)*5
#    dat_all<-rbind(dat_all,dat)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_weib_unbalanced4000n_mediantt_RTRXBTBX_n1000p10_test10000_scenario3.RData")) # simulation result file from 100 simulations
#save(dat_all,file=paste0(your_directory,"/dat_all_weib_unbalanced4000n_mediantt_n1000p10_test10000_scenario3.RData")) # save training data from 100 simulations

# prepare test data for Python
#load(paste0(your_directory,"/test10000_weib_mediantt_unbalanced4000n_scenario3_",(s-1)*5+i,".RData"))
#testdat <- cbind(mydata$data, mydata$true.diff,mydata$true1, mydata$true0)
#colnames(testdat) <- c("V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","Treatment","Time","Event","true.diff","true1","true0")
#save(testdat,file = paste0(your_directory,"/testDat10000_weib_mediantt_unbalanced4000n_setting3_dataframe.RData")) 

D-T and D-T are run using Python. Please run D-T and D-X in Python as the following:

  1. Install the following Python modules if you haven’t: pyreadr, tensorflow, pandas, matplotlib, sklearn, numpy, multiprocessing. These modules can be installed using command line as the following if you use Anaconda: conda activate venv. This step is to open the virtual environment of the Python version that you install. venv is the name of your virtual environment. If you run Python on server, you can open your virtual environment by typing the command line: source ~/bin/activate/venv . After entering your virtual environment, use the following command line to install Python modules: pip install module_name. If you install Python3, please type: pip3 install module_name. Every time you would like to run the .py files, please enter your virtual environment where these modules are installed.

  2. Enter your virtual environment where these modules are installed.

  3. Please put DNNSurv_Tlearner_100runs_simulation.py, DNNSurv_Xlearner_100runs_simulation.py, DNNSurve_TXlearner_fun_util.py, training and test data prepared above into the same folder (Please create a new folder if you would like to.).

The names of test data are listed here:
Balanced design and scenario 1: testDat10000_weib_mediantt_balanced_setting1_dataframe.RData; Balanced design and scenario 2: testDat10000_weib_mediantt_balanced_setting2_dataframe.RData; Balanced design and scenario 3: testDat10000_weib_mediantt_balanced_setting3_dataframe.RData. Dependent design and scenario1: testDat10000_weib_mediantt_dependent_setting1_dataframe.RData; Dependent design and scenario2: testDat10000_weib_mediantt_dependent_setting2_dataframe.RData; Dependent design and scenario3: testDat10000_weib_mediantt_dependent_setting3_dataframe.RData. Unbalanced design (4000 samples) and scenario 1: testDat10000_weib_mediantt_unbalanced4000n_setting1_dataframe.RData; Unbalanced design (4000 samples) and scenario 2: testDat10000_weib_mediantt_unbalanced4000n_setting2_dataframe.RData; Unbalanced design (4000 samples) and scenario 3: testDat10000_weib_mediantt_unbalanced4000n_setting3_dataframe.RData.

The names of test data are listed here:
Balanced design and scenario 1: testDat10000_weib_mediantt_balanced_setting1_dataframe.RData; Balanced design and scenario 2: testDat10000_weib_mediantt_balanced_setting2_dataframe.RData; Balanced design and scenario 3: testDat10000_weib_mediantt_balanced_setting3_dataframe.RData. Dependent design and scenario1: testDat10000_weib_mediantt_dependent_setting1_dataframe.RData; Dependent design and scenario2: testDat10000_weib_mediantt_dependent_setting2_dataframe.RData; Dependent design and scenario3: testDat10000_weib_mediantt_dependent_setting3_dataframe.RData. Unbalanced design (4000 samples) and scenario 1: testDat10000_weib_mediantt_unbalanced4000n_setting1_dataframe.RData; Unbalanced design (4000 samples) and scenario 2: testDat10000_weib_mediantt_unbalanced4000n_setting2_dataframe.RData; Unbalanced design (4000 samples) and scenario 3: testDat10000_weib_mediantt_unbalanced4000n_setting3_dataframe.RData.

  1. Set up the folder directory in line 2 of DNNSurv_Tlearner_1run_simulation.py and DNNSurv_Xlearner_1run_simulation.py.

  2. In line 24, change training data name for different simulation design and scenarios.

  3. In line 25, change test data name for different simulation design and scenarios.

  4. In line 27, please give a name to the output result file.

  5. Submit DNNSurv_Tlearner_100runs_simulation.py and DNNSurv_Xlearner_100runs_simulation.py to Python using command line: python DNNSurv_Xlearner_100runs_simulation.py. You can add a & at the end if you are using server for nohup option.
    For 100 runs, please visit “Example_code_simulation_1run.Rmd” for submitting a single run of simulation files “DNNSurv_Tlearner_1run_simulation.py” and “DNNSurv_Xlearner_1run_simulation.py”.
    In the output result file, the first 10000 rows are predicted CATE; the second 10000 rows are true CATE.

  6. For each simulation design and scenario, combine D-T and D-X with other methods that are run in R as the following code. Here we give an example of combining all simulation results under balanced design:

###----------------------------------- scenario 1 -------------------------------------###
# load R-T, R-X, B-T, B-X, CSF results
#load(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario1.RData"))

# load D-T results
#your_D_T_result_file_name <- "Tlearner_result_weib_balanced_mediantt_scenario1.csv" # please change the name of output file to what you assign in .py file line 27. 
#library(readr)
#your_directory <- setwd("The directory contain your D-T output result file")
#Tlearner <- read_csv(paste0(your_directory ,your_D_T_result_file_name), col_names = FALSE)
#Tlearner1<-t(Tlearner)
#colnames(Tlearner1) <- c("deepsurv_T_diff","true_diff")
#diff_all1 <- cbind(diff_all,Tlearner1[,1])
#colnames(diff_all1)[length(colnames(diff_all1))] <- "deepsurv_T_diff"

# load D-X results
#your_D_X_result_file_name <- "Xlearner_result_weib_balanced_mediantt_scenario1.csv" # please change the name of output file to what you assign in .py file line 27. 
#library(readr)
#your_directory <- setwd("The directory contain your D-X output result file")
#Xlearner <- read_csv(paste0(your_directory ,your_D_X_result_file_name), col_names = FALSE)
#Xlearner1<-t(Xlearner)
#colnames(Xlearner1) <- c("deepsurv_X_diff","true_diff")
#random1.diff.all <- cbind(diff_all1,Xlearner1[,1])
#colnames(random1.diff.all)[length(colnames(random1.diff.all))] <- "deepsurv_X_diff"

###----------------------------------- scenario 2 -------------------------------------###
# load R-T, R-X, B-T, B-X, CSF results
#load(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario2.RData"))

# load D-T results
#your_D_T_result_file_name <- "Tlearner_result_weib_balanced_mediantt_scenario2.csv" # please change the name of output file to what you assign in .py file line 27. 
#library(readr)
#your_directory <- setwd("The directory contain your D-T output result file")
#Tlearner <- read_csv(paste0(your_directory ,your_D_T_result_file_name), col_names = FALSE)
#Tlearner1<-t(Tlearner)
#colnames(Tlearner1) <- c("deepsurv_T_diff","true_diff")
#diff_all1 <- cbind(diff_all,Tlearner1[,1])
#colnames(diff_all1)[length(colnames(diff_all1))] <- "deepsurv_T_diff"

# load D-X results
#your_D_X_result_file_name <- "Xlearner_result_weib_balanced_mediantt_scenario2.csv" # please change the name of output file to what you assign in .py file line 27. 
#library(readr)
#your_directory <- setwd("The directory contain your D-X output result file")
#Xlearner <- read_csv(paste0(your_directory ,your_D_X_result_file_name), col_names = FALSE)
#Xlearner1<-t(Xlearner)
#colnames(Xlearner1) <- c("deepsurv_X_diff","true_diff")
#random2.diff.all <- cbind(diff_all1,Xlearner1[,1])
#colnames(random2.diff.all)[length(colnames(random2.diff.all))] <- "deepsurv_X_diff"

###----------------------------------- scenario 3 -------------------------------------###
# load R-T, R-X, B-T, B-X, CSF results
#load(diff_all,file=paste0(your_directory,"/result_weib_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario3.RData"))

# load D-T results
#your_D_T_result_file_name <- "Tlearner_result_weib_balanced_mediantt_scenario3.csv" # please change the name of output file to what you assign in .py file line 27. 
#library(readr)
#your_directory <- setwd("The directory contain your D-T output result file")
#Tlearner <- read_csv(paste0(your_directory ,your_D_T_result_file_name), col_names = FALSE)
#Tlearner1<-t(Tlearner)
#colnames(Tlearner1) <- c("deepsurv_T_diff","true_diff")
#diff_all1 <- cbind(diff_all,Tlearner1[,1])
#colnames(diff_all1)[length(colnames(diff_all1))] <- "deepsurv_T_diff"

# load D-X results
#your_D_X_result_file_name <- "Xlearner_result_weib_balanced_mediantt_scenario3.csv" # please change the name of output file to what you assign in .py file line 27. 
#library(readr)
#your_directory <- setwd("The directory contain your D-X output result file")
#Xlearner <- read_csv(paste0(your_directory ,your_D_X_result_file_name), col_names = FALSE)
#Xlearner1<-t(Xlearner)
#colnames(Xlearner1) <- c("deepsurv_X_diff","true_diff")
#random3.diff.all <- cbind(diff_all1,Xlearner1[,1])
#colnames(random3.diff.all)[length(colnames(random3.diff.all))] <- "deepsurv_X_diff"

#save(random1.diff.all,random2.diff.all,random3.diff.all,file = paste0(your_directory,"/weib_balanced_mediantt.RData"))

1.2 Standard logistic model case

Balanced design:

###-------------------------------------------- scenario 1 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_StandardLogistic.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("StandardLogistic_TrueModel.R")

# generate test data
#tt<-1.05  # assign a time point of interest
#mydata <- data_gen_censor_slogistic(n=10000, p=10,sigma0=1,sigma1=2,r=0.15,mu=0, censor = "30%", setting = 1, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_logis_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))

# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-1.05  # assign a time point of interest
#      simdata <- data_gen_censor_slogistic(n=1000, p=10,sigma0=1,sigma1=2,r=0.15,mu=0, censor = "30%", setting = 1, ex = "random",time.interest=1.05)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      logis.true<-slogistic_true(data=dat,testdat=mydata$data,time.interest=tt,setting=1)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,logis_diff=logis.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_logis_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario1_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_logis_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario1.RData"))

###-------------------------------------------- scenario 2 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_StandardLogistic.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("StandardLogistic_TrueModel.R")

# generate test data
#tt<-1.1  # assign a time point of interest
#mydata <- data_gen_censor_slogistic(n=10000, p=10,sigma0=0.5,sigma1=1,r=0.22,mu=0, censor = "30%", setting = 2, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_logis_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))

# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-1.1  # assign a time point of interest
#      simdata <- data_gen_censor_slogistic(n=1000, p=10,sigma0=0.5,sigma1=1,r=0.22,mu=0, censor = "30%", setting = 2, ex = "random",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      logis.true<-slogistic_true(data=dat,testdat=mydata$data,time.interest=tt,setting=2)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,logis_diff=logis.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_logis_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario2_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_logis_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario2.RData"))

###-------------------------------------------- scenario 3 ----------------------------------------------###
#library(survival)
#library(BART)
#library(randomForestSRC)
#library(tidyverse)
#library(gbm)
#library(randomForest)
#library(pec)
#library(AFTrees)
#library(doMC)
#source("simulation_design_survival_StandardLogistic.R")
#source("rsf_TXlearners.R")
#source("baft_TXlearner_fun_util.R")
#source("baft_TXlearners.R")
#source("StandardLogistic_TrueModel.R")

# generate test data
#tt<-1.29 # assign a time point of interest
#mydata <- data_gen_censor_slogistic(n=10000, p=10,sigma0=0.5,sigma1=1,r=0.2,mu=0, censor = "30%", setting = 3, ex = "random",time.interest=tt)
#save(mydata,file=paste0(your_directory,"/test10000_logis_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))

# run simulation
#nCores=20 # register 20 cores
#registerDoMC(nCores)
#foreach (s=1:20) %dopar% {
#  B<-5
#  for (i in 1:B){
#    tryCatch({
#      set.seed(100871+i+s*15213)
#      tt<-1.29  # assign a time point of interest
#      simdata <- data_gen_censor_slogistic(n=1000, p=10,sigma0=0.5,sigma1=1,r=0.2,mu=0, censor = "30%", setting = 3, ex = "random",time.interest=tt)
#      dat <- simdata$data
#      rsf_T<-rsf_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      rsf_X<-rsf_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      baft_T<-baft_HTE_T(data=dat,testdat=mydata$data,time.interest=tt)
#      baft_X<-baft_HTE_X(data=dat,testdat=mydata$data,ntrees=1000,time.interest=tt)
#      logis.true<-slogistic_true(data=dat,testdat=mydata$data,time.interest=tt,setting=3)
      
      # save prediction results
#      diff<-data.frame(rsf_T_diff=rsf_T$diff,rsf_X_diff=rsf_X$diff,baft_T_diff=baft_T$diff,baft_X_diff=baft_X$diff,logis_diff=logis.true$diff,true_diff=mydata$true.diff)
#      save(diff,ratio,file=paste0(your_directory,"/result_logis_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
      
      # save simulation data in order to run D-T and D-X in Python
#      save(dat,file=paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
#    }
#    ,error=function(e){
#      print(paste("Error at i=",(s-1)*5+i))
#    })
#  }
#}
#rm(list=ls())

#diff_all<-NULL
#for (s in 1:20){
#  for (i in 1:5){
#    tryCatch({
#    load(paste0(your_directory,"/simdata_logis_mediantt_balanced_scenario3_",(s-1)*5+i,".RData"))
#    diff$sim=i+(s-1)*5
#    diff_all<-rbind(diff_all,diff)
#    },error=function(e){
#      print(paste("Error at s,i",s,i))
#    })
#  }
#}
#save(diff_all,file=paste0(your_directory,"/result_logis_balanced_mediantt_RTRXBTBX_n1000p10_test10000_scenario3.RData"))

2 Tables and plots in main text

This section provides codes to reproduce tables and plots in main text of simulations by loading the intermediate results.

2.1 Figure 1

This figure shows binned bias and RMSE when data is generated from Weibull models under balanced design and scenario 1. Because the size of the intermediate results are too large, we have saved summarized biases and RMSEs from 100 simulations in “Simulations/intermediate_result_100simulations.RData”.

The following codes are examples for loading the predicted CATE on test datasets in 100 runs of the simulations. The codes are commented out because the result files are too large to be loaded for the manuscript submission. Please jump to the next chunk for loading summarized biases and RMSEs.

Load predicted CATE from R-T, R-X, B-T, B-X, D-T, D-X and CSF when data is generated from Weibull models under balanced design and scenario1:

#load(paste0(your_directory,"/weib_balanced_mediantt.RData"))

#random1.diff.all$ID<-random2.diff.all$ID<-random3.diff.all$ID<-rep(1:10000,100)
#random<-list(random1.diff.all,random2.diff.all,random3.diff.all)

Calculate binned bias and RMSE:

#RMSE_rsf_T<-RMSE_rsf_X<-RMSE_baft_T<-RMSE_baft_X<-RMSE_weib<-RMSE_dnnsurv_T<-RMSE_dnnsurv_X<-RMSE_CSF<-list(rep(NA,100),rep(NA,100),rep(NA,100))
#bias_rsf_T<-bias_rsf_X<-bias_baft_T<-bias_baft_X<-bias_weib<-bias_dnnsurv_T<-bias_dnnsurv_X<-bias_CSF<-list(rep(NA,100),rep(NA,100),rep(NA,100))

#for (s in 1:3){ # random, setting 1 to 3
#  for (i in 1:100){ # 100 runs of results
#    dat<-random[[s]][which(random[[s]]$sim==i),]
#    dat<-dat[order(dat$true_diff),]
    
#    quant<-quantile(dat$true_diff,probs=seq(0,1,length.out=51)) 
    
#    bias_rsf_T[[s]][i]<-0
#    bias_rsf_X[[s]][i]<-0
#    bias_baft_T[[s]][i]<-0
#    bias_baft_X[[s]][i]<-0
#    bias_weib[[s]][i]<-0
#    bias_dnnsurv_T[[s]][i]<-0
#    bias_dnnsurv_X[[s]][i]<-0
#    bias_CSF[[s]][i]<-0
#    RMSE_rsf_T[[s]][i]<-0
#    RMSE_rsf_X[[s]][i]<-0
#    RMSE_baft_T[[s]][i]<-0
#    RMSE_baft_X[[s]][i]<-0
#    RMSE_weib[[s]][i]<-0
#    RMSE_dnnsurv_T[[s]][i]<-0
#    RMSE_dnnsurv_X[[s]][i]<-0
#    RMSE_CSF[[s]][i]<-0
    
#    for (j in 1:50){
#      datq<-dat[which(dat$true_diff>=quant[j]&dat$true_diff<=quant[j+1]),]
#      bias_rsf_T[[s]][i]<-bias_rsf_T[[s]][i]+mean(datq$rsf_T_diff-datq$true_diff)
#      bias_rsf_X[[s]][i]<-bias_rsf_X[[s]][i]+mean(datq$rsf_X_diff-datq$true_diff)
#      bias_baft_T[[s]][i]<-bias_baft_T[[s]][i]+mean(datq$baft_T_diff-datq$true_diff)
#      bias_baft_X[[s]][i]<-bias_baft_X[[s]][i]+mean(datq$baft_X_diff-datq$true_diff)
#      bias_weib[[s]][i]<-bias_weib[[s]][i]+mean(datq$weib_diff-datq$true_diff)
#      bias_dnnsurv_T[[s]][i]<-bias_dnnsurv_T[[s]][i]+mean(datq$deepsurv_T_diff-datq$true_diff)
#      bias_dnnsurv_X[[s]][i]<-bias_dnnsurv_X[[s]][i]+mean(datq$deepsurv_X_diff-datq$true_diff)
#      bias_CSF[[s]][i]<-bias_CSF[[s]][i]+mean(datq$CSF_diff-datq$true_diff)
#      RMSE_rsf_T[[s]][i]<-RMSE_rsf_T[[s]][i]+sqrt(mean((datq$rsf_T_diff-datq$true_diff)^2))
#      RMSE_rsf_X[[s]][i]<-RMSE_rsf_X[[s]][i]+sqrt(mean((datq$rsf_X_diff-datq$true_diff)^2))
#      RMSE_baft_T[[s]][i]<-RMSE_baft_T[[s]][i]+sqrt(mean((datq$baft_T_diff-datq$true_diff)^2))
#      RMSE_baft_X[[s]][i]<-RMSE_baft_X[[s]][i]+sqrt(mean((datq$baft_X_diff-datq$true_diff)^2))
#      RMSE_weib[[s]][i]<-RMSE_weib[[s]][i]+sqrt(mean((datq$weib_diff-datq$true_diff)^2))
#      RMSE_dnnsurv_T[[s]][i]<-RMSE_dnnsurv_T[[s]][i]+sqrt(mean((datq$deepsurv_T_diff-datq$true_diff)^2))
#      RMSE_dnnsurv_X[[s]][i]<-RMSE_dnnsurv_X[[s]][i]+sqrt(mean((datq$deepsurv_X_diff-datq$true_diff)^2))
#      RMSE_CSF[[s]][i]<-RMSE_CSF[[s]][i]+sqrt(mean((datq$CSF_diff-datq$true_diff)^2))
#    }
#    bias_rsf_T[[s]][i]=bias_rsf_T[[s]][i]/50
#    bias_rsf_X[[s]][i]=bias_rsf_X[[s]][i]/50
#    bias_baft_T[[s]][i]=bias_baft_T[[s]][i]/50
#    bias_baft_X[[s]][i]=bias_baft_X[[s]][i]/50
#    bias_weib[[s]][i]=bias_weib[[s]][i]/50
#    bias_dnnsurv_T[[s]][i]=bias_dnnsurv_T[[s]][i]/50
#    bias_dnnsurv_X[[s]][i]=bias_dnnsurv_X[[s]][i]/50
#    bias_CSF[[s]][i]=bias_CSF[[s]][i]/50
#    RMSE_rsf_T[[s]][i]=RMSE_rsf_T[[s]][i]/50
#    RMSE_rsf_X[[s]][i]=RMSE_rsf_X[[s]][i]/50
#    RMSE_baft_T[[s]][i]=RMSE_baft_T[[s]][i]/50
#    RMSE_baft_X[[s]][i]=RMSE_baft_X[[s]][i]/50
#    RMSE_weib[[s]][i]=RMSE_weib[[s]][i]/50
#    RMSE_dnnsurv_T[[s]][i]=RMSE_dnnsurv_T[[s]][i]/50
#    RMSE_dnnsurv_X[[s]][i]=RMSE_dnnsurv_X[[s]][i]/50
#    RMSE_CSF[[s]][i]=RMSE_CSF[[s]][i]/50
#  }
#}
#save(bias_rsf_T,bias_rsf_X,bias_baft_T,bias_baft_X,bias_weib,bias_dnnsurv_T,bias_dnnsurv_X,bias_CSF,RMSE_rsf_T,RMSE_rsf_X,RMSE_baft_T,RMSE_baft_X,RMSE_weib,RMSE_dnnsurv_T,RMSE_dnnsurv_X,RMSE_CSF,file =paste0(your_directory,"/simulation_weib_bias_RMSE_summary.RData"))

Plot Figure 1:

load(paste0(your_directory,"/intermediate_result_100simulations.RData"))
par(mfrow=c(2,3))
par(mar = c(2.4, 4.8, 2, 1.2))
par(cex.lab=2)
par(cex.axis=2)
for(m in 1:3){
  if (m==1){
    boxplot(simulation_weib_bias_RMSE_summary$bias_rsf_T[[m]],simulation_weib_bias_RMSE_summary$bias_rsf_X[[m]],simulation_weib_bias_RMSE_summary$bias_baft_T[[m]],simulation_weib_bias_RMSE_summary$bias_baft_X[[m]],simulation_weib_bias_RMSE_summary$bias_dnnsurv_T[[m]],simulation_weib_bias_RMSE_summary$bias_dnnsurv_X[[m]],simulation_weib_bias_RMSE_summary$bias_CSF[[m]],simulation_weib_bias_RMSE_summary$bias_weib[[m]],xaxt="n",ylim=c(-0.12,0.12),cex.axis=2,ylab="Bias") #bias_rsf_X_xtype1[[m]],bias_rsf_X_xtype2[[m]],bias_rsf_X_xtype3[[m]],bias_rsf_X_xtype4[[m]], bias_baft_X_xtype1[[m]],bias_baft_X_xtype2[[m]],bias_baft_X_xtype3[[m]],bias_baft_X_xtype4[[m]]
    abline(h=0,col="red",lty=2,lwd=2)
    points(x=1,y=mean(simulation_weib_bias_RMSE_summary$bias_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=2,y=mean(simulation_weib_bias_RMSE_summary$bias_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=3,y=mean(simulation_weib_bias_RMSE_summary$bias_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=4,y=mean(simulation_weib_bias_RMSE_summary$bias_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=5,y=mean(simulation_weib_bias_RMSE_summary$bias_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=6,y=mean(simulation_weib_bias_RMSE_summary$bias_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=7,y=mean(simulation_weib_bias_RMSE_summary$bias_CSF[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=8,y=mean(simulation_weib_bias_RMSE_summary$bias_weib[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
    title(paste0("Scenario ",m),cex.main=2)
  }else{
    boxplot(simulation_weib_bias_RMSE_summary$bias_rsf_T[[m]],simulation_weib_bias_RMSE_summary$bias_rsf_X[[m]],simulation_weib_bias_RMSE_summary$bias_baft_T[[m]],simulation_weib_bias_RMSE_summary$bias_baft_X[[m]],simulation_weib_bias_RMSE_summary$bias_dnnsurv_T[[m]],simulation_weib_bias_RMSE_summary$bias_dnnsurv_X[[m]],simulation_weib_bias_RMSE_summary$bias_CSF[[m]],simulation_weib_bias_RMSE_summary$bias_weib[[m]],xaxt="n",ylim=c(-0.12,0.12),cex.axis=2) #bias_rsf_X_xtype1[[m]],bias_rsf_X_xtype2[[m]],bias_rsf_X_xtype3[[m]],bias_rsf_X_xtype4[[m]], bias_baft_X_xtype1[[m]],bias_baft_X_xtype2[[m]],bias_baft_X_xtype3[[m]],bias_baft_X_xtype4[[m]]
    abline(h=0,col="red",lty=2,lwd=2)
    points(x=1,y=mean(simulation_weib_bias_RMSE_summary$bias_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=2,y=mean(simulation_weib_bias_RMSE_summary$bias_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=3,y=mean(simulation_weib_bias_RMSE_summary$bias_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=4,y=mean(simulation_weib_bias_RMSE_summary$bias_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=5,y=mean(simulation_weib_bias_RMSE_summary$bias_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=6,y=mean(simulation_weib_bias_RMSE_summary$bias_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=7,y=mean(simulation_weib_bias_RMSE_summary$bias_CSF[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=8,y=mean(simulation_weib_bias_RMSE_summary$bias_weib[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
    title(paste0("Scenario ",m),cex.main=2)
  }
}
# RMSE
for(m in 1:3){
  if (m==1){
    boxplot(simulation_weib_bias_RMSE_summary$RMSE_rsf_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_rsf_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_baft_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_baft_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_CSF[[m]],simulation_weib_bias_RMSE_summary$RMSE_weib[[m]],ylim=c(0,0.25),cex.axis=2,ylab="Binned RMSE",names = c("R-T","R-X","B-T","B-X","D-T","D-X","CSF","Weib")) 
    points(x=1,y=mean(simulation_weib_bias_RMSE_summary$RMSE_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=2,y=mean(simulation_weib_bias_RMSE_summary$RMSE_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=3,y=mean(simulation_weib_bias_RMSE_summary$RMSE_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=4,y=mean(simulation_weib_bias_RMSE_summary$RMSE_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=5,y=mean(simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=6,y=mean(simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=7,y=mean(simulation_weib_bias_RMSE_summary$RMSE_CSF[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=8,y=mean(simulation_weib_bias_RMSE_summary$RMSE_weib[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
  }else{
    boxplot(simulation_weib_bias_RMSE_summary$RMSE_rsf_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_rsf_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_baft_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_baft_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_T[[m]],simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_X[[m]],simulation_weib_bias_RMSE_summary$RMSE_CSF[[m]],simulation_weib_bias_RMSE_summary$RMSE_weib[[m]],ylim=c(0,0.25),cex.axis=2,names = c("R-T","R-X","B-T","B-X","D-T","D-X","CSF","Weib")) 
    points(x=1,y=mean(simulation_weib_bias_RMSE_summary$RMSE_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=2,y=mean(simulation_weib_bias_RMSE_summary$RMSE_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=3,y=mean(simulation_weib_bias_RMSE_summary$RMSE_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=4,y=mean(simulation_weib_bias_RMSE_summary$RMSE_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=5,y=mean(simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=6,y=mean(simulation_weib_bias_RMSE_summary$RMSE_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=7,y=mean(simulation_weib_bias_RMSE_summary$RMSE_CSF[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=8,y=mean(simulation_weib_bias_RMSE_summary$RMSE_weib[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
  }
}

2.2 Table 1

This section reproduces prediction accuracy of each method (Table 1).

The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.

Scenario 1 (upper panel of Table 1): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)

#random1.diff.all1 <- random1.diff.all[,c(2,4,7,9,11,12,16,13,14)]
#random2.diff.all1 <- random2.diff.all[,c(2,4,7,9,11,12,16,13,14)]
#random3.diff.all1 <- random3.diff.all[,c(2,4,7,9,11,12,16,13,14)]
#random1<-list(random1.diff.all1,random2.diff.all1,random3.diff.all1)

### random setting 1
#library(tableone)
#bimatrix.random1<-data.frame(matrix(rep(NA,10000*100*9),ncol=9))
#for (i in 1:9){
  #bimatrix.random1[,i]<-ifelse(random1[[1]][,i]>=0,1,0) 
#}
#colnames(bimatrix.random1)<-colnames(random1[[1]])[1:9]
#bimatrix.random1$sim<-random[[1]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*8),ncol=8)
#for (i in 1:100){
#  dat<-bimatrix.random1[which(bimatrix.random1$sim==i),]
#  for (j in 1:8){
#    tryCatch({
#      metrics<-table(dat[,j],dat$true_diff)
#      tn<-metrics[1,1]
#      tp<-metrics[2,2]
#      fn<-metrics[1,2]
#      fp<-metrics[2,1]
#      acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
#      ppv[i,j]<-tp/(tp+fp)
#      npv[i,j]<-tn/(tn+fn)
#      sens[i,j]<-tp/(tp+fn)
#      spec[i,j]<-tn/(tn+fp)
#      fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
#     ,error=function(e){
#        print(paste("Error at i=",i,j))
#      })
#  }} # in simu 8, 11, 41, 63, 83, predictions from CSF all >=0.
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X","BAFT_T","BAFT_X","DNNSurv_T","DNNSurv_X","CSF","Weib")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_weib_accuracy_summary_S1.RData"))
library(tidyr)
library(tableone)
acc_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$acc), method, value, RSF_T:Weib, factor_key=TRUE)
ppv_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$ppv), method, value, RSF_T:Weib, factor_key=TRUE)
npv_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$npv), method, value, RSF_T:Weib, factor_key=TRUE)
sens_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$sens), method, value, RSF_T:Weib, factor_key=TRUE)
spec_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$sepc), method, value, RSF_T:Weib, factor_key=TRUE)
fmeasure_long.random1<-gather(data.frame(simulation_weib_accuracy_summary_S1$fmeasure), method, value, RSF_T:Weib, factor_key=TRUE)
statmat.random1<-data.frame(cbind(acc_long.random1,ppv_long.random1[,2],npv_long.random1[,2],sens_long.random1[,2],spec_long.random1[,2],fmeasure_long.random1[,2]))
names(statmat.random1)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random1$acc<-statmat.random1$acc*100
statmat.random1$ppv<-statmat.random1$ppv*100
statmat.random1$npv<-statmat.random1$npv*100
statmat.random1$sens<-statmat.random1$sens*100
statmat.random1$spec<-statmat.random1$spec*100
statmat.random1$fmeasure<-statmat.random1$fmeasure*100
table_one.random1<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random1)
table_one_matrix.random1<-print(table_one.random1)
##                       Stratified by method
##                        RSF_T        RSF_X        BAFT_T       BAFT_X      
##   n                      100          100          100          100       
##   acc (mean (SD))      90.11 (1.81) 90.58 (1.85) 85.99 (2.03) 88.55 (1.91)
##   ppv (mean (SD))      91.78 (3.21) 92.33 (3.30) 90.67 (2.32) 92.18 (2.50)
##   npv (mean (SD))      87.17 (5.99) 87.56 (6.19) 76.13 (4.54) 80.93 (4.81)
##   sens (mean (SD))     94.56 (3.24) 94.62 (3.38) 89.26 (2.96) 91.51 (2.94)
##   spec (mean (SD))     79.74 (9.21) 81.16 (9.29) 78.36 (6.22) 81.64 (6.66)
##   fmeasure (mean (SD)) 93.05 (1.22) 93.36 (1.27) 89.90 (1.52) 91.79 (1.39)
##                       Stratified by method
##                        DNNSurv_T    DNNSurv_X    CSF           Weib        
##   n                      100          100          100           100       
##   acc (mean (SD))      92.39 (1.71) 93.44 (1.67) 82.02 (7.15)  96.37 (1.62)
##   ppv (mean (SD))      94.96 (3.02) 95.52 (2.94) 81.18 (7.56)  97.36 (2.33)
##   npv (mean (SD))      87.62 (5.77) 89.63 (5.43) 95.55 (6.05)  94.68 (4.90)
##   sens (mean (SD))     94.30 (3.25) 95.25 (3.04) 98.38 (2.67)  97.55 (2.44)
##   spec (mean (SD))     87.92 (8.00) 89.20 (7.63) 43.86 (27.72) 93.62 (5.89)
##   fmeasure (mean (SD)) 94.54 (1.22) 95.31 (1.20) 88.67 (3.84)  97.41 (1.15)
##                       Stratified by method
##                        p      test
##   n                               
##   acc (mean (SD))      <0.001     
##   ppv (mean (SD))      <0.001     
##   npv (mean (SD))      <0.001     
##   sens (mean (SD))     <0.001     
##   spec (mean (SD))     <0.001     
##   fmeasure (mean (SD)) <0.001

The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.

Scenario 2 (middle panel of Table 1): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)

#library(tableone)
#bimatrix.random2<-data.frame(matrix(rep(NA,10000*100*9),ncol=9))
#for (i in 1:9){
#  bimatrix.random2[,i]<-ifelse(random1[[2]][,i]>=0,1,0) 
#}
#colnames(bimatrix.random2)<-colnames(random1[[2]])[1:9]
#bimatrix.random2$sim<-random[[2]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*8),ncol=8)
#for (i in 1:100){
#  dat<-bimatrix.random2[which(bimatrix.random2$sim==i),]
#  for (j in 1:8){
#    tryCatch({
#      metrics<-table(dat[,j],dat$true_diff)
#      tn<-metrics[1,1]
#      tp<-metrics[2,2]
#      fn<-metrics[1,2]
#      fp<-metrics[2,1]
#      acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
#      ppv[i,j]<-tp/(tp+fp)
#      npv[i,j]<-tn/(tn+fn)
#      sens[i,j]<-tp/(tp+fn)
#      spec[i,j]<-tn/(tn+fp)
#      fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
#      ,error=function(e){
#        print(paste("Error at i=",i,j))
#      })
#  }}  # simulation 74 and 75: a lot of missing in weibull_diff, all other with numbers >0; in 33 simu, predictions from CSF all >=0.
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X","BAFT_T","BAFT_X","DNNSurv_T","DNNSurv_X","CSF","Weib")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_weib_accuracy_summary_S2.RData"))
library(tidyr)
library(tableone)
acc_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$acc), method, value, RSF_T:Weib, factor_key=TRUE)
ppv_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$ppv), method, value, RSF_T:Weib, factor_key=TRUE)
npv_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$npv), method, value, RSF_T:Weib, factor_key=TRUE)
sens_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$sens), method, value, RSF_T:Weib, factor_key=TRUE)
spec_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$sepc), method, value, RSF_T:Weib, factor_key=TRUE)
fmeasure_long.random2<-gather(data.frame(simulation_weib_accuracy_summary_S2$fmeasure), method, value, RSF_T:Weib, factor_key=TRUE)
statmat.random2<-data.frame(cbind(acc_long.random2,ppv_long.random2[,2],npv_long.random2[,2],sens_long.random2[,2],spec_long.random2[,2],fmeasure_long.random2[,2]))
names(statmat.random2)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random2$acc<-statmat.random2$acc*100
statmat.random2$ppv<-statmat.random2$ppv*100
statmat.random2$npv<-statmat.random2$npv*100
statmat.random2$sens<-statmat.random2$sens*100
statmat.random2$spec<-statmat.random2$spec*100
statmat.random2$fmeasure<-statmat.random2$fmeasure*100
table_one.random2<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random2)
table_one_matrix.random2<-print(table_one.random2)
##                       Stratified by method
##                        RSF_T         RSF_X         BAFT_T       BAFT_X       
##   n                      100           100           100          100        
##   acc (mean (SD))      79.68 (4.01)  80.50 (4.15)  72.15 (3.78) 75.29 (3.91) 
##   ppv (mean (SD))      86.55 (4.87)  86.97 (5.11)  84.69 (3.50) 86.18 (4.01) 
##   npv (mean (SD))      66.96 (9.64)  68.80 (10.19) 51.73 (5.27) 56.37 (6.08) 
##   sens (mean (SD))     85.31 (8.21)  86.15 (8.26)  74.42 (5.21) 78.00 (5.61) 
##   spec (mean (SD))     65.89 (16.12) 66.65 (16.80) 66.60 (9.65) 68.66 (11.45)
##   fmeasure (mean (SD)) 85.50 (3.47)  86.12 (3.51)  79.08 (3.20) 81.70 (3.19) 
##                       Stratified by method
##                        DNNSurv_T     DNNSurv_X     CSF           Weib         
##   n                      100           100           100           100        
##   acc (mean (SD))      82.10 (4.50)  82.76 (4.68)  76.31 (5.49)  91.04 (3.53) 
##   ppv (mean (SD))      89.60 (5.28)  89.63 (5.78)  78.84 (8.21)  94.07 (4.47) 
##   npv (mean (SD))      70.15 (11.31) 72.14 (11.67) 82.95 (14.92) 86.84 (10.55)
##   sens (mean (SD))     85.38 (8.42)  86.57 (8.68)  94.02 (9.68)  93.69 (6.01) 
##   spec (mean (SD))     74.07 (16.28) 73.41 (18.48) 32.90 (31.65) 84.53 (12.63)
##   fmeasure (mean (SD)) 87.00 (3.74)  87.57 (3.78)  84.89 (4.15)  93.64 (2.65) 
##                       Stratified by method
##                        p      test
##   n                               
##   acc (mean (SD))      <0.001     
##   ppv (mean (SD))      <0.001     
##   npv (mean (SD))      <0.001     
##   sens (mean (SD))     <0.001     
##   spec (mean (SD))     <0.001     
##   fmeasure (mean (SD)) <0.001

The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.

Scenario 3 (lower panel of Table 1): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)

#library(tableone)
#bimatrix.random3<-data.frame(matrix(rep(NA,10000*100*9),ncol=9))
#for (i in 1:9){
#  bimatrix.random3[,i]<-ifelse(random1[[3]][,i]>=0,1,0) 
#}
#colnames(bimatrix.random3)<-colnames(random1[[3]])[1:9]
#bimatrix.random3$sim<-random[[3]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*8),ncol=8)
#for (i in 1:100){
#  dat<-bimatrix.random3[which(bimatrix.random3$sim==i),]
#  for (j in 1:8){
#    tryCatch({
#      metrics<-table(dat[,j],dat$true_diff)
#      tn<-metrics[1,1]
#      tp<-metrics[2,2]
#      fn<-metrics[1,2]
#      fp<-metrics[2,1]
#      acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
#      ppv[i,j]<-tp/(tp+fp)
#      npv[i,j]<-tn/(tn+fn)
#      sens[i,j]<-tp/(tp+fn)
#      spec[i,j]<-tn/(tn+fp)
#      fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
#      ,error=function(e){
#        print(paste("Error at i=",i,j))
#      })
#  }} # in many simulations, CATE predictions from CSF all >=0.
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X","BAFT_T","BAFT_X","DNNSurv_T","DNNSurv_X","CSF","Weib")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_weib_accuracy_summary_S3.RData"))
library(tidyr)
library(tableone)
acc_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$acc), method, value, RSF_T:Weib, factor_key=TRUE)
ppv_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$ppv), method, value, RSF_T:Weib, factor_key=TRUE)
npv_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$npv), method, value, RSF_T:Weib, factor_key=TRUE)
sens_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$sens), method, value, RSF_T:Weib, factor_key=TRUE)
spec_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$sepc), method, value, RSF_T:Weib, factor_key=TRUE)
fmeasure_long.random3<-gather(data.frame(simulation_weib_accuracy_summary_S3$fmeasure), method, value, RSF_T:Weib, factor_key=TRUE)
statmat.random3<-data.frame(cbind(acc_long.random3,ppv_long.random3[,2],npv_long.random3[,2],sens_long.random3[,2],spec_long.random3[,2],fmeasure_long.random3[,2]))
names(statmat.random3)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random3$acc<-statmat.random3$acc*100
statmat.random3$ppv<-statmat.random3$ppv*100
statmat.random3$npv<-statmat.random3$npv*100
statmat.random3$sens<-statmat.random3$sens*100
statmat.random3$spec<-statmat.random3$spec*100
statmat.random3$fmeasure<-statmat.random3$fmeasure*100
table_one.random3<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random3)
table_one_matrix.random3<-print(table_one.random3)
##                       Stratified by method
##                        RSF_T         RSF_X         BAFT_T       BAFT_X      
##   n                      100           100           100          100       
##   acc (mean (SD))      83.47 (2.16)  83.74 (2.20)  79.21 (2.83) 81.30 (2.57)
##   ppv (mean (SD))      86.61 (3.66)  86.42 (3.86)  87.52 (2.46) 88.06 (2.57)
##   npv (mean (SD))      76.62 (7.85)  78.17 (7.93)  62.53 (5.16) 66.87 (5.52)
##   sens (mean (SD))     91.33 (4.92)  92.08 (4.77)  82.76 (4.15) 85.53 (4.31)
##   spec (mean (SD))     63.91 (12.66) 62.95 (13.52) 70.37 (7.06) 70.77 (7.78)
##   fmeasure (mean (SD)) 88.72 (1.59)  88.97 (1.50)  84.99 (2.28) 86.67 (2.06)
##                       Stratified by method
##                        DNNSurv_T    DNNSurv_X     CSF           Weib        
##   n                      100          100           100           100       
##   acc (mean (SD))      86.73 (2.58) 86.77 (2.19)  74.33 (3.36)  92.12 (1.89)
##   ppv (mean (SD))      91.22 (3.19) 90.34 (3.70)  74.18 (3.96)  94.86 (2.53)
##   npv (mean (SD))      77.89 (8.26) 79.66 (7.75)  94.78 (8.29)  86.67 (6.79)
##   sens (mean (SD))     90.35 (5.21) 91.58 (4.73)  98.97 (3.09)  94.20 (3.63)
##   spec (mean (SD))     77.73 (9.68) 74.77 (11.60) 13.00 (17.12) 86.96 (7.14)
##   fmeasure (mean (SD)) 90.62 (2.05) 90.78 (1.60)  84.66 (1.57)  94.45 (1.41)
##                       Stratified by method
##                        p      test
##   n                               
##   acc (mean (SD))      <0.001     
##   ppv (mean (SD))      <0.001     
##   npv (mean (SD))      <0.001     
##   sens (mean (SD))     <0.001     
##   spec (mean (SD))     <0.001     
##   fmeasure (mean (SD)) <0.001

2.3 Figure 2

This figure shows binned bias and RMSE when data is generated from standard logistic models under balanced design and scenario 1. Because the size of the intermediate results are too large, we have saved summarized biases and RMSEs from 100 simulations in “Simulations/intermediate_result_100simulations.RData”.

The following codes are examples for loading the predicted CATE on test datasets in 100 runs of the simulations. The codes are commented out because the result files are too large to be loaded for the manuscript submission. Please jump to the next chunk for loading summarized biases and RMSEs. We have saved intermediate results from 100 simulations in “Simulations/intermediate_result_100simulations.RData”.

Load predicted CATE from R-T, R-X, B-T, B-X, D-T, D-X and CSF when data is generated from standard logistic models under balanced design and scenario1:

#load(paste0(your_directory,"/logis_balanced_mediantt.RData"))

#random1.diff$ID<-rep(1:10000,100)
#random2.diff$ID<-rep(1:10000,100)
#random3.diff$ID<-rep(1:10000,100)
#random<-list(random1.diff,random2.diff,random3.diff)

Calculate binned bias and RMSE:

#RMSE_rsf_T<-RMSE_rsf_X<-RMSE_baft_T<-RMSE_baft_X<-RMSE_slogistic<-RMSE_weib<-RMSE_dnnsurv_T<-RMSE_dnnsurv_X<-list(rep(NA,100),rep(NA,100),rep(NA,100))
#bias_rsf_T<-bias_rsf_X<-bias_baft_T<-bias_baft_X<-bias_slogistic<-bias_weib<-bias_dnnsurv_T<-bias_dnnsurv_X<-list(rep(NA,100),rep(NA,100),rep(NA,100))
# random, setting 1 to setting 3
#for (s in 1:3){
#  for (i in 1:100){ # 100 runs of results
#    dat<-random[[s]][which(random[[s]]$sim==i),]
#    dat<-dat[order(dat$true_diff),]
    
#    quant<-quantile(dat$true_diff,probs=seq(0,1,length.out=51)) 
    
#    bias_rsf_T[[s]][i]<-0
#    bias_rsf_X[[s]][i]<-0
#    bias_baft_T[[s]][i]<-0
#    bias_baft_X[[s]][i]<-0
#    bias_slogistic[[s]][i]<-0
#    bias_weib[[s]][i]<-0
#    bias_dnnsurv_T[[s]][i]<-0
#    bias_dnnsurv_X[[s]][i]<-0
#    RMSE_rsf_T[[s]][i]<-0
#    RMSE_rsf_X[[s]][i]<-0
#    RMSE_baft_T[[s]][i]<-0
#    RMSE_baft_X[[s]][i]<-0
#    RMSE_slogistic[[s]][i]<-0
#    RMSE_weib[[s]][i]<-0
#    RMSE_dnnsurv_T[[s]][i]<-0
#    RMSE_dnnsurv_X[[s]][i]<-0
    
#    for (j in 1:50){
#      datq<-dat[which(dat$true_diff>=quant[j]&dat$true_diff<=quant[j+1]),]
#      bias_rsf_T[[s]][i]<-bias_rsf_T[[s]][i]+mean(datq$rsf_T-datq$true_diff)
#      bias_rsf_X[[s]][i]<-bias_rsf_X[[s]][i]+mean(datq$rsf_X_diff0-datq$true_diff)
#      bias_baft_T[[s]][i]<-bias_baft_T[[s]][i]+mean(datq$baft_T-datq$true_diff)
#      bias_baft_X[[s]][i]<-bias_baft_X[[s]][i]+mean(datq$baft_X_diff0-datq$true_diff)
#      bias_slogistic[[s]][i]<-bias_slogistic[[s]][i]+mean(datq$slogis-datq$true_diff)
#      bias_weib[[s]][i]<-bias_weib[[s]][i]+mean(datq$weib-datq$true_diff)
#      bias_dnnsurv_T[[s]][i]<-bias_dnnsurv_T[[s]][i]+mean(datq$deepsurv_T-datq$true_diff)
#      bias_dnnsurv_X[[s]][i]<-bias_dnnsurv_X[[s]][i]+mean(datq$deepsurv_X-datq$true_diff)
#      RMSE_rsf_T[[s]][i]<-RMSE_rsf_T[[s]][i]+sqrt(mean((datq$rsf_T-datq$true_diff)^2))
#      RMSE_rsf_X[[s]][i]<-RMSE_rsf_X[[s]][i]+sqrt(mean((datq$rsf_X_diff0-datq$true_diff)^2))
#      RMSE_baft_T[[s]][i]<-RMSE_baft_T[[s]][i]+sqrt(mean((datq$baft_T-datq$true_diff)^2))
#      RMSE_baft_X[[s]][i]<-RMSE_baft_X[[s]][i]+sqrt(mean((datq$baft_X_diff0-datq$true_diff)^2))
#      RMSE_slogistic[[s]][i]<-RMSE_slogistic[[s]][i]+sqrt(mean((datq$slogis-datq$true_diff)^2))
#      RMSE_weib[[s]][i]<-RMSE_weib[[s]][i]+sqrt(mean((datq$weib-datq$true_diff)^2))
#      RMSE_dnnsurv_T[[s]][i]<-RMSE_dnnsurv_T[[s]][i]+sqrt(mean((datq$deepsurv_T-datq$true_diff)^2))
#      RMSE_dnnsurv_X[[s]][i]<-RMSE_dnnsurv_X[[s]][i]+sqrt(mean((datq$deepsurv_X-datq$true_diff)^2))
#    }
#    bias_rsf_T[[s]][i]=bias_rsf_T[[s]][i]/50
#    bias_rsf_X[[s]][i]=bias_rsf_X[[s]][i]/50
#    bias_baft_T[[s]][i]=bias_baft_T[[s]][i]/50
#    bias_baft_X[[s]][i]=bias_baft_X[[s]][i]/50
#    bias_slogistic[[s]][i]=bias_slogistic[[s]][i]/50
#    bias_weib[[s]][i]=bias_weib[[s]][i]/50
#    bias_dnnsurv_T[[s]][i]=bias_dnnsurv_T[[s]][i]/50
#    bias_dnnsurv_X[[s]][i]=bias_dnnsurv_X[[s]][i]/50
#    RMSE_rsf_T[[s]][i]=RMSE_rsf_T[[s]][i]/50
#    RMSE_rsf_X[[s]][i]=RMSE_rsf_X[[s]][i]/50
#    RMSE_baft_T[[s]][i]=RMSE_baft_T[[s]][i]/50
#    RMSE_baft_X[[s]][i]=RMSE_baft_X[[s]][i]/50
#    RMSE_slogistic[[s]][i]=RMSE_slogistic[[s]][i]/50
#    RMSE_weib[[s]][i]=RMSE_weib[[s]][i]/50
#    RMSE_dnnsurv_T[[s]][i]=RMSE_dnnsurv_T[[s]][i]/50
#    RMSE_dnnsurv_X[[s]][i]=RMSE_dnnsurv_X[[s]][i]/50
#  }
#}
#save(bias_rsf_T,bias_rsf_X,bias_baft_T,bias_baft_X,bias_slogistic,bias_dnnsurv_T,bias_dnnsurv_X,bias_CSF,RMSE_rsf_T,RMSE_rsf_X,RMSE_baft_T,RMSE_baft_X,RMSE_slogistic,RMSE_dnnsurv_T,RMSE_dnnsurv_X,RMSE_CSF,file =paste0(your_directory,"/simulation_logis_bias_RMSE_summary.RData"))

Plot Figure 2:

par(mfrow=c(2,3))
par(mar = c(2.4, 4.8, 2, 1.2))
par(cex.lab=2)
par(cex.axis=2)
for(m in 1:3){
  if (m==1){
    boxplot(simulation_logis_bias_RMSE_summary$bias_rsf_T[[m]],simulation_logis_bias_RMSE_summary$bias_rsf_X[[m]],simulation_logis_bias_RMSE_summary$bias_baft_T[[m]],simulation_logis_bias_RMSE_summary$bias_baft_X[[m]],simulation_logis_bias_RMSE_summary$bias_dnnsurv_T[[m]],simulation_logis_bias_RMSE_summary$bias_dnnsurv_X[[m]],simulation_logis_bias_RMSE_summary$bias_slogistic[[m]],xaxt="n",ylim=c(-0.12,0.12),cex.axis=2,ylab="Bias") #bias_rsf_X_xtype1_dependent[[m]],bias_rsf_X_xtype2_dependent[[m]],bias_rsf_X_xtype3_dependent[[m]],bias_rsf_X_xtype4_dependent[[m]],bias_baft_X_xtype1_dependent[[m]],bias_baft_X_xtype2_dependent[[m]],bias_baft_X_xtype3_dependent[[m]],bias_baft_X_xtype4_dependent[[m]]
    abline(h=0,col="red",lty=2,lwd=2)
    points(x=1,y=mean(simulation_logis_bias_RMSE_summary$bias_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=2,y=mean(simulation_logis_bias_RMSE_summary$bias_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=3,y=mean(simulation_logis_bias_RMSE_summary$bias_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=4,y=mean(simulation_logis_bias_RMSE_summary$bias_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=5,y=mean(simulation_logis_bias_RMSE_summary$bias_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=6,y=mean(simulation_logis_bias_RMSE_summary$bias_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=7,y=mean(simulation_logis_bias_RMSE_summary$bias_slogistic[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
    title(paste0("Scenario ",m),cex.main=2)
  }else{
    boxplot(simulation_logis_bias_RMSE_summary$bias_rsf_T[[m]],simulation_logis_bias_RMSE_summary$bias_rsf_X[[m]],simulation_logis_bias_RMSE_summary$bias_baft_T[[m]],simulation_logis_bias_RMSE_summary$bias_baft_X[[m]],simulation_logis_bias_RMSE_summary$bias_dnnsurv_T[[m]],simulation_logis_bias_RMSE_summary$bias_dnnsurv_X[[m]],simulation_logis_bias_RMSE_summary$bias_slogistic[[m]],xaxt="n",ylim=c(-0.12,0.12),cex.axis=2) #bias_rsf_X_xtype1_dependent[[m]],bias_rsf_X_xtype2_dependent[[m]],bias_rsf_X_xtype3_dependent[[m]],bias_rsf_X_xtype4_dependent[[m]],bias_baft_X_xtype1_dependent[[m]],bias_baft_X_xtype2_dependent[[m]],bias_baft_X_xtype3_dependent[[m]],bias_baft_X_xtype4_dependent[[m]]
    abline(h=0,col="red",lty=2,lwd=2)
    points(x=1,y=mean(simulation_logis_bias_RMSE_summary$bias_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=2,y=mean(simulation_logis_bias_RMSE_summary$bias_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=3,y=mean(simulation_logis_bias_RMSE_summary$bias_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=4,y=mean(simulation_logis_bias_RMSE_summary$bias_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=5,y=mean(simulation_logis_bias_RMSE_summary$bias_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=6,y=mean(simulation_logis_bias_RMSE_summary$bias_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=7,y=mean(simulation_logis_bias_RMSE_summary$bias_slogistic[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
    title(paste0("Scenario ",m),cex.main=2)
  }
}

## plot bias and RMSE
# RMSE
for(m in 1:3){
  if (m==1){
    boxplot(simulation_logis_bias_RMSE_summary$RMSE_rsf_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_rsf_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_baft_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_baft_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_slogistic[[m]],ylim=c(0,0.25),cex.axis=2,ylab="Binned RMSE",names = c("R-T","R-X","B-T","B-X","D-T","D-X","Logistic"))#RMSE_rsf_X_xtype1_dependent[[m]],RMSE_rsf_X_xtype2_dependent[[m]],RMSE_rsf_X_xtype3_dependent[[m]],RMSE_rsf_X_xtype4_dependent[[m]],RMSE_baft_X_xtype1_dependent[[m]],RMSE_baft_X_xtype2_dependent[[m]],RMSE_baft_X_xtype3_dependent[[m]],RMSE_baft_X_xtype4_dependent[[m]]
    points(x=1,y=mean(simulation_logis_bias_RMSE_summary$RMSE_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=2,y=mean(simulation_logis_bias_RMSE_summary$RMSE_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=3,y=mean(simulation_logis_bias_RMSE_summary$RMSE_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=4,y=mean(simulation_logis_bias_RMSE_summary$RMSE_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=5,y=mean(simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=6,y=mean(simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=7,y=mean(simulation_logis_bias_RMSE_summary$RMSE_slogistic[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
  }else{
    boxplot(simulation_logis_bias_RMSE_summary$RMSE_rsf_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_rsf_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_baft_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_baft_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_T[[m]],simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_X[[m]],simulation_logis_bias_RMSE_summary$RMSE_slogistic[[m]],ylim=c(0,0.25),cex.axis=2,names = c("R-T","R-X","B-T","B-X","D-T","D-X","Logistic"))#RMSE_rsf_X_xtype1_dependent[[m]],RMSE_rsf_X_xtype2_dependent[[m]],RMSE_rsf_X_xtype3_dependent[[m]],RMSE_rsf_X_xtype4_dependent[[m]],RMSE_baft_X_xtype1_dependent[[m]],RMSE_baft_X_xtype2_dependent[[m]],RMSE_baft_X_xtype3_dependent[[m]],RMSE_baft_X_xtype4_dependent[[m]]
    points(x=1,y=mean(simulation_logis_bias_RMSE_summary$RMSE_rsf_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=2,y=mean(simulation_logis_bias_RMSE_summary$RMSE_rsf_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=3,y=mean(simulation_logis_bias_RMSE_summary$RMSE_baft_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=4,y=mean(simulation_logis_bias_RMSE_summary$RMSE_baft_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=5,y=mean(simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_T[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=6,y=mean(simulation_logis_bias_RMSE_summary$RMSE_dnnsurv_X[[m]]),col="darkslategrey",pch=19,cex=1.2)
    points(x=7,y=mean(simulation_logis_bias_RMSE_summary$RMSE_slogistic[[m]],na.rm=T),col="darkslategrey",pch=19,cex=1.2)
  }
}

2.4 Table 2

This section reproduces prediction accuracy of each method (Table 2). Please run the last section (Figure 2 section) first.

The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.

Scenario 1 (upper panel of Table 2): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)

#random1.diff <- random1.diff[,c(2:5,8:9,1,6,7)]
#random2.diff <- random2.diff[,c(2:3,5:6,8:9,1,4,7)]
#random3.diff <- random3.diff[,c(2:3,5:6,8:9,1,4,7)]
#random<-list(random1.diff,random2.diff,random3.diff)

#library(tableone)
#bimatrix.random1<-data.frame(matrix(rep(NA,10000*100*8),ncol=8))
#for (i in 1:8){
#  bimatrix.random1[,i]<-ifelse(random[[1]][,i]>=0,1,0) 
#}
#colnames(bimatrix.random1)<-colnames(random[[1]])[1:8]
#bimatrix.random1$sim<-random[[1]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*7),ncol=7)
#for (i in 1:100){
#  dat<-bimatrix.random1[which(bimatrix.random1$sim==i),]
#  for (j in 1:7){
#    tryCatch({
#      metrics<-table(dat[,j],dat$true_diff)
#      tn<-metrics[1,1]
#      tp<-metrics[2,2]
#      fn<-metrics[1,2]
#      fp<-metrics[2,1]
#      acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
#      ppv[i,j]<-tp/(tp+fp)
#      npv[i,j]<-tn/(tn+fn)
#      sens[i,j]<-tp/(tp+fn)
#      spec[i,j]<-tn/(tn+fp)
#      fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
#      ,error=function(e){
#        print(paste("Error at i=",i,j))
#      })
#  }}
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X0","BAFT_T","BAFT_X0","DNNSurv_T","DNNSurv_X","Logis")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_logis_accuracy_summary_S1.RData"))
library(tidyr)
library(tableone)
acc_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$acc), method, value, RSF_T:Logis, factor_key=TRUE)
ppv_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$ppv), method, value, RSF_T:Logis, factor_key=TRUE)
npv_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$npv), method, value, RSF_T:Logis, factor_key=TRUE)
sens_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$sens), method, value, RSF_T:Logis, factor_key=TRUE)
spec_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$sepc), method, value, RSF_T:Logis, factor_key=TRUE)
fmeasure_long.random1<-gather(data.frame(simulation_logis_accuracy_summary_S1$fmeasure), method, value, RSF_T:Logis, factor_key=TRUE)
statmat.random1<-data.frame(cbind(acc_long.random1,ppv_long.random1[,2],npv_long.random1[,2],sens_long.random1[,2],spec_long.random1[,2],fmeasure_long.random1[,2]))
names(statmat.random1)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random1$acc<-statmat.random1$acc*100
statmat.random1$ppv<-statmat.random1$ppv*100
statmat.random1$npv<-statmat.random1$npv*100
statmat.random1$sens<-statmat.random1$sens*100
statmat.random1$spec<-statmat.random1$spec*100
statmat.random1$fmeasure<-statmat.random1$fmeasure*100
table_one.random1<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random1)
table_one_matrix.random1<-print(table_one.random1)
##                       Stratified by method
##                        RSF_T         RSF_X0        BAFT_T       BAFT_X0     
##   n                      100           100           100          100       
##   acc (mean (SD))      79.04 (4.40)  79.75 (4.53)  78.63 (3.65) 80.65 (3.73)
##   ppv (mean (SD))      84.06 (5.25)  84.67 (5.51)  85.21 (4.18) 86.58 (4.57)
##   npv (mean (SD))      72.20 (7.75)  73.24 (7.91)  69.28 (5.86) 72.34 (6.28)
##   sens (mean (SD))     83.84 (7.56)  84.38 (7.68)  81.03 (5.81) 83.07 (5.98)
##   spec (mean (SD))     70.49 (12.90) 71.51 (13.37) 74.34 (9.16) 76.33 (9.99)
##   fmeasure (mean (SD)) 83.58 (3.85)  84.13 (3.94)  82.87 (3.18) 84.56 (3.18)
##                       Stratified by method
##                        DNNSurv_T     DNNSurv_X     Logis         p      test
##   n                      100           100           100                    
##   acc (mean (SD))      83.23 (4.83)  83.75 (5.06)  91.56 (4.28)  <0.001     
##   ppv (mean (SD))      87.62 (5.84)  87.72 (5.81)  94.32 (5.20)  <0.001     
##   npv (mean (SD))      78.02 (8.68)  79.26 (9.47)  88.74 (8.56)  <0.001     
##   sens (mean (SD))     86.81 (7.58)  87.59 (7.71)  92.87 (6.22)  <0.001     
##   spec (mean (SD))     76.85 (13.63) 76.91 (13.55) 89.22 (10.73) <0.001     
##   fmeasure (mean (SD)) 86.83 (4.03)  87.28 (4.16)  93.34 (3.44)  <0.001

The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.

Scenario 2 (middle panel of Table 2): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)

#library(tableone)
#bimatrix.random2<-data.frame(matrix(rep(NA,10000*100*8),ncol=8))
#for (i in 1:8){
#  bimatrix.random2[,i]<-ifelse(random[[2]][,i]>=0,1,0) 
#}
#colnames(bimatrix.random2)<-colnames(random[[2]])[1:8]
#bimatrix.random2$sim<-random[[2]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*7),ncol=7)
#for (i in 1:100){
#  dat<-bimatrix.random2[which(bimatrix.random2$sim==i),]
#  for (j in 1:7){
#    tryCatch({
#      metrics<-table(dat[,j],dat$true_diff)
#      tn<-metrics[1,1]
#      tp<-metrics[2,2]
#      fn<-metrics[1,2]
#      fp<-metrics[2,1]
#      acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
#      ppv[i,j]<-tp/(tp+fp)
#      npv[i,j]<-tn/(tn+fn)
#      sens[i,j]<-tp/(tp+fn)
#      spec[i,j]<-tn/(tn+fp)
#      fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
#      ,error=function(e){
#        print(paste("Error at i=",i,j))
#      })
#  }} 
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X0","BAFT_T","BAFT_X0","DNNSurv_T","DNNSurv_X","Logis")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_logis_accuracy_summary_S2.RData"))
library(tidyr)
library(tableone)
acc_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$acc), method, value, RSF_T:Logis, factor_key=TRUE)
ppv_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$ppv), method, value, RSF_T:Logis, factor_key=TRUE)
npv_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$npv), method, value, RSF_T:Logis, factor_key=TRUE)
sens_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$sens), method, value, RSF_T:Logis, factor_key=TRUE)
spec_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$sepc), method, value, RSF_T:Logis, factor_key=TRUE)
fmeasure_long.random2<-gather(data.frame(simulation_logis_accuracy_summary_S2$fmeasure), method, value, RSF_T:Logis, factor_key=TRUE)
statmat.random2<-data.frame(cbind(acc_long.random2,ppv_long.random2[,2],npv_long.random2[,2],sens_long.random2[,2],spec_long.random2[,2],fmeasure_long.random2[,2]))
names(statmat.random2)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random2$acc<-statmat.random2$acc*100
statmat.random2$ppv<-statmat.random2$ppv*100
statmat.random2$npv<-statmat.random2$npv*100
statmat.random2$sens<-statmat.random2$sens*100
statmat.random2$spec<-statmat.random2$spec*100
statmat.random2$fmeasure<-statmat.random2$fmeasure*100
table_one.random2<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random2)
table_one_matrix.random2<-print(table_one.random2)
##                       Stratified by method
##                        RSF_T        RSF_X0       BAFT_T       BAFT_X0     
##   n                      100          100          100          100       
##   acc (mean (SD))      80.98 (3.54) 82.05 (3.67) 79.21 (3.18) 81.97 (3.20)
##   ppv (mean (SD))      81.45 (6.34) 82.54 (6.59) 78.84 (4.39) 81.66 (4.81)
##   npv (mean (SD))      82.19 (6.00) 83.39 (6.36) 80.12 (4.06) 82.99 (4.39)
##   sens (mean (SD))     81.13 (8.74) 82.28 (9.04) 79.80 (5.53) 82.61 (5.83)
##   spec (mean (SD))     80.84 (9.18) 81.83 (9.36) 78.64 (5.97) 81.35 (6.40)
##   fmeasure (mean (SD)) 80.74 (3.92) 81.83 (4.07) 79.13 (3.27) 81.91 (3.25)
##                       Stratified by method
##                        DNNSurv_T    DNNSurv_X    Logis        p      test
##   n                      100          100          100                   
##   acc (mean (SD))      83.13 (4.29) 83.99 (4.20) 91.90 (3.73) <0.001     
##   ppv (mean (SD))      83.31 (6.89) 84.34 (6.95) 92.68 (6.23) <0.001     
##   npv (mean (SD))      84.71 (6.63) 85.70 (7.26) 92.41 (5.94) <0.001     
##   sens (mean (SD))     83.79 (9.05) 84.53 (9.83) 91.58 (7.47) <0.001     
##   spec (mean (SD))     82.48 (9.48) 83.46 (9.58) 92.21 (7.25) <0.001     
##   fmeasure (mean (SD)) 83.00 (4.59) 83.79 (4.68) 91.75 (3.98) <0.001

The following codes require to load the predictions on test dataset for 100 time of simulations. Since the result file is too large, we have summarize the accuracy measures. Please jump to the next chunk to run the summary table.

Scenario 3 (lower panel of Table 2): (The message “Error at i=” below means the simulation which CSF predicted all CATE > 0. We have mentioned this in the Results section in the main text.)

#library(tableone)
#bimatrix.random3<-data.frame(matrix(rep(NA,10000*100*8),ncol=8))
#for (i in 1:8){
#  bimatrix.random3[,i]<-ifelse(random[[3]][,i]>=0,1,0) 
#}
#colnames(bimatrix.random3)<-colnames(random[[3]])[1:8]
#bimatrix.random3$sim<-random[[3]]$sim
#acc<-ppv<-npv<-sens<-spec<-fmeasure<-matrix(rep(NA,100*7),ncol=7)
#for (i in 1:100){
#  dat<-bimatrix.random3[which(bimatrix.random3$sim==i),]
#  for (j in 1:7){
#    tryCatch({
#      metrics<-table(dat[,j],dat$true_diff)
#      tn<-metrics[1,1]
#      tp<-metrics[2,2]
#      fn<-metrics[1,2]
#      fp<-metrics[2,1]
#      acc[i,j]<-(tn+tp)/(tn+tp+fn+fp)
#      ppv[i,j]<-tp/(tp+fp)
#      npv[i,j]<-tn/(tn+fn)
#      sens[i,j]<-tp/(tp+fn)
#      spec[i,j]<-tn/(tn+fp)
#      fmeasure[i,j]<-2*tp/(2*tp+fp+fn)}
#      ,error=function(e){
#        print(paste("Error at i=",i,j))
#      })
#  }}
#colnames(acc)<-colnames(ppv)<-colnames(npv)<-colnames(sens)<-colnames(spec)<-colnames(fmeasure)<-c("RSF_T","RSF_X0","BAFT_T","BAFT_X0","DNNSurv_T","DNNSurv_X","Logis")
#save(acc, ppv, npv, sens,spec,fmeasure,file = paste0(your_directory,"/simulation_logis_accuracy_summary_S3.RData"))
library(tidyr)
library(tableone)
acc_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$acc), method, value, RSF_T:Logis, factor_key=TRUE)
ppv_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$ppv), method, value, RSF_T:Logis, factor_key=TRUE)
npv_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$npv), method, value, RSF_T:Logis, factor_key=TRUE)
sens_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$sens), method, value, RSF_T:Logis, factor_key=TRUE)
spec_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$sepc), method, value, RSF_T:Logis, factor_key=TRUE)
fmeasure_long.random3<-gather(data.frame(simulation_logis_accuracy_summary_S3$fmeasure), method, value, RSF_T:Logis, factor_key=TRUE)
statmat.random3<-data.frame(cbind(acc_long.random3,ppv_long.random3[,2],npv_long.random3[,2],sens_long.random3[,2],spec_long.random3[,2],fmeasure_long.random3[,2]))
names(statmat.random3)[2:7]<-c("acc","ppv","npv","sens","spec","fmeasure")
statmat.random3$acc<-statmat.random3$acc*100
statmat.random3$ppv<-statmat.random3$ppv*100
statmat.random3$npv<-statmat.random3$npv*100
statmat.random3$sens<-statmat.random3$sens*100
statmat.random3$spec<-statmat.random3$spec*100
statmat.random3$fmeasure<-statmat.random3$fmeasure*100
table_one.random3<-CreateTableOne(vars=c("acc","ppv","npv","sens","spec","fmeasure"),strata="method",data=statmat.random3)
table_one_matrix.random3<-print(table_one.random3)
##                       Stratified by method
##                        RSF_T        RSF_X0       BAFT_T       BAFT_X0     
##   n                      100          100          100          100       
##   acc (mean (SD))      82.12 (2.28) 82.15 (2.35) 83.39 (2.39) 84.16 (2.33)
##   ppv (mean (SD))      83.61 (4.36) 83.48 (4.50) 85.86 (3.16) 86.11 (3.35)
##   npv (mean (SD))      81.37 (5.63) 81.83 (6.00) 80.45 (3.99) 82.00 (4.24)
##   sens (mean (SD))     86.78 (6.08) 87.12 (6.41) 85.69 (4.05) 86.99 (4.29)
##   spec (mean (SD))     75.66 (8.82) 75.26 (9.26) 80.19 (5.46) 80.22 (5.93)
##   fmeasure (mean (SD)) 84.89 (2.12) 84.95 (2.20) 85.68 (2.16) 86.42 (2.10)
##                       Stratified by method
##                        DNNSurv_T    DNNSurv_X    Logis        p      test
##   n                      100          100          100                   
##   acc (mean (SD))      85.63 (2.72) 85.68 (2.60) 92.60 (2.00) <0.001     
##   ppv (mean (SD))      86.64 (4.23) 86.76 (4.54) 93.98 (3.25) <0.001     
##   npv (mean (SD))      85.25 (5.46) 85.37 (5.57) 91.26 (4.12) <0.001     
##   sens (mean (SD))     89.47 (5.24) 89.49 (5.36) 93.43 (3.65) <0.001     
##   spec (mean (SD))     80.32 (7.73) 80.40 (8.34) 91.44 (5.10) <0.001     
##   fmeasure (mean (SD)) 87.83 (2.36) 87.87 (2.22) 93.61 (1.76) <0.001