rm(list = ls())
library(glmnet)
library(xrnet)
library(doParallel)
library(dplyr)
library(xtune)
library(randomForest)
library(fwelnet)
library(ggplot2)
# Make sure working directory is ".../JDS2107-007/"
getwd()


sim = 1
out <- numeric()
for(sim in 1:50) {
  load(paste0("RDA/Methyl/results/analysis_w_avgext", sim, ".RData"))
  out <- c(out, rr_pmse, rr_r2, glm_pmse, glm_r2, aug_pmse, aug_r2)
}

measure = rep(rep(c("PMSE", "R2"), times = 3), 50)
method = rep(rep(c("RR", "GLM", "AUG"), each = 2), 50)
out <- data.frame(y  = out,
                  measure = measure,
                  method = method
)

out %>% group_by(measure, method) %>% summarise(x = median(y))


# Figure 4 in the main text
ggplot(out %>% filter(measure == "R2"), aes(x = method, y = y, fill = method)) + 
  geom_boxplot(outlier.size = -10) + 
  xlab("") +
  scale_x_discrete(
    limits = c("GLM", "AUG", "RR"),
    labels = c("AUG" = "Augmented Ridge",
               "GLM" = "Ridge", 
               "RR" = "Two-Level Ridge")) +
  ylab(expression(Test~~R^2)) +
  #guides(colour = guide_legend(nrow = 1)) +
  theme(legend.position = "none",
        axis.text.x = element_text(color = "black", size = 12),
        axis.text.y = element_text(color = "black", size = 12),
        legend.title = element_text(size = 0),
        legend.text = element_text(size = 14))
