rm(list = ls())

library(haven)
library(dplyr)
library(stringr)
library(readxl)
library(tableone)
library(ggalluvial)
library(ggpubr)
library(tidyr)
library(clipr)
library(anesrake)

setwd("/Volumes/GoogleDrive/My Drive/Collaborations/CPRC/County/")

w1_data0 <- read.csv("Data/updated_wave1_data.csv")[, -1]
w2_data0 <- read.csv("Data/updated_wave2_data.csv")[, -1] %>%
  mutate(vaccine_ever = ifelse(vaccine_ever == 1, 1, 0),
         test_positive_all = ifelse(is.na(test_positive) | test_positive %in% c(2, 3), 0, 1),
         info_from_w2 = ifelse(!is.na(vaccine_type) & vaccine_type == "Johnson & Johnson", jjvaccinedatetakenfrom, vaccinedatetakenfrom))


View(w1_data0 %>% filter(vaccine_first_date == vaccine_last_date) %>%
       dplyr::select(PID, vaccine_type, vaccine_first_date, vaccine_last_date))

View(w2_data0 %>% filter(vaccine_first_date == vaccine_last_date) %>%
       dplyr::select(PID, vaccine_type, vaccine_first_date, vaccine_last_date))

w1_data <- w1_data0 %>% mutate(SPA = case_when(
  spa_1 == 1 ~ '1',
  spa_2 == 1 ~ '2',
  spa_3 == 1 ~ '3',
  spa_4 == 1 ~ '4',
  spa_5 == 1 ~ '5',
  spa_6 == 1 ~ '6',
  spa_7 == 1 ~ '7',
  spa_8 == 1 ~ '8',
  spa_none == 99 ~ 'None')) %>% dplyr::select(
    PID, r001, n001, s001, 
    starts_with("rake_"),
    age_4c, age_bd, gender_3c, raceeth_5c, income_4c,
    SPA,
    starts_with("test_"),
    date_studytest, 
    vaccine_ever_v2,
    vaccine_type,
    vaccine_first_date,
    vaccine_last_date,
    wave
  ) 


w2_data <- w2_data0 %>% mutate(SPA = case_when(
  spa_1 == 1 ~ '1',
  spa_2 == 1 ~ '2',
  spa_3 == 1 ~ '3',
  spa_4 == 1 ~ '4',
  spa_5 == 1 ~ '5',
  spa_6 == 1 ~ '6',
  spa_7 == 1 ~ '7',
  spa_8 == 1 ~ '8',
  spa_none == 99 ~ 'None')) %>% dplyr::select(
    PID, r001, n001, s001, 
    starts_with("rake_"),
    age_4c, age_bd, gender_3c, raceeth_5c, income_4c,
    SPA,
    starts_with("test_"),
    date_studytest, 
    vaccine_ever_v2,
    vaccine_type,
    vaccine_first_date,
    vaccine_last_date,
    wave,
    ends_with("_w2") 
  )

# Merge the two datasets here
dat0 <- bind_rows(w1_data, w2_data) %>%
  group_by(PID) %>%
  mutate(nrows = n()) %>%
  arrange(PID, wave) 

#############################################
# Get weights here 

# Values from Jonathan:
#Race x Vac 
a1 <- c(1738001, 2029523, 258584, 789588, 1831219, 425007, 375786, 453082)
# Sex x Vac
a2 <- c(2555462, 2260234, 1490918, 1593386)
# Age x Vac
a3 <- c(866454, 1635032, 1268734, 806276, 238591, 785436, 1002778, 688886, 430864, 176949)
# Test Positive
a4 <- c(6873000, 1027000)

totN <- 7900000

targets <- list(a1 / sum(a1), a2 / sum(a2), a3 / sum(a3), a4 / sum(a4))

lapply(targets,  function(x) sum(x))

dat0 <- dat0 %>% mutate(
  # Some reason anesrake needs to have values start at 1 instead of 0
  test_positive_all_temp = test_positive_all + 1
) %>% as.data.frame()

targets <- list(a1 / sum(a1), a2 / sum(a2), a3 / sum(a3), a4 / sum(a4))

names(targets) <- c("rake_racxvac", "rake_sexxvac", "rake_agexvac", "test_positive_all_temp")
rakeout <- anesrake(targets, dat0, caseid = dat0$PID, type = "pctlim",
                    pctlim = 5,
                    verbose = TRUE)

all(rakeout$caseid == dat0$PID) # Can just append weightvec to screener
dat0$rakeWeight <- rakeout$weightvec

dat0 %>% group_by(rake_racxvac) %>%
  summarise(n = n(),
            x = sum(rakeWeight))

dat0 <- dat0 %>% dplyr::select(-starts_with("rake_"))

#######################################################

# Those who are only in one wave
dat0 %>% group_by(PID) %>%
  filter(nrows == 1) %>%
  group_by(wave) %>%
  summarise(n = n())

# Those who are in both waves
dat0 %>% group_by(PID) %>%
  filter(nrows == 2) %>%
  group_by(wave) %>%
  summarise(n = n())


dat.updated.vals <- dat0 %>%
  group_by(PID) %>%
  arrange(PID, wave) %>%
  dplyr::select(PID, wave, 
                vaccine_first_date, vaccine_last_date, vaccine_type,
                info_from_w2) %>%
  slice(n()) %>%
  rename(
    wave_updated = wave,
    vaccine_first_date_updated = vaccine_first_date,
    vaccine_last_date_updated = vaccine_last_date,
    vaccine_type_updated = vaccine_type,
    info_from_updated = info_from_w2
  )

# Baseline data (Wave 1 if you are Wave 1 or Wave 1 and 2; Wave 2 if you are wave 2 only)
dat.baseline <- dat0 %>%
  group_by(PID) %>%
  arrange(PID, wave) %>%
  dplyr::select(PID,
                age_4c, age_bd, gender_3c, raceeth_5c, income_4c,
                SPA) %>%
  rename(age_4c_baseline = age_4c,
         age_bd_baseline = age_bd,
         gender_3c_baseline = gender_3c,
         raceeth_5c_baseline = raceeth_5c,
         income_4c_baseline = income_4c,
         SPA_baseline = SPA) %>%
  slice(1) # Gets the first wave


dat <- left_join(dat0, dat.updated.vals, by = "PID")
dat <- left_join(dat, dat.baseline, by = "PID")

# Check
dat %>% dplyr::select(PID, wave, age_bd, age_bd_baseline)


p1 <- ggplot(dat %>% filter(nrows == 2 & wave == 1) %>% mutate(a = as.Date(vaccine_first_date_updated) - as.Date(vaccine_first_date)), 
             aes(x = a, fill = info_from_updated)) +
  geom_histogram() +
  geom_vline(xintercept = -14) +
  geom_vline(xintercept = 14) +
  xlab("Wave 2 - Wave 1 First Date")

p2 <- ggplot(dat %>% filter(nrows == 2 & wave == 1) %>% mutate(a = as.Date(vaccine_last_date_updated) - as.Date(vaccine_last_date)), 
             aes(x = a, fill = info_from_updated)) +
  geom_histogram() +
  geom_vline(xintercept = -14) +
  geom_vline(xintercept = 14) +
  xlab("Wave 2 - Wave 1 Last Date")

ggarrange(p1, p2, labels = c("A", "B"), common.legend = TRUE)


dat %>% filter(nrows == 2 & wave == 1) %>% group_by(vaccine_type, vaccine_type_updated) %>%
  summarise(n = n())

View(dat %>% filter(vaccine_type == "Moderna" & is.na(vaccine_type_updated)))
# Questions:
# What to do with ppl who change certain demos?
# What to with those with NO updated vaccine type?


# Create vaccine subgroups
dat <- dat %>% mutate(
  # If you are missing an updated vaccine type 
  vaccine_type_updated = ifelse(!is.na(vaccine_type) & is.na(vaccine_type_updated), vaccine_type, vaccine_type_updated),
  # Indicator for wave
  wave = factor(wave),
  # Self report positive = tested positive for COVID and only tested using rapid antigen or PCR 
  self_report_positive = ifelse(test_positive_all == 1 & test_antibody == 0 & (test_rapid == 1 | test_pcr == 1 ), "Yes", "No"),
  # If first and last date are the same, only keep first date and set last date to NA
  vaccine_last_date_updated = ifelse(vaccine_first_date_updated == vaccine_last_date_updated & !is.na(vaccine_first_date_updated), NA, vaccine_last_date_updated),
  # Create vaccination status
  vacc_status = case_when(
    # Unvaccinated
    vaccine_ever_v2 == 0 | (vaccine_type_updated != "Johnson & Johnson" & vaccine_first_date_updated >= date_studytest)  | (vaccine_type_updated == "Johnson & Johnson" & vaccine_last_date_updated >= date_studytest) ~ "Unvaccinated",
    # Partial
    !is.na(vaccine_first_date_updated) & vaccine_first_date_updated < date_studytest & (is.na(vaccine_last_date_updated) | date_studytest <= vaccine_last_date_updated) ~ "Partially",
    # Fully 
    vaccine_last_date_updated < date_studytest ~ "Fully"),
  # Days since vaccination
  days_since_vacc = as.Date(date_studytest, format = "%Y-%m-%d") - as.Date(vaccine_last_date_updated, format = "%Y-%m-%d"),
  # Vaccination groups (for fully vaccinated)
  vacc_group = case_when(
    is.na(days_since_vacc) | days_since_vacc < 0 ~ "Not Fully Vaccinated",
    days_since_vacc < 7 ~ "Less than 1wk",
    days_since_vacc < 14 ~ "1-to-2wk",
    days_since_vacc < 21 ~ "2-to-3wk",
    days_since_vacc < 30 ~ "3wk-to-1mo"),
  vacc_group = ifelse(is.na(vacc_group), ">1mo", vacc_group),
  vacc_group = factor(vacc_group, levels = c("Not Fully Vaccinated", "Less than 1wk", "1-to-2wk", "2-to-3wk", "3wk-to-1mo", ">1mo"), ordered = TRUE),
  # Get groups
  elevatedN = n001 >= 700,
  elevatedS = s001 >= 700,
  elevatedR = r001 >= 700,
  groups = case_when(
    vacc_status == "Fully" & elevatedN & (elevatedS | elevatedR) ~ "Fully Vaccinated and Infected",
    vacc_status == "Partially" & elevatedN ~ "Partially Vaccinated and Infected",
    vacc_status == "Unvaccinated" & elevatedN & (elevatedS | elevatedR) ~ "Unvaccinated and Infected",
    vacc_status == "Fully" & !elevatedN & (elevatedS | elevatedR) & self_report_positive == "No" ~ "Fully Vaccinated and Uninfected",
    vacc_status == "Partially" & !elevatedN & self_report_positive == "No" ~ "Partially Vaccinated and Uninfected",
    vacc_status== "Unvaccinated" & (!elevatedS & !elevatedR & !elevatedN) ~ "Unvaccinated and Uninfected", 
    vacc_status == "Fully" & !elevatedN & (elevatedS | elevatedR) & self_report_positive == "Yes" ~ "Fully Vaccinated and Inconclusive",    
    vacc_status == "Partially" & !elevatedN & self_report_positive == "Yes" ~ "Partially Vaccinated and Inconclusive",
    vacc_status == "Fully" & (!elevatedS & !elevatedR & !elevatedN) ~ "Nonresponder and Uninfected",
    !is.na(vaccine_type_updated) & is.na(vaccine_first_date_updated) & is.na(vaccine_last_date_updated) ~ "Inconclusive Vaccination"),
  groups = ifelse(is.na(groups), "Inconclusive", groups),
  groups = factor(groups, levels = c("Fully Vaccinated and Infected", 
                                     "Partially Vaccinated and Infected",
                                     "Unvaccinated and Infected",
                                     "Fully Vaccinated and Uninfected",
                                     "Partially Vaccinated and Uninfected",
                                     "Unvaccinated and Uninfected",
                                     "Nonresponder and Uninfected",
                                     "Fully Vaccinated and Inconclusive",
                                     "Partially Vaccinated and Inconclusive",
                                     "Inconclusive Vaccination",
                                     "Inconclusive"))
  
)

dat %>% group_by(groups) %>%
  summarise(n = n())

write_clip(dat %>% group_by(groups) %>%
             summarise(n = n()))

View(dat %>% dplyr::select(PID, wave, vacc_status, vacc_group, vaccine_ever_v2, vaccine_first_date_updated, vaccine_last_date_updated, date_studytest) %>%
       slice(1:50))

# Inconclusive group
write_clip(dat %>% filter(groups == "Inconclusive") %>% group_by(self_report_positive, vaccine_ever_v2, 
                                                           vacc_status,
                                                           elevatedN, elevatedS, elevatedR) %>%
              summarise(n = n()))


View(a <- dat %>% filter(groups == "Inconclusive") %>% dplyr::select(PID, self_report_positive, vaccine_ever_v2, 
                                                      vacc_status,
                                                      info_from_updated,
                                                      vaccine_type_updated,
                                                      vaccine_first_date_updated,
                                                      vaccine_last_date_updated,
                                                      vaccine_type,
                                                      vaccine_first_date,
                                                      vaccine_last_date,
                                                      date_studytest,
                                                      elevatedN, elevatedS, elevatedR))

write.csv(a, file = "Inconclusives.csv")
# Get demographic tables

# Simple summary statistics
dat.tableone <- dat %>% arrange(PID, wave) %>% group_by(PID) %>%
  mutate(cats = case_when(
    nrows == 1 & wave == 1 ~ "Wave 1 Only",
    nrows == 1 & wave == 2 ~ "Wave 2 Only",
    nrows == 2 ~ "Wave 1 and 2"
  )) %>%
  slice(n())

names(dat.tableone)
catvars = c("age_4c", "gender_3c", "raceeth_5c", "income_4c", "SPA",
            "vaccine_ever_v2", "test_positive_all")


T1 <- tableone::CreateTableOne(catvars, strata = "cats", data = dat.tableone, 
                         includeNA = TRUE, factorVars = catvars)

View(print(T1))


# Get tables based on RBD, S, values (baseline)

write_clip(dat %>% filter(nrows == 1 | (nrows == 2 & wave == 1)) %>% group_by(vacc_status) %>%
  summarise(n = sum(!is.na(r001))) %>%
  pivot_wider(names_from = vacc_status, values_from = n) %>%
  dplyr::select(3, 2, 1))

write_clip(dat %>% filter(nrows == 1 | (nrows == 2 & wave == 1)) %>% group_by(vacc_status) %>%
             summarise(n = sum(!is.na(n001))) %>%
             pivot_wider(names_from = vacc_status, values_from = n) %>%
             dplyr::select(3, 2, 1))

write_clip(dat %>% filter(nrows == 1 | (nrows == 2 & wave == 1)) %>% group_by(vacc_status) %>%
             summarise(n = sum(!is.na(s001))) %>%
             pivot_wider(names_from = vacc_status, values_from = n) %>%
             dplyr::select(3, 2, 1))

dat %>% filter(nrows == 1 | (nrows == 2 & wave == 1))  %>% 
  group_by(age_4c_baseline) %>%
  summarise(n = n())

# Variables: age_4c_baseline, gender_3c_baseline, raceeth_5c, test_positive_all

tmp <- dat %>% filter(nrows == 1 | (nrows == 2 & wave == 1)) %>%
  group_by(
    #age_4c_baseline,
    #gender_3c_baseline,
    #raceeth_5c_baseline,
    test_positive_all,
    vacc_status) %>%
  summarise(
    R = median(s001, na.rm = TRUE),
    R1 = quantile(s001, probs = 0.25),
    R2 = quantile(s001, probs = 0.75), 
    IQR = paste0(R, " (", R1, ", ", R2, ")")
    ) %>%
  dplyr::select(1, 2, 6) %>% 
  pivot_wider(names_from = vacc_status, 
                    values_from = IQR) %>%
  dplyr::select(1, 4, 3, 2)

clipr::write_clip(tmp)
clipr::write_clip(tmp[, -1])

# For wave 2 | wave people
write_clip(dat %>% filter(nrows == 2 & wave == 2) %>% group_by(vacc_status) %>%
             summarise(n = sum(!is.na(r001))) %>%
             pivot_wider(names_from = vacc_status, values_from = n) %>%
             dplyr::select(3, 2, 1))

write_clip(dat %>% filter(nrows == 2 & wave == 2) %>% group_by(vacc_status) %>%
             summarise(n = sum(!is.na(n001))) %>%
             pivot_wider(names_from = vacc_status, values_from = n) %>%
             dplyr::select(3, 2, 1))

write_clip(dat %>% filter(nrows == 2 & wave == 2) %>% group_by(vacc_status) %>%
             summarise(n = sum(!is.na(s001))) %>%
             pivot_wider(names_from = vacc_status, values_from = n) %>%
             dplyr::select(3, 2, 1))

dat %>% filter(nrows == 2 & wave == 2) %>% 
  group_by(test_positive_all) %>%
  summarise(n = n())

# Variables: age_4c_baseline, gender_3c_baseline, raceeth_5c, test_positive_all

tmp <- dat %>% filter(nrows == 2 & wave == 2) %>%
  group_by(
    #age_4c_baseline,
    #gender_3c_baseline,
    #raceeth_5c_baseline,
    test_positive_all,
    vacc_status) %>%
  summarise(
    R = median(n001, na.rm = TRUE),
    R1 = quantile(n001, probs = 0.25),
    R2 = quantile(n001, probs = 0.75), 
    IQR = paste0(R, " (", R1, ", ", R2, ")")
  ) %>%
  dplyr::select(1, 2, 6) %>% 
  pivot_wider(names_from = vacc_status, 
              values_from = IQR) %>%
  dplyr::select(1, 4, 3, 2)

tmp
clipr::write_clip(tmp[, -1])

############
dat.tmp <- dat %>% filter(wave == 1 | (wave == 2 & days_since_vacc >= 180)) %>%
  group_by(PID) %>%
  mutate(n = n()) %>%
  filter(n == 2)


ggplot(data = dat.tmp, 
       aes(x = wave, 
           y = r001, group = PID,
           color)) +
  geom_line() +
  geom_abline(intercept = 700, linetype = "dashed", color = "red", lwd = 0.5)


# Look at longitudinal data
names(dat)
dat.long <- dat %>% group_by(PID) %>% mutate(n = n()) %>%
  filter(n == 2) %>% dplyr::select(
    PID, 
    r001, n001, s001, 
    groups, vacc_group, 
    vaccine_type,
    vaccine_first_date, vaccine_last_date, date_studytest,
    info_from_w2,
    wave, days_since_vacc) %>% arrange(PID)


dat.long

table(dat$groups)


# Started off uninfected and vaccinated
p1 <- ggplot(data = dat.sub %>% filter(wave == 2 | (wave == 1 & str_detect(groups, "Uninfected") & str_detect(groups, "Vaccinated"))), 
             aes(x = wave, 
                 y = n001, group = PID)) +
  geom_line()

# Started off uninfected and unvaccinated
p2 <- ggplot(data = dat.sub %>% filter(wave == 2 | (wave == 1 & str_detect(groups, "Uninfected") & str_detect(groups, "Unvaccinated"))), 
             aes(x = wave, 
                 y = n001, group = PID)) +
  geom_line()
ggarrange(p1, p2)


# Look at those who started off fully vaccinated
ggplot(data = dat.sub %>% filter( wave == 2 | (wave == 1 & str_detect(groups, "Fully Vaccinated"))), 
       aes(x = wave, 
           y = r001, group = PID)) +
  geom_line()

# Look at those who started off fully vaccinated (2wk > )
ggplot(data = dat.sub %>% filter(wave == 2 | 
                                   (wave == 1 & str_detect(groups, "Fully Vaccinated") & vacc_group > "2-to-3wk")), 
       aes(x = wave, 
           y = r001, group = PID,
           color)) +
  geom_line() +
  geom_abline(intercept = 700, linetype = "dashed", color = "red", lwd = 0.5)


dat.sub2 <- dat.long %>% 
  dplyr::select(wave, PID, n001, r001, s001, 
                vaccine_type,
                vaccine_first_date, vaccine_last_date,
                date_studytest, info_from_w2,
                groups, days_since_vacc)  %>%
  group_by(PID) %>% 
  gather("n001", "r001", "s001", 
         "groups", 
         "days_since_vacc", 
         "vaccine_type",
         "vaccine_first_date", "vaccine_last_date",
         "date_studytest", "info_from_w2",
         key = variable, value = number) %>% 
  unite(combi, variable, wave) %>% 
  spread(combi, number) 





tmp <- dat.sub2 %>% group_by(groups_1, groups_2) %>% 
  summarise(n = n())

tmp

# Alluvial plot

# Started off partially vaccinated or unvaccinated
ggplot(tmp %>% filter(str_detect(groups_1, c("Partially Vaccinated", "Unvaccinated"))),
       aes(y = n, axis1 = groups_1, axis2 = groups_2)) +
  geom_alluvium(aes(fill = n, color = n)) +
  geom_stratum() +
  geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("Wave 1", "Wave 2"),
                   expand = c(0.1, 0.1)) +
  theme_void() %+replace%
  theme(axis.text.x = element_text(size = 14, vjust = 4),
        legend.position = "none")


### STOP

dat.sub2
tableone::CreateTableOne(vars = c("age_4c", "age_bd", "gender_3c", "raceeth_5c", "income_4c", "self_report_positive", "vacc_status"),
                         data = w1_data,
                         includeNA = TRUE,
                         factorVars = c("age_4c", "gender_3c", "raceeth_5c", "income_4c", "self_report_positive", "vacc_status"))

#w1_data <- w1_data %>% filter(is.na(inWave2) | !inWave2)
tmp <- w1_data %>% filter(PID == "DA-012097")
tmp$wave <- 2


dat %>% group_by(vacc_status) %>% 
  summarise(n = n(),
            mean = mean(r001),
            sd = sd(r001),
            median = median(r001),
            IQR = IQR(r001))

w1_data %>% group_by(groups) %>% 
  summarise(n = n(),
            mean = mean(r001),
            sd = sd(r001),
            median = median(r001),
            IQR = IQR(r001))

w1_data %>% filter(is.na(groups))  %>% 
  group_by(self_report_positive, vacc_status, elevatedN, elevatedR, elevatedS) %>%
  summarise(n = n())

w1_data %>% filter(groups == "Fully Vaccinated and Uninfected") %>% 
  group_by(self_report_positive, vacc_status, elevatedN, elevatedR, elevatedS) %>%
  summarise(n = n())


w1_data %>% filter(groups == "Inconclusive") %>% 
  group_by(self_report_positive, vacc_status, elevatedN, elevatedR, elevatedS) %>%
  summarise(n = n())


# Histograms by groups


ggplot(w1_data %>% filter(groups %in% c(
  "Fully Vaccinated and Infected",
  "Partially Vaccinated and Infected",
  "Unvaccinated and Infected",
  "Fully Vaccinated and Uninfected",
  "Partially Vaccinated and Uninfected",
  #"Unvaccinated and Uninfected",
  #"Nonresponder and Uninfected",
  #"Fully Vaccinated and Inconclusive",
  #"Partially Vaccinated and Inconclusive",
  "Inconclusive"
)), aes(x = groups, y = r001)) +
  geom_boxplot() +
  ylab("RBD") +
  xlab("")


ggplot(dat %>% filter(groups %in% c("Fully Vaccinated and Uninfected", "Fully Vaccinated and Infected")),
       aes(x = vacc_group, y = r001)) +
  facet_wrap(~groups + wave) +
  ylab("RBD") +
  xlab("") +
  geom_boxplot()

table(dat$nrows)


## Infected vs. Uninfected
a <- dat %>% filter(nrows == 2 & wave == 1 & groups == "Fully Vaccinated and Infected" &days_since_vacc > 14) %>% pull(PID)
b <- dat %>% filter(nrows == 2 & wave == 1 & groups == "Fully Vaccinated and Uninfected" & days_since_vacc > 14) %>% pull(PID)


tmp <- dat %>% filter(PID %in% c(a, b)) %>%
  mutate(`Infection Status` = case_when(
    PID %in% a ~ "Initially Infected",
    PID %in% b ~ "Initally Uninfected"
  ))

p1 <- ggplot(tmp, aes(x = wave, y = r001, group = PID, color = `Infection Status`)) +
  geom_line() +
  xlab("Wave") +
  ylab("RBD") 

p2 <- ggplot(tmp %>% group_by(`Infection Status`, wave) %>% summarise(n = mean(r001)), 
       aes(x = wave, y = n, group = `Infection Status`, color = `Infection Status`)) + 
  geom_line() +
  xlab("Wave") +
  ylab("RBD") 

ggarrange(p1, p2, common.legend = TRUE)

a <- dat %>% filter((nrows == 2 & groups == "Fully Vaccinated and Uninfected" & wave == 1) | 
                  (nrows == 2 & groups == "Fully Vaccinated and Infected" & wave == 2)) %>%
  group_by(PID) %>%
  mutate(n = n()) %>%
  filter(n == 2)


# Vaccine status conditional on infection
a1 <- dat %>% filter(nrows == 2 & wave == 1 & str_detect(groups, "Infected") & vacc_status == "Unvaccinated" ) %>% pull(PID)
a2 <- dat %>% filter(nrows == 2 & wave == 1 & str_detect(groups, "Infected") & vacc_status == "Partially" ) %>% pull(PID)
a3 <- dat %>% filter(nrows == 2 & wave == 1 & str_detect(groups, "Infected") & vacc_status == "Fully", days_since_vacc > 14) %>% pull(PID)
a4 <- dat %>% filter(nrows == 2 & wave == 2 & str_detect(groups, "Infected") & vacc_status == "Unvaccinated" ) %>% pull(PID)
a5 <- dat %>% filter(nrows == 2 & wave == 2 & str_detect(groups, "Infected") & vacc_status == "Partially" ) %>% pull(PID)
a6 <- dat %>% filter(nrows == 2 & wave == 2 & str_detect(groups, "Infected") & vacc_status == "Fully" ) %>% pull(PID)

tmp2 <- dat %>% filter(PID %in% a1)

tmp2 <- dat %>% mutate(
  tmp_group = case_when(
  nrows == 2 & wave == 1 & str_detect(groups, "Infected") & vacc_status == "Unvaccinated" ~ 1,
  nrows == 2 & wave == 1 & str_detect(groups, "Infected") & vacc_status == "Partially" ~ 2,
  nrows == 2 & wave == 1 & str_detect(groups, "Infected") & vacc_status == "Fully" ~ 3,
  nrows == 2 & wave == 2 & str_detect(groups, "Infected") & vacc_status == "Unvaccinated" ~ 4,
  nrows == 2 & wave == 2 & str_detect(groups, "Infected") & vacc_status == "Partially" ~ 5,
  nrows == 2 & wave == 2 & str_detect(groups, "Infected") & vacc_status == "Fully" ~ 6,
  ),
  tmp_group = factor(tmp_group, levels = c(1:6), labels = c("", "B", "C", "D" ,"E", "F"))
  )

tmp2 <- dat %>% filter(PID %in% a1)
p1 <- ggplot(tmp2, aes(x = wave, y = r001, group = PID, 
                       color = vacc_status)) +
  geom_line() +
  geom_point() +
  ylim(c(0, 25000)) +
  xlab("Wave") +
  ylab("RBD")

tmp2 <- dat %>% filter(PID %in% a3)
p2 <- ggplot(tmp2, aes(x = wave, y = r001, group = PID, 
                       color = vacc_status)) +
  geom_line() +
  geom_point() +
  ylim(c(0,25000))+
  xlab("Wave") +
  ylab("RBD")

ggarrange(p1, p2, common.legend = TRUE)

b <- dat %>% filter(nrows == 2 & wave == 1 & groups == "Fully Vaccinated and Uninfected" & days_since_vacc > 14) %>% pull(PID)

