# Libraries
pacman::p_load(tidyverse,survival, ggplot2, survival, tidyverse, survminer, here)

Read in raw UNOS kidney data

input.data <- here("raw_kidney_aim1.csv") 
data = read_csv(input.data) %>% 
  tibble()
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 189271 Columns: 478
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (201): hcvdr, WL_ORG, USE_WHICH_PRA, DONATION, ON_DIALYSIS, ANTIBODY_TES...
## dbl (198): NUM_PREV_TX, CURRENT_PRA, PEAK_PRA, CREAT_CLEAR, GFR, MAX_KDPI_LO...
## lgl  (79): COD_WL, COD_OSTXT_WL, C_PEPTIDE, C_PEPTIDEDATE, CITIZEN_COUNTRY, ...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

Cleaning

data = data %>% 
  mutate(
    eth_group_cat = as.factor(case_when(
      eth_group == 1 ~ "Caucasians",
      eth_group == 2 ~ "AfricanAmericans",
      TRUE ~ "Others"
      )),
    
    has_diab = as.factor(ifelse(DIAB %in% c(1,5,998), "NoDiabetes","Diabetes")),
    
    r_Blood = as.factor(case_when(
      ABO %in% c("A","A1","A2") ~ "A",
      ABO %in% c("A2B","A1B") ~ "AB",
      ABO %in% c("O") ~ "O",
      TRUE ~ "B"
      )),
    
    don_Blood = as.factor(case_when(
      ABO_DON %in% c("A","A1","A2") ~ "A",
      ABO_DON %in% c("A2B","A1B") ~ "AB",
      ABO_DON %in% c("O") ~ "O",
      TRUE ~ "B"
      )),
    
    PVD = as.factor(ifelse(perip_vasc_new == 1, "Yes","No")),
    
    hcvdr_num = as.factor(case_when(
      hcvdr == "D+R+" ~ 3,
      hcvdr=="D+R-" ~ 2,
      hcvdr=="D-R+" ~ 1,
      TRUE ~ 0
      ))
  )

full.covariates <- data %>% 
  select(REGION,CTR_CODE,TRR_ID_CODE,PTIME,PSTATUS,GTIME_KI,GSTATUS_KI,
         ptime_10yr,pstatus_10yr,gtime_10yr,gstatus_10yr,
         hcvdr,hcvdr_num,
         AGE,GENDER,PRA,r_Blood,KDRI,PVD,COLD_ISCH_KI,
         prev_tx_num,eth_group_cat,genderd_num,
         don_Blood,AGE_DON,first_week_dial,
         LOS,has_diab,SERUM_CREAT,BMI_CALC,TRTREJ1YKI) %>% 
  drop_na()

Random sample 19,000 observations

JDS_data = full.covariates %>% sample_n(19000)

Save data

JDS_data %>% 
  write_csv("JDS_Data.csv")