## Entering flow centrality simulations
## Files are in "/Users/Monnie/Dropbox/2018 Fall Leave/FlowThroughCentrality/Simulations"
library(tidyverse)

setwd("./Nodes50Deg6Pert3")

# Create variable names for all data sets
varNames <- c("Network",paste0(c("Cent","Tier"), rep(1:50,each=2)))

# Enter all data files, one for each centrality measure
# Create variable to indicate measure
# rearrange columns so that all Centralities and all Tiers are together

## Betweeness Centrality
dataBW <- read.csv(file="Nodes50_AvgDeg06_UW_01_Pert03_Betweennesss.csv",skip=5,nrows=100,header=FALSE)
colnames(dataBW) <- varNames
dataBW$Measure <- rep("BW",100)
dataBWa <- dataBW %>% dplyr::select(Network, Measure, starts_with('Cent'))
dataBWb <- dataBW %>% dplyr::select(Network, Measure, starts_with('Tier'))
dataBWtier <- gather(dataBWb,key="Tier",value="Node",Tier1:Tier50)
dataBWcent <- gather(dataBWa,key="Tier",value="Centrality",Cent1:Cent50)
dataBWall <- bind_cols(dataBWtier,dataBWcent)
dataBWall <- dplyr::select(dataBWall,-c(Network1:Tier1))
dataBWall <- dataBWall %>% separate(Network,into=c("Trash","Sample"),sep=" ")
dataBWall <- dataBWall %>% separate(Tier,into=c("Trash1","Tier"),sep = "(?<=[A-Za-z])(?=[0-9])")
dataBWall <- dataBWall %>% dplyr::select(-starts_with('Trash'))
dataBWall$Sample <- as.integer(dataBWall$Sample)
dataBWall$Tier <- as.integer(dataBWall$Tier)
dataBWall$Measure <- as.factor(dataBWall$Measure)

## Closeness Centrality
dataCL <- read.csv(file="Nodes50_AvgDeg06_UW_01_Pert03_Closeness.csv",skip=5,nrows=100,header=FALSE)
colnames(dataCL) <- varNames
dataCL$Measure <- rep("CL",100)
dataCLa <- dataCL %>% dplyr::select(Network, Measure, starts_with('Cent')) 
dataCLb <- dataCL %>% dplyr::select(Network, Measure, starts_with('Tier'))
dataCLtier <- gather(dataCLb,key="Tier",value="Node",Tier1:Tier50)
dataCLcent <- gather(dataCLa,key="Tier",value="Centrality",Cent1:Cent50)
dataCLall <- bind_cols(dataCLtier,dataCLcent)
dataCLall <- dplyr::select(dataCLall,-c(Network1:Tier1))
dataCLall <- dataCLall %>% separate(Network,into=c("Trash","Sample"),sep=" ")
dataCLall <- dataCLall %>% separate(Tier,into=c("Trash1","Tier"),sep = "(?<=[A-Za-z])(?=[0-9])")
dataCLall <- dataCLall %>% dplyr::select(-starts_with('Trash'))
dataCLall$Sample <- as.integer(dataCLall$Sample)
dataCLall$Tier <- as.integer(dataCLall$Tier)
dataCLall$Measure <- as.factor(dataCLall$Measure)


## Flow Between Centrality
dataFB <- read.csv(file="Nodes50_AvgDeg06_UW_01_Pert03_FlowBetween.csv",skip=5,nrows=100,header=FALSE)
colnames(dataFB) <- varNames
dataFB$Measure <- rep("FB",100)
dataFBa <- dataFB %>% dplyr::select(Network, Measure, starts_with('Cent')) 
dataFBb <- dataFB %>% dplyr::select(Network, Measure, starts_with('Tier'))
dataFBtier <- gather(dataFBb,key="Tier",value="Node",Tier1:Tier50)
dataFBcent <- gather(dataFBa,key="Tier",value="Centrality",Cent1:Cent50)
dataFBall <- bind_cols(dataFBtier,dataFBcent)
dataFBall <- dplyr::select(dataFBall,-c(Network1:Tier1))
dataFBall <- dataFBall %>% separate(Network,into=c("Trash","Sample"),sep=" ")
dataFBall <- dataFBall %>% separate(Tier,into=c("Trash1","Tier"),sep = "(?<=[A-Za-z])(?=[0-9])")
dataFBall <- dataFBall %>% dplyr::select(-starts_with('Trash'))
dataFBall$Sample <- as.integer(dataFBall$Sample)
dataFBall$Tier <- as.integer(dataFBall$Tier)
dataFBall$Measure <- as.factor(dataFBall$Measure)


## Flow Through Centrality
dataFT <- read.csv(file="Nodes50_AvgDeg06_UW_01_Pert03_Flowthrough.csv",skip=5,nrows=100,header=FALSE)
colnames(dataFT) <- varNames
dataFT$Measure <- rep("FT",100)
dataFTa <- dataFT %>% dplyr::select(Network, Measure, starts_with('Cent'))
dataFTb <- dataFT %>% dplyr::select(Network, Measure, starts_with('Tier'))
dataFTtier <- gather(dataFTb,key="Tier",value="Node",Tier1:Tier50)
dataFTcent <- gather(dataFTa,key="Tier",value="Centrality",Cent1:Cent50)
dataFTall <- bind_cols(dataFTtier,dataFTcent)
dataFTall <- dplyr::select(dataFTall,-c(Network1:Tier1))
dataFTall <- dataFTall %>% separate(Network,into=c("Trash","Sample"),sep=" ")
dataFTall <- dataFTall %>% separate(Tier,into=c("Trash1","Tier"),sep = "(?<=[A-Za-z])(?=[0-9])")
dataFTall <- dataFTall %>% dplyr::select(-starts_with('Trash'))
dataFTall$Sample <- as.integer(dataFTall$Sample)
dataFTall$Tier <- as.integer(dataFTall$Tier)
dataFTall$Measure <- as.factor(dataFTall$Measure)

## Stable betweeness centrality
dataSB <- read.csv(file="Nodes50_AvgDeg06_UW_01_Pert03_StableBetween.csv",skip=5,nrows=100,header=FALSE)
colnames(dataSB) <- varNames
dataSB$Measure <- rep("SB",100)
dataSBa <- dataSB %>% dplyr::select(Network, Measure, starts_with('Cent'))
dataSBb <- dataSB %>% dplyr::select(Network, Measure, starts_with('Tier'))
dataSBtier <- gather(dataSBb,key="Tier",value="Node",Tier1:Tier50)
dataSBcent <- gather(dataSBa,key="Tier",value="Centrality",Cent1:Cent50)
dataSBall <- bind_cols(dataSBtier,dataSBcent)
dataSBall <- dplyr::select(dataSBall,-c(Network1:Tier1))
dataSBall <- dataSBall %>% separate(Network,into=c("Trash","Sample"),sep=" ")
dataSBall <- dataSBall %>% separate(Tier,into=c("Trash1","Tier"),sep = "(?<=[A-Za-z])(?=[0-9])")
dataSBall <- dataSBall %>% dplyr::select(-starts_with('Trash'))
dataSBall$Sample <- as.integer(dataSBall$Sample)
dataSBall$Tier <- as.integer(dataSBall$Tier)
dataSBall$Measure <- as.factor(dataSBall$Measure)

## Create tibbles with first six columns of each file
dataBWfirst6 <- subset(dataBWall,Tier<=6)
dataCLfirst6 <- subset(dataCLall, Tier<=6)
dataFBfirst6 <- subset(dataFBall, Tier<=6)
dataFTfirst6 <- subset(dataFTall, Tier<=6)
dataSBfirst6 <- subset(dataSBall, Tier<=6)
dataAllFirst6 <- bind_rows(dataBWfirst6,dataCLfirst6,dataFBfirst6,dataFTfirst6, dataSBfirst6)
dataAllFirst6$Measure <- as.factor(dataAllFirst6$Measure)

##################### Start editing here ###################

## Combine all data into one data frame
## Plot Nodes for Tier values
## Plot Centralities for Tier values

## Data in long format for first 6 Tiers
tier6n50deg6pert3a <- gather(dataAllFirst6,key=CentLevel,value=Centrality,Cent1:Cent6,factor_key=TRUE) 
tier6n50deg6pert3b <- gather(dataAllFirst6,key=Level,value=Tier, Tier1:Tier6 factor_key=TRUE)
tier6n50deg6pert3 <-merge(tier6n50deg6pert3a,tier6n50deg6pert3b,by=c("Network","Measure"))
tier6n50deg6pert3 <- dplyr::select(tier6n50deg6pert6,c(Tier1:Tier6,Cent1:Cent6))
tier6n50deg6pert3 <- separate(tier3n50deg6pert3,Network,into=c("Trash","Sample"),sep=" ")
tier6n50deg6pert3$Sample <- as.integer(tier6n50deg6pert3$Sample)
tier6n50deg6pert3 <- tier6n50deg6pert3[,-1]

pdf(file="tier6n50deg6pert3Lines.pdf")
ggplot(tier6n50deg6pert3,aes(x=Sample, y=Centrality, colour=CentLevel)) + geom_line() + facet_grid(Measure~.,scales="free")
dev.off()

groupedtier6n50deg6pert3 <- tier6n50deg6pert3 %>% group_by(CentLevel,Measure) %>% summarise(mean=mean(Centrality),sd=sd(Centrality),cv=100*(sd(Centrality)/mean(Centrality)))

xtable(groupedtier3n50deg6pert3)

## Data in long format for all tiers
alltiers <- bind_rows(dataBW,dataCL,dataFB,dataFT,dataSB)
alltiersLong <- gather(alltiers,key=Level,value=Tier,

## More plots
http://www.sthda.com/english/wiki/ggplot2-error-bars-quick-start-guide-r-software-and-data-visualization

## Some histograms. Data are not Normal for the most part.
par(mfrow=c(5,3))
hist(dataAllFirst3$BWTier1)
hist(dataAllFirst3$BWTier2)
hist(dataAllFirst3$BWTier3)
hist(dataAllFirst3$FBTier1)
hist(dataAllFirst3$FBTier2)
hist(dataAllFirst3$FBTier3)
hist(dataAllFirst3$FTTier1)
hist(dataAllFirst3$FTTier2)
hist(dataAllFirst3$FTTier3)
hist(dataAllFirst3$SBTier1)
hist(dataAllFirst3$SBTier2)
hist(dataAllFirst3$SBTier3)
hist(dataAllFirst3$CLTier1)
hist(dataAllFirst3$CLTier2)
hist(dataAllFirst3$CLTier3)

## Create tibble in long format

## Run tests for variance on dependent variables

## References
## http://psychologicalstatistics.blogspot.com/2006/05/what-is-all-this-stuff-about.html
### https://www.rdocumentation.org/packages/vegan/versions/2.4-2/topics/mrpp
## http://cc.oulu.fi/~jarioksa/softhelp/vegan/html/mrpp.html
