empty_list2[[i]] <- CRT.tr
plotcp(empty_list2[[i]])
}
for (i in seq_along(empty_list2)) {
CRT.tr <- rpart(y ~ .,data1, method = "anova", control = control)
empty_list2[[i]] <- CRT.tr
printcp(empty_list2[[i]])
}
for (i in seq_along(empty_list2)) {
CRT.tr <- rpart(AHD ~ .,data2nna[ , -15], method = "anova", control = control)
empty_list2[[i]] <- CRT.tr
printcp(empty_list2[[i]])
}
table(data2nna$AHD)/nrow(data2nna)
0.4612795*0.61903
CT <- prune(empty_list2[[5]], cp=0.0310118)
plot(CT, uniform=F)
text(CT, use.n = F)
plot(CT, uniform=F)
text(CT, use.n = T)
plot(CT, uniform=F)
text(CT, use.n = F)
plot(CT, uniform=F)
text(CT, use.n = T)
plot(CT, uniform=F)
text(CT, use.n = T)
CT$variable.importance
CT$cptable[nrow(CT$cptable),]
0.61903*0.50072191
0.61903*0.63804637
new_data2 <- data.frame(Age = 58, Sex = 1, ChestPain = "nonanginal", RestBP = 132, Chol = 224, Fbs = 0, RestECG = 2, MaxHR = 173, ExAng = 0, Oldpeak = 3.2, Slope = 1, Ca = 2, Thal = "reversable")
predict(CT, newdata = new_data2, type="class")
predict(CT, newdata = new_data2)
table(data2nna$AHD)
CT
empty_list2[[5]]
summary(empty_list2[[5]])
predict(CT, newdata = new_data2, type = "class")
data2nna$Thal
data2nna$AHD
class(data2nna$AHD)
control <- rpart.control(minbucket = 2, cp = 0.0001, maxsurrogate = 0, usesurrogate = 0, xval = 10)
empty_list2 <- vector(mode = "list", length = 10)
for (i in seq_along(empty_list2)) {
CRT.tr <- rpart(AHD ~ .,data2nna[ , -15], method = "class", control = control)
empty_list2[[i]] <- CRT.tr
plotcp(empty_list2[[i]])
}
for (i in seq_along(empty_list2)) {
CRT.tr <- rpart(AHD ~ .,data2nna[ , -15], method = "class", control = control)
empty_list2[[i]] <- CRT.tr
printcp(empty_list2[[i]])
}
0.40146*0.61903
CT <- prune(empty_list2[[9]], cp=0.0310118)
plot(CT, uniform=F)
text(CT, use.n = T)
plot(CT, uniform=F)
text(CT, use.n = T)
CT$variable.importance
CT$cptable[nrow(CT$cptable),]
0.61903*0.32846715
0.61903*0.40145985
new_data2 <- data.frame(Age = 58, Sex = 1, ChestPain = "nonanginal", RestBP = 132, Chol = 224, Fbs = 0, RestECG = 2, MaxHR = 173, ExAng = 0, Oldpeak = 3.2, Slope = 1, Ca = 2, Thal = "reversable")
predict(CT, newdata = new_data2, type = "class")
CT
CT
set.seed(1)
Nrep<-20 #number of replicates of CV
K<-10  #K-fold CV on each replicate
n.models = 2 #number of different models to fit
n=nrow(data2nna)
y<-data2nna[[15]]
yhat=matrix(0,n,n.models)
CV.rate<-matrix(0,Nrep,n.models)
for (j in 1:Nrep) {
Ind<-CVInd(n,K)
for (k in 1:K) {
out<-nnet(AHD~.,data2nna[-Ind[[k]],-15],linout=F,size=5,decay=0.1, maxit=1000,trace=F)
phat<-as.numeric(predict(out,data2nna[Ind[[k]],-15]));  yhat[Ind[[k]],1]<-as.numeric(phat >= 0.5)
out<-glm(AHD~.,data2nna[-Ind[[k]],-15],family=binomial(link="logit"))
phat<-as.numeric(predict(out,data2nna[Ind[[k]],-15],type="response"));  yhat[Ind[[k]],2]<-as.numeric(phat >= 0.5)
} #end of k loop
CV.rate[j,]=apply(yhat,2,function(x) sum(y != x)/n)
} #end of j loop
CV.rateAve<- apply(CV.rate,2,mean)
CV.rateAve #averaged CV misclass rate
data2mv <- data2
data2mv[[12]] <- as.numeric(data2mv[[12]])
data2mv$ChestPain <- factor(data2mv$ChestPain, levels = c("typical", "asymptomatic", "nonanginal", "nontypical" ))
data2mv$Thal <- factor(data2mv$Thal, levels = c("fixed", "normal", "reversable"))
data2mv$AHD <- factor(data2mv$AHD, levels = c("No", "Yes"))
data2mv$num_res <- if_else(data2mv$AHD == "Yes", 1, 0)
View(data2mv)
control <- rpart.control(minbucket = 2, cp = 0.0001, maxsurrogate = 0, usesurrogate = 0, xval = 10)
empty_list3 <- vector(mode = "list", length = 10)
for (i in seq_along(empty_list3)) {
CRT.tr <- rpart(AHD ~ .,data2mv[ , -15], method = "class", control = control)
empty_list3[[i]] <- CRT.tr
plotcp(empty_list3[[i]])
}
for (i in seq_along(empty_list3)) {
CRT.tr <- rpart(AHD ~ .,data2mv[ , -15], method = "class", control = control)
empty_list3[[i]] <- CRT.tr
printcp(empty_list3[[i]])
}
?rpart.control
control <- rpart.control(minbucket = 2, cp = 0.0001, maxsurrogate = 0, usesurrogate = 2, xval = 10)
empty_list3 <- vector(mode = "list", length = 10)
for (i in seq_along(empty_list3)) {
CRT.tr <- rpart(AHD ~ .,data2mv[ , -15], method = "class", control = control)
empty_list3[[i]] <- CRT.tr
plotcp(empty_list3[[i]])
}
control <- rpart.control(minbucket = 2, cp = 0.0001, maxsurrogate = 5, usesurrogate = 2, xval = 10)
empty_list3 <- vector(mode = "list", length = 10)
for (i in seq_along(empty_list3)) {
CRT.tr <- rpart(AHD ~ .,data2mv[ , -15], method = "class", control = control)
empty_list3[[i]] <- CRT.tr
plotcp(empty_list3[[i]])
}
control <- rpart.control(minbucket = 2, cp = 0.0001, maxsurrogate = 100, usesurrogate = 2, xval = 10)
empty_list3 <- vector(mode = "list", length = 10)
for (i in seq_along(empty_list3)) {
CRT.tr <- rpart(AHD ~ .,data2mv[ , -15], method = "class", control = control)
empty_list3[[i]] <- CRT.tr
plotcp(empty_list3[[i]])
}
for (i in seq_along(empty_list3)) {
CRT.tr <- rpart(AHD ~ .,data2mv[ , -15], method = "class", control = control)
empty_list3[[i]] <- CRT.tr
printcp(empty_list3[[i]])
}
CTmv <- prune(empty_list3[[2]], cp = 0.0215827)
plot(CTmv, uniform = FALSE)
text(CTmv)
xxx <- c(6.36, 4.055, 9.777, 12.429,  13.33)
mean(xxx)
987765/8
987.765/8
1324.9/15
n <- (1:20)
n.1 <- (n * (n + 1)) / 2
names(n.1) <- letters[1:20]
n.1[c("a", "e", "i", "o", "u")]
?names
0.3*0.55
(3.01 - 2.91) / sqrt((1.01^2/70)+(0.75^2/75))
?qt
df <- 70+75-2
qt(0.975, df)
threshold <- (15*80)/100
(15.5 - threshold) / (2.3 / sqrt(42))
qt(0.975, 41)
11/23
(11/23) * (11/23) * (11/23)
(11/23) * (10/22) * (9/21)
1828-672
se <- sqrt(((672/1828)*(1156/1828)) / 1828)
# 8
z <- 1.96
se <- sqrt(((672/1828)*(1156/1828)) / 1828)
cbind((672/1828) - z*se, (672/1828) + z*se)
se2 <- sqrt(((1126/1843)*(717/1843)) / 1843)
cbind((1126/1843) - z*se2, (1126/1843) + z*se2)
(1126/1843) - (672/1828)
pooled <- ((1126/1843)*1843 + (672/1828)*1828) / (1843+1828)
pooled
((1126/1843) - (672/1828)) / sqrt(pooled * (1-pooled) * ((1/1843) + (1/1828)))
?qnorm
0.01/2
1-0.005
qnorm(0.995)
(choose(5, 3) * 0.75^3 * 0.25^2) +(choose(5, 4) * 0.75^4 * 0.25^1)  + (choose(5, 5) * 0.75^5 * 0.25^0)
(choose(5, 0) * 0.75^0 * 0.25^5)
data <- c(0.59, 0.65, 0.69, 0.53, 0.6, 0.53, 0.58, 0.64, 0.46, 0.67, 0.51, 0.59)
rm(data)
knitr::opts_chunk$set(echo = TRUE)
library(readxl)
library(dplyr)
setwd("/Users/emiliolehoucq/Google Drive/Courses/7. Fall 2019/IEMS 304/Homeworks/Homework 9/")
data1 <- read_excel("HW7_data.xls")
data2 <- read_excel("HW7_data.xls", sheet = 2)
data2nna <- data2[!data2[[12]]=="NA" & !data2[[13]]=="NA", ]
data2nna[[12]] <- as.numeric(data2nna[[12]])
data2nna$ChestPain <- factor(data2nna$ChestPain, levels = c("typical", "asymptomatic", "nonanginal", "nontypical" ))
data2nna$Thal <- factor(data2nna$Thal, levels = c("fixed", "normal", "reversable"))
data2nna$AHD <- factor(data2nna$AHD, levels = c("No", "Yes"))
data2nna$num_res <- if_else(data2nna$AHD == "Yes", 1, 0)
data2mv <- data2
data2mv[[12]] <- as.numeric(data2mv[[12]])
data2mv$ChestPain <- factor(data2mv$ChestPain, levels = c("typical", "asymptomatic", "nonanginal", "nontypical" ))
data2mv$Thal <- factor(data2mv$Thal, levels = c("fixed", "normal", "reversable"))
data2mv$AHD <- factor(data2mv$AHD, levels = c("No", "Yes"))
data2mv$num_res <- if_else(data2mv$AHD == "Yes", 1, 0)
install.packages("gbm")
library(gbm)
?gbm
View(data1)
hist(data1$y)
gbm1 <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.02, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
gbm1
best.iter <- gbm.perf(gbm1,method="cv")
?gbm.perf
best.iter
1-gbm1$cv.error[best.iter]/var(CRT1$Strength)
1-gbm1$cv.error[best.iter]/var(data1$y)
gbm1 <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.02, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
best.iter <- gbm.perf(gbm1,method="cv")
1-gbm1$cv.error[best.iter]/var(data1$y) # CV r2
btrees < c()
btrees <- c()
best.iters <- c()
for (i in 1:10) {
gbm <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.02, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
btrees[i] <- gbm
}
empty_list <- vector(mode = "list", length = 10)
for (i in 1:10) {
gbm <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.02, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
empty_list[i] <- gbm
}
print(i)
for (i in 1:10) {
print(i)
}
for (i in seq_along(empty_list)) {
print(i)
}
empty_list[1]
empty_list <- vector(mode = "list", length = 10)
empty_list[1]
for (i in seq_along(empty_list)) {
gbm <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.02, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
empty_list[i] <- gbm
}
empty_list <- vector(mode = "list", length = 10)
for (i in seq_along(empty_list)) {
gbm <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.02, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
empty_list[[i]] <- gbm
}
empty_list[[1]]
best.iter
print(best.iter)
gbm.perf(gbm1,method="cv")
best.iter
for (i in seq_along(empty_list)) {
best.iter <- gbm.perf(empty_list[[i]],method="cv")
best.iter
}
boosted_trees <- vector(mode = "list", length = 10)
boosted_trees <- vector(mode = "list", length = 10)
for (i in seq_along(empty_list)) {
gbm <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.02, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
boosted_trees[[i]] <- gbm
}
for (i in seq_along(boosted_trees)) {
best.iter <- gbm.perf(boosted_trees[[i]],method="cv")
print(best.iter)
}
for (i in seq_along(boosted_trees)) {
best.iter <- gbm.perf(boosted_trees[[i]],method="cv")
print(best.iter)
}
best.iterations <- list()
for (i in seq_along(boosted_trees)) {
best.iter <- gbm.perf(boosted_trees[[i]],method="cv")
best.iterations[[i]] <- best.iter
print(best.iter)
}
for (i in seq_along(boosted_trees)) {
print(1-gbm1$cv.error[best.iter[[i]]]/var(data1$y)) # CV r2
}
for (i in seq_along(best.iterations)) {
print(1-gbm1$cv.error[best.iterations[[i]]]/var(data1$y)) # CV r2
}
best.iterations[[1]]
for (i in seq_along(best.iterations)) {
print(best.iterations[[i]])
}
set.seed(1)
boosted_trees <- vector(mode = "list", length = 10)
for (i in seq_along(empty_list)) {
gbm <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.02, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
boosted_trees[[i]] <- gbm
}
best.iterations <- list()
for (i in seq_along(boosted_trees)) {
best.iter <- gbm.perf(boosted_trees[[i]],method="cv")
best.iterations[[i]] <- best.iter
print(best.iter)
}
for (i in seq_along(best.iterations)) {
print(best.iterations[[i]])
}
set.seed(1)
boosted_trees <- vector(mode = "list", length = 10)
for (i in seq_along(empty_list)) {
gbm <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.02, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
boosted_trees[[i]] <- gbm
}
best.iterations <- list()
for (i in seq_along(boosted_trees)) {
best.iter <- gbm.perf(boosted_trees[[i]],method="cv")
best.iterations[[i]] <- best.iter
print(best.iter)
}
for (i in seq_along(best.iterations)) {
print(best.iterations[[i]])
}
for (i in seq_along(best.iterations)) {
print(1-gbm1$cv.error[best.iterations[[i]]]/var(data1$y)) # CV r2
}
summary(boosted_trees[[3]],n.trees=best.iter)
View(data1)
for (i in seq_along(data1[ , 2:])) plot(boosted_trees[[3]], i.var = i, n.trees = best.iter)
for (i in seq_along(data1[ , 2:4])) plot(boosted_trees[[3]], i.var = i, n.trees = best.iter)
for (i in seq_along(data1[ , 2:4])) print(plot(boosted_trees[[3]], i.var = i, n.trees = best.iter))
?plot.gbm
plot(boosted_trees[[3]], i.var = c(1,3), n.trees = best.iter)
install.packages("viridis")
plot(boosted_trees[[3]], i.var = c(1,3), n.trees = best.iter)
new_data <- data.frame(x1 = 3.0, x2 = 28, x3 = 1.0)
predict(boosted_trees[[3]],newdata = new_data, n.trees = best.iter)
set.seed(1)
set.seed(1)
boosted_trees2 <- vector(mode = "list", length = 10)
for (i in seq_along(empty_list)) {
gbm <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.2, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
boosted_trees2[[i]] <- gbm
}
best.iterations2 <- list()
for (i in seq_along(boosted_trees2)) {
best.iter <- gbm.perf(boosted_trees2[[i]],method="cv")
best.iterations2[[i]] <- best.iter
print(best.iter)
}
for (i in seq_along(best.iterations2)) {
print(best.iterations2[[i]])
}
for (i in seq_along(best.iterations2)) {
print(1-gbm1$cv.error[best.iterations2[[i]]]/var(data1$y)) # CV r2
}
plot.gbm(boosted_trees[[3]], i.var = c(1,3), n.trees = best.iter)
for (i in seq_along(data1[ , 2:4])) print(plot.gbm(boosted_trees[[3]], i.var = i, n.trees = best.iter))
summary.gbm(boosted_trees[[3]],n.trees=best.iter)
install.packages("randomForest")
library(randomForest)
random_forests <- list()
random_forests <- vector(mode = "list", length = 10)
set.seed(1)
random_forests <- vector(mode = "list", length = 10)
set.seed(1)
random_forests <- vector(mode = "list", length = 10)
for (i in seq_along(random_forests)) {
rForest <- randomForest(y~., data=data1, mtry=1, ntree = 500, nodesize = 3, importance = TRUE)
random_forests[[i]] <- rForest
plot(rForest)
}
set.seed(1)
random_forests <- vector(mode = "list", length = 10)
for (i in seq_along(random_forests)) {
rForest <- randomForest(y~., data=data1, mtry=1, ntree = 500, nodesize = 3, importance = TRUE)
random_forests[[i]] <- rForest
}
set.seed(1)
random_forests <- vector(mode = "list", length = 10)
for (i in seq_along(random_forests)) {
rForest <- randomForest(y~., data=data1, mtry=1, ntree = 500, nodesize = 3, importance = TRUE)
random_forests[[i]] <- rForest
print(rForest)
}
for (i in seq_along(random_forests)) {
rForest <- randomForest(y~., data=data1, mtry=1, ntree = 500, nodesize = 3, importance = TRUE)
random_forests[[i]] <- rForest
print.randomForest(rForest)
}
?print.randomForest
for (i in seq_along(random_forests)) {
rForest <- randomForest(y~., data=data1, mtry=1, ntree = 500, nodesize = 3, importance = TRUE)
random_forests[[i]] <- rForest
randomForest(rForest)
}
random_forests <- vector(mode = "list", length = 10)
for (i in seq_along(random_forests)) {
rForest <- randomForest(y~., data=data1, mtry=1, ntree = 500, nodesize = 3, importance = TRUE)
random_forests[[i]] <- rForest
randomForest(rForest)
}
set.seed(1)
random_forests <- vector(mode = "list", length = 10)
for (i in seq_along(random_forests)) {
rForest <- randomForest(y~., data=data1, mtry=1, ntree = 500, nodesize = 3, importance = TRUE)
random_forests[[i]] <- rForest
print(rForest)
}
for (i in seq_along(random_forests)) {
plot(random_forests[[i]])
}
importance(random_forests[[10]])
varImpPlot(random_forests[[10]])
for (i in seq_along(data1[ , 2:4])) print(partialPlot(random_forests[[10]], pred.data=data1, x.var = names(data1)[i], xlab = names(data1)[i], main=NULL))
for (i in c(2:4)) print(partialPlot(random_forests[[10]], pred.data=data1, x.var = names(data1)[i], xlab = names(data1)[i], main=NULL))
tree <- random_forests[[10]]
for (i in c(2:4)) print(partialPlot(tree, pred.data=data1, x.var = names(data1)[i], xlab = names(data1)[i], main=NULL))
tree
typeof(random_forests[[10]])
?partialPlot
typeof(tree)
random_forests[10]
typeof(random_forests[10])
typeof(rForest)
test <- randomForest(y~., data=data1, mtry=1, ntree = 500, nodesize = 3, importance = TRUE)
for (i in c(2:4)) print(partialPlot(test, pred.data=data1, x.var = names(data1)[i], xlab = names(data1)[i], main=NULL))
for (i in c(2:4)) print(partialPlot(test, pred.data=as.data.frame(data1), x.var = names(data1)[i], xlab = names(data1)[i], main=NULL))
for (i in c(2:4)) print(partialPlot(random_forests[[10]], pred.data=as.data.frame(data1), x.var = names(data1)[i], xlab = names(data1)[i], main=NULL))
?predict
predict(random_forests[[10]],newdata = new_data)
set.seed(1)
random_forests2 <- vector(mode = "list", length = 10)
for (i in seq_along(random_forests2)) {
rForest <- randomForest(y~., data=data1, mtry=2, ntree = 500, nodesize = 3, importance = TRUE)
random_forests2[[i]] <- rForest
print(rForest)
}
set.seed(1)
random_forests3 <- vector(mode = "list", length = 10)
for (i in seq_along(random_forests3)) {
rForest <- randomForest(y~., data=data1, mtry=3, ntree = 500, nodesize = 3, importance = TRUE)
random_forests3[[i]] <- rForest
print(rForest)
}
set.seed(1)
random_forests3 <- vector(mode = "list", length = 10)
for (i in seq_along(random_forests3)) {
rForest <- randomForest(y~., data=data1, mtry=3, ntree = 500, nodesize = 3, importance = TRUE)
random_forests3[[i]] <- rForest
print(rForest)
}
?mtry
?randomForest
set.seed(1)
boosted_trees <- vector(mode = "list", length = 10)
for (i in seq_along(boosted_trees)) {
gbm <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.02, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
boosted_trees[[i]] <- gbm
}
best.iterations <- list()
for (i in seq_along(boosted_trees)) {
best.iter <- gbm.perf(boosted_trees[[i]],method="cv")
best.iterations[[i]] <- best.iter
print(best.iter)
}
for (i in seq_along(best.iterations)) {
print(best.iterations[[i]])
}
for (i in seq_along(best.iterations)) {
print(1-best.iterations[[i]]$cv.error[best.iterations[[i]]]/var(data1$y)) # CV r2
}
best.iterations[[1]]
for (i in seq_along(best.iterations)) {
print(1-boosted_trees[[i]]$cv.error[best.iterations[[i]]]/var(data1$y)) # CV r2
}
summary.gbm(boosted_trees[[2]],n.trees=best.iter)
plot.gbm(boosted_trees[[2]], i.var = c(1,3), n.trees = best.iter)
predict(boosted_trees[[2]],newdata = new_data, n.trees = best.iter)
for (i in seq_along(best.iterations2)) {
print(1-boosted_trees2[[i]]$cv.error[best.iterations2[[i]]]/var(data1$y)) # CV r2
}
set.seed(1)
boosted_trees2 <- vector(mode = "list", length = 10)
for (i in seq_along(boosted_trees2)) {
gbm <- gbm(y~., data=data1, distribution="gaussian", n.trees=5000, shrinkage=0.2, interaction.depth=3, bag.fraction = .5, train.fraction = 1, n.minobsinnode = 3, cv.folds = 10, keep.data=TRUE, verbose=FALSE)
boosted_trees2[[i]] <- gbm
}
best.iterations2 <- list()
for (i in seq_along(boosted_trees2)) {
best.iter <- gbm.perf(boosted_trees2[[i]],method="cv")
best.iterations2[[i]] <- best.iter
print(best.iter)
}
for (i in seq_along(best.iterations2)) {
print(best.iterations2[[i]])
}
for (i in seq_along(best.iterations2)) {
print(1-boosted_trees2[[i]]$cv.error[best.iterations2[[i]]]/var(data1$y)) # CV r2
}
View(iris)
ols <- lm(Sepal.Width ~ Sepal.Length, iris)
library(ggfortify)
autoplot(ols)
plot(ols)
data <- c(17, 21, 9, 11, 14, 27)
mean(data)
sd(data)
var(data)
6671.89 + 366.1368 * 35
11020.47 + 483.3635 * 35
?ls
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
data <- read_sav("/unprocessed/ATP W35.sav")
library(haven)
data <- read_sav("/unprocessed/ATP W35.sav")
setwd("/Users/emiliolehoucq/Google Drive/Publications/Quant paper/American_Trends_Panel_Wave_35_Pew_Research_Center/data")
library(haven)
data <- read_sav("/unprocessed/ATP W35.sav")
setwd("/Users/emiliolehoucq/Google Drive/Publications/Quant paper/American_Trends_Panel_Wave_35_Pew_Research_Center/data")
data <- read_sav("/unprocessed/ATP W35.sav")
clean_data <- readRDS("/processed/clean_data_2_without_justifications.rds")
clean_data <- readRDS("processed/clean_data_2_without_justifications.rds")
data <- read_sav("unprocessed/ATP W35.sav")
View(clean_data)
