dat<- list(data1, data2, data3)
lapply(dat, dim)
## [[1]]
## [1] 200 5
##
## [[2]]
## [1] 200 14
##
## [[3]]
## [1] 200 7
lapply(dat, summary)
## [[1]]
## x1 x2 x3 x4 x5
## Min. :-7.4977 Min. :14.23 Min. :-8.5779 Min. :-2.81789 0: 48
## 1st Qu.: 0.6827 1st Qu.:17.91 1st Qu.:-0.6469 1st Qu.:-0.68795 1: 50
## Median : 4.3953 Median :19.06 Median : 3.1827 Median : 0.06250 2:102
## Mean : 3.5734 Mean :19.88 Mean : 1.0531 Mean :-0.04922
## 3rd Qu.: 6.4950 3rd Qu.:20.75 3rd Qu.: 4.2886 3rd Qu.: 0.62000
## Max. : 9.9319 Max. :27.59 Max. : 6.6156 Max. : 2.99029
##
## [[2]]
## x1 x2 x3 x4
## Min. :-7.4977 Min. :17.23 Min. : 3.178 Min. :-2.81789
## 1st Qu.:-1.9728 1st Qu.:20.27 1st Qu.: 5.983 1st Qu.:-0.68795
## Median :-0.6241 Median :21.32 Median : 6.970 Median : 0.06250
## Mean :-0.6266 Mean :21.48 Mean : 6.853 Mean :-0.04922
## 3rd Qu.: 0.9104 3rd Qu.:22.41 3rd Qu.: 7.687 3rd Qu.: 0.62000
## Max. : 3.9319 Max. :26.59 Max. :10.076 Max. : 2.99029
## x5 x6 x7 x8
## Min. :36.80 Min. :-2.84542 Min. :-2.86019 Min. :-2.527855
## 1st Qu.:39.36 1st Qu.:-0.61746 1st Qu.:-0.77526 1st Qu.:-0.766061
## Median :39.94 Median : 0.12398 Median :-0.04669 Median :-0.030096
## Mean :39.98 Mean : 0.08127 Mean :-0.04043 Mean : 0.002042
## 3rd Qu.:40.54 3rd Qu.: 0.79812 3rd Qu.: 0.69610 3rd Qu.: 0.719026
## Max. :42.55 Max. : 3.26457 Max. : 2.84095 Max. : 3.595376
## x9 x10 x11 x12 x13 x14
## Min. :-4.5979 Min. :-2.4043 Min. :-1.9494 0:61 4: 48 0:110
## 1st Qu.:-2.4559 1st Qu.:-0.3121 1st Qu.:-0.2430 1:74 5: 40 1: 41
## Median :-1.6494 Median : 1.2281 Median : 0.4392 2:65 6:112 2: 49
## Mean :-1.3043 Mean : 1.0216 Mean : 0.6146
## 3rd Qu.:-0.1434 3rd Qu.: 2.2652 3rd Qu.: 1.3396
## Max. : 2.8586 Max. : 4.3892 Max. : 4.1195
##
## [[3]]
## x1 x2 x3 x4
## Min. :-1.374434 Min. :-6.7658 Min. :-0.2112 Min. :-2.81789
## 1st Qu.:-0.334501 1st Qu.:-3.5760 1st Qu.: 2.7062 1st Qu.:-0.68795
## Median : 0.015412 Median :-2.8615 Median : 4.2401 Median : 0.06250
## Mean :-0.006645 Mean :-2.9152 Mean : 4.1062 Mean :-0.04922
## 3rd Qu.: 0.389974 3rd Qu.:-2.2508 3rd Qu.: 5.2313 3rd Qu.: 0.62000
## Max. : 1.251323 Max. :-0.3211 Max. : 9.2312 Max. : 2.99029
## x5 x6 x7
## 0:110 0:54 4: 40
## 1: 45 1:77 5: 43
## 2: 45 2:69 6:117
##
##
##
op<- optics(data1[,1:4],eps = 10,minPts = 10) # relative larger eps and minPts
plot(op, main='Data 1')
op<- optics(data2[,1:11],eps = 10,minPts = 20) # relative larger eps and minPts
plot(op, main='Data 2')
op<- optics(data3[,1:4],eps = 10,minPts = 10) # relative larger eps and minPts
plot(op, main='Data 3')
data_run<- scale(data1[,-5],T,T) # scale is recommended
tune<- KMeansSparseCluster.permute(as.matrix(data_run),K=3,wbounds=seq(1.1, 3.1, by=0.3),silent = T)
sparsek<- KMeansSparseCluster(as.matrix(data_run),K=3,wbounds = tune$bestw,silent = T)
sparsek[1][[1]]$ws
## x1 x2 x3 x4
## 0.491210566 0.586567459 0.643931423 0.001765776
cramer.v(table(data1$x5,sparsek[1][[1]]$Cs))
## [1] 0.6757008
data2.new<- t(scale(data2[,1:11],T,T))
results.dat2<- ConsensusClusterPlus(as.matrix(data2.new),maxK=6,reps=1000,pItem=0.8,pFeature=1,
clusterAlg="km",distance="euclidean",seed=1262,plot='png',
title=paste0(path,'/hydapDat2'))
#results2.dat2<- calcICL(results.dat2, plot='png',title=paste0(path,'/hydapDat2'))
apply(data2[,12:14],2, function(x) cramer.v(table(x,results.dat2[[3]][["consensusClass"]])))
## x12 x13 x14
## 0.1672483 0.8071675 0.8015909
data3.new<- t(scale(data3[,1:4],T,T))
results.dat3<- ConsensusClusterPlus(as.matrix(data3.new),maxK=6,reps=1000,pItem=0.8,pFeature=1,
clusterAlg="km",distance="euclidean",seed=1262,plot='png',
title=paste0(path,'/hydapDat3'))
#results2.dat3<- calcICL(results.dat3, plot='png',title=paste0(path,'/hydapDat3'))
cramer.v(table(data3$x5,data3$x6))
## [1] 0.09764819
cramer.v(table(data3$x5,data3$x7))
## [1] 0.6758128
cramer.v(table(data3$x6,data3$x7))
## [1] 0.1005522
true_c<- c(rep(1,40),rep(2,40),rep(3,120))
hydap.dat1<- hydap(data=data1[,c(1:3,5)],conti.pos = 1:3,cate.pos = 4,k=3)
adjustedRandIndex(true_c,hydap.dat1$clustering)
## [1] 0.9676672
hydap.dat2<- hydap(data=data2[,c(1:11,13,14)],conti.pos = 1:11,cate.pos = 12:13,k=3)
adjustedRandIndex(true_c,hydap.dat2$clustering)
## [1] 0.9677997
hydap.dat3<- hydap(data=data3[,c(5,7)],conti.pos = NULL,cate.pos = 4,k=3)
adjustedRandIndex(true_c,hydap.dat3$clustering)
## [1] 0.7293102