#-------------------------------------------------------#
# R in Action (2nd ed): Chapter 16 #
# Cluster analysis #
# requires packaged NbClust, flexclust, rattle #
# install.packages(c("NbClust", "flexclust", "rattle")) #
#-------------------------------------------------------# par(ask=TRUE)
opar <- par(no.readonly=FALSE) # Calculating Distances
data(nutrient, package="flexclust")
head(nutrient, 2)
d <- dist(nutrient)
as.matrix(d)[1:4,1:4] # Listing 16.1 - Average linkage clustering of nutrient data
data(nutrient, package="flexclust")
row.names(nutrient) <- tolower(row.names(nutrient))
nutrient.scaled <- scale(nutrient)
d <- dist(nutrient.scaled)
fit.average <- hclust(d, method="average")
plot(fit.average, hang=-1, cex=.8, main="Average Linkage Clustering") # Listing 16.2 - Selecting the number of clusters
library(NbClust)
nc <- NbClust(nutrient.scaled, distance="euclidean",
min.nc=2, max.nc=15, method="average")
par(opar)
table(nc$Best.n[1,])
barplot(table(nc$Best.n[1,]),
xlab="Numer of Clusters", ylab="Number of Criteria",
main="Number of Clusters Chosen by 26 Criteria") # Listing 16.3 - Obtaining the final cluster solution
clusters <- cutree(fit.average, k=5)
table(clusters)
aggregate(nutrient, by=list(cluster=clusters), median)
aggregate(as.data.frame(nutrient.scaled), by=list(cluster=clusters),
median)
plot(fit.average, hang=-1, cex=.8,
main="Average Linkage Clustering\n5 Cluster Solution")
rect.hclust(fit.average, k=5) # Plot function for within groups sum of squares by number of clusters
wssplot <- function(data, nc=15, seed=1234){
wss <- (nrow(data)-1)*sum(apply(data,2,var))
for (i in 2:nc){
set.seed(seed)
wss[i] <- sum(kmeans(data, centers=i)$withinss)}
plot(1:nc, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")} # Listing 16.4 - K-means clustering of wine data
data(wine, package="rattle")
head(wine)
df <- scale(wine[-1])
wssplot(df)
library(NbClust)
set.seed(1234)
nc <- NbClust(df, min.nc=2, max.nc=15, method="kmeans")
par(opar)
table(nc$Best.n[1,])
barplot(table(nc$Best.n[1,]),
xlab="Numer of Clusters", ylab="Number of Criteria",
main="Number of Clusters Chosen by 26 Criteria")
set.seed(1234)
fit.km <- kmeans(df, 3, nstart=25)
fit.km$size
fit.km$centers
aggregate(wine[-1], by=list(cluster=fit.km$cluster), mean) # evaluate clustering
ct.km <- table(wine$Type, fit.km$cluster)
ct.km
library(flexclust)
randIndex(ct.km) # Listing 16.5 - Partitioning around mediods for the wine data
library(cluster)
set.seed(1234)
fit.pam <- pam(wine[-1], k=3, stand=TRUE)
fit.pam$medoids
clusplot(fit.pam, main="Bivariate Cluster Plot") # evaluate clustering
ct.pam <- table(wine$Type, fit.pam$clustering)
ct.pam
randIndex(ct.pam) ## Avoiding non-existent clusters
library(fMultivar)
set.seed(1234)
df <- rnorm2d(1000, rho=.5)
df <- as.data.frame(df)
plot(df, main="Bivariate Normal Distribution with rho=0.5") wssplot(df)
library(NbClust)
nc <- NbClust(df, min.nc=2, max.nc=15, method="kmeans")
par(opar)
barplot(table(nc$Best.n[1,]),
xlab="Numer of Clusters", ylab="Number of Criteria",
main ="Number of Clusters Chosen by 26 Criteria") library(ggplot2)
library(cluster)
fit <- pam(df, k=2)
df$clustering <- factor(fit$clustering)
ggplot(data=df, aes(x=V1, y=V2, color=clustering, shape=clustering)) +
geom_point() + ggtitle("Clustering of Bivariate Normal Data") plot(nc$All.index[,4], type="o", ylab="CCC",
xlab="Number of clusters", col="blue")

最新文章

  1. gRPC+etcd的优势分析
  2. Web 开发人员不能错过的 jQuery 教程和案例
  3. Oracle Database 11g Express Edition学习笔记
  4. 1.6---旋转二维数组,旋转图像像素,旋转矩阵,90度(CC150)
  5. [读书笔记]java中的类加载器
  6. android--多View切换viewpager
  7. Working with nil
  8. NDK环境配置
  9. 【十分钟教会你汇编】MIPS编程入门(妈妈说标题要高大上,才会有人看&gt;_&lt;!)
  10. 关于javascript延迟加载图片
  11. struct2(六) 为表单添加验证
  12. 将一个javaWeb应用跑在Docker里
  13. LeetCode编程训练 - 回溯(Backtracking)
  14. ELK-Elasticsearch安装
  15. 欢迎来到Python世界
  16. Spring data JPA中使用Specifications动态构建查询
  17. 【Android】快速开发偷懒必备(二) 支持DataBinding啦~爽炸,一行实现花式列表[申明:来源于网络]
  18. mysql03
  19. sudo命令 和限制root 远程登陆
  20. 在Java的反射中,Class.forName和ClassLoader的区别

热门文章

  1. Java线程——线程之间的几点重要说明
  2. springboot的http监控接口启动器的配置
  3. EL表达式获取属性值的原理
  4. Python之小作业
  5. spring自定义aop
  6. linux下tab作用的描述?
  7. debian8修改kde桌面语言
  8. Zabbix常用监控项整理
  9. Python程序在docker中运行,未找到自定义模块
  10. Docker系列二: docker常用命令总结