Answer To: In the zip file, there are files and instructions for this case study.
Sudharsan.J answered on Aug 24 2021
########################################################################
setwd("C:\\Users\\Monika\\Desktop\\Greynodes\\23.08.2020\\")
# Import the Data
library(readxl)
data=read_excel("hw3_data.xlsx",sheet ="Sheet1")
str(data)
# Part-A Standardize the Data
library(robustHD)
x.scaled=standardize(data[,c(2,4,5,6)], centerFun = mean, scaleFun = sd)
str(x.scaled)
#Part-B Elbow Plot
library(tidyverse) # data manipulation
library(cluster) # clustering algorithms
library(factoextra) # clustering algorithms & visualization
set.seed(1234)
# function to compute total within-cluster sum of square
wss <- function(k) {
kmeans(x.scaled, k, nstart = 1000, iter.max = 1000 )$tot.withinss
}
# Compute and plot wss for k = 1 to k = 15
k.values <- 1:20
# extract wss for 2-15 clusters
wss_values <- map_dbl(k.values, wss)
plot(k.values, wss_values,
type="b", pch = 19, frame = FALSE, ylim=c(0,8000),
xlab="Number of clusters K",
ylab="Total within-clusters sum of squares")
library(factoextra)
fviz_nbclust(x.scaled, kmeans, method = "wss") +
geom_vline(xintercept = 5, linetype = 2)+
labs(subtitle = "Elbow method")
library("NbClust")
nb <- NbClust(x.scaled, distance = "euclidean", min.nc = 2,
max.nc = 20, method = "kmeans")
library("factoextra")
fviz_nbclust(nb)
# Part-D Run k-means Cluster analysis
set.seed(123)
results <- kmeans(x.scaled, centers=5, iter.max=1000, nstart=1000)
result1=as.data.frame(results$cluster)
set.seed(124)
results_new <- kmeans(x.scaled, centers=5, iter.max=1000, nstart=1000)
result2=as.data.frame(results_new$cluster)
output=cbind(x.scaled$Inv_1M_Bef,result1,result2)
output_1=output[order(data$Inv_1M_Bef),]
colnames(output_1)=...