Answer To: Introduction For this week’s take-home lab, you will work on the same data set from Week 4/5...
Mohd answered on Feb 22 2022
-
-
-
2/21/2022
Loading Packages
library(dplyr)
library(caret)
library(MASS)
library(e1071)
library(magrittr)
library(rmarkdown)
library(readxl)
library(pROC)
ucicreditcard <- read_excel("~/ucicreditcard.xlsx")
#View(ucicreditcard)
ucicreditcard$default_payment<-ucicreditcard$`default payment next month`
ucicreditcard<-ucicreditcard[,-25]
#Assigning values to NA
ucicreditcard$MARRIAGE<-replace(ucicreditcard$MARRIAGE,ucicreditcard$MARRIAGE==0,NA)
ucicreditcard%>%
count(EDUCATION)
## # A tibble: 7 x 2
## EDUCATION n
##
## 1 0 14
## 2 1 10585
## 3 2 14030
## 4 3 4917
## 5 4 123
## 6 5 280
## 7 6 51
ucicreditcard$EDUCATION<-replace(ucicreditcard$EDUCATION,ucicreditcard$EDUCATION==6,NA)
ucicreditcard$EDUCATION<-replace(ucicreditcard$EDUCATION,ucicreditcard$EDUCATION==5,NA)
ucicreditcard%>%
count(PAY_0)
## # A tibble: 11 x 2
## PAY_0 n
##
## 1 -2 2759
## 2 -1 5686
## 3 0 14737
## 4 1 3688
## 5 2 2667
## 6 3 322
## 7 4 76
## 8 5 26
## 9 6 11
## 10 7 9
## 11 8 19
Checking Null Values
sum(is.na(ucicreditcard$PAY_0))
## [1] 0
ucicreditcard$PAY_0<-replace(ucicreditcard$PAY_0,ucicreditcard$PAY_0==-2,NA)
ucicreditcard$PAY_2<-replace(ucicreditcard$PAY_2,ucicreditcard$PAY_2==-2,NA)
ucicreditcard$PAY_3<-replace(ucicreditcard$PAY_3,ucicreditcard$PAY_3==-2,NA)
ucicreditcard$PAY_4<-replace(ucicreditcard$PAY_4,ucicreditcard$PAY_4==-2,NA)
ucicreditcard$PAY_5<-replace(ucicreditcard$PAY_5,ucicreditcard$PAY_5==-2,NA)
ucicreditcard$PAY_6<-replace(ucicreditcard$PAY_6,ucicreditcard$PAY_6==-2,NA)
ucicreditcard%>%
count(PAY_0)
## # A tibble: 11 x 2
## PAY_0 n
##
## 1 -1 5686
## 2 0 14737
## 3 1 3688
## 4 2 2667
## 5 3 322
## 6 4 76
## 7 5 26
## 8 6 11
## 9 7 9
## 10 8 19
## 11 NA 2759
sum(is.na(ucicreditcard$PAY_0))
## [1] 2759
Training/test partition of the dataset
#removing NA
ucicreditcard<-na.omit(ucicreditcard)
set.seed(549)
ucicreditcard<-ucicreditcard[,2:25]
inp <- sample(2, nrow(ucicreditcard), replace = TRUE, prob = c(0.8, 0.2))
training_data <- ucicreditcard[inp==1, ]
test_data <- ucicreditcard[inp==2, ]
Fitting the best KNN model and CART model
train.dep<-training_data$default_payment
test.dep<-test_data$default_payment
train.indep<-training_data[,2:24]
test.indep<-test_data[,2:24]
Loading packages
library(class)
library(rpart)
library(rpart.plot)
library(gridExtra)
library(ISLR)
KNN Model with Summary
knn.1<-knn(train.indep,test.indep,train.dep,k=1)
knn.5<-knn(train.indep,test.indep,train.dep,k=5)
knn.15<-knn(train.indep,test.indep,train.dep,k=15)
Accuracy at different K
sum(test.dep==knn.1)/length(test.dep)
## [1] 0.6833814
sum(test.dep==knn.5)/length(test.dep)
## [1] 0.7503883
sum(test.dep==knn.15)/length(test.dep)
## [1] 0.770801
Hyper Parameter Tuning of KNN MOdel for better result(accuracy)
class(train.dep)
## [1] "numeric"
train.dep<-as.factor(train.dep)
knn_cross<-tune.knn(x=train.indep,y=train.dep,k=1:40,tunecontrol=tune.control(sampling="cross"),cross=5)
summary(knn_cross)
##
## Parameter tuning of 'knn.wrapper':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## k
## 34
##
## - best performance: 0.2269582
##
## - Detailed performance results:
## k error dispersion
## 1 1 0.3064167 0.010980007
## 2 2 0.3087763 0.011192464
## 3 3 0.2673313 0.010152256
## 4 4 0.2667953 0.012618763
## 5 5 0.2504415 0.010790730
## 6 6 0.2504416 0.010458181
## 7 7 0.2412733 0.008250576
## 8 8 0.2402546 0.009672923
## 9 9 0.2386459 0.010230342
## 10 10 0.2373058 0.008317957
## 11 11 0.2357510 0.007349066
## 12 12 0.2351612 0.007564908
## 13 13 0.2318371 0.008142404
## 14 14 0.2326413...