-
Notifications
You must be signed in to change notification settings - Fork 0
/
KNN
105 lines (84 loc) · 2.58 KB
/
KNN
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
-------------------- Digit Recognizer using KNN--------------
digit <- read.csv("digits.csv", header = TRUE)
str(digit)
subdigit <- digit[1:8000,]
str(subdigit)
set.seed(3033)
sub <- sample(nrow(subdigit), nrow(subdigit)*0.75)
TrainData <- subdigit[sub, 2:785]
TestData <- subdigit[-sub,2:785]
TrainTarget <- subdigit[sub,1]
TestTarget <- subdigit[-sub,1]
#knn
library(class)
knn_fit <- knn(train = TrainData, test = TestData,
cl = TrainTarget, k = 5)
summary(knn_fit)
str(knn_fit)
#accuracy
install.packages("caret")
library(caret)
cm<-confusionMatrix(data = knn_fit,reference = TestTarget)
cm
help("confusionMatrix")
# Here's where the naive Bayes model is created.
install.packages("crossval", dependencies = TRUE)
install.packages("e1071", dependencies = TRUE)
# Load the two required packages.
library("crossval")
library("e1071")
classifier<-naiveBayes(label ~. , data=TrainData)
-------------BreastCancerkNN-----------------
#1
cancer <- read.csv("./breastcancer.csv")
head(cancer,10)
nrow(cancer)
table(cancer$Class)
#2
# Change missing operator from ? to NA
cancer$Bare.Nuclei[cancer$Bare.Nuclei == "?"] <- NA
sapply(cancer, function(x) sum(is.na(x)))
cancer.2 <- subset(cancer, !is.na(cancer$Bare.Nuclei))
#3
str(cancer.2)
cancer.2$Class <- as.factor(cancer.2$Class)
levels(cancer.2$Class) <- list("benign" = "2", "malignant" = "4")
cancer.2$Bare.Nuclei <- as.integer(cancer.2$Bare.Nuclei)
#remove case
cancer.2 <- cancer.2[,(2:11)]
str(cancer.2)
#normalize the data - to avoid coercion
normalize <- function(x) {
num <- x - min(x)
denom <- max(x) - min(x)
return (num/denom)
}
cancer.norm <- as.data.frame(lapply(cancer.2[1:9], normalize))
#4
set.seed(111)
sub <- sample(nrow(cancer.norm), floor(nrow(cancer.norm)*.75))
TrainData <- cancer.norm[sub,]
TestData <- cancer.norm[-sub,]
nrow(TrainData)
str(TrainData)
nrow(TestData)
str(TestData)
#separate Target variables
TrainTarget <- cancer.2[sub,10]
TestTarget <- cancer.2[-sub,10]
#5
library(class)
knn_fit <- knn(train = TrainData, test = TestData,
cl = TrainTarget, k = 5)
#6
library(caret)
cm <- confusionMatrix(data=knn_fit, reference=TestTarget)
#7
Train.with.target <- cbind(TrainData,TrainTarget)
str(Train.with.target)
logit_fit <- glm(TrainTarget ~., data = Train.with.target, family=binomial(link="logit"))
summary(logit_fit)
Test.with.target <- cbind(TestData,TestTarget)
logit.pred.test <- ifelse(predict(logit_fit, Test.with.target ) > 0.5, "malignant", "benign")
logit.pred.test <- as.factor(logit.pred.test)
confusionMatrix(data = logit.pred.test, reference = Test.with.target$TestTarget) #check accuracy