-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathglass_knn.py
42 lines (35 loc) · 1.25 KB
/
glass_knn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 6 15:45:17 2020
@author: HP
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
glass=pd.read_csv('C:/Users/HP/Desktop/assignments submission/KNN/glass.csv')
#to split train and test data
from sklearn.model_selection import train_test_split
train,test=train_test_split(glass,test_size=0.3,random_state=0)
#KNN
from sklearn.neighbors import KNeighborsClassifier as KNC
#to find best k value
acc=[]
for i in range(3,50,2):
neigh=KNC(n_neighbors=i)
neigh.fit(train.iloc[:,0:9],train.iloc[:,9])
train_acc=np.mean(neigh.predict(train.iloc[:,0:9])==train.iloc[:,9])
test_acc=np.mean(neigh.predict(test.iloc[:,0:9])==test.iloc[:,9])
acc.append([train_acc,test_acc])
plt.plot(np.arange(3,50,2),[i[0] for i in acc],'bo-')
plt.plot(np.arange(3,50,2),[i[1] for i in acc],'ro-')
plt.legend(['train','test'])
#from plots at k=5 we get best model
#model building at k=5
neigh=KNC(n_neighbors=5)
neigh.fit(train.iloc[:,0:9],train.iloc[:,9])
pred_train=neigh.predict(train.iloc[:,0:9])
train_acc=np.mean(pred_train==train.iloc[:,9])
train_acc#0.76
pred_test=neigh.predict(test.iloc[:,0:9])
test_acc=np.mean(pred_test==test.iloc[:,9])
test_acc#0.661