-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathChessSupervisedLearning.py
171 lines (141 loc) · 5.35 KB
/
ChessSupervisedLearning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#import libraries for machine learning
import pandas as pd
from pandas.tools.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import scipy as sk
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
pd.options.mode.chained_assignment = None
#seperate my input file based on comma, and attribute the values in the following names
f2 = open('King-Rook-King.data.txt', 'r')
f1 = open('K-R-K.data.txt','w')
for line in f2:
for ch in line[:12]:
if (ch is 'a'):
f1.write('9')
elif (ch is 'b'):
f1.write('10')
elif (ch is 'c'):
f1.write('11')
elif (ch is 'd'):
f1.write('12')
elif (ch is 'e'):
f1.write('13')
elif (ch is 'f'):
f1.write('14')
elif (ch is 'g'):
f1.write('15')
elif (ch is 'h'):
f1.write('16')
else:
f1.write(ch)
f1.write(line[12:17])
f1.close()
f2.close()
#there was a problem with cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
#it can not take string and i have to replace the columns with numbers
names = ['WhiteKingcolumn', 'WhiteKingline', 'WhiteRookcolumn', 'WhiteRookline', 'BlackKingcolumn','BlackKingline','Outcome']
data = pd.read_csv("new.txt",names=names )
#Checking my data
#data shape
print(data.shape)
print(data.head(20))
#This includes the count, mean, the min and max values as well as some percentiles
print(data.describe())
#class distribution
print(data.groupby('Outcome').size())
#Visualize my data
# box and whisker plots
data.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
plt.show()
# box and whisker plots
data.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
plt.show()
# scatter plot matrix
scatter_matrix(data)
plt.show()
# Split-out validation dataset
array = data.values
X = array[:,0:6]
Y = array[:,6]
validation_size = 0.20
seed = 7
X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X, Y, test_size=validation_size, random_state=seed)
# Test options and evaluation metric
seed = 7
scoring = 'accuracy'
# Spot Check Algorithms
#Implementation in order to measure algorithms' execution time
def compare_Algorithms():
return model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
#this funtion's attributes is the speed and the acuraccy of every algorithm that the user have run
#attributes is an array with alla the speeds and acuraccies
def Hurwicz(names,times,results,a):
decision = []
i=0
maxTime = 1#because I want to take the smallest time , but in the end the biggest decesion
#so I make all the times 1-time , so i want to take the biggest of these elements
for y in range (len(names)):
x = a*(maxTime - times[y]) + (1-a)*results[y]
decision.append(x)
index, value = max(enumerate(decision), key=operator.itemgetter(1))
print('The most suitable algorithm for you is:')
print(names[index])
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))
# evaluate each model in turn
results1 = []
names1 = []
times1 = []
results = []
names = []
time = []
print('Algorithm: accuracy time')
for name, model in models:
kfold = model_selection.KFold(n_splits=10, random_state=seed)
cv_results = compare_Algorithms()
if __name__ == "__main__":
setup = "from __main__ import cv_results"
time = timeit.timeit('cv_results', setup=setup)
results.append(cv_results)
names.append(name)
msg = "%s: %f %f" % (name, cv_results.mean(),time)
results1.append(cv_results.mean())
names1.append(name)
times1.append(time)
print(msg)
print('Give me how much you prefer speed from acuraccy (with 1 you prefer only speed and with 0 only acuraccy)')
a = input()
Hurwicz(names1,times1,results1,a)
# Compare Algorithms
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()
# Make predictions on validation dataset
#i take the algorithm with the biggest accuracy KNN
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)
predictions = knn.predict(X_validation)
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))