-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdistance_neural_net.py
172 lines (156 loc) · 5.41 KB
/
distance_neural_net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 4 19:15:29 2017
@author: ZhicongLiang
"""
import numpy as np
from feature_selection import *
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from neural_net import *
def neural(k,dspt=False):
'''t is the threshold when we re-classified a painting in Neural network,
k is the index of sample that we want to re-classified,
dspt=False means we are re-classified for known samples and True means we are
re-classified for disputed samples
and return the predict result 1/0'''
t = 0.7
if dspt == False:
# rule out the sample we want to re-classified
index = [j for j in range(len(y)) if j!=k]
x_train = neural_x[index,:]
y_train = neural_y[index]
# fit the model with the remaining known samples
model = fit(x_train,y_train)
prob = model.predict(neural_x[k,:].reshape(1,neural_x.shape[1]))
print(k,':',1 - prob)
pred = threshold(prob,1-t)[0][0]
print(pred)
del model
else:
model = fit(neural_x,neural_y)
prob = model.predict(nol_D[k,:].reshape(1,nol_D.shape[1]))
print(k,':',1-prob)
pred =threshold(prob,1-t)[0][0]
print(pred)
return pred
def distance(x):
dst = []
for i in range(x.shape[0]):
dst+= [sum([j**2 for j in x[i,:]])]
return dst
def thsd(train_x,train_y):
# select the best threshold p for a
# given data set train_x and its label
acc = []
dst = distance(train_x)
for i in range(len(dst)):
p = dst[i]
count = 0
for j in range(len(dst)):
if (dst[j]<=p and train_y[j]==1):count+=1
if (dst[j]>p and train_y[j]==0):count+=1
acc += [count/len(train_y)]
idx = acc.index(max(acc))
return dst[idx]
def validate():
'''print out the leave-one-out cross validationg accuracy
of model with feature number:ft_num'''
count = 0
for k in range(len(y)):
dst = 0
index = [j for j in range(len(y)) if j!=k]
x_train = x[index,:]
y_train = y[index]
p = thsd(x_train,y_train)
dst = sum([t**2 for t in x[k,:]])
if (dst>p):
'''if a painting is predict as non-raphael by disatance dsicrimiant
analysis, we would predict it again in neural network
If it has a probability larger than 70%, than we predict it as genuine'''
pred = neural(k)
if pred==1:
dst=0
if (dst<=p and y[k]==1):
count+=1
print(k,'times : count=',count)
elif (dst>p and y[k]==0):
count+=1
print(k,'times : count=',count)
# cv_acc the accuracy of the cross validation test
cv_acc= count/len(y)
print('cv_acc=',cv_acc)
return cv_acc
def dpt_predict():
'''return the prediction of the diputed paintings'''
test_D = nol_D[:,ft_slt]
p = thsd(x,y)
pred = []
for k in range(test_D.shape[0]):
dst = sum([t**2 for t in test_D[k,:]])
if dst>p:
'''if it's predicted as outlier, re-classify it with Neural Network'''
re_pred = neural(k,dspt=True)
if re_pred==1:
dst=0
if (dst<p):pred+=[1]
if (dst>=p):pred+=[0]
return pred
def plot(ft_num=3):
if ft_num==3:
'''plot the scatter figure with the first 3 features
red points represent genuine paintings and blue for non-raphael'''
# ploting the 3D figure
ax = plt.figure()
ax = ax.add_subplot(111,projection='3d')
xs_T = x[0:11,0]
ys_T = x[0:11,1]
zs_T = x[0:11,2]
ax.scatter(xs_T,ys_T,zs_T,c='r',marker='^')
xs_N = x[11:,0]
ys_N = x[11:,1]
zs_N = x[11:,2]
ax.scatter(xs_N,ys_N,zs_N,c='b',marker='o')
plt.savefig('3D.jpg')
elif ft_num==2:
'''ploting the scatter figure with the first 2 features
according to the graph, we can see that there are 4 points
mis-classifed'''
p = thsd(x,y)
plt.scatter(x[0:11,0],x[0:11,1],c='r')
plt.scatter(x[11:,0],x[11:,1],c='b')
cent_T = x.mean(axis=0)
a,b = cent_T
r = np.sqrt(p)
theta = np.arange(0, 2*np.pi, 0.01)
u = a + r * np.cos(theta)
v = b + r * np.sin(theta)
plt.scatter(u,v,marker='o')
plt.savefig('2D.jpg')
else:
print('Oops:Dimension larger than 3!')
return
if __name__ == '__main__':
#ft_slt = [6,33,2,12,39]
ft_num = 3
ft_slt = ft_selection(ft_num)
# G comes from the file: tight_frame_feature_selection
# it contains all 20 known samples with their labels in first column
x = G[:,1:]
x = x[:,ft_slt]
y = G[:,0]
neural_x = G[:,1:]
neural_y = G[:,0]
D = pickle.load(open('data/tight_frame_D.p','rb'))
nol_D = normalize(D)
plot(ft_num)
'''repeat the process 30 time and calculate the average'''
cr_acc = []
predict = np.zeros(len(D))
num_iter = 30
for i in range(num_iter):
print('----------------the',i,'time---------------')
cr_acc += [validate()]
predict = np.add(predict,dpt_predict())
avg_pred = predict/num_iter
avg_cr_acc = np.mean(cr_acc)