-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfeature_selection.py
102 lines (83 loc) · 2.61 KB
/
feature_selection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
"""
Created on Sat Jul 22 11:22:27 2017
@author: ZhicongLiang
"""
import numpy as np
import pickle
from processing_data import *
import matplotlib.pyplot as plt
def dist(F,g):
'''return the distance from the center 0 of g,
given feature selection F (a list)'''
g = g[1:]
g = g[F]
dst = sum([i**2 for i in g])
return dst
def ROC(F):
'''Given list F (index of feature that we choose)
return the TPR and FPR of different threshold p'''
dst = []
# list TPR and FPR are used to
# store the TPR and FPR of different threshold p
TPR = []
FPR = []
step = 500
# G is define in main function
T = sum(G[:,0])
N = G.shape[0] - T
for g in G:
dst += [dist(F,g)]
top = max(dst)
floor = min(dst)
p = np.linspace(floor,top,step)
for k in range(step):
TP = 0
FP = 0
for i in range(len(dst)):
if (dst[i]<=p[k] and G[i][0]==1):TP += 1
if (dst[i]<=p[k] and G[i][0]==0):FP += 1
TPR += [TP/T]
FPR += [FP/N]
# plt.plot(FPR,TPR)
return TPR,FPR
def AUC(F):
'''return the area under the ROC curve of given F'''
TPR,FPR = ROC(F)
area = 0
for i in range(1,len(FPR)):
# calculating the area by integral
area += (FPR[i]-FPR[i-1])*TPR[i]
return area
def ft_selection(ft_num):
'''return the index of feature we choose
ft_num means that number of feature we want'''
ft_slt = []
for k in range(ft_num):
print('seletcting',k,'-th feature')
# list area is used to record the AUC of
# different feature selection choice F
area = []
for j in range(54):
if (j not in ft_slt):
# forward step-wise procedure
F = ft_slt + [j]
area += [AUC(F)]
else:
area += [-1]
# find out the feature with largest AUC
idx = area.index(max(area))
ft_slt += [idx]
return ft_slt
N = pickle.load(open('data/tight_frame_N.p','rb'))
T = pickle.load(open('data/tight_frame_T.p','rb'))
'''centre of the genuine painting is 0,
since we have normalize the data'''
nol_N = normalize(N)
nol_T = normalize(T)
'''adding label to the first row'''
lab_T = np.column_stack(([1 for i in range(nol_T.shape[0])],nol_T))
lab_N = np.column_stack(([0 for i in range(nol_N.shape[0])],nol_N))
# G is the training data, with 20 known sample
G = np.row_stack((lab_T,lab_N))
del N,T,lab_T,lab_N,nol_N,nol_T