-
Notifications
You must be signed in to change notification settings - Fork 1
/
train.py
155 lines (118 loc) · 4.24 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# sklearn functions
from sklearn.svm import LinearSVC, SVC
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.cross_validation import train_test_split
from sklearn import preprocessing
from sklearn import feature_selection
import warnings
import json
import os
from sklearn.externals import joblib
import pickle
import numpy as np
# My helpers
from preprocess import extractFeatures
from preprocess import sampleSet
def readPredMeta(sensor):
path = predictionDir(sensor) + 'meta.json'
meta = json.loads(open(path,'r').read())
return meta
def generatePredictionData(meta,dataDir):
allFeatures = []
allLabels = []
for sample in meta:
# Extract features and a label
rawData = np.genfromtxt(dataDir + sample['filename'], delimiter=',')
features = extractFeatures(rawData)
# Stack features and labels
if allFeatures == []:
allFeatures = features
else:
allFeatures = np.vstack((allFeatures,features))
return allFeatures
def saveObject(obj, filename):
with open(filename, 'wb') as output:
pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)
def modelDir(sensor):
return 'sensors/' + sensor + '/model/'
def dataDir(sensor):
return 'sensors/' + sensor + '/samples/'
def predictionDir(sensor):
return 'sensors/' + sensor + '/predictions/'
# Read a meta file
def readMeta(sensor):
path = dataDir(sensor) + 'meta.json'
meta = json.loads(open(path,'r').read())
return meta
# Generate a list of labels from meta.json
def generateLabels(meta):
labels = []
for sample in meta:
labels.append(sample['label'])
# Remove duplicate
output = []
for i in labels:
if not i in output:
output.append(i)
return output
def generateData(meta,labels,dataDir):
allFeatures = []
allLabels = []
for sample in meta:
# Extract features and a label
rawData = np.genfromtxt(dataDir + sample['filename'], delimiter=',')
features = extractFeatures(rawData)
indexedLabel = labels.index(sample['label'])
# Stack features and labels
if allFeatures == []:
allFeatures = features
else:
allFeatures = np.vstack((allFeatures,features))
allLabels.append(indexedLabel)
data = sampleSet(allFeatures,allLabels)
return data
# Train a classifer for a given sensor and save it
def trainClassifier(sensor):
# Load meta informaton about samplings
meta = readMeta(sensor)
# Geenrate a trainig set
labels = generateLabels(meta)
data = generateData(meta, labels, dataDir(sensor))
# Prescaling
scaler = preprocessing.Scaler().fit(data.features)
scaledFeatures = scaler.transform(data.features)
# Feature selection
selector = feature_selection.SelectKBest(feature_selection.f_regression).fit(scaledFeatures, data.labels)
selectedFeatures = selector.transform(scaledFeatures)
# Train a classifier
clf = SVC(kernel='linear', C=1).fit(selectedFeatures, data.labels)
# Save to files
if not os.path.exists(modelDir(sensor)):
os.makedirs(modelDir(sensor))
joblib.dump(clf, modelDir(sensor) + 'model.pkl')
joblib.dump(scaler, modelDir(sensor) + 'scaler.pkl')
joblib.dump(selector, modelDir(sensor) + 'selector.pkl')
saveObject(labels, modelDir(sensor)+'labels.pkl')
def crossValidation(sensor):
meta = readMeta(sensor)
# Geenrate a trainig set
labels = generateLabels(meta)
data = generateData(meta, labels, dataDir(sensor))
# Prescaling
scaler = preprocessing.Scaler().fit(data.features)
scaledFeatures = scaler.transform(data.features)
# Feature selection
selector = feature_selection.SelectKBest(feature_selection.f_regression).fit(scaledFeatures, data.labels)
selectedFeatures = selector.transform(scaledFeatures)
# Train a classifier
clf = SVC(kernel='linear', C=1)
scores = cross_val_score(clf, selectedFeatures, data.labels, cv=5)
return scores
# Main function
if __name__ == "__main__":
warnings.filterwarnings('ignore')
sensor = 'knocking'
trainClassifier(sensor)
crossValidation(sensor)
print "Finished training"