forked from udacity/CarND-Vehicle-Detection
-
Notifications
You must be signed in to change notification settings - Fork 1
/
train_classifier.py
368 lines (307 loc) · 11.8 KB
/
train_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
'''
Main utility to Train an SVM Classifier to detect Vehicles.
- Builds a dataset of Cars/NotCars
- Extracts features
- Performs PCA for dimensionality reduction
- Optional GridSearch to search for best model (working, but commented out)
- Saves the best model and params in a pickle file
- The model and params are now ready to be used by VehicleDetector for detecting vehicles in video/static images
'''
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import train_test_split
from skimage.feature import hog
from scipy.ndimage.measurements import label
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold
from sklearn.decomposition import RandomizedPCA, PCA
from sklearn.pipeline import Pipeline
from moviepy.editor import VideoFileClip
import pickle
import os
import glob
import time
from vehicle_detector import *
## -----------------------------------------------------------------------
DATA_DIR = '/Users/aa/Developer/datasets/udacity-vehicle-detection/'
CAR_DIR = DATA_DIR + 'vehicles/'
NON_VEHICLES = DATA_DIR + 'non-vehicles/'
MODEL_DIR = './model/'
TOGGLE_PCA = True # Toggle switch: True (do PCA); False (dont do PCA)
def save_model(clf, scaler, pca_transformer, featureX, labelsY):
''' Save classifier and scaler, and features/labels
'''
loc = time.localtime()
msg = str(loc.tm_mon) + '_' + str(loc.tm_mday) + '_' + str(loc.tm_hour) + '_' + str(loc.tm_min)
PICKLE_FILE = MODEL_DIR + 'model_' + msg + '.pkl'
FEATURE_FILE = MODEL_DIR + 'feats_' + msg + '.pkl'
PCA_FILE = MODEL_DIR + 'pca_' + msg + '.pkl'
SCALER_FILE = MODEL_DIR + 'scaler' + msg + '.pkl'
# save info
if TOGGLE_PCA:
dump_data = {
"clf": clf, # the classifier
"scaler": scaler, # scaler
"do_pca": True, # YES, do PCA
"pca": pca_transformer
}
else:
dump_data = {
"clf": clf, # the classifier
"scaler": scaler, # scaler
"do_pca": False # NO PCA
}
with open(PICKLE_FILE, 'wb') as PF:
pickle.dump(dump_data, PF)
print('Model saved: ', PICKLE_FILE)
""" Used for testing only
feature_labels = {
"features": featureX,
"labels": labelsY
}
with open(FEATURE_FILE, 'wb') as MF:
pickle.dump(feature_labels, MF)
print('Features saved:', FEATURE_FILE)
with open(PCA_FILE, 'wb') as FF:
pickle.dump(pca_transformer, FF)
print('PCA saved: ', PCA_FILE)
with open(SCALER_FILE, 'wb') as SF:
pickle.dump(scaler, SF)
print('Scaler saved: ', SCALER_FILE)
"""
def save_pipeline(pipeline_estimator):
'''
'''
loc = time.localtime()
msg = str(loc.tm_mon) + '_' + str(loc.tm_mday) + '_' + str(loc.tm_hour) + '_' + str(loc.tm_min)
PIPELINE = MODEL_DIR + 'pipe_' + msg + '.pkl'
with open(PIPELINE, 'wb') as F:
pickle.dump(pipeline_estimator, F)
print('Saved pipeline: ', PIPELINE)
def perform_pca(X, n_comp=128, show_plot=False):
''' Perform PCA on the given features X, and return a reduced size of X (the principal components)
:param X: the scaled feature vector X
:param n_comp: number of PCA components to collapse into
:return pca (the transformer)
:return and the PCA of X
'''
pca = PCA(n_components=n_comp, whiten=True)
pca = pca.fit(X)
pca_features = pca.transform(X)
explained_variance = pca.explained_variance_ratio_
components = pca.components_
print("Explained variance by {} principal components: {:.4f}".format(n_comp, sum(explained_variance[:n_comp])))
if show_plot is True:
# plot PCA explanation
plt.subplot(2, 1, 1)
plt.xlabel('Dimension')
plt.ylabel('Explained Variance')
plt.title('Explained Variances of PCA')
plt.plot(pca.explained_variance_ratio_)
plt.subplot(2, 1, 2)
plt.xlabel('Dimension')
plt.ylabel('Cumu. Explained Variance')
plt.title('Cumulative Explained Variances of PCA')
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.show()
return pca, pca_features
def perform_grid_search(X, y):
''' Peforms grid search on X and y
returns best estimator and grid search
'''
print('Performing grid search')
pipeline_svc = Pipeline([
('scaler', StandardScaler()),
('pca', PCA(whiten=True)),
('clf', SVC(random_state=1))
])
n_components_range = [64, 128, 256]
param_range = [0.01, 0.1, 1.0, 10.0]
param_grid = [
{'clf__C': param_range, 'clf__kernel': ['linear'], 'pca__n_components': n_components_range},
{'clf__C': param_range, 'clf__kernel': ['rbf'], 'clf__gamma': param_range, 'pca__n_components': n_components_range}
]
t0 = time.time()
gs = GridSearchCV(estimator=pipeline_svc,
param_grid=param_grid,
scoring='accuracy',
cv=3,
n_jobs=1,
verbose=1)
gs = gs.fit(X, y)
t1 = time.time()
print('-'*80)
print('Grid search time:', round(t1-t0, 2))
print('Grid search best score:', gs.best_score_)
print('Grid search params:', gs.best_params_)
print('Best estimator:\n', gs.best_estimator_)
print('-'*80)
# return both the Best Estimator and grid search object
return gs.best_estimator_, gs
def build_pipeline():
'''
'''
pipeline_svc = Pipeline([
('scaler', StandardScaler()),
('pca', PCA(whiten=True, n_components=128)),
('clf', SVC(C=1.0, kernel='rbf', gamma=0.01))
])
return pipeline_svc
def save_train_params():
''' Save training params -- to be used later for detection
'''
params = {
"color_space": color_space,
"orient": orient,
"pix_per_cell": pix_per_cell,
"cell_per_block": cell_per_block,
"hog_channel": hog_channel,
"spatial_size": spatial_size,
"hist_bins": hist_bins,
"spatial_feat": spatial_feat,
"hist_feat": hist_feat,
"hog_feat": hog_feat
}
CONFIG_FILE = './model/params.cfg'
with open(CONFIG_FILE, 'wb') as F:
pickle.dump(params, F)
########################## main ###########################
print('Looking for images: ', DATA_DIR)
images = glob.glob(DATA_DIR + '**/*.png', recursive=True) # For Larger dataset
# images = glob.glob(LOCAL_DATA_DIR + '**/*.jpeg', recursive=True) # for LOCAL dataset
print('Images len: ', len(images))
cars = []
notcars = []
for image in images:
if 'non-vehicles' in image or 'non-vehicles_smallset' in image:
notcars.append(image)
else:
cars.append(image)
## for testing only
# limit = 2000
#cars = cars[:limit]
#notcars = notcars[:limit]
print('Cars len :', len(cars))
print('Non-cars len:', len(notcars))
### CONFIGURATION: Tweak these parameters and see how the results change.
color_space = 'YCrCb' # Can be RGB, HSV, LUV, HLS, YUV, YCrCb
orient = 9 # HOG orientations
pix_per_cell = 8 # HOG pixels per cell
cell_per_block = 2 # HOG cells per block
hog_channel = 'ALL' # Can be 0, 1, 2, or "ALL"
spatial_size = (32, 32) # Spatial binning dimensions
hist_bins = 32 # Number of histogram bins
spatial_feat = True # Spatial features on or off
hist_feat = True # Histogram features on or off
hog_feat = True # HOG features on or off
y_start_stop = [400, 680] # Min and max in y to search in slide_window()
# save params
save_train_params()
print('PCA: ', TOGGLE_PCA)
print('Using:',
color_space, ' color_space, ',
orient,'orientations, ',
pix_per_cell, 'pixels/cell,',
cell_per_block,'cells/block, ',
'hog_channel:', hog_channel,
',hist_bins:', hist_bins,
',spatial_size:', spatial_size,
',y_start_stop:', y_start_stop )
print('Extracting features...')
t0 = time.time()
car_features = extract_features(cars,
color_space=color_space,
spatial_size=spatial_size,
hist_bins=hist_bins,
orient=orient,
pix_per_cell=pix_per_cell,
cell_per_block=cell_per_block,
hog_channel=hog_channel,
spatial_feat=spatial_feat,
hist_feat=hist_feat,
hog_feat=hog_feat)
t1 = time.time()
print(' Car features done : in {0} secs'.format(round(t1-t0, 2)))
notcar_features = extract_features(notcars,
color_space=color_space,
spatial_size=spatial_size,
hist_bins=hist_bins,
orient=orient,
pix_per_cell=pix_per_cell,
cell_per_block=cell_per_block,
hog_channel=hog_channel,
spatial_feat=spatial_feat,
hist_feat=hist_feat,
hog_feat=hog_feat)
t2 = time.time()
print(' Notcar features done: in {0} secs'.format(round(t2-t1, 2)))
print('Features :', len(car_features[0]))
## Stack up the data
X = np.vstack((car_features, notcar_features)).astype(np.float64)
# Fit a per-column scaler
X_scaler = StandardScaler().fit(X)
# Apply the scaler to X
scaled_X = X_scaler.transform(X)
# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))
rand_state = np.random.randint(0, 100)
if TOGGLE_PCA:
## Perform PCA on scaled_X
pca_transformer, X_pca = perform_pca(scaled_X, n_comp=64)
# Split up data into randomized training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y,
test_size=0.2, random_state=rand_state)
else:
print('NOT performing PCA')
pca_transformer = None
# Split up data into randomized training and test sets
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y,
test_size=0.2, random_state=rand_state)
print('X_train: ', X_train.shape)
print('y_train: ', y_train.shape)
print('X_test: ', X_test.shape)
print('y_test: ', y_test.shape)
"""
## Do grid search
# best_estimator, gs = perform_grid_search(X_train, y_train)
## Get built-in Pipeline
best_estimator = build_pipeline()
# now use the best estimator to train/fit and score
## Train the Classifier
t3 = time.time()
best_estimator.fit(X_train, y_train)
t4 = time.time()
print('Train time:', round(t4-t3, 2), ' secs')
# Check the score of the SVC
print('>> Test Accuracy of Estimator: ', round(best_estimator.score(X_test, y_test), 4))
# Check the prediction time for a single sample
t5 = time.time()
print('Test time:', round(t5-t4, 2), ' secs')
## Save Pipeline
save_pipeline(best_estimator)
"""
## Fit the model
# Use a SVC
if TOGGLE_PCA:
svc = SVC(C=10.0, kernel='rbf', gamma=0.01) # LinearSVC(max_iter=8000)
else:
svc = LinearSVC(C=0.08, loss='hinge')
# Check the training time for the SVC
print(svc)
t3 = time.time()
## Train the Classifier
svc.fit(X_train, y_train)
t4 = time.time()
print('Train time:', round(t4-t3, 2), ' secs')
# Check the score of the SVC
print('>> Test Accuracy of SVC: ', round(svc.score(X_test, y_test), 4))
# Check the prediction time for a single sample
t5 = time.time()
print('Test time:', round(t5-t4, 2), ' secs')
## Save Model, Scaler and Features/Labels
save_model(svc, X_scaler, pca_transformer, scaled_X, y)
print('Done')