|
| 1 | + |
| 2 | +import datetime |
| 3 | +import numpy |
| 4 | +import time |
| 5 | +import scipy |
| 6 | +import sigopt.interface |
| 7 | +import scipy.io |
| 8 | +import tensorflow as tf |
| 9 | +import math |
| 10 | +from skimage.color import rgb2gray |
| 11 | +from sklearn import preprocessing |
| 12 | +from sklearn.cross_validation import train_test_split |
| 13 | +from sigopt_creds import client_token |
| 14 | + |
| 15 | + |
| 16 | +# load SVHN dataset |
| 17 | +extra_X = scipy.io.loadmat("extra_32x32.mat")['X'].astype('float64') |
| 18 | +extra_y = scipy.io.loadmat("extra_32x32.mat")['y'].astype('float64') |
| 19 | +test_X = scipy.io.loadmat("test_32x32.mat")['X'].astype('float64') |
| 20 | +test_y = scipy.io.loadmat("test_32x32.mat")['y'].astype('float64') |
| 21 | +train_X = scipy.io.loadmat("train_32x32.mat")['X'].astype('float64') |
| 22 | +train_y = scipy.io.loadmat("train_32x32.mat")['y'].astype('float64') |
| 23 | + |
| 24 | +def convert_rgb2gray(X): |
| 25 | + X_gray = numpy.zeros((32, 32, X.shape[3])) |
| 26 | + for i in xrange(0, X.shape[3]): |
| 27 | + img_gray = rgb2gray(X[:, :, :, i]) |
| 28 | + X_gray[:, :, i] = img_gray |
| 29 | + return X_gray |
| 30 | + |
| 31 | +# convert all image data to grayscale |
| 32 | +extra_X = convert_rgb2gray(extra_X) |
| 33 | +test_X = convert_rgb2gray(test_X) |
| 34 | +train_X = convert_rgb2gray(train_X) |
| 35 | + |
| 36 | +image_w = 32 |
| 37 | +train_XZ = numpy.reshape(train_X.T, (train_X.shape[2], image_w * image_w)) |
| 38 | +test_XZ = numpy.reshape(test_X.T, (test_X.shape[2], image_w * image_w)) |
| 39 | +extra_XZ = numpy.reshape(extra_X.T, (extra_X.shape[2], image_w * image_w)) |
| 40 | + |
| 41 | +# normalize image pixel features |
| 42 | +extra_XZ = preprocessing.scale(extra_XZ, axis=1) |
| 43 | +train_XZ = preprocessing.scale(train_XZ, axis=1) |
| 44 | +test_XZ = preprocessing.scale(test_XZ, axis=1) |
| 45 | + |
| 46 | +# convert SVHN labels to one-hot format |
| 47 | +one_hot_enc = preprocessing.OneHotEncoder(sparse=False) |
| 48 | +test_yZ = one_hot_enc.fit_transform(test_y) |
| 49 | +train_yZ = one_hot_enc.fit_transform(train_y) |
| 50 | +extra_yZ = one_hot_enc.fit_transform(extra_y) |
| 51 | + |
| 52 | +# stack train and extra on top of each other |
| 53 | +extra_XZ = numpy.concatenate((extra_XZ, train_XZ), axis=0) |
| 54 | +extra_yZ = numpy.concatenate((extra_yZ, train_yZ), axis=0) |
| 55 | + |
| 56 | +# only consider 75% of this dataset for now |
| 57 | +_, extra_XZ, _, extra_yZ = train_test_split(extra_XZ, extra_yZ, test_size=0.75, random_state=42) |
| 58 | + |
| 59 | + |
| 60 | +# create SigOpt experiment |
| 61 | +conn = sigopt.interface.Connection(client_token=client_token) |
| 62 | +experiment = conn.experiments().create( |
| 63 | + name='SVHN ConvNet '+datetime.datetime.now().strftime("%Y_%m_%d_%I%M_%S"), |
| 64 | + parameters=[ |
| 65 | + {'name': 'filter1_w', 'type': 'int', 'bounds': {'min': 3, 'max': 10}}, |
| 66 | + {'name': 'filter1_depth', 'type': 'int', 'bounds': {'min': 10, 'max': 64}}, |
| 67 | + {'name': 'filter2_w', 'type': 'int', 'bounds': {'min': 3, 'max': 10}}, |
| 68 | + {'name': 'filter2_depth', 'type': 'int', 'bounds': {'min': 10, 'max': 64}}, |
| 69 | + {'name': 'drp_out_keep_p', 'type': 'double', 'bounds': {'min': 0.2, 'max': 1.0}}, |
| 70 | + {'name': 'log_rms_lr', 'type': 'double', 'bounds': {'min': math.log(0.00001), |
| 71 | + 'max': math.log(1.0)}}, |
| 72 | + {'name': 'rms_mom', 'type': 'double', 'bounds': {'min': 0.5, 'max': 1.0}}, |
| 73 | + {'name': 'rms_decay', 'type': 'double', 'bounds': {'min': 0.5, 'max': 1.0}}, |
| 74 | + ], |
| 75 | +) |
| 76 | + |
| 77 | +# SigOpt optimization loop |
| 78 | +conn = sigopt.interface.Connection(client_token=client_token) |
| 79 | +for jk in xrange(100): |
| 80 | + # SigOpt params |
| 81 | + suggestion = conn.experiments(experiment.id).suggestions().create() |
| 82 | + params = suggestion.assignments |
| 83 | + |
| 84 | + sess = tf.InteractiveSession() |
| 85 | + x = tf.placeholder(tf.float32, shape=[None, image_w*image_w]) |
| 86 | + y_ = tf.placeholder(tf.float32, shape=[None, 10]) |
| 87 | + filter1_w = int(params['filter1_w']) |
| 88 | + filter1_depth = int(params['filter1_depth']) |
| 89 | + filter2_w = int(params['filter2_w']) |
| 90 | + filter2_depth = int(params['filter2_depth']) |
| 91 | + rms_lr = math.exp(params['log_rms_lr']) |
| 92 | + rms_mom = params['rms_mom'] |
| 93 | + rms_decay = params['rms_decay'] |
| 94 | + drp_out_keep_p = params['drp_out_keep_p'] |
| 95 | + |
| 96 | + def weight_variable(shape): |
| 97 | + initial = tf.truncated_normal(shape, stddev=0.1) |
| 98 | + return tf.Variable(initial) |
| 99 | + |
| 100 | + def bias_variable(shape): |
| 101 | + initial = tf.constant(0.1, shape=shape) |
| 102 | + return tf.Variable(initial) |
| 103 | + |
| 104 | + def conv2d(x, W): |
| 105 | + return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') |
| 106 | + |
| 107 | + def max_pool_2x2(x): |
| 108 | + return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], |
| 109 | + strides=[1, 2, 2, 1], padding='SAME') |
| 110 | + |
| 111 | + W_conv1 = weight_variable([filter1_w, filter1_w, 1, filter1_depth]) |
| 112 | + b_conv1 = bias_variable([filter1_depth]) |
| 113 | + |
| 114 | + x_image = tf.reshape(x, [-1,image_w,image_w,1], name='reshape1') |
| 115 | + |
| 116 | + h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) |
| 117 | + h_pool1 = max_pool_2x2(h_conv1) |
| 118 | + |
| 119 | + W_conv2 = weight_variable([filter2_w, filter2_w, filter1_depth, filter2_depth]) |
| 120 | + b_conv2 = bias_variable([filter2_depth]) |
| 121 | + |
| 122 | + h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) |
| 123 | + h_pool2 = max_pool_2x2(h_conv2) |
| 124 | + |
| 125 | + W_fc1 = weight_variable([8 * 8 * filter2_depth, 1024]) |
| 126 | + b_fc1 = bias_variable([1024]) |
| 127 | + |
| 128 | + h_pool2_flat = tf.reshape(h_pool2, [-1, 8 * 8 * filter2_depth], name='rehsape2') |
| 129 | + h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) |
| 130 | + |
| 131 | + keep_prob = tf.placeholder(tf.float32) |
| 132 | + h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) |
| 133 | + |
| 134 | + W_fc2 = weight_variable([1024, 10]) |
| 135 | + b_fc2 = bias_variable([10]) |
| 136 | + |
| 137 | + y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) |
| 138 | + |
| 139 | + cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv)) |
| 140 | + train_step = tf.train.RMSPropOptimizer(rms_lr, decay=rms_decay, momentum=rms_mom).minimize(cross_entropy) |
| 141 | + |
| 142 | + correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) |
| 143 | + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) |
| 144 | + |
| 145 | + # generate single CV fold to do hyperparam optimization |
| 146 | + train_XZ_2, valid_XZ, train_yZ_2, valid_yZ = train_test_split(extra_XZ, extra_yZ, |
| 147 | + test_size=0.15, random_state=42) |
| 148 | + |
| 149 | + sess.run(tf.initialize_all_variables()) |
| 150 | + # run SGD |
| 151 | + t0 = time.time() |
| 152 | + batch_size = 10000 |
| 153 | + epoch_size = 1 |
| 154 | + for k in xrange(epoch_size): |
| 155 | + for i in xrange(0,train_XZ_2.shape[0],batch_size): |
| 156 | + if i % 100 == 0: |
| 157 | + train_accuracy = accuracy.eval(feed_dict={x:train_XZ_2[i:(i + batch_size)], y_: train_yZ_2[i:(i + batch_size)], keep_prob: 1.0}) |
| 158 | + print("step %d, training accuracy %g"%(i, train_accuracy)) |
| 159 | + train_step.run(feed_dict={x:train_XZ_2[i:(i + batch_size)], y_: train_yZ_2[i:(i + batch_size)], keep_prob: drp_out_keep_p}) |
| 160 | + # chunk opt metric, so we don't OOM error |
| 161 | + opt_metric = 0.0 |
| 162 | + opt_chunk = 10 |
| 163 | + for i in xrange(0,valid_XZ.shape[0],valid_XZ.shape[0]/opt_chunk): |
| 164 | + chunk_acc = accuracy.eval(feed_dict={x: valid_XZ[i:(i + valid_XZ.shape[0] / opt_chunk)], y_: valid_yZ[i:(i + valid_XZ.shape[0] / opt_chunk)], keep_prob: 1.0}) |
| 165 | + chunk_range = min(i + valid_XZ.shape[0]/opt_chunk, valid_XZ.shape[0]) - i |
| 166 | + chunk_perc = chunk_range / float(valid_XZ.shape[0]) |
| 167 | + opt_metric += chunk_acc * chunk_perc |
| 168 | + print opt_metric |
| 169 | + print "Total Time :", (time.time() - t0) |
| 170 | + |
| 171 | + # report to SigOpt |
| 172 | + conn.experiments(experiment.id).observations().create( |
| 173 | + suggestion=suggestion.id, |
| 174 | + value=float(opt_metric), |
| 175 | + value_stddev=0.05 |
| 176 | + ) |
| 177 | +# hold out accuracy |
| 178 | +# print("test accuracy %g"%accuracy.eval(feed_dict={x: test_XZ, y_: test_yZ, keep_prob: 1.0})) |
| 179 | + |
| 180 | + |
0 commit comments