diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d1e2a95 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +logs/ +logs_proposed/ +__pycache__/ +*.txt diff --git a/VAD_bDNN.py b/VAD_bDNN.py new file mode 100644 index 0000000..a330d9f --- /dev/null +++ b/VAD_bDNN.py @@ -0,0 +1,330 @@ +import tensorflow as tf +import numpy as np +import utils as utils +import re +import data_reader_bDNN as dr +import os + +FLAGS = tf.flags.FLAGS + +tf.flags.DEFINE_string('mode', "train", "mode : train/ test [default : train]") + +file_dir = "/home/sbie/github/VAD_KJT/Datafolder/SE_TIMIT_MRCG_0328" +input_dir = file_dir +output_dir = file_dir + "/Labels" + +valid_file_dir = "/home/sbie/github/VAD_KJT/Datafolder/NX_TIMIT_MRCG_small" +valid_input_dir = valid_file_dir + "/Babble" +valid_output_dir = valid_file_dir + "/Babble/Labels" + +norm_dir = input_dir + +logs_dir = "/home/sbie/github/VAD_bDNN_baseline/logs" + +reset = True # remove all existed logs and initialize log directories +device = '/gpu:1' + +if FLAGS.mode is 'test': + reset = False + +if reset: + + os.popen('rm -rf ' + logs_dir + '/*') + os.popen('mkdir ' + logs_dir + '/train') + os.popen('mkdir ' + logs_dir + '/valid') + + +learning_rate = 0.00005 +eval_num_batches = 2e5 +SMALL_NUM = 1e-4 +max_epoch = int(1e5) +dropout_rate = 0.5 + +decay = 0.9 # batch normalization decay factor +w = 19 # w default = 19 +u = 9 # u default = 9 +eval_th = 0.7 +th = 0.7 +num_hidden_1 = 512 +num_hidden_2 = 512 + +batch_size = 4096 + 2*w # batch_size = 32 +valid_batch_size = batch_size + +assert (w-1) % u == 0, "w-1 must be divisible by u" + +num_features = 768 # MRCG feature +bdnn_winlen = (((w-1) / u) * 2) + 3 + +bdnn_inputsize = int(bdnn_winlen * num_features) +bdnn_outputsize = int(bdnn_winlen) + + +def affine_transform(x, output_dim, name=None): + """ + affine transformation Wx+b + assumes x.shape = (batch_size, num_features) + """ + + w = tf.get_variable(name + "_w", [x.get_shape()[1], output_dim], initializer=tf.truncated_normal_initializer(stddev=0.02)) + b = tf.get_variable(name + "_b", [output_dim], initializer=tf.constant_initializer(0.0)) + + return tf.matmul(x, w) + b + + +def inference(inputs, keep_prob, is_training=True): + + # initialization + # h1_out = affine_transform(inputs, num_hidden_1, name="hidden_1") + h1_out = utils.batch_norm_affine_transform(inputs, num_hidden_1, name="hidden_1", decay=decay, is_training=is_training) + h1_out = tf.nn.relu(h1_out) + h1_out = tf.nn.dropout(h1_out, keep_prob=keep_prob) + + # h2_out = utils.batch_norm_affine_transform(h1_out, num_hidden_2, name="hidden_2") + h2_out = utils.batch_norm_affine_transform(h1_out, num_hidden_2, name="hidden_2", decay=decay, is_training=is_training) + h2_out = tf.nn.relu(h2_out) + h2_out = tf.nn.dropout(h2_out, keep_prob=keep_prob) + + logits = affine_transform(h2_out, bdnn_outputsize, name="output") + logits = tf.sigmoid(logits) + logits = tf.reshape(logits, [-1, int(bdnn_outputsize)]) + + return logits + + +def train(loss_val, var_list): + + initLr = 1e-5 + lrDecayRate = .95 + lrDecayFreq = 200 + + global_step = tf.Variable(0, trainable=False) + lr = tf.train.exponential_decay(initLr, global_step, lrDecayFreq, lrDecayRate, staircase=True) + + optimizer = tf.train.AdamOptimizer(lr) + grads = optimizer.compute_gradients(loss_val, var_list=var_list) + + return optimizer.apply_gradients(grads, global_step=global_step) + + +def bdnn_prediction(bdnn_batch_size, logits, threshold=th): + + result = np.zeros((bdnn_batch_size, 1)) + indx = np.arange(bdnn_batch_size) + 1 + indx = indx.reshape((bdnn_batch_size, 1)) + indx = utils.bdnn_transform(indx, w, u) + indx = indx[w:(bdnn_batch_size-w), :] + indx_list = np.arange(w, bdnn_batch_size - w) + + for i in indx_list: + indx_temp = np.where((indx-1) == i) + pred = logits[indx_temp] + pred = np.sum(pred)/pred.shape[0] + result[i] = pred + + result = np.trim_zeros(result) + result = result >= threshold + + return result.astype(int) + + +def evaluation(m_valid, valid_data_set, sess, eval_batch_size, num_batches=eval_num_batches): + # num_samples = valid_data_set.num_samples + # num_batches = num_samples / batch_size + avg_valid_cost = 0. + avg_valid_accuracy = 0. + itr_sum = 0. + + accuracy_list = [0 for i in range(valid_data_set._file_len)] + cost_list = [0 for i in range(valid_data_set._file_len)] + itr_file = 0 + + while True: + + valid_inputs, valid_labels = valid_data_set.next_batch(eval_batch_size) + + if valid_data_set.file_change_checker(): + + accuracy_list[itr_file] = avg_valid_accuracy / itr_sum + cost_list[itr_file] = avg_valid_cost / itr_sum + avg_valid_accuracy = 0. + avg_valid_cost = 0. + itr_sum = 0 + itr_file += 1 + valid_data_set.file_change_initialize() + + if valid_data_set.eof_checker(): + valid_data_set.reader_initialize() + print('Valid data reader was initialized!') # initialize eof flag & num_file & start index + break + + feed_dict = {m_valid.inputs: valid_inputs, m_valid.labels: valid_labels, + m_valid.keep_probability: 1} + + valid_cost, valid_logits = sess.run([m_valid.cost, m_valid.logits], feed_dict=feed_dict) + valid_pred = bdnn_prediction(eval_batch_size, valid_logits, threshold=eval_th) + # print(np.sum(valid_pred)) + + raw_indx = int(np.floor(valid_labels.shape[1]/2)) + raw_labels = valid_labels[:, raw_indx] + raw_labels = raw_labels.reshape((-1, 1)) + valid_accuracy = np.equal(valid_pred, raw_labels) + valid_accuracy = valid_accuracy.astype(int) + valid_accuracy = np.sum(valid_accuracy)/eval_batch_size + avg_valid_cost += valid_cost + avg_valid_accuracy += valid_accuracy + itr_sum += 1 + + total_avg_valid_cost = np.asscalar(np.mean(np.asarray(cost_list))) + total_avg_valid_accuracy = np.asscalar(np.mean(np.asarray(accuracy_list))) + + return total_avg_valid_cost, total_avg_valid_accuracy, accuracy_list + + +def dense_to_one_hot(labels_dense, num_classes=2): + + """Convert class labels from scalars to one-hot vectors.""" + # copied from TensorFlow tutorial + num_labels = labels_dense.shape[0] + index_offset = np.arange(num_labels) * num_classes + labels_one_hot = np.zeros((num_labels, num_classes)) + labels_one_hot.flat[(index_offset + labels_dense.ravel()).astype(int)] = 1 + return labels_one_hot.astype(np.float32) + + +class Model(object): + + def __init__(self, is_training=True): + + self.keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") + self.inputs = inputs = tf.placeholder(tf.float32, shape=[None, bdnn_inputsize], + name="inputs") + self.labels = labels = tf.placeholder(tf.float32, shape=[None, bdnn_outputsize], name="labels") + + # set inference graph + self.logits = logits = inference(inputs, self.keep_probability, is_training=is_training) # (batch_size, bdnn_outputsize) + # set objective function + # self.cost = cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)) + + self.cost = cost = tf.reduce_mean(tf.square(labels - logits)) + + # cost = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits) + # cost = tf.reduce_sum(tf.square(labels - logits), axis=1) + # self.cost = cost = tf.reduce_mean(cost) + + # self.sigm = tf.sigmoid(logits) + # set training strategy + + trainable_var = tf.trainable_variables() + self.train_op = train(cost, trainable_var) + + +def main(argv=None): + + # Graph Part # + + print("Graph initialization...") + with tf.device(device): + with tf.variable_scope("model", reuse=None): + m_train = Model(is_training=True) + with tf.variable_scope("model", reuse=True): + m_valid = Model(is_training=False) + + print("Done") + + # Summary Part # + + print("Setting up summary op...") + + cost_ph = tf.placeholder(dtype=tf.float32) + accuracy_ph = tf.placeholder(dtype=tf.float32) + + cost_summary_op = tf.summary.scalar("cost", cost_ph) + accuracy_summary_op = tf.summary.scalar("accuracy", accuracy_ph) + + train_summary_writer = tf.summary.FileWriter(logs_dir + '/train/', max_queue=4) + valid_summary_writer = tf.summary.FileWriter(logs_dir + '/valid/', max_queue=4) + print("Done") + + # Model Save Part # + + print("Setting up Saver...") + saver = tf.train.Saver() + ckpt = tf.train.get_checkpoint_state(logs_dir) + print("Done") + + # Session Part # + + sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) + sess_config.gpu_options.allow_growth = True + sess = tf.Session(config=sess_config) + + if ckpt and ckpt.model_checkpoint_path: # model restore + print("Model restored...") + saver.restore(sess, ckpt.model_checkpoint_path) + print("Done") + else: + sess.run(tf.global_variables_initializer()) # if the checkpoint doesn't exist, do initialization + + train_data_set = dr.DataReader(input_dir, output_dir, norm_dir, w=w, u=u, name="train") # training data reader initialization + valid_data_set = dr.DataReader(valid_input_dir, valid_output_dir, norm_dir, w=w, u=u, name="valid") # validation data reader initialization + + if FLAGS.mode is 'train': + + epoch = 0 + for itr in range(max_epoch): + + train_inputs, train_labels = train_data_set.next_batch(batch_size) + + feed_dict = {m_train.inputs: train_inputs, m_train.labels: train_labels, + m_train.keep_probability: dropout_rate} + + sess.run(m_train.train_op, feed_dict=feed_dict) + + if itr % 50 == 0 and itr >= 0: + + train_cost, logits = sess.run([m_train.cost, m_train.logits], feed_dict=feed_dict) + + result = bdnn_prediction(batch_size, logits, threshold=th) + raw_indx = int(np.floor(train_labels.shape[1] / 2)) + raw_labels = train_labels[:, raw_indx] + raw_labels = raw_labels.reshape((-1, 1)) + train_accuracy = np.equal(result, raw_labels) + train_accuracy = train_accuracy.astype(int) + train_accuracy = np.sum(train_accuracy) / batch_size # change to mean... + + print("Step: %d, train_cost: %.3f, train_accuracy=%3.3f" % (itr, train_cost, train_accuracy)) + + train_cost_summary_str = sess.run(cost_summary_op, feed_dict={cost_ph: train_cost}) + train_accuracy_summary_str = sess.run(accuracy_summary_op, feed_dict={accuracy_ph: train_accuracy}) + train_summary_writer.add_summary(train_cost_summary_str, itr) # write the train phase summary to event files + train_summary_writer.add_summary(train_accuracy_summary_str, itr) + + # if train_data_set.eof_checker(): + if itr % 200 == 0 and itr > 0: + + saver.save(sess, logs_dir + "/model.ckpt", itr) # model save + print('validation start!') + valid_cost, valid_accuracy, valid_list = evaluation(m_valid, valid_data_set, sess, valid_batch_size) + # + print('epoch : %d' % epoch) + print("avg_valid_cost: %.3f, avg_valid_accuracy: %.3f" % (valid_cost, valid_accuracy)) + print('valid_accuracy wrt SNR:') + print('SNR_-5 : %.3f, SNR_0 : %.3f, SNR_5 : %.3f, SNR_10 : %.3f' % (valid_list[0], valid_list[1], + valid_list[2], valid_list[3])) + valid_summary_str_cost = sess.run(cost_summary_op, feed_dict={cost_ph: valid_cost}) + valid_summary_str_accuracy = sess.run(accuracy_summary_op, feed_dict={accuracy_ph: valid_accuracy}) + valid_summary_writer.add_summary(valid_summary_str_cost, itr) + valid_summary_writer.add_summary(valid_summary_str_accuracy, itr) + # train_data_set.reader_initialize() + # print('Train data reader was initialized!') # initialize eof flag & num_file & start index + epoch += 1 + + elif FLAGS.mode is 'test': + _, valid_accuracy = evaluation(m_valid, valid_data_set, sess, valid_batch_size) + print("valid_accuracy = %.3f" % valid_accuracy) + +if __name__ == "__main__": + tf.app.run() + + diff --git a/VAD_modify.py b/VAD_modify.py new file mode 100644 index 0000000..e53efeb --- /dev/null +++ b/VAD_modify.py @@ -0,0 +1,566 @@ +import tensorflow as tf +import numpy as np +import utils as utils +import re +import data_reader_bDNN as dr +import os +from tensorflow.contrib import rnn + +FLAGS = tf.flags.FLAGS + +tf.flags.DEFINE_string('mode', "train", "mode : train/ test [default : train]") + +# file_dir = "/home/sbie/github/VAD_KJT/Datafolder/SE_TIMIT_MRCG_0328" +file_dir = "/home/sbie/storage/VAD_Database/SE_TIMIT_MRCG_0328" +input_dir = file_dir +output_dir = file_dir + "/Labels" + +valid_file_dir = "/home/sbie/storage/VAD_Database/NX_TIMIT_MRCG_small" +valid_input_dir = valid_file_dir + "/Babble" +valid_output_dir = valid_file_dir + "/Babble/Labels" + +norm_dir = input_dir + +logs_dir = "/home/sbie/github/VAD_bDNN_baseline/logs_proposed" + +reset = True # remove all existed logs and initialize log directories +device = '/gpu:0' + +if FLAGS.mode is 'test': + reset = False + +if reset: + + os.popen('rm -rf ' + logs_dir + '/*') + os.popen('mkdir ' + logs_dir + '/train') + os.popen('mkdir ' + logs_dir + '/valid') + + +SMALL_NUM = 1e-5 +learning_rate = 0.00005 +eval_num_batches = 2e5 +SMALL_NUM = 1e-4 +max_epoch = int(1e5) +dropout_rate = 0.5 +decay = 0.9 # batch normalization decay factor +w = 19 # w default = 19 +u = 9 # u default = 9 +eval_th = 0.6 +th = 0.6 +action_hidden_1 = 512 +action_hidden_2 = 512 + +effective_batch_size = int(4096 * 2) +train_batch_size = effective_batch_size + 2*w # batch_size = 32 +valid_batch_size = train_batch_size + +assert (w-1) % u == 0, "w-1 must be divisible by u" + +num_features = 768 # for MRCG feature = 768 +bdnn_winlen = (((w-1) / u) * 2) + 3 + +bdnn_inputsize = int(bdnn_winlen * num_features) +bdnn_outputsize = int(bdnn_winlen) + +glimpse_hidden = 256 +bp_hidden = 128 +glimpse_out = bp_out = 128 +nGlimpses = 6 +lstm_cell_size = 128 + +global_step = tf.Variable(0, trainable=False) + +initLr = 1e-5 +lrDecayRate = .95 +lrDecayFreq = 200 +lr = tf.train.exponential_decay(initLr, global_step, lrDecayFreq, lrDecayRate, staircase=True) + + +def affine_transform(x, output_dim, name=None): + """ + affine transformation Wx+b + assumes x.shape = (batch_size, num_features) + """ + + w = tf.get_variable(name + "_w", [x.get_shape()[1], output_dim], initializer=tf.truncated_normal_initializer(stddev=0.02)) + b = tf.get_variable(name + "_b", [output_dim], initializer=tf.constant_initializer(0.0)) + + return tf.matmul(x, w) + b + + +def sw_sensor(inputs, bp): + + bp = tf.expand_dims(bp, axis=2) + bp = tf.tile(bp, (1, 1, num_features)) + bp = tf.reshape(bp, (inputs.get_shape()[0].value, -1, 1)) + bp = tf.squeeze(bp) + + sw = bp * inputs + + return sw + + +def get_glimpse(inputs, bp, reuse=None, is_training=True): + + with tf.variable_scope("glimpse_net", reuse=reuse): + + glimpse_input = sw_sensor(inputs, bp) + + act_glimpse_hidden = tf.nn.relu(utils.batch_norm_affine_transform(glimpse_input, glimpse_hidden, decay=decay, + name='glimpse_hidden', is_training=is_training)) + act_bp_hidden = tf.nn.relu(utils.batch_norm_affine_transform(bp, bp_hidden, decay=decay, name='bp_hidden', + is_training=is_training)) + + glimpse_feature = tf.nn.relu(utils.batch_norm_affine_transform(act_glimpse_hidden, glimpse_out, decay=decay, + name='glimpse_out', is_training=is_training) + + utils.batch_norm_affine_transform(act_bp_hidden, bp_out, decay=decay, + name='bp_out', is_training=is_training)) + + return glimpse_feature + + +def bernoulli_pmf(mean, sample): + """ + calculate the probability of bernoulli process + :param mean: mean. shape = (batch_size, num_sbs) + :param sample: sample. shape = (batch_size, num_sbs) + :return: p_br: shape = (batch_size, num_sbs) + """ + + p_br = tf.pow(mean, sample) * tf.pow(1 - mean, 1 - sample) + return p_br + + +def train(loss_val, var_list): + + lrDecayRate = .96 + lrDecayFreq = 200 + momentumValue = .9 + + global_step = tf.Variable(0, trainable=False) + lr = tf.train.exponential_decay(learning_rate, global_step, lrDecayFreq, lrDecayRate, staircase=True) + + # define the optimizer + # optimizer = tf.train.MomentumOptimizer(lr, momentumValue) + # optimizer = tf.train.AdagradOptimizer(learning_rate) + # + optimizer = tf.train.AdamOptimizer(lr) + grads = optimizer.compute_gradients(loss_val, var_list=var_list) + + return optimizer.apply_gradients(grads, global_step=global_step) + + +def bdnn_prediction(bdnn_batch_size, logits, threshold=th): + + result = np.zeros((int(bdnn_batch_size), 1)) + indx = np.arange(int(bdnn_batch_size)) + 1 + indx = indx.reshape((int(bdnn_batch_size), 1)) + indx = utils.bdnn_transform(indx, w, u) + indx = indx[w:(int(bdnn_batch_size)-w), :] + indx_list = np.arange(w, int(bdnn_batch_size) - w) + + for i in indx_list: + indx_temp = np.where((indx-1) == i) + pred = logits[indx_temp] + pred = np.sum(pred)/pred.shape[0] + result[i] = pred + + result = np.trim_zeros(result) + result = result >= threshold + + return result.astype(np.float32) + + +def evaluation(m_valid, valid_data_set, sess, eval_batch_size): + # num_samples = valid_data_set.num_samples + # num_batches = num_samples / batch_size + avg_valid_cost = 0. + avg_valid_accuracy = 0. + itr_sum = 0. + + accuracy_list = [0 for i in range(valid_data_set._file_len)] + cost_list = [0 for i in range(valid_data_set._file_len)] + itr_file = 0 + + while True: + + valid_inputs, valid_labels = valid_data_set.next_batch(eval_batch_size) + + if valid_data_set.file_change_checker(): + + accuracy_list[itr_file] = avg_valid_accuracy / itr_sum + cost_list[itr_file] = avg_valid_cost / itr_sum + avg_valid_accuracy = 0. + avg_valid_cost = 0. + itr_sum = 0 + itr_file += 1 + valid_data_set.file_change_initialize() + + if valid_data_set.eof_checker(): + valid_data_set.reader_initialize() + print('Valid data reader was initialized!') # initialize eof flag & num_file & start index + break + + feed_dict = {m_valid.inputs: valid_inputs, m_valid.labels: valid_labels, + m_valid.keep_probability: 1} + + valid_cost, valid_accuracy = sess.run([m_valid.cost, m_valid.reward], feed_dict=feed_dict) + + avg_valid_cost += valid_cost + avg_valid_accuracy += valid_accuracy + itr_sum += 1 + + total_avg_valid_cost = np.asscalar(np.mean(np.asarray(cost_list))) + total_avg_valid_accuracy = np.asscalar(np.mean(np.asarray(accuracy_list))) + + return total_avg_valid_cost, total_avg_valid_accuracy, accuracy_list + + +def dense_to_one_hot(labels_dense, num_classes=2): + + """Convert class labels from scalars to one-hot vectors.""" + # copied from TensorFlow tutorial + num_labels = labels_dense.shape[0] + index_offset = np.arange(num_labels) * num_classes + labels_one_hot = np.zeros((num_labels, num_classes)) + labels_one_hot.flat[(index_offset + labels_dense.ravel()).astype(int)] = 1 + return labels_one_hot.astype(np.float32) + + +class Model(object): + + def __init__(self, batch_size, reuse=None, is_training=True): + + self.batch_size = batch_size + self.keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") + self.inputs = tf.placeholder(tf.float32, shape=[batch_size, bdnn_inputsize], + name="inputs") + self.labels = tf.placeholder(tf.float32, shape=[batch_size, bdnn_outputsize], name="labels") + self.is_training = is_training + self.mean_bps = [] + self.sampled_bps = [] + self.baselines = [] + + # set inference graph + cell_outputs = self.inference(reuse) # (batch_size, bdnn_outputsize) + # set objective function + + self.cost, self.reward, self.train_op, self.avg_b, self.rminusb, self.sampled_bps_tensor, self.lr\ + = self.calc_reward(cell_outputs) + + def inference(self, reuse=None): + + # initialization + raw_inputs = self.inputs + batch_size = self.batch_size + keep_prob = self.keep_probability + is_training = self.is_training + + lstm_cell = rnn.LayerNormBasicLSTMCell(lstm_cell_size, dropout_keep_prob=keep_prob, reuse=reuse) + initial_state = lstm_cell.zero_state(batch_size, tf.float32) + + tf.set_random_seed(1) # initialize the random seed at graph level + + init_sw = tf.random_uniform([batch_size, int(bdnn_winlen)], minval=0, maxval=1) + self.mean_bps.append(init_sw) + + init_sw = tf.cast(tf.greater(init_sw, 0.5), tf.float32) + self.sampled_bps.append(init_sw) + + reuse_recurrent = None + + init_glimpse = self.get_glimpse(raw_inputs, init_sw, reuse=reuse_recurrent) # (batch_size, glimpse_out) + + inputs = [0] * nGlimpses + outputs = [0] * nGlimpses + glimpse = init_glimpse + + for time_step in range(nGlimpses): + + if time_step == 0: + with tf.variable_scope("core_network", reuse=reuse_recurrent): + (cell_output, cell_state) = lstm_cell(glimpse, initial_state) + else: + reuse_recurrent = True + with tf.variable_scope("core_network", reuse=reuse_recurrent): + (cell_output, cell_state) = lstm_cell(glimpse, cell_state) + + inputs[time_step] = glimpse + outputs[time_step] = cell_output + + if time_step != nGlimpses - 1: # not final time_step + + glimpse = self.get_next_input(cell_output, reuse=reuse_recurrent) + + else: # final time_step + with tf.variable_scope("baseline", reuse=reuse_recurrent): + cell_output_no_grad = tf.stop_gradient(cell_output) + baseline = tf.sigmoid( + utils.batch_norm_affine_transform(cell_output_no_grad, 1, decay=decay, name='baseline', + is_training=is_training)) + self.baselines.append(baseline) + + return outputs + + def get_glimpse(self, inputs, bp, reuse=None): + + is_training = self.is_training + + with tf.variable_scope("glimpse_net", reuse=reuse): + glimpse_input = sw_sensor(inputs, bp) + + act_glimpse_hidden = tf.nn.relu( + utils.batch_norm_affine_transform(glimpse_input, glimpse_hidden, decay=decay, + name='glimpse_hidden', is_training=is_training)) + act_bp_hidden = tf.nn.relu(utils.batch_norm_affine_transform(bp, bp_hidden, decay=decay, name='bp_hidden', + is_training=is_training)) + + glimpse_feature = tf.nn.relu(utils.batch_norm_affine_transform(act_glimpse_hidden, glimpse_out, decay=decay, + name='glimpse_out', + is_training=is_training) + + utils.batch_norm_affine_transform(act_bp_hidden, bp_out, decay=decay, + name='bp_out', is_training=is_training)) + return glimpse_feature + + def get_next_input(self, cell_output, reuse=None): + + raw_inputs = self.inputs + is_training = self.is_training + cell_output_no_grad = tf.stop_gradient(cell_output) + + with tf.variable_scope("baseline", reuse=reuse): + + baseline = tf.sigmoid( + utils.batch_norm_affine_transform(cell_output_no_grad, 1, decay=decay, name='baseline', + is_training=is_training)) + self.baselines.append(baseline) + + with tf.variable_scope("selection_network", reuse=reuse): + mean_bp = tf.sigmoid( + utils.batch_norm_affine_transform(cell_output_no_grad, int(bdnn_winlen), decay=decay, name='selection', + is_training=is_training)) + self.mean_bps.append(mean_bp) + + rand_seq = tf.random_uniform(mean_bp.get_shape().as_list(), minval=0, maxval=1) + sampled_bp = tf.cast(tf.greater(mean_bp, rand_seq), tf.float32) + sampled_bp = tf.stop_gradient(sampled_bp) + self.sampled_bps.append(sampled_bp) + + return get_glimpse(raw_inputs, sampled_bp, is_training) + + def action_network(self, outputs): + is_training = self.is_training + with tf.variable_scope("action_network"): + + h1_out = tf.nn.relu(utils.batch_norm_affine_transform(outputs, action_hidden_1, + decay=decay, name='action_hidden_1', is_training=is_training)) + h1_out = tf.nn.dropout(h1_out, keep_prob=self.keep_probability) + h2_out = tf.nn.relu(utils.batch_norm_affine_transform(h1_out, action_hidden_2, + decay=decay, name='action_hidden_2', is_training=is_training)) + h2_out = tf.nn.dropout(h2_out, keep_prob=self.keep_probability) + + return h2_out + + def bdnn_prediction(self, logits, threshold=0.5): + + # batch_size = self.batch_size + # result = tf.zeros(shape=(batch_size, 1), dtype=tf.float32) + # indx = np.arange(batch_size) + 1 + # indx = indx.reshape((batch_size, 1)) + # indx = utils.bdnn_transform(indx, w, u) + # indx = indx[w:(batch_size - w), :] + # indx_list = np.arange(w, batch_size - w) + # + # for i in indx_list: + # indx_temp = np.where((indx - 1) == i) + # pred = logits[int(indx_temp)] + # pred = tf.reduce_mean(pred) + # result[i] = pred + # + # result = tf.py_func(self.np_trim_zeros, [result], tf.float32) + # result = tf.greater(result, threshold) + # result = tf.cast(result, tf.float32) + + batch_size_tensor = tf.constant(self.batch_size+2*w, dtype=tf.float32) + th_tenor = tf.constant(threshold, dtype=tf.float32) + + result = tf.py_func(bdnn_prediction, [batch_size_tensor, logits, th_tenor], tf.float32) + + return result + + @staticmethod + def np_trim_zeros(x): + return np.trim_zeros(x) + + def calc_reward(self, outputs): + + batch_size = self.batch_size + is_training = self.is_training + + # consider the action at the last time step + + outputs = outputs[-1] + outputs = tf.reshape(outputs, (batch_size, lstm_cell_size)) + + # get the baseline + + b = tf.stack(self.baselines) + b = tf.tile(b, [1, 1, int(bdnn_winlen)]) + b = tf.reshape(tf.transpose(b, [1, 0, 2]), [batch_size, nGlimpses * int(bdnn_winlen)]) + no_grad_b = tf.stop_gradient(b) + + # get the action + + action_out = self.action_network(outputs) + logits = tf.sigmoid(affine_transform(action_out, int(bdnn_outputsize), name="softmax")) + result = self.bdnn_prediction(logits, threshold=0.5) + + # convert list of tensors to one big tensor + + mean_bps = tf.concat(axis=0, values=self.mean_bps) + mean_bps = tf.reshape(mean_bps, (nGlimpses, self.batch_size, int(bdnn_winlen))) + mean_bps = tf.transpose(mean_bps, [1, 0, 2]) + + sampled_bps = tf.concat(axis=0, values=self.sampled_bps) + sampled_bps = tf.reshape(sampled_bps, (nGlimpses, self.batch_size, int(bdnn_winlen))) + sampled_bps = tf.transpose(sampled_bps, [1, 0, 2]) + + # reward for all examples in the batch + + raw_indx = int(np.floor(bdnn_outputsize / 2)) + raw_labels = self.labels[:, raw_indx] + raw_labels = tf.reshape(raw_labels, shape=(-1, 1)) + R = tf.cast(tf.equal(result, raw_labels), tf.float32) + R = tf.reshape(R, (batch_size, 1)) + R = tf.tile(R, [1, nGlimpses * int(bdnn_winlen)]) + reward = tf.reduce_mean(R) + + # select the window + + p_bps = bernoulli_pmf(mean_bps, sampled_bps) + p_bps = tf.reshape(p_bps, (self.batch_size, nGlimpses * int(bdnn_winlen))) + + # define the cost function + + sv_part = -tf.square(self.labels - logits) + rf_part = tf.log(p_bps + SMALL_NUM) * (R - no_grad_b) + + J = tf.concat(axis=1, values=[sv_part, rf_part]) + J = tf.reduce_sum(J, 1) + J = J - tf.reduce_mean(tf.square(R - b), 1) + J = tf.reduce_mean(J, 0) + cost = -J + + var_list = tf.trainable_variables() + grads = tf.gradients(cost, var_list) + grads, _ = tf.clip_by_global_norm(grads, 0.5) + optimizer = tf.train.AdamOptimizer(lr) + train_op = optimizer.apply_gradients(zip(grads, var_list), global_step=global_step) + + return cost, reward, train_op, tf.reduce_mean(b), tf.reduce_mean(R - b), sampled_bps, lr + + +def main(argv=None): + + # Graph Part # + + print("Graph initialization...") + with tf.device(device): + with tf.variable_scope("model", reuse=None): + m_train = Model(batch_size=effective_batch_size, reuse=None, is_training=True) + with tf.variable_scope("model", reuse=True): + m_valid = Model(batch_size=effective_batch_size, reuse=True, is_training=False) + + print("Done") + + # Summary Part # + + print("Setting up summary op...") + + cost_ph = tf.placeholder(dtype=tf.float32) + accuracy_ph = tf.placeholder(dtype=tf.float32) + + cost_summary_op = tf.summary.scalar("cost", cost_ph) + accuracy_summary_op = tf.summary.scalar("accuracy", accuracy_ph) + + train_summary_writer = tf.summary.FileWriter(logs_dir + '/train/', max_queue=4) + valid_summary_writer = tf.summary.FileWriter(logs_dir + '/valid/', max_queue=4) + print("Done") + + # Model Save Part # + + print("Setting up Saver...") + saver = tf.train.Saver() + ckpt = tf.train.get_checkpoint_state(logs_dir) + print("Done") + + # Session Part # + + sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) + sess_config.gpu_options.allow_growth = True + sess = tf.Session(config=sess_config) + + if ckpt and ckpt.model_checkpoint_path: # model restore + print("Model restored...") + saver.restore(sess, ckpt.model_checkpoint_path) + print("Done") + else: + sess.run(tf.global_variables_initializer()) # if the checkpoint doesn't exist, do initialization + + train_data_set = dr.DataReader(input_dir, output_dir, norm_dir, w=w, u=u, name="train") # training data reader initialization + valid_data_set = dr.DataReader(valid_input_dir, valid_output_dir, norm_dir, w=w, u=u, name="valid") # validation data reader initialization + + if FLAGS.mode is 'train': + + epoch = 0 + for itr in range(max_epoch): + + train_inputs, train_labels = train_data_set.next_batch(train_batch_size) + + feed_dict = {m_train.inputs: train_inputs, m_train.labels: train_labels, + m_train.keep_probability: dropout_rate} + + sess.run(m_train.train_op, feed_dict=feed_dict) + + if itr % 50 == 0 and itr >= 0: + + train_cost, train_reward = sess.run([m_train.cost, m_train.reward], feed_dict=feed_dict) + sampled_bps_tensor = sess.run(m_train.sampled_bps_tensor, feed_dict=feed_dict) + sampled_bps_tensor = np.mean(sampled_bps_tensor[:, -1, :], axis=1) + + print("Step: %d, train_cost: %.3f, train_reward=%3.3f" % (itr, train_cost, train_reward)) + + train_cost_summary_str = sess.run(cost_summary_op, feed_dict={cost_ph: train_cost}) + train_accuracy_summary_str = sess.run(accuracy_summary_op, feed_dict={accuracy_ph: train_reward}) + train_summary_writer.add_summary(train_cost_summary_str, itr) # write the train phase summary to event files + train_summary_writer.add_summary(train_accuracy_summary_str, itr) + + # if train_data_set.eof_checker(): + + if itr % 200 == 0 and itr > 0: + + saver.save(sess, logs_dir + "/model.ckpt", itr) # model save + print('validation start!') + valid_cost, valid_accuracy, valid_list = evaluation(m_valid, valid_data_set, sess, valid_batch_size) + + print('epoch : %d' % epoch) + print("avg_valid_cost: %.3f, avg_valid_accuracy: %.3f" % (valid_cost, valid_accuracy)) + print('valid_accuracy wrt SNR:') + print('SNR_-5 : %.3f, SNR_0 : %.3f, SNR_5 : %.3f, SNR_10 : %.3f' % (valid_list[0], valid_list[1], + valid_list[2], valid_list[3])) + valid_summary_str_cost = sess.run(cost_summary_op, feed_dict={cost_ph: valid_cost}) + valid_summary_str_accuracy = sess.run(accuracy_summary_op, feed_dict={accuracy_ph: valid_accuracy}) + valid_summary_writer.add_summary(valid_summary_str_cost, itr) + valid_summary_writer.add_summary(valid_summary_str_accuracy, itr) + # train_data_set.reader_initialize() + # print('Train data reader was initialized!') # initialize eof flag & num_file & start index + epoch += 1 + + elif FLAGS.mode is 'test': + _, valid_accuracy = evaluation(m_valid, valid_data_set, sess, valid_batch_size) + print("valid_accuracy = %.3f" % valid_accuracy) + +if __name__ == "__main__": + tf.app.run() + + diff --git a/data_load.py b/data_load.py new file mode 100644 index 0000000..a95fc60 --- /dev/null +++ b/data_load.py @@ -0,0 +1,9 @@ +import scipy.io as sio +import numpy as np + +stft = sio.loadmat('nx_stft_003.mat') +stft = stft['s'] +label = np.fromfile('./label_400.bin',dtype = np.float32) + +print(np.shape(stft)) +print(np.shape(label)) diff --git a/data_reader_bDNN.py b/data_reader_bDNN.py new file mode 100644 index 0000000..10516e1 --- /dev/null +++ b/data_reader_bDNN.py @@ -0,0 +1,154 @@ +import numpy as np +import os +import glob +import utils +import scipy.io as sio + + +class DataReader(object): + + def __init__(self, input_dir, output_dir, norm_dir, w=19, u=9, name=None): + print(name + " data reader initialization...") + self._input_dir = input_dir + self._output_dir = output_dir + self._norm_dir = norm_dir + self._input_file_list = sorted(glob.glob(input_dir+'/*.bin')) + self._input_spec_list = sorted(glob.glob(input_dir+'/*.txt')) + self._output_file_list = sorted(glob.glob(output_dir+'/*.bin')) + self._file_len = len(self._input_file_list) + self._name = name + assert self._file_len == len(self._output_file_list), "# input files and output file is not matched" + + self._epoch = 1 + self._num_file = 0 + self._start_idx = 0 + self._inputs = self._read_input(self._input_file_list[self._num_file], self._input_spec_list[self._num_file]) + self._outputs = self._read_output(self._output_file_list[self._num_file]) + self._w = w + self._u = u + self.eof = False + self.file_change = False + assert np.shape(self._inputs)[0] == np.shape(self._outputs)[0], \ + ("# samples is not matched between input: %d and output: %d files" + % (np.shape(self._inputs)[0], np.shape(self._outputs)[0])) + + self.num_samples = np.shape(self._outputs)[0] + + norm_param = sio.loadmat(self._norm_dir+'/global_normalize_factor.mat') + self.train_mean = norm_param['global_mean'] + self.train_std = norm_param['global_std'] + + print("Done.") + print("BOF : " + self._name + " file_" + str(self._num_file).zfill(2)) + + def _binary_read_with_shape(self): + pass + + @staticmethod + def _read_input(input_file_dir, input_spec_dir): + + data = np.fromfile(input_file_dir, dtype=np.float32) # (# total frame, feature_size) + with open(input_spec_dir,'r') as f: + spec = f.readline() + size = spec.split(',') + data = data.reshape((int(size[0]), int(size[1])), order='F') + + return data + + @staticmethod + def _read_output(output_file_dir): + + data = np.fromfile(output_file_dir, dtype=np.float32) # data shape : (# total frame,) + data = data.reshape(-1, 1) # data shape : (# total frame, 1) + + return data + + def next_batch(self, batch_size): + + if self._start_idx + batch_size > self.num_samples: + + self._start_idx = 0 + self.file_change = True + self._num_file += 1 + + print("EOF : " + self._name + " file_" + str(self._num_file-1).zfill(2) + + " -> BOF : " + self._name + " file_" + str(self._num_file).zfill(2)) + + if self._num_file > self._file_len - 1: + self.eof = True + self._num_file = 0 + print("EOF : last " + self._name + " file. " + "-> BOF : " + self._name + " file_" + + str(self._num_file).zfill(2)) + + self._inputs = self._read_input(self._input_file_list[self._num_file], self._input_spec_list[self._num_file]) + self._outputs = self._read_output(self._output_file_list[self._num_file]) + + data_len = np.shape(self._inputs)[0] + self._outputs = self._outputs[0:data_len, :] + + assert np.shape(self._inputs)[0] == np.shape(self._outputs)[0], \ + ("# samples is not matched between input: %d and output: %d files" + % (np.shape(self._inputs)[0], np.shape(self._outputs)[0])) + + self.num_samples = np.shape(self._outputs)[0] + # print("current file number : %d, samples : %d" % (self._num_file + 1, self.num_samples)) + #print("Loaded " + self._name + " file number : %d" % (self._num_file + 1)) + else: + self.file_change = False + self.eof = False + + inputs = self._inputs[self._start_idx:self._start_idx + batch_size, :] + inputs = self.normalize(inputs) + inputs = utils.bdnn_transform(inputs, self._w, self._u) + inputs = inputs[self._w: (batch_size-self._w), :] + + outputs = self._outputs[self._start_idx:self._start_idx + batch_size, :] + outputs = utils.bdnn_transform(outputs, self._w, self._u) + outputs = outputs[self._w: (batch_size - self._w), :] + + self._start_idx += batch_size + # print(self._start_idx) + # print(self.num_samples) + return inputs, outputs + + def normalize(self, x): + x = (x - self.train_mean)/self.train_std + # a = (np.std(x, axis=0)) + return x + + def reader_initialize(self): + self._num_file = 0 + self._start_idx = 0 + self.eof = False + + def eof_checker(self): + return self.eof + + def file_change_checker(self): + return self.file_change + + def file_change_initialize(self): + self.file_change = False + + +def dense_to_one_hot(labels_dense, num_classes=2): + """Convert class labels from scalars to one-hot vectors.""" + # copied from TensorFlow tutorial + num_labels = labels_dense.shape[0] + index_offset = np.arange(num_labels) * num_classes + labels_one_hot = np.zeros((num_labels, num_classes)) + labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 + return labels_one_hot + + +# file_dir = "/home/sbie/github/VAD_KJT/Datamake/Database/Aurora2withSE" +# input_dir1 = file_dir + "/STFT2" +# output_dir1 = file_dir + "/Labels" +# dr = DataReader(input_dir1, output_dir1, input_dir1,name='test') +# +# for i in range(1000000): +# tt, pp = dr.next_batch(500) +# print("asdf") + + + diff --git a/py_func_test.py b/py_func_test.py new file mode 100644 index 0000000..c0c2f9c --- /dev/null +++ b/py_func_test.py @@ -0,0 +1,14 @@ +import tensorflow as tf +import numpy as np + + +def test_func(xx): + return np.mean(xx[1:3]) + +x = tf.constant(np.arange(10), tf.float32) +y = tf.py_func(test_func, [x], tf.float32) + +sess = tf.InteractiveSession() +yy = y.eval() + +print("aa") \ No newline at end of file diff --git a/test_bdnn.py b/test_bdnn.py new file mode 100644 index 0000000..33d7154 --- /dev/null +++ b/test_bdnn.py @@ -0,0 +1,14 @@ +import numpy as np +import utils + +a = np.arange(12) + 1 +a = a.reshape((-1, 3)) + +bb = utils.bdnn_transform(a, 2, 1) + +for i in range(bb.shape[0]): + print(bb[i, :]) + + + + diff --git a/test_bdnn2.py b/test_bdnn2.py new file mode 100644 index 0000000..d9de40d --- /dev/null +++ b/test_bdnn2.py @@ -0,0 +1,47 @@ +# prediction + + +import numpy as np +import utils + +batch_size = 20 + +idx = np.arange(batch_size) + 1 +idx = np.reshape(idx, (batch_size, 1)) + + +data = np.arange(batch_size*3) -1 +data = np.reshape(data, (batch_size, 3)) + +label = np.arange(batch_size) + 10 +label = label.reshape((batch_size,1)) + +w = 5 +u = 2 + + +trans_idx = utils.bdnn_transform(idx, w, u) +trans_data = utils.bdnn_transform(data, w, u) +trans_label = utils.bdnn_transform(label, w, u) + +result = np.zeros((batch_size, 1)) + + +final_data = trans_data[w:(batch_size-w), :] + +numm = np.arange(w, batch_size-w) + +# for i in numm: +# idx_temp = np.where((trans_idx-1) == i) +# aa = trans_label[idx_temp] +# aa = np.sum(aa)/aa.shape[0] +# result[i] = aa + +for i in range(batch_size): + idx_temp = np.where((trans_idx-1) == i) + aa = trans_label[idx_temp] + aa = np.sum(aa)/aa.shape[0] + result[i] = aa + +ttt = np.trim_zeros(result) +print("done") diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..e869c83 --- /dev/null +++ b/utils.py @@ -0,0 +1,338 @@ +# Utils used with tensorflow implementation + +import tensorflow as tf +import numpy as np +import scipy.misc as misc +import os, sys +from six.moves import urllib +import tarfile +import zipfile +import scipy.io +import re + +__author__ = 'Charlie' + + +def get_model_data(dir_path, model_url): + maybe_download_and_extract(dir_path, model_url) + filename = model_url.split("/")[-1] + filepath = os.path.join(dir_path, filename) + if not os.path.exists(filepath): + raise IOError("VGG Model not found!") + data = scipy.io.loadmat(filepath) + return data + + +def maybe_download_and_extract(dir_path, url_name, is_tarfile=False, is_zipfile=False): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + filename = url_name.split('/')[-1] + filepath = os.path.join(dir_path, filename) + if not os.path.exists(filepath): + def _progress(count, block_size, total_size): + sys.stdout.write( + '\r>> Downloading %s %.1f%%' % (filename, float(count * block_size) / float(total_size) * 100.0)) + sys.stdout.flush() + + filepath, _ = urllib.request.urlretrieve(url_name, filepath, reporthook=_progress) + print() + statinfo = os.stat(filepath) + print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') + if is_tarfile: + tarfile.open(filepath, 'r:gz').extractall(dir_path) + elif is_zipfile: + with zipfile.ZipFile(filepath) as zf: + zip_dir = zf.namelist()[0] + zf.extractall(dir_path) + + +def save_image(image, save_dir, name, mean=None): + """ + Save image by unprocessing if mean given else just save + :param mean: + :param image: + :param save_dir: + :param name: + :return: + """ + if mean: + image = unprocess_image(image, mean) + misc.imsave(os.path.join(save_dir, name + ".png"), image) + + +def get_variable(weights, name): + init = tf.constant_initializer(weights, dtype=tf.float32) + var = tf.get_variable(name=name, initializer=init, shape=weights.shape) + return var + + +def weight_variable(shape, stddev=0.02, name=None): + # print(shape) + initial = tf.truncated_normal(shape, stddev=stddev) + #initial = tf.contrib.layers.xavier_initializer_conv2d() + if name is None: + return tf.Variable(initial) + else: + return tf.get_variable(name, initializer=initial) + + +def bias_variable(shape, name=None): + initial = tf.constant(0.0, shape=shape) + if name is None: + return tf.Variable(initial) + else: + return tf.get_variable(name, initializer=initial) + + +def get_tensor_size(tensor): + from operator import mul + return reduce(mul, (d.value for d in tensor.get_shape()), 1) + + +def conv2d_basic(x, W, bias, stride=1): + conv = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding="SAME") + return tf.nn.bias_add(conv, bias) + + +def conv2d_basic_VALID(x, W, bias, stride=1): + conv = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding="VALID") + return tf.nn.bias_add(conv, bias) + + +def conv2d_strided(x, W, b): + conv = tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding="SAME") + return tf.nn.bias_add(conv, b) + + +def conv2d_transpose_strided(x, W, b, output_shape=None, stride = 2): + # print x.get_shape() + # print W.get_shape() + if output_shape is None: + output_shape = x.get_shape().as_list() + output_shape[1] *= 2 + output_shape[2] *= 2 + output_shape[3] = W.get_shape().as_list()[2] + # print output_shape + conv = tf.nn.conv2d_transpose(x, W, output_shape, strides=[1, stride, stride, 1], padding="SAME") + return tf.nn.bias_add(conv, b) + + +def leaky_relu(x, alpha=0.0, name=""): + return tf.maximum(alpha * x, x, name) + + +def max_pool_2x2(x): + return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") + + +def max_pool_2x1(x): + return tf.nn.max_pool(x, ksize=[1, 2, 1, 1], strides=[1, 2, 1, 1], padding="SAME") + + +def avg_pool_2x2(x): + return tf.nn.avg_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") + + +def local_response_norm(x): + return tf.nn.lrn(x, depth_radius=5, bias=2, alpha=1e-4, beta=0.75) + + +def batch_norm(x, n_out, phase_train, scope='bn', decay=0.9, eps=1e-5): + """ + Code taken from http://stackoverflow.com/a/34634291/2267819 + """ + with tf.variable_scope(scope): + beta = tf.get_variable(name='beta', shape=[n_out], initializer=tf.constant_initializer(0.0) + , trainable=True) + gamma = tf.get_variable(name='gamma', shape=[n_out], initializer=tf.random_normal_initializer(1.0, 0.02), + trainable=True) + batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') + ema = tf.train.ExponentialMovingAverage(decay=decay) + + def mean_var_with_update(): + ema_apply_op = ema.apply([batch_mean, batch_var]) + with tf.control_dependencies([ema_apply_op]): + return tf.identity(batch_mean), tf.identity(batch_var) + + mean, var = tf.cond(phase_train, + mean_var_with_update, + lambda: (ema.average(batch_mean), ema.average(batch_var))) + normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, eps) + return normed + + +def process_image(image, mean_pixel): + return image - mean_pixel + + +def unprocess_image(image, mean_pixel): + return image + mean_pixel + + +def bottleneck_unit(x, out_chan1, out_chan2, down_stride=False, up_stride=False, name=None): + """ + Modified implementation from github ry?! + """ + + def conv_transpose(tensor, out_channel, shape, strides, name=None): + out_shape = tensor.get_shape().as_list() + in_channel = out_shape[-1] + kernel = weight_variable([shape, shape, out_channel, in_channel], name=name) + shape[-1] = out_channel + return tf.nn.conv2d_transpose(x, kernel, output_shape=out_shape, strides=[1, strides, strides, 1], + padding='SAME', name='conv_transpose') + + def conv(tensor, out_chans, shape, strides, name=None): + in_channel = tensor.get_shape().as_list()[-1] + kernel = weight_variable([shape, shape, in_channel, out_chans], name=name) + return tf.nn.conv2d(x, kernel, strides=[1, strides, strides, 1], padding='SAME', name='conv') + + def bn(tensor, name=None): + """ + :param tensor: 4D tensor input + :param name: name of the operation + :return: local response normalized tensor - not using batch normalization :( + """ + return tf.nn.lrn(tensor, depth_radius=5, bias=2, alpha=1e-4, beta=0.75, name=name) + + in_chans = x.get_shape().as_list()[3] + + if down_stride or up_stride: + first_stride = 2 + else: + first_stride = 1 + + with tf.variable_scope('res%s' % name): + if in_chans == out_chan2: + b1 = x + else: + with tf.variable_scope('branch1'): + if up_stride: + b1 = conv_transpose(x, out_chans=out_chan2, shape=1, strides=first_stride, + name='res%s_branch1' % name) + else: + b1 = conv(x, out_chans=out_chan2, shape=1, strides=first_stride, name='res%s_branch1' % name) + b1 = bn(b1, 'bn%s_branch1' % name, 'scale%s_branch1' % name) + + with tf.variable_scope('branch2a'): + if up_stride: + b2 = conv_transpose(x, out_chans=out_chan1, shape=1, strides=first_stride, name='res%s_branch2a' % name) + else: + b2 = conv(x, out_chans=out_chan1, shape=1, strides=first_stride, name='res%s_branch2a' % name) + b2 = bn(b2, 'bn%s_branch2a' % name, 'scale%s_branch2a' % name) + b2 = tf.nn.relu(b2, name='relu') + + with tf.variable_scope('branch2b'): + b2 = conv(b2, out_chans=out_chan1, shape=3, strides=1, name='res%s_branch2b' % name) + b2 = bn(b2, 'bn%s_branch2b' % name, 'scale%s_branch2b' % name) + b2 = tf.nn.relu(b2, name='relu') + + with tf.variable_scope('branch2c'): + b2 = conv(b2, out_chans=out_chan2, shape=1, strides=1, name='res%s_branch2c' % name) + b2 = bn(b2, 'bn%s_branch2c' % name, 'scale%s_branch2c' % name) + + x = b1 + b2 + return tf.nn.relu(x, name='relu') + + +def add_to_regularization_and_summary(var): + if var is not None: + tf.summary.histogram(var.op.name, var) + tf.add_to_collection("reg_loss", tf.nn.l2_loss(var)) + + +def add_activation_summary(var): + if var is not None: + tf.summary.histogram(var.op.name + "/activation", var) + tf.summary.scalar(var.op.name + "/sparsity", tf.nn.zero_fraction(var)) + + +def add_gradient_summary(grad, var): + if grad is not None: + tf.summary.histogram(var.op.name + "/gradient", grad) + + +def get_conv_shape(name): + spec = re.split(':|, |->', name) + kernel_size = int(spec[5]) + stride = int(spec[7]) + input_fm = int(spec[9]) + output_fm = int(spec[10]) + conv_shape = [kernel_size, kernel_size, input_fm, output_fm] + return conv_shape, stride + + +def get_1d_conv_shape(name): + spec = re.split(':|, |->', name) + kernel_size = int(spec[5]) + stride = int(spec[7]) + input_fm = int(spec[9]) + output_fm = int(spec[10]) + conv_shape = [kernel_size, 1, input_fm, output_fm] + return conv_shape, stride + + +def write_val_summary(graph, loss): + with graph.as_default(): + val_loss = tf.placeholder(tf.float32, shape=[1], name="loss") + tf.summary.scalar("entropy", val_loss) + summary_op = tf.summary.merge_all() + return summary_op + + +def conv2lstm_layer(inputs, num_fm): + """ + make the conv_out flat for rnn input + :param inputs: + :param num_fm: # final output feature maps. + :return: outputs: flattened output. shape = (batch_size, num_fm) + """ + shape = inputs.get_shape().as_list() + W = weight_variable([shape[1], shape[2], shape[3], num_fm], name="last_conv_w") + b = bias_variable([num_fm], name="last_conv_b") + conv_last = conv2d_basic_VALID(inputs, W, b) + outputs = tf.nn.relu(conv_last, name="last_relu") + return outputs + + +def batch_norm_affine_transform(x, output_dim, decay=0, name=None, is_training=True): + """ + affine transformation Wx+b + assumes x.shape = (batch_size, num_features) + """ + + w = tf.get_variable(name+"_w", [x.get_shape()[1], output_dim], initializer=tf.truncated_normal_initializer(stddev=0.02)) + b = tf.get_variable(name+"_b", [output_dim], initializer=tf.constant_initializer(0.0)) + affine_result = tf.matmul(x, w) + b + batch_norm_result = tf.contrib.layers.batch_norm(affine_result, decay=decay, is_training=is_training, + updates_collections=None) + return batch_norm_result + + +def bdnn_transform(inputs, w, u): + + # """ + # :param inputs. shape = (batch_size, feature_size) + # :param w : decide neighbors + # :param u : decide neighbors + # :return: trans_inputs. shape = (batch_size, feature_size*len(neighbors)) + # """ + + neighbors_1 = np.arange(-w, -u, u) + neighbors_2 = np.array([-1, 0, 1]) + neighbors_3 = np.arange(1+u, w+1, u) + + neighbors = np.concatenate((neighbors_1, neighbors_2, neighbors_3), axis=0) + + pad_size = 2*w + inputs.shape[0] + pad_inputs = np.zeros((pad_size, inputs.shape[1])) + pad_inputs[0:inputs.shape[0], :] = inputs + + trans_inputs = [np.roll(pad_inputs, -1*neighbors[i], axis=0)[0:inputs.shape[0], :] + for i in range(neighbors.shape[0])] + + trans_inputs = np.asarray(trans_inputs) + trans_inputs = np.transpose(trans_inputs, [1, 0, 2]) + trans_inputs = np.reshape(trans_inputs, (trans_inputs.shape[0], -1)) + + return trans_inputs