Skip to content

Commit

Permalink
shuffle task
Browse files Browse the repository at this point in the history
  • Loading branch information
Dylan Bourgeois committed Feb 12, 2019
1 parent ab9dfa2 commit 5e412e2
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import optimization
import tokenization
import tensorflow as tf
import numpy as np
import networkx as nx

flags = tf.flags

Expand Down Expand Up @@ -147,6 +149,8 @@

flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")

flags.DEFINE_bool("shuffle", False, "Whether to run shuffle experiment.")

tf.flags.DEFINE_string(
"tpu_name", None,
"The Cloud TPU to use for training. This should be either the name "
Expand Down Expand Up @@ -305,6 +309,17 @@ def _create_examples(self, lines, labels, set_type):
suffix = "_adj" if set_type=="train" else "_adj_val"
adj = self.get_adjacency(adj_file, suffix=suffix, idx=i)

if FLAGS.shuffle:
split_text = np.asarray(text.split(' '))
shuffle_idx = np.random.permutation(len(split_text))
# print(text, ' '.join(split_text[shuffle_idx]))
adj = np.asarray(adj)
G = nx.from_numpy_matrix(adj)
shuffled_adj = nx.adjacency_matrix(G, nodelist=shuffle_idx).todense()
padded_adj = np.eye(64)
padded_adj[:len(shuffle_idx), :len(shuffle_idx)] = shuffled_adj
#print(adj, list(padded_adj))

examples.append(
InputExample(guid=guid, text_a=text, adjacency=adj, label=label))
#train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
Expand Down Expand Up @@ -800,6 +815,7 @@ def main(_):
train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
if FLAGS.clean_data:
train_examples = processor.get_train_examples(FLAGS.train_file, FLAGS.train_labels)
print(len(train_examples))
num_train_steps = int(
len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
file_based_convert_examples_to_features(
Expand Down

0 comments on commit 5e412e2

Please sign in to comment.