From 4e939fb722054ec8513d826ef1b091a343fec698 Mon Sep 17 00:00:00 2001
From: "H. Jin" <jinhw1989@gmail.com>
Date: Wed, 18 Sep 2019 14:45:18 -0500
Subject: [PATCH 1/3] Update implementation compatible with python 3

---
 README.md      | 18 ++++++++----
 base_model.py  | 23 ++++++++-------
 ind_model.py   | 64 +++++++++++++++++++++--------------------
 layers.py      | 42 +++++++++++++++------------
 test_ind.py    | 77 ++++++++++++++++++++++++++++++--------------------
 test_trans.py  | 73 +++++++++++++++++++++++++++--------------------
 trans_model.py |  4 +--
 7 files changed, 173 insertions(+), 128 deletions(-)

diff --git a/README.md b/README.md
index 421d86e..cd0e714 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 # Planetoid
 
+## Modifications
+
+* Change implementation in python 3
+
 ## Introduction
 
 This is an implementation of Planetoid, a graph-based semi-supervised learning method proposed in the following paper:
@@ -15,12 +19,14 @@ Please cite the above paper if you use the datasets or code in this repo.
 We include the Citeseer dataset in the directory `data`, where the data structures needed are pickled.
 
 To run the transductive version,
-```
+
+```shell
 python test_trans.py
 ```
 
 To run the inductive version,
-```
+
+```shell
 python test_ind.py
 ```
 
@@ -35,6 +41,7 @@ The models are implemented mainly in `trans_model.py` (transductive) and `ind_mo
 ### Transductive learning
 
 The input to the transductive model contains:
+
 - `x`, the feature vectors of the training instances,
 - `y`, the one-hot labels of the training instances,
 - `graph`, a `dict` in the format `{index: [index_of_neighbor_nodes]}`, where the neighbor nodes are organized as a list. The current version only supports binary graphs.
@@ -44,6 +51,7 @@ Let L be the number of training instances. The indices in `graph` from 0 to L -
 ### Inductive learning
 
 The input to the inductive model contains:
+
 - `x`, the feature vectors of the labeled training instances,
 - `y`, the one-hot labels of the labeled training instances,
 - `allx`, the feature vectors of both labeled and unlabeled training instances (a superset of `x`),
@@ -55,11 +63,12 @@ Let n be the number of both labeled and unlabeled training instances. These n in
 
 Datasets for Citeseet, Cora, and Pubmed are available in the directory `data`, in a preprocessed format stored as numpy/scipy files.
 
-The dataset for DIEL is available at http://www.cs.cmu.edu/~lbing/data/emnlp-15-diel/emnlp-15-diel.tar.gz. We also provide a much more succinct version of the dataset that only contains necessary files and some (not very well-organized) pre-processing code here at http://cs.cmu.edu/~zhiliny/data/diel_data.tar.gz.
+The dataset for DIEL is available at <http://www.cs.cmu.edu/~lbing/data/emnlp-15-diel/emnlp-15-diel.tar.gz.> We also provide a much more succinct version of the dataset that only contains necessary files and some (not very well-organized) pre-processing code here at <http://cs.cmu.edu/~zhiliny/data/diel_data.tar.gz.>
 
-The NELL dataset can be found here at http://www.cs.cmu.edu/~zhiliny/data/nell_data.tar.gz.
+The NELL dataset can be found here at <http://www.cs.cmu.edu/~zhiliny/data/nell_data.tar.gz.>
 
 In addition to `x`, `y`, `allx`, and `graph` as described above, the preprocessed datasets also include:
+
 - `tx`, the feature vectors of the test instances,
 - `ty`, the one-hot labels of the test instances,
 - `test.index`, the indices of test instances in `graph`, for the inductive setting,
@@ -72,4 +81,3 @@ You can use `cPickle.load(open(filename))` to load the numpy/scipy objects `x`,
 ## Hyper-parameter tuning
 
 Refer to `test_ind.py` and `test_trans.py` for the definition of different hyper-parameters (passed as arguments). Hyper-parameters are tuned by randomly shuffle the training/test split (i.e., randomly shuffling the indices in `x`, `y`, `tx`, `ty`, and `graph`). For the DIEL dataset, we tune the hyper-parameters on one of the ten runs, and then keep the same hyper-parameters for all the ten runs.
-
diff --git a/base_model.py b/base_model.py
index 9ae169f..e417139 100644
--- a/base_model.py
+++ b/base_model.py
@@ -1,9 +1,10 @@
 
 import lasagne
-import cPickle
+import pickle
 import random
 import numpy as np
 
+
 class base_model(object):
     """the base model for both transductive and inductive learning."""
 
@@ -11,15 +12,18 @@ def __init__(self, args):
         """
         args (an object): contains the arguments used for initalizing the model.
         """
+        np.random.seed(13)
+        random.seed(13)
+
         self.embedding_size = args.embedding_size
         self.learning_rate = args.learning_rate
         self.batch_size = args.batch_size
         self.neg_samp = args.neg_samp
         self.model_file = args.model_file
-        
+
         self.window_size = args.window_size
         self.path_size = args.path_size
-        
+
         self.g_batch_size = args.g_batch_size
         self.g_learning_rate = args.g_learning_rate
         self.g_sample_size = args.g_sample_size
@@ -28,10 +32,8 @@ def __init__(self, args):
         self.update_emb = args.update_emb
         self.layer_loss = args.layer_loss
 
+        # TODO: replace lasagne
         lasagne.random.set_rng(np.random)
-        np.random.seed(13)
-
-        random.seed(13)
 
         self.inst_generator = self.gen_train_inst()
         self.graph_generator = self.gen_graph()
@@ -42,9 +44,9 @@ def store_params(self):
         """
 
         for i, l in enumerate(self.l):
-            fout = open("{}.{}".format(self.model_file, i), 'w')
+            fout = open("{}.{}".format(self.model_file, i), 'wb')
             params = lasagne.layers.get_all_param_values(l)
-            cPickle.dump(params, fout, cPickle.HIGHEST_PROTOCOL)
+            pickle.dump(params, fout, pickle.HIGHEST_PROTOCOL)
             fout.close()
 
     def load_params(self):
@@ -52,7 +54,7 @@ def load_params(self):
         """
         for i, l in enumerate(self.l):
             fin = open("{}.{}".format(self.model_file, i))
-            params = cPickle.load(fin)
+            params = pickle.load(fin)
             lasagne.layers.set_all_param_values(l, params)
             fin.close()
 
@@ -71,6 +73,3 @@ def train(self, init_iter_label, init_iter_graph, max_iter, iter_graph, iter_ins
         """
         self.init_train(init_iter_label, init_iter_graph)
         self.step_train(max_iter, iter_graph, iter_inst, iter_label)
-
-
-
diff --git a/ind_model.py b/ind_model.py
index fd56666..cbd982a 100644
--- a/ind_model.py
+++ b/ind_model.py
@@ -9,6 +9,7 @@
 
 from base_model import base_model
 
+
 class ind_model(base_model):
     """Planetoid-I.
     """
@@ -28,38 +29,38 @@ def add_data(self, x, y, allx, graph):
     def build(self):
         """build the model. This method should be called after self.add_data.
         """
-        x_sym = sparse.csr_matrix('x', dtype = 'float32')
+        x_sym = sparse.csr_matrix('x', dtype='float32')
         self.x_sym = x_sym
         y_sym = T.imatrix('y')
-        gx_sym = sparse.csr_matrix('gx', dtype = 'float32')
+        gx_sym = sparse.csr_matrix('gx', dtype='float32')
         gy_sym = T.ivector('gy')
         gz_sym = T.vector('gz')
 
-        l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym)
-        l_gx_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = gx_sym)
-        l_gy_in = lasagne.layers.InputLayer(shape = (None, ), input_var = gy_sym)
+        l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=x_sym)
+        l_gx_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=gx_sym)
+        l_gy_in = lasagne.layers.InputLayer(shape=(None, ), input_var=gy_sym)
 
-        l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
+        l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax)
         l_x_2 = layers.SparseLayer(l_x_in, self.embedding_size)
         W = l_x_2.W
-        l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
+        l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax)
         if self.use_feature:
-            l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis = 1)
-            l_x = layers.DenseLayer(l_x, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
+            l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis=1)
+            l_x = layers.DenseLayer(l_x, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax)
         else:
             l_x = l_x_2
 
-        l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W = W)
+        l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W=W)
         if self.neg_samp > 0:
-            l_gy = lasagne.layers.EmbeddingLayer(l_gy_in, input_size = self.num_ver, output_size = self.embedding_size)
+            l_gy = lasagne.layers.EmbeddingLayer(l_gy_in, input_size=self.num_ver, output_size=self.embedding_size)
             l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul)
             pgy_sym = lasagne.layers.get_output(l_gx)
-            g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis = 1) * gz_sym)).sum()
+            g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis=1) * gz_sym)).sum()
         else:
-            l_gx = lasagne.layers.DenseLayer(l_gx, self.num_ver, nonlinearity = lasagne.nonlinearities.softmax)
+            l_gx = lasagne.layers.DenseLayer(l_gx, self.num_ver, nonlinearity=lasagne.nonlinearities.softmax)
             pgy_sym = lasagne.layers.get_output(l_gx)
             g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, gy_sym).sum()
-        
+
         self.l = [l_x, l_gx]
 
         py_sym = lasagne.layers.get_output(l_x)
@@ -73,12 +74,12 @@ def build(self):
         params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b] if self.use_feature else [l_x.W, l_x.b]
         if self.update_emb:
             params = lasagne.layers.get_all_params(l_x)
-        updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate)
-        self.train_fn = theano.function([x_sym, y_sym], loss, updates = updates)
+        updates = lasagne.updates.sgd(loss, params, learning_rate=self.learning_rate)
+        self.train_fn = theano.function([x_sym, y_sym], loss, updates=updates)
 
         g_params = lasagne.layers.get_all_params(l_gx)
-        g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate)
-        self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates = g_updates, on_unused_input = 'ignore')
+        g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate=self.g_learning_rate)
+        self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates=g_updates, on_unused_input='ignore')
 
         self.test_fn = theano.function([x_sym], py_sym)
 
@@ -86,7 +87,7 @@ def gen_train_inst(self):
         """generator for batches for classification loss.
         """
         while True:
-            ind = np.array(np.random.permutation(self.x.shape[0]), dtype = np.int32)
+            ind = np.array(np.random.permutation(self.x.shape[0]), dtype=np.int32)
             i = 0
             while i < self.x.shape[0]:
                 j = min(ind.shape[0], i + self.batch_size)
@@ -103,21 +104,25 @@ def gen_graph(self):
                 g, gy = [], []
                 j = min(ind.shape[0], i + self.g_batch_size)
                 for k in ind[i: j]:
-                    if len(self.graph[k]) == 0: continue
+                    if len(self.graph[k]) == 0:
+                        continue
                     path = [k]
                     for _ in range(self.path_size):
                         path.append(random.choice(self.graph[path[-1]]))
                     for l in range(len(path)):
-                        if path[l] >= self.allx.shape[0]: continue
+                        if path[l] >= self.allx.shape[0]:
+                            continue
                         for m in range(l - self.window_size, l + self.window_size + 1):
-                            if m < 0 or m >= len(path): continue
-                            if path[m] >= self.allx.shape[0]: continue
+                            if m < 0 or m >= len(path):
+                                continue
+                            if path[m] >= self.allx.shape[0]:
+                                continue
                             g.append([path[l], path[m]])
                             gy.append(1.0)
                             for _ in range(self.neg_samp):
                                 g.append([path[l], random.randint(0, self.num_ver - 1)])
                                 gy.append(- 1.0)
-                g = np.array(g, dtype = np.int32)
+                g = np.array(g, dtype=np.int32)
                 yield self.allx[g[:, 0]], g[:, 1], gy
                 i = j
 
@@ -140,17 +145,17 @@ def gen_label_graph(self):
             for _ in range(self.g_sample_size):
                 x1 = random.randint(0, self.x.shape[0] - 1)
                 label = labels[x1]
-                if len(label2inst) == 1: continue
+                if len(label2inst) == 1:
+                    continue
                 x2 = random.choice(label2inst[label])
                 g.append([x1, x2])
                 gy.append(1.0)
                 for _ in range(self.neg_samp):
                     g.append([x1, random.choice(not_label[label])])
                     gy.append(- 1.0)
-            g = np.array(g, dtype = np.int32)
+            g = np.array(g, dtype=np.int32)
             yield self.allx[g[:, 0]], g[:, 1], gy
 
-
     def init_train(self, init_iter_label, init_iter_graph):
         """pre-training of graph embeddings.
         init_iter_label (int): # iterations for optimizing label context loss.
@@ -159,12 +164,12 @@ def init_train(self, init_iter_label, init_iter_graph):
         for i in range(init_iter_label):
             gx, gy, gz = next(self.label_generator)
             loss = self.g_fn(gx, gy, gz)
-            print 'iter label', i, loss
+            print('iter label', i, loss)
 
         for i in range(init_iter_graph):
             gx, gy, gz = next(self.graph_generator)
             loss = self.g_fn(gx, gy, gz)
-            print 'iter graph', i, loss
+            print('iter graph', i, loss)
 
     def step_train(self, max_iter, iter_graph, iter_inst, iter_label):
         """a training step. Iteratively sample batches for three loss functions.
@@ -191,4 +196,3 @@ def predict(self, tx):
         returns (numpy.ndarray, #instacnes * #classes): classification probabilities for dev instances.
         """
         return self.test_fn(tx)
-
diff --git a/layers.py b/layers.py
index bee5fdf..f1a6d96 100644
--- a/layers.py
+++ b/layers.py
@@ -7,9 +7,10 @@
 
 EXP_SOFTMAX = True
 
+
 class DenseLayer(lasagne.layers.Layer):
-    def __init__(self, incoming, num_units, W = lasagne.init.GlorotUniform(),
-                 b = lasagne.init.Constant(0.), nonlinearity = lasagne.nonlinearities.rectify,
+    def __init__(self, incoming, num_units, W=lasagne.init.GlorotUniform(),
+                 b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify,
                  **kwargs):
         super(DenseLayer, self).__init__(incoming, **kwargs)
         self.nonlinearity = (nonlinearities.identity if nonlinearity is None
@@ -42,11 +43,12 @@ def get_output_for(self, input, **kwargs):
         if not EXP_SOFTMAX or self.nonlinearity != lasagne.nonlinearities.softmax:
             return self.nonlinearity(activation)
         else:
-            return T.exp(activation) / (T.exp(activation).sum(1, keepdims = True))
+            return T.exp(activation) / (T.exp(activation).sum(1, keepdims=True))
+
 
 class SparseLayer(lasagne.layers.Layer):
 
-    def __init__(self, incoming, num_units, W = lasagne.init.GlorotUniform(), b = lasagne.init.Constant(0.), nonlinearity = lasagne.nonlinearities.rectify, **kwargs):
+    def __init__(self, incoming, num_units, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs):
         super(SparseLayer, self).__init__(incoming, **kwargs)
 
         self.num_units = num_units
@@ -67,7 +69,7 @@ def get_output_for(self, input, **kwargs):
         if not EXP_SOFTMAX or self.nonlinearity != lasagne.nonlinearities.softmax:
             return self.nonlinearity(act)
         else:
-            return T.exp(act) / (T.exp(act).sum(1, keepdims = True))
+            return T.exp(act) / (T.exp(act).sum(1, keepdims=True))
 
     def get_output_shape_for(self, input_shape):
         return (input_shape[0], self.num_units)
@@ -75,7 +77,7 @@ def get_output_shape_for(self, input_shape):
 
 class HybridLayer(lasagne.layers.MergeLayer):
 
-    def __init__(self, incomings, num_units, W1 = lasagne.init.GlorotUniform(), W2 = lasagne.init.GlorotUniform(), b = lasagne.init.Constant(0.), nonlinearity = lasagne.nonlinearities.rectify, **kwargs):
+    def __init__(self, incomings, num_units, W1=lasagne.init.GlorotUniform(), W2=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs):
         super(HybridLayer, self).__init__(incomings, **kwargs)
 
         self.num_units = num_units
@@ -84,19 +86,20 @@ def __init__(self, incomings, num_units, W1 = lasagne.init.GlorotUniform(), W2 =
         num_inputs_1 = self.input_shapes[0][1]
         num_inputs_2 = self.input_shapes[1][1]
 
-        self.W1 = self.add_param(W1, (num_inputs_1, num_units), name = "W1")
-        self.W2 = self.add_param(W2, (num_inputs_2, num_units), name = "W2")
-        self.b = self.add_param(b, (num_units, ), name = "b", regularizable = False)
+        self.W1 = self.add_param(W1, (num_inputs_1, num_units), name="W1")
+        self.W2 = self.add_param(W2, (num_inputs_2, num_units), name="W2")
+        self.b = self.add_param(b, (num_units, ), name="b", regularizable=False)
 
     def get_output_for(self, inputs, **kwargs):
         act = sparse.basic.structured_dot(inputs[0], self.W1) + T.dot(inputs[1], self.W2) + self.b.dimshuffle('x', 0)
         if EXP_SOFTMAX and self.nonlinearity == lasagne.nonlinearities.softmax:
-            return T.exp(act) / (T.exp(act).sum(1, keepdims = True))
+            return T.exp(act) / (T.exp(act).sum(1, keepdims=True))
         return self.nonlinearity(act)
 
     def get_output_shape_for(self, input_shapes):
         return (input_shapes[0][0], self.num_units)
 
+
 class EntropyLayer(lasagne.layers.Layer):
 
     def __init__(self, incoming, constW, **kwargs):
@@ -108,24 +111,27 @@ def get_output_for(self, input, **kwargs):
         return T.reshape(T.dot(input, self.constW), (input.shape[0] * input.shape[1] * input.shape[1], 1))
 
     def get_output_shape_for(self, input_shape):
-        if input_shape[0] is None or input_shape[1] is None: return (None, 1)
+        if input_shape[0] is None or input_shape[1] is None:
+            return (None, 1)
         return (input_shape[0] * input_shape[1] * input_shape[1], 1)
 
+
 class TensorLayer(lasagne.layers.Layer):
 
-    def __init__(self, incoming, num_units, V = lasagne.init.GlorotUniform(), W  = lasagne.init.GlorotUniform(), b = lasagne.init.Constant(0.), nonlinearity = lasagne.nonlinearities.rectify, **kwargs):
+    def __init__(self, incoming, num_units, V=lasagne.init.GlorotUniform(), W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs):
         super(TensorLayer, self).__init__(incoming, **kwargs)
         self.num_units = num_units
         self.nonlinearity = nonlinearity
 
         num_inputs = self.input_shape[1]
 
-        self.V = self.add_param(V, (self.num_units, num_inputs, num_inputs), name = "V")
-        self.W = self.add_param(W, (num_inputs, self.num_units), name = "W")
-        self.b = self.add_param(b, (self.num_units, ), name = "b")
+        self.V = self.add_param(V, (self.num_units, num_inputs, num_inputs), name="V")
+        self.W = self.add_param(W, (num_inputs, self.num_units), name="W")
+        self.b = self.add_param(b, (self.num_units, ), name="b")
 
     def get_output_for(self, input, **kwargs):
-        act = T.batched_dot(T.tensordot(input, self.V, axes = [1, 2]), input) + T.dot(input, self.W) + self.b.dimshuffle('x', 0)
+        act = T.batched_dot(T.tensordot(input, self.V, axes=[1, 2]),
+                            input) + T.dot(input, self.W) + self.b.dimshuffle('x', 0)
         return self.nonlinearity(act)
 
     def get_output_shape_for(self, input_shape):
@@ -138,14 +144,14 @@ def __init__(self, incomings, **kwargs):
         super(DotLayer, self).__init__(incomings, **kwargs)
 
     def get_output_for(self, inputs, **kwargs):
-        return T.sum(inputs[0] * inputs[1], axis = 1)
+        return T.sum(inputs[0] * inputs[1], axis=1)
 
     def get_output_shape_for(self, input_shapes):
         return (input_shapes[0][0], )
 
+
 class SigmoidLogLayer(lasagne.layers.Layer):
 
     def get_output_for(self, input, **kwargs):
         # return T.log(lasagne.nonlinearities.sigmoid(input))
         return lasagne.nonlinearities.sigmoid(input)
-
diff --git a/test_ind.py b/test_ind.py
index 0d8410b..eca4142 100644
--- a/test_ind.py
+++ b/test_ind.py
@@ -2,48 +2,63 @@
 from scipy import sparse as sp
 from ind_model import ind_model as model
 import argparse
-import cPickle
+import pickle as pkl
+import numpy as np
+
 
 DATASET = 'citeseer'
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--learning_rate', help = 'learning rate for supervised loss', type = float, default = 0.1)
-parser.add_argument('--embedding_size', help = 'embedding dimensions', type = int, default = 50)
-parser.add_argument('--window_size', help = 'window size in random walk sequences', type = int, default = 3)
-parser.add_argument('--path_size', help = 'length of random walk sequences', type = int, default = 10)
-parser.add_argument('--batch_size', help = 'batch size for supervised loss', type = int, default = 200)
-parser.add_argument('--g_batch_size', help = 'batch size for graph context loss', type = int, default = 20)
-parser.add_argument('--g_sample_size', help = 'batch size for label context loss', type = int, default = 20)
-parser.add_argument('--neg_samp', help = 'negative sampling rate; zero means using softmax', type = int, default = 0)
-parser.add_argument('--g_learning_rate', help = 'learning rate for unsupervised loss', type = float, default = 1e-3)
-parser.add_argument('--model_file', help = 'filename for saving models', type = str, default = 'ind.model')
-parser.add_argument('--use_feature', help = 'whether use input features', type = bool, default = True)
-parser.add_argument('--update_emb', help = 'whether update embedding when optimizing supervised loss', type = bool, default = True)
-parser.add_argument('--layer_loss', help = 'whether incur loss on hidden layers', type = bool, default = True)
+parser.add_argument('--learning_rate', help='learning rate for supervised loss', type=float, default=0.1)
+parser.add_argument('--embedding_size', help='embedding dimensions', type=int, default=50)
+parser.add_argument('--window_size', help='window size in random walk sequences', type=int, default=3)
+parser.add_argument('--path_size', help='length of random walk sequences', type=int, default=10)
+parser.add_argument('--batch_size', help='batch size for supervised loss', type=int, default=200)
+parser.add_argument('--g_batch_size', help='batch size for graph context loss', type=int, default=20)
+parser.add_argument('--g_sample_size', help='batch size for label context loss', type=int, default=20)
+parser.add_argument('--neg_samp', help='negative sampling rate; zero means using softmax', type=int, default=0)
+parser.add_argument('--g_learning_rate', help='learning rate for unsupervised loss', type=float, default=1e-3)
+parser.add_argument('--model_file', help='filename for saving models', type=str, default='ind.model')
+parser.add_argument('--use_feature', help='whether use input features', type=bool, default=True)
+parser.add_argument('--update_emb', help='whether update embedding when optimizing supervised loss',
+                    type=bool, default=True)
+parser.add_argument('--layer_loss', help='whether incur loss on hidden layers', type=bool, default=True)
 args = parser.parse_args()
 
+
 def comp_accu(tpy, ty):
-    import numpy as np
-    return (np.argmax(tpy, axis = 1) == np.argmax(ty, axis = 1)).sum() * 1.0 / tpy.shape[0]
+    return (np.argmax(tpy, axis=1) == np.argmax(ty, axis=1)).sum() * 1.0 / tpy.shape[0]
+
 
 # load the data: x, y, tx, ty, allx, graph
 NAMES = ['x', 'y', 'tx', 'ty', 'allx', 'graph']
-OBJECTS = []
-for i in range(len(NAMES)):
-    OBJECTS.append(cPickle.load(open("data/ind.{}.{}".format(DATASET, NAMES[i]))))
-x, y, tx, ty, allx, graph = tuple(OBJECTS)
-
-m = model(args)                                                 # initialize the model
-m.add_data(x, y, allx, graph)                                   # add data
-m.build()                                                       # build the model
-m.init_train(init_iter_label = 10000, init_iter_graph = 400)    # pre-training
+objects = {}
+for name in NAMES:
+    data = pkl.load(open("data/ind.{}.{}".format(DATASET, name), 'rb'), encoding='latin1')
+    objects[name] = data
+
+# initialize the model
+m = model(args)
+
+# add data
+m.add_data(objects['x'], objects['y'], objects['allx'], objects['graph'])
+
+# build the model
+m.build()
+m.init_train(init_iter_label=10000, init_iter_graph=400)    # pre-training
 iter_cnt, max_accu = 0, 0
-while True:
-    m.step_train(max_iter = 1, iter_graph = 0.1, iter_inst = 1, iter_label = 0) # perform a training step
-    tpy = m.predict(tx)                                                         # predict the dev set
-    accu = comp_accu(tpy, ty)                                                   # compute the accuracy on the dev set
-    print iter_cnt, accu, max_accu
+for _ in range(1000):
+    # perform a training step
+    m.step_train(max_iter=1, iter_graph=0.1, iter_inst=1, iter_label=0)
+
+    # predict the dev set
+    tpy = m.predict(objects['tx'])
+
+    # compute the accuracy on the dev set
+    accu = comp_accu(tpy, objects['ty'])
+    print(iter_cnt, accu, max_accu)
     iter_cnt += 1
     if accu > max_accu:
-        m.store_params()                                                        # store the model if better result is obtained
+        # store the model if better result is obtained
+        m.store_params()
         max_accu = max(max_accu, accu)
diff --git a/test_trans.py b/test_trans.py
index 4c2f2ce..2e7b1f9 100644
--- a/test_trans.py
+++ b/test_trans.py
@@ -2,48 +2,61 @@
 from scipy import sparse as sp
 from trans_model import trans_model as model
 import argparse
-import cPickle
+import pickle as pkl
 
 DATASET = 'citeseer'
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--learning_rate', help = 'learning rate for supervised loss', type = float, default = 0.1)
-parser.add_argument('--embedding_size', help = 'embedding dimensions', type = int, default = 50)
-parser.add_argument('--window_size', help = 'window size in random walk sequences', type = int, default = 3)
-parser.add_argument('--path_size', help = 'length of random walk sequences', type = int, default = 10)
-parser.add_argument('--batch_size', help = 'batch size for supervised loss', type = int, default = 200)
-parser.add_argument('--g_batch_size', help = 'batch size for graph context loss', type = int, default = 200)
-parser.add_argument('--g_sample_size', help = 'batch size for label context loss', type = int, default = 100)
-parser.add_argument('--neg_samp', help = 'negative sampling rate; zero means using softmax', type = int, default = 0)
-parser.add_argument('--g_learning_rate', help = 'learning rate for unsupervised loss', type = float, default = 1e-2)
-parser.add_argument('--model_file', help = 'filename for saving models', type = str, default = 'trans.model')
-parser.add_argument('--use_feature', help = 'whether use input features', type = bool, default = True)
-parser.add_argument('--update_emb', help = 'whether update embedding when optimizing supervised loss', type = bool, default = True)
-parser.add_argument('--layer_loss', help = 'whether incur loss on hidden layers', type = bool, default = True)
+parser.add_argument('--learning_rate', help='learning rate for supervised loss', type=float, default=0.1)
+parser.add_argument('--embedding_size', help='embedding dimensions', type=int, default=50)
+parser.add_argument('--window_size', help='window size in random walk sequences', type=int, default=3)
+parser.add_argument('--path_size', help='length of random walk sequences', type=int, default=10)
+parser.add_argument('--batch_size', help='batch size for supervised loss', type=int, default=200)
+parser.add_argument('--g_batch_size', help='batch size for graph context loss', type=int, default=200)
+parser.add_argument('--g_sample_size', help='batch size for label context loss', type=int, default=100)
+parser.add_argument('--neg_samp', help='negative sampling rate; zero means using softmax', type=int, default=0)
+parser.add_argument('--g_learning_rate', help='learning rate for unsupervised loss', type=float, default=1e-2)
+parser.add_argument('--model_file', help='filename for saving models', type=str, default='trans.model')
+parser.add_argument('--use_feature', help='whether use input features', type=bool, default=True)
+parser.add_argument('--update_emb', help='whether update embedding when optimizing supervised loss',
+                    type=bool, default=True)
+parser.add_argument('--layer_loss', help='whether incur loss on hidden layers', type=bool, default=True)
 args = parser.parse_args()
 
+
 def comp_accu(tpy, ty):
     import numpy as np
-    return (np.argmax(tpy, axis = 1) == np.argmax(ty, axis = 1)).sum() * 1.0 / tpy.shape[0]
+    return (np.argmax(tpy, axis=1) == np.argmax(ty, axis=1)).sum() * 1.0 / tpy.shape[0]
+
 
 # load the data: x, y, tx, ty, graph
 NAMES = ['x', 'y', 'tx', 'ty', 'graph']
-OBJECTS = []
-for i in range(len(NAMES)):
-    OBJECTS.append(cPickle.load(open("data/trans.{}.{}".format(DATASET, NAMES[i]))))
-x, y, tx, ty, graph = tuple(OBJECTS)
-
-m = model(args)                                             # initialize the model
-m.add_data(x, y, graph)                                     # add data
-m.build()                                                   # build the model
-m.init_train(init_iter_label = 2000, init_iter_graph = 70)  # pre-training
+objects = {}
+for name in NAMES:
+    data = pkl.load(open("data/ind.{}.{}".format(DATASET, name), 'rb'), encoding='latin1')
+    objects[name] = data
+#     objects.append(cPickle.load(open("data/trans.{}.{}".format(DATASET, name))))
+# x, y, tx, ty, graph = tuple(objects)
+
+# initialize the model
+m = model(args)
+# add data
+m.add_data(objects['x'], objects['y'], objects['graph'])
+# build the model
+m.build()
+# pre-training
+m.init_train(init_iter_label=2000, init_iter_graph=70)
 iter_cnt, max_accu = 0, 0
-while True:
-    m.step_train(max_iter = 1, iter_graph = 0, iter_inst = 1, iter_label = 0)   # perform a training step
-    tpy = m.predict(tx)                                                         # predict the dev set
-    accu = comp_accu(tpy, ty)                                                   # compute the accuracy on the dev set
-    print iter_cnt, accu, max_accu
+for _ in range(10000):
+    # perform a training step
+    m.step_train(max_iter=1, iter_graph=0, iter_inst=1, iter_label=0)
+    # predict the dev set
+    tpy = m.predict(objects['tx'])
+    # compute the accuracy on the dev set
+    accu = comp_accu(tpy, objects['ty'])
+    print(iter_cnt, accu, max_accu)
     iter_cnt += 1
     if accu > max_accu:
-        m.store_params()                                                        # store the model if better result is obtained
+        # store the model if better result is
+        m.store_params()                                                        
         max_accu = max(max_accu, accu)
diff --git a/trans_model.py b/trans_model.py
index 33c16da..3ba0274 100644
--- a/trans_model.py
+++ b/trans_model.py
@@ -164,12 +164,12 @@ def init_train(self, init_iter_label, init_iter_graph):
         for i in range(init_iter_label):
             gx, gy = next(self.label_generator)
             loss = self.g_fn(gx, gy)
-            print 'iter label', i, loss
+            print('iter label', i, loss) 
 
         for i in range(init_iter_graph):
             gx, gy = next(self.graph_generator)
             loss = self.g_fn(gx, gy)
-            print 'iter graph', i, loss
+            print('iter graph', i, loss) 
 
     def step_train(self, max_iter, iter_graph, iter_inst, iter_label):
         """a training step. Iteratively sample batches for three loss functions.

From 6efd5dd97549084120c7b33c93aa88fca6ae44c2 Mon Sep 17 00:00:00 2001
From: "H. Jin" <jinhw1989@gmail.com>
Date: Sun, 22 Sep 2019 16:49:44 -0500
Subject: [PATCH 2/3] FIX: dataset in transductive setting

---
 test_trans.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_trans.py b/test_trans.py
index 2e7b1f9..4d17a94 100644
--- a/test_trans.py
+++ b/test_trans.py
@@ -33,7 +33,7 @@ def comp_accu(tpy, ty):
 NAMES = ['x', 'y', 'tx', 'ty', 'graph']
 objects = {}
 for name in NAMES:
-    data = pkl.load(open("data/ind.{}.{}".format(DATASET, name), 'rb'), encoding='latin1')
+    data = pkl.load(open("data/trans.{}.{}".format(DATASET, name), 'rb'), encoding='latin1')
     objects[name] = data
 #     objects.append(cPickle.load(open("data/trans.{}.{}".format(DATASET, name))))
 # x, y, tx, ty, graph = tuple(objects)

From 93fc5da571b580d23f5931b822b0b1e94d7b178b Mon Sep 17 00:00:00 2001
From: "H. Jin" <hjin25@uic.edu>
Date: Thu, 5 Mar 2020 15:02:19 -0600
Subject: [PATCH 3/3] add requirements

---
 requirements.txt | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..5792040
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+lasagne==0.2.dev1
+theano
+scipy
+numpy
\ No newline at end of file