kimiyoung · cshjin · Sep 18, 2019 · Sep 22, 2019 · Mar 5, 2020
diff --git a/README.md b/README.md
@@ -1,5 +1,9 @@
 # Planetoid
 
+## Modifications
+
+* Change implementation in python 3
+
 ## Introduction
 
 This is an implementation of Planetoid, a graph-based semi-supervised learning method proposed in the following paper:
@@ -15,12 +19,14 @@ Please cite the above paper if you use the datasets or code in this repo.
 We include the Citeseer dataset in the directory `data`, where the data structures needed are pickled.
 
 To run the transductive version,
-```
+
+```shell
 python test_trans.py
 ```
 
 To run the inductive version,
-```
+
+```shell
 python test_ind.py
 ```
 
@@ -35,6 +41,7 @@ The models are implemented mainly in `trans_model.py` (transductive) and `ind_mo
 ### Transductive learning
 
 The input to the transductive model contains:
+
 - `x`, the feature vectors of the training instances,
 - `y`, the one-hot labels of the training instances,
 - `graph`, a `dict` in the format `{index: [index_of_neighbor_nodes]}`, where the neighbor nodes are organized as a list. The current version only supports binary graphs.
@@ -44,6 +51,7 @@ Let L be the number of training instances. The indices in `graph` from 0 to L -
 ### Inductive learning
 
 The input to the inductive model contains:
+
 - `x`, the feature vectors of the labeled training instances,
 - `y`, the one-hot labels of the labeled training instances,
 - `allx`, the feature vectors of both labeled and unlabeled training instances (a superset of `x`),
@@ -55,11 +63,12 @@ Let n be the number of both labeled and unlabeled training instances. These n in
 
 Datasets for Citeseet, Cora, and Pubmed are available in the directory `data`, in a preprocessed format stored as numpy/scipy files.
 
-The dataset for DIEL is available at http://www.cs.cmu.edu/~lbing/data/emnlp-15-diel/emnlp-15-diel.tar.gz. We also provide a much more succinct version of the dataset that only contains necessary files and some (not very well-organized) pre-processing code here at http://cs.cmu.edu/~zhiliny/data/diel_data.tar.gz.
+The dataset for DIEL is available at <http://www.cs.cmu.edu/~lbing/data/emnlp-15-diel/emnlp-15-diel.tar.gz.> We also provide a much more succinct version of the dataset that only contains necessary files and some (not very well-organized) pre-processing code here at <http://cs.cmu.edu/~zhiliny/data/diel_data.tar.gz.>
 
-The NELL dataset can be found here at http://www.cs.cmu.edu/~zhiliny/data/nell_data.tar.gz.
+The NELL dataset can be found here at <http://www.cs.cmu.edu/~zhiliny/data/nell_data.tar.gz.>
 
 In addition to `x`, `y`, `allx`, and `graph` as described above, the preprocessed datasets also include:
+
 - `tx`, the feature vectors of the test instances,
 - `ty`, the one-hot labels of the test instances,
 - `test.index`, the indices of test instances in `graph`, for the inductive setting,
@@ -72,4 +81,3 @@ You can use `cPickle.load(open(filename))` to load the numpy/scipy objects `x`,
 ## Hyper-parameter tuning
 
 Refer to `test_ind.py` and `test_trans.py` for the definition of different hyper-parameters (passed as arguments). Hyper-parameters are tuned by randomly shuffle the training/test split (i.e., randomly shuffling the indices in `x`, `y`, `tx`, `ty`, and `graph`). For the DIEL dataset, we tune the hyper-parameters on one of the ten runs, and then keep the same hyper-parameters for all the ten runs.
-
diff --git a/base_model.py b/base_model.py
@@ -1,25 +1,29 @@
 
 import lasagne
-import cPickle
+import pickle
 import random
 import numpy as np
 
+
 class base_model(object):
     """the base model for both transductive and inductive learning."""
 
     def __init__(self, args):
         """
         args (an object): contains the arguments used for initalizing the model.
         """
+        np.random.seed(13)
+        random.seed(13)
+
         self.embedding_size = args.embedding_size
         self.learning_rate = args.learning_rate
         self.batch_size = args.batch_size
         self.neg_samp = args.neg_samp
         self.model_file = args.model_file
-        
+
         self.window_size = args.window_size
         self.path_size = args.path_size
-        
+
         self.g_batch_size = args.g_batch_size
         self.g_learning_rate = args.g_learning_rate
         self.g_sample_size = args.g_sample_size
@@ -28,10 +32,8 @@ def __init__(self, args):
         self.update_emb = args.update_emb
         self.layer_loss = args.layer_loss
 
+        # TODO: replace lasagne
         lasagne.random.set_rng(np.random)
-        np.random.seed(13)
-
-        random.seed(13)
 
         self.inst_generator = self.gen_train_inst()
         self.graph_generator = self.gen_graph()
@@ -42,17 +44,17 @@ def store_params(self):
         """
 
         for i, l in enumerate(self.l):
-            fout = open("{}.{}".format(self.model_file, i), 'w')
+            fout = open("{}.{}".format(self.model_file, i), 'wb')
             params = lasagne.layers.get_all_param_values(l)
-            cPickle.dump(params, fout, cPickle.HIGHEST_PROTOCOL)
+            pickle.dump(params, fout, pickle.HIGHEST_PROTOCOL)
             fout.close()
 
     def load_params(self):
         """load the model parameters from self.model_file.
         """
         for i, l in enumerate(self.l):
             fin = open("{}.{}".format(self.model_file, i))
-            params = cPickle.load(fin)
+            params = pickle.load(fin)
             lasagne.layers.set_all_param_values(l, params)
             fin.close()
 
@@ -71,6 +73,3 @@ def train(self, init_iter_label, init_iter_graph, max_iter, iter_graph, iter_ins
         """
         self.init_train(init_iter_label, init_iter_graph)
         self.step_train(max_iter, iter_graph, iter_inst, iter_label)
-
-
-
diff --git a/ind_model.py b/ind_model.py
@@ -9,6 +9,7 @@
 
 from base_model import base_model
 
+
 class ind_model(base_model):
     """Planetoid-I.
     """
@@ -28,38 +29,38 @@ def add_data(self, x, y, allx, graph):
     def build(self):
         """build the model. This method should be called after self.add_data.
         """
-        x_sym = sparse.csr_matrix('x', dtype = 'float32')
+        x_sym = sparse.csr_matrix('x', dtype='float32')
         self.x_sym = x_sym
         y_sym = T.imatrix('y')
-        gx_sym = sparse.csr_matrix('gx', dtype = 'float32')
+        gx_sym = sparse.csr_matrix('gx', dtype='float32')
         gy_sym = T.ivector('gy')
         gz_sym = T.vector('gz')
 
-        l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym)
-        l_gx_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = gx_sym)
-        l_gy_in = lasagne.layers.InputLayer(shape = (None, ), input_var = gy_sym)
+        l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=x_sym)
+        l_gx_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=gx_sym)
+        l_gy_in = lasagne.layers.InputLayer(shape=(None, ), input_var=gy_sym)
 
-        l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
+        l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax)
         l_x_2 = layers.SparseLayer(l_x_in, self.embedding_size)
         W = l_x_2.W
-        l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
+        l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax)
         if self.use_feature:
-            l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis = 1)
-            l_x = layers.DenseLayer(l_x, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
+            l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis=1)
+            l_x = layers.DenseLayer(l_x, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax)
         else:
             l_x = l_x_2
 
-        l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W = W)
+        l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W=W)
         if self.neg_samp > 0:
-            l_gy = lasagne.layers.EmbeddingLayer(l_gy_in, input_size = self.num_ver, output_size = self.embedding_size)
+            l_gy = lasagne.layers.EmbeddingLayer(l_gy_in, input_size=self.num_ver, output_size=self.embedding_size)
             l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul)
             pgy_sym = lasagne.layers.get_output(l_gx)
-            g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis = 1) * gz_sym)).sum()
+            g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis=1) * gz_sym)).sum()
         else:
-            l_gx = lasagne.layers.DenseLayer(l_gx, self.num_ver, nonlinearity = lasagne.nonlinearities.softmax)
+            l_gx = lasagne.layers.DenseLayer(l_gx, self.num_ver, nonlinearity=lasagne.nonlinearities.softmax)
             pgy_sym = lasagne.layers.get_output(l_gx)
             g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, gy_sym).sum()
-        
+
         self.l = [l_x, l_gx]
 
         py_sym = lasagne.layers.get_output(l_x)
@@ -73,20 +74,20 @@ def build(self):
         params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b] if self.use_feature else [l_x.W, l_x.b]
         if self.update_emb:
             params = lasagne.layers.get_all_params(l_x)
-        updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate)
-        self.train_fn = theano.function([x_sym, y_sym], loss, updates = updates)
+        updates = lasagne.updates.sgd(loss, params, learning_rate=self.learning_rate)
+        self.train_fn = theano.function([x_sym, y_sym], loss, updates=updates)
 
         g_params = lasagne.layers.get_all_params(l_gx)
-        g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate)
-        self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates = g_updates, on_unused_input = 'ignore')
+        g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate=self.g_learning_rate)
+        self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates=g_updates, on_unused_input='ignore')
 
         self.test_fn = theano.function([x_sym], py_sym)
 
     def gen_train_inst(self):
         """generator for batches for classification loss.
         """
         while True:
-            ind = np.array(np.random.permutation(self.x.shape[0]), dtype = np.int32)
+            ind = np.array(np.random.permutation(self.x.shape[0]), dtype=np.int32)
             i = 0
             while i < self.x.shape[0]:
                 j = min(ind.shape[0], i + self.batch_size)
@@ -103,21 +104,25 @@ def gen_graph(self):
                 g, gy = [], []
                 j = min(ind.shape[0], i + self.g_batch_size)
                 for k in ind[i: j]:
-                    if len(self.graph[k]) == 0: continue
+                    if len(self.graph[k]) == 0:
+                        continue
                     path = [k]
                     for _ in range(self.path_size):
                         path.append(random.choice(self.graph[path[-1]]))
                     for l in range(len(path)):
-                        if path[l] >= self.allx.shape[0]: continue
+                        if path[l] >= self.allx.shape[0]:
+                            continue
                         for m in range(l - self.window_size, l + self.window_size + 1):
-                            if m < 0 or m >= len(path): continue
-                            if path[m] >= self.allx.shape[0]: continue
+                            if m < 0 or m >= len(path):
+                                continue
+                            if path[m] >= self.allx.shape[0]:
+                                continue
                             g.append([path[l], path[m]])
                             gy.append(1.0)
                             for _ in range(self.neg_samp):
                                 g.append([path[l], random.randint(0, self.num_ver - 1)])
                                 gy.append(- 1.0)
-                g = np.array(g, dtype = np.int32)
+                g = np.array(g, dtype=np.int32)
                 yield self.allx[g[:, 0]], g[:, 1], gy
                 i = j
 
@@ -140,17 +145,17 @@ def gen_label_graph(self):
             for _ in range(self.g_sample_size):
                 x1 = random.randint(0, self.x.shape[0] - 1)
                 label = labels[x1]
-                if len(label2inst) == 1: continue
+                if len(label2inst) == 1:
+                    continue
                 x2 = random.choice(label2inst[label])
                 g.append([x1, x2])
                 gy.append(1.0)
                 for _ in range(self.neg_samp):
                     g.append([x1, random.choice(not_label[label])])
                     gy.append(- 1.0)
-            g = np.array(g, dtype = np.int32)
+            g = np.array(g, dtype=np.int32)
             yield self.allx[g[:, 0]], g[:, 1], gy
 
-
     def init_train(self, init_iter_label, init_iter_graph):
         """pre-training of graph embeddings.
         init_iter_label (int): # iterations for optimizing label context loss.
@@ -159,12 +164,12 @@ def init_train(self, init_iter_label, init_iter_graph):
         for i in range(init_iter_label):
             gx, gy, gz = next(self.label_generator)
             loss = self.g_fn(gx, gy, gz)
-            print 'iter label', i, loss
+            print('iter label', i, loss)
 
         for i in range(init_iter_graph):
             gx, gy, gz = next(self.graph_generator)
             loss = self.g_fn(gx, gy, gz)
-            print 'iter graph', i, loss
+            print('iter graph', i, loss)
 
     def step_train(self, max_iter, iter_graph, iter_inst, iter_label):
         """a training step. Iteratively sample batches for three loss functions.
@@ -191,4 +196,3 @@ def predict(self, tx):
         returns (numpy.ndarray, #instacnes * #classes): classification probabilities for dev instances.
         """
         return self.test_fn(tx)
-