From 4e939fb722054ec8513d826ef1b091a343fec698 Mon Sep 17 00:00:00 2001 From: "H. Jin" Date: Wed, 18 Sep 2019 14:45:18 -0500 Subject: [PATCH 1/3] Update implementation compatible with python 3 --- README.md | 18 ++++++++---- base_model.py | 23 ++++++++------- ind_model.py | 64 +++++++++++++++++++++-------------------- layers.py | 42 +++++++++++++++------------ test_ind.py | 77 ++++++++++++++++++++++++++++++-------------------- test_trans.py | 73 +++++++++++++++++++++++++++-------------------- trans_model.py | 4 +-- 7 files changed, 173 insertions(+), 128 deletions(-) diff --git a/README.md b/README.md index 421d86e..cd0e714 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # Planetoid +## Modifications + +* Change implementation in python 3 + ## Introduction This is an implementation of Planetoid, a graph-based semi-supervised learning method proposed in the following paper: @@ -15,12 +19,14 @@ Please cite the above paper if you use the datasets or code in this repo. We include the Citeseer dataset in the directory `data`, where the data structures needed are pickled. To run the transductive version, -``` + +```shell python test_trans.py ``` To run the inductive version, -``` + +```shell python test_ind.py ``` @@ -35,6 +41,7 @@ The models are implemented mainly in `trans_model.py` (transductive) and `ind_mo ### Transductive learning The input to the transductive model contains: + - `x`, the feature vectors of the training instances, - `y`, the one-hot labels of the training instances, - `graph`, a `dict` in the format `{index: [index_of_neighbor_nodes]}`, where the neighbor nodes are organized as a list. The current version only supports binary graphs. @@ -44,6 +51,7 @@ Let L be the number of training instances. The indices in `graph` from 0 to L - ### Inductive learning The input to the inductive model contains: + - `x`, the feature vectors of the labeled training instances, - `y`, the one-hot labels of the labeled training instances, - `allx`, the feature vectors of both labeled and unlabeled training instances (a superset of `x`), @@ -55,11 +63,12 @@ Let n be the number of both labeled and unlabeled training instances. These n in Datasets for Citeseet, Cora, and Pubmed are available in the directory `data`, in a preprocessed format stored as numpy/scipy files. -The dataset for DIEL is available at http://www.cs.cmu.edu/~lbing/data/emnlp-15-diel/emnlp-15-diel.tar.gz. We also provide a much more succinct version of the dataset that only contains necessary files and some (not very well-organized) pre-processing code here at http://cs.cmu.edu/~zhiliny/data/diel_data.tar.gz. +The dataset for DIEL is available at We also provide a much more succinct version of the dataset that only contains necessary files and some (not very well-organized) pre-processing code here at -The NELL dataset can be found here at http://www.cs.cmu.edu/~zhiliny/data/nell_data.tar.gz. +The NELL dataset can be found here at In addition to `x`, `y`, `allx`, and `graph` as described above, the preprocessed datasets also include: + - `tx`, the feature vectors of the test instances, - `ty`, the one-hot labels of the test instances, - `test.index`, the indices of test instances in `graph`, for the inductive setting, @@ -72,4 +81,3 @@ You can use `cPickle.load(open(filename))` to load the numpy/scipy objects `x`, ## Hyper-parameter tuning Refer to `test_ind.py` and `test_trans.py` for the definition of different hyper-parameters (passed as arguments). Hyper-parameters are tuned by randomly shuffle the training/test split (i.e., randomly shuffling the indices in `x`, `y`, `tx`, `ty`, and `graph`). For the DIEL dataset, we tune the hyper-parameters on one of the ten runs, and then keep the same hyper-parameters for all the ten runs. - diff --git a/base_model.py b/base_model.py index 9ae169f..e417139 100644 --- a/base_model.py +++ b/base_model.py @@ -1,9 +1,10 @@ import lasagne -import cPickle +import pickle import random import numpy as np + class base_model(object): """the base model for both transductive and inductive learning.""" @@ -11,15 +12,18 @@ def __init__(self, args): """ args (an object): contains the arguments used for initalizing the model. """ + np.random.seed(13) + random.seed(13) + self.embedding_size = args.embedding_size self.learning_rate = args.learning_rate self.batch_size = args.batch_size self.neg_samp = args.neg_samp self.model_file = args.model_file - + self.window_size = args.window_size self.path_size = args.path_size - + self.g_batch_size = args.g_batch_size self.g_learning_rate = args.g_learning_rate self.g_sample_size = args.g_sample_size @@ -28,10 +32,8 @@ def __init__(self, args): self.update_emb = args.update_emb self.layer_loss = args.layer_loss + # TODO: replace lasagne lasagne.random.set_rng(np.random) - np.random.seed(13) - - random.seed(13) self.inst_generator = self.gen_train_inst() self.graph_generator = self.gen_graph() @@ -42,9 +44,9 @@ def store_params(self): """ for i, l in enumerate(self.l): - fout = open("{}.{}".format(self.model_file, i), 'w') + fout = open("{}.{}".format(self.model_file, i), 'wb') params = lasagne.layers.get_all_param_values(l) - cPickle.dump(params, fout, cPickle.HIGHEST_PROTOCOL) + pickle.dump(params, fout, pickle.HIGHEST_PROTOCOL) fout.close() def load_params(self): @@ -52,7 +54,7 @@ def load_params(self): """ for i, l in enumerate(self.l): fin = open("{}.{}".format(self.model_file, i)) - params = cPickle.load(fin) + params = pickle.load(fin) lasagne.layers.set_all_param_values(l, params) fin.close() @@ -71,6 +73,3 @@ def train(self, init_iter_label, init_iter_graph, max_iter, iter_graph, iter_ins """ self.init_train(init_iter_label, init_iter_graph) self.step_train(max_iter, iter_graph, iter_inst, iter_label) - - - diff --git a/ind_model.py b/ind_model.py index fd56666..cbd982a 100644 --- a/ind_model.py +++ b/ind_model.py @@ -9,6 +9,7 @@ from base_model import base_model + class ind_model(base_model): """Planetoid-I. """ @@ -28,38 +29,38 @@ def add_data(self, x, y, allx, graph): def build(self): """build the model. This method should be called after self.add_data. """ - x_sym = sparse.csr_matrix('x', dtype = 'float32') + x_sym = sparse.csr_matrix('x', dtype='float32') self.x_sym = x_sym y_sym = T.imatrix('y') - gx_sym = sparse.csr_matrix('gx', dtype = 'float32') + gx_sym = sparse.csr_matrix('gx', dtype='float32') gy_sym = T.ivector('gy') gz_sym = T.vector('gz') - l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym) - l_gx_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = gx_sym) - l_gy_in = lasagne.layers.InputLayer(shape = (None, ), input_var = gy_sym) + l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=x_sym) + l_gx_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=gx_sym) + l_gy_in = lasagne.layers.InputLayer(shape=(None, ), input_var=gy_sym) - l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) + l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) l_x_2 = layers.SparseLayer(l_x_in, self.embedding_size) W = l_x_2.W - l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) + l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) if self.use_feature: - l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis = 1) - l_x = layers.DenseLayer(l_x, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) + l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis=1) + l_x = layers.DenseLayer(l_x, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) else: l_x = l_x_2 - l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W = W) + l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W=W) if self.neg_samp > 0: - l_gy = lasagne.layers.EmbeddingLayer(l_gy_in, input_size = self.num_ver, output_size = self.embedding_size) + l_gy = lasagne.layers.EmbeddingLayer(l_gy_in, input_size=self.num_ver, output_size=self.embedding_size) l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul) pgy_sym = lasagne.layers.get_output(l_gx) - g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis = 1) * gz_sym)).sum() + g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis=1) * gz_sym)).sum() else: - l_gx = lasagne.layers.DenseLayer(l_gx, self.num_ver, nonlinearity = lasagne.nonlinearities.softmax) + l_gx = lasagne.layers.DenseLayer(l_gx, self.num_ver, nonlinearity=lasagne.nonlinearities.softmax) pgy_sym = lasagne.layers.get_output(l_gx) g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, gy_sym).sum() - + self.l = [l_x, l_gx] py_sym = lasagne.layers.get_output(l_x) @@ -73,12 +74,12 @@ def build(self): params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b] if self.use_feature else [l_x.W, l_x.b] if self.update_emb: params = lasagne.layers.get_all_params(l_x) - updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate) - self.train_fn = theano.function([x_sym, y_sym], loss, updates = updates) + updates = lasagne.updates.sgd(loss, params, learning_rate=self.learning_rate) + self.train_fn = theano.function([x_sym, y_sym], loss, updates=updates) g_params = lasagne.layers.get_all_params(l_gx) - g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate) - self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates = g_updates, on_unused_input = 'ignore') + g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate=self.g_learning_rate) + self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates=g_updates, on_unused_input='ignore') self.test_fn = theano.function([x_sym], py_sym) @@ -86,7 +87,7 @@ def gen_train_inst(self): """generator for batches for classification loss. """ while True: - ind = np.array(np.random.permutation(self.x.shape[0]), dtype = np.int32) + ind = np.array(np.random.permutation(self.x.shape[0]), dtype=np.int32) i = 0 while i < self.x.shape[0]: j = min(ind.shape[0], i + self.batch_size) @@ -103,21 +104,25 @@ def gen_graph(self): g, gy = [], [] j = min(ind.shape[0], i + self.g_batch_size) for k in ind[i: j]: - if len(self.graph[k]) == 0: continue + if len(self.graph[k]) == 0: + continue path = [k] for _ in range(self.path_size): path.append(random.choice(self.graph[path[-1]])) for l in range(len(path)): - if path[l] >= self.allx.shape[0]: continue + if path[l] >= self.allx.shape[0]: + continue for m in range(l - self.window_size, l + self.window_size + 1): - if m < 0 or m >= len(path): continue - if path[m] >= self.allx.shape[0]: continue + if m < 0 or m >= len(path): + continue + if path[m] >= self.allx.shape[0]: + continue g.append([path[l], path[m]]) gy.append(1.0) for _ in range(self.neg_samp): g.append([path[l], random.randint(0, self.num_ver - 1)]) gy.append(- 1.0) - g = np.array(g, dtype = np.int32) + g = np.array(g, dtype=np.int32) yield self.allx[g[:, 0]], g[:, 1], gy i = j @@ -140,17 +145,17 @@ def gen_label_graph(self): for _ in range(self.g_sample_size): x1 = random.randint(0, self.x.shape[0] - 1) label = labels[x1] - if len(label2inst) == 1: continue + if len(label2inst) == 1: + continue x2 = random.choice(label2inst[label]) g.append([x1, x2]) gy.append(1.0) for _ in range(self.neg_samp): g.append([x1, random.choice(not_label[label])]) gy.append(- 1.0) - g = np.array(g, dtype = np.int32) + g = np.array(g, dtype=np.int32) yield self.allx[g[:, 0]], g[:, 1], gy - def init_train(self, init_iter_label, init_iter_graph): """pre-training of graph embeddings. init_iter_label (int): # iterations for optimizing label context loss. @@ -159,12 +164,12 @@ def init_train(self, init_iter_label, init_iter_graph): for i in range(init_iter_label): gx, gy, gz = next(self.label_generator) loss = self.g_fn(gx, gy, gz) - print 'iter label', i, loss + print('iter label', i, loss) for i in range(init_iter_graph): gx, gy, gz = next(self.graph_generator) loss = self.g_fn(gx, gy, gz) - print 'iter graph', i, loss + print('iter graph', i, loss) def step_train(self, max_iter, iter_graph, iter_inst, iter_label): """a training step. Iteratively sample batches for three loss functions. @@ -191,4 +196,3 @@ def predict(self, tx): returns (numpy.ndarray, #instacnes * #classes): classification probabilities for dev instances. """ return self.test_fn(tx) - diff --git a/layers.py b/layers.py index bee5fdf..f1a6d96 100644 --- a/layers.py +++ b/layers.py @@ -7,9 +7,10 @@ EXP_SOFTMAX = True + class DenseLayer(lasagne.layers.Layer): - def __init__(self, incoming, num_units, W = lasagne.init.GlorotUniform(), - b = lasagne.init.Constant(0.), nonlinearity = lasagne.nonlinearities.rectify, + def __init__(self, incoming, num_units, W=lasagne.init.GlorotUniform(), + b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs): super(DenseLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (nonlinearities.identity if nonlinearity is None @@ -42,11 +43,12 @@ def get_output_for(self, input, **kwargs): if not EXP_SOFTMAX or self.nonlinearity != lasagne.nonlinearities.softmax: return self.nonlinearity(activation) else: - return T.exp(activation) / (T.exp(activation).sum(1, keepdims = True)) + return T.exp(activation) / (T.exp(activation).sum(1, keepdims=True)) + class SparseLayer(lasagne.layers.Layer): - def __init__(self, incoming, num_units, W = lasagne.init.GlorotUniform(), b = lasagne.init.Constant(0.), nonlinearity = lasagne.nonlinearities.rectify, **kwargs): + def __init__(self, incoming, num_units, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs): super(SparseLayer, self).__init__(incoming, **kwargs) self.num_units = num_units @@ -67,7 +69,7 @@ def get_output_for(self, input, **kwargs): if not EXP_SOFTMAX or self.nonlinearity != lasagne.nonlinearities.softmax: return self.nonlinearity(act) else: - return T.exp(act) / (T.exp(act).sum(1, keepdims = True)) + return T.exp(act) / (T.exp(act).sum(1, keepdims=True)) def get_output_shape_for(self, input_shape): return (input_shape[0], self.num_units) @@ -75,7 +77,7 @@ def get_output_shape_for(self, input_shape): class HybridLayer(lasagne.layers.MergeLayer): - def __init__(self, incomings, num_units, W1 = lasagne.init.GlorotUniform(), W2 = lasagne.init.GlorotUniform(), b = lasagne.init.Constant(0.), nonlinearity = lasagne.nonlinearities.rectify, **kwargs): + def __init__(self, incomings, num_units, W1=lasagne.init.GlorotUniform(), W2=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs): super(HybridLayer, self).__init__(incomings, **kwargs) self.num_units = num_units @@ -84,19 +86,20 @@ def __init__(self, incomings, num_units, W1 = lasagne.init.GlorotUniform(), W2 = num_inputs_1 = self.input_shapes[0][1] num_inputs_2 = self.input_shapes[1][1] - self.W1 = self.add_param(W1, (num_inputs_1, num_units), name = "W1") - self.W2 = self.add_param(W2, (num_inputs_2, num_units), name = "W2") - self.b = self.add_param(b, (num_units, ), name = "b", regularizable = False) + self.W1 = self.add_param(W1, (num_inputs_1, num_units), name="W1") + self.W2 = self.add_param(W2, (num_inputs_2, num_units), name="W2") + self.b = self.add_param(b, (num_units, ), name="b", regularizable=False) def get_output_for(self, inputs, **kwargs): act = sparse.basic.structured_dot(inputs[0], self.W1) + T.dot(inputs[1], self.W2) + self.b.dimshuffle('x', 0) if EXP_SOFTMAX and self.nonlinearity == lasagne.nonlinearities.softmax: - return T.exp(act) / (T.exp(act).sum(1, keepdims = True)) + return T.exp(act) / (T.exp(act).sum(1, keepdims=True)) return self.nonlinearity(act) def get_output_shape_for(self, input_shapes): return (input_shapes[0][0], self.num_units) + class EntropyLayer(lasagne.layers.Layer): def __init__(self, incoming, constW, **kwargs): @@ -108,24 +111,27 @@ def get_output_for(self, input, **kwargs): return T.reshape(T.dot(input, self.constW), (input.shape[0] * input.shape[1] * input.shape[1], 1)) def get_output_shape_for(self, input_shape): - if input_shape[0] is None or input_shape[1] is None: return (None, 1) + if input_shape[0] is None or input_shape[1] is None: + return (None, 1) return (input_shape[0] * input_shape[1] * input_shape[1], 1) + class TensorLayer(lasagne.layers.Layer): - def __init__(self, incoming, num_units, V = lasagne.init.GlorotUniform(), W = lasagne.init.GlorotUniform(), b = lasagne.init.Constant(0.), nonlinearity = lasagne.nonlinearities.rectify, **kwargs): + def __init__(self, incoming, num_units, V=lasagne.init.GlorotUniform(), W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs): super(TensorLayer, self).__init__(incoming, **kwargs) self.num_units = num_units self.nonlinearity = nonlinearity num_inputs = self.input_shape[1] - self.V = self.add_param(V, (self.num_units, num_inputs, num_inputs), name = "V") - self.W = self.add_param(W, (num_inputs, self.num_units), name = "W") - self.b = self.add_param(b, (self.num_units, ), name = "b") + self.V = self.add_param(V, (self.num_units, num_inputs, num_inputs), name="V") + self.W = self.add_param(W, (num_inputs, self.num_units), name="W") + self.b = self.add_param(b, (self.num_units, ), name="b") def get_output_for(self, input, **kwargs): - act = T.batched_dot(T.tensordot(input, self.V, axes = [1, 2]), input) + T.dot(input, self.W) + self.b.dimshuffle('x', 0) + act = T.batched_dot(T.tensordot(input, self.V, axes=[1, 2]), + input) + T.dot(input, self.W) + self.b.dimshuffle('x', 0) return self.nonlinearity(act) def get_output_shape_for(self, input_shape): @@ -138,14 +144,14 @@ def __init__(self, incomings, **kwargs): super(DotLayer, self).__init__(incomings, **kwargs) def get_output_for(self, inputs, **kwargs): - return T.sum(inputs[0] * inputs[1], axis = 1) + return T.sum(inputs[0] * inputs[1], axis=1) def get_output_shape_for(self, input_shapes): return (input_shapes[0][0], ) + class SigmoidLogLayer(lasagne.layers.Layer): def get_output_for(self, input, **kwargs): # return T.log(lasagne.nonlinearities.sigmoid(input)) return lasagne.nonlinearities.sigmoid(input) - diff --git a/test_ind.py b/test_ind.py index 0d8410b..eca4142 100644 --- a/test_ind.py +++ b/test_ind.py @@ -2,48 +2,63 @@ from scipy import sparse as sp from ind_model import ind_model as model import argparse -import cPickle +import pickle as pkl +import numpy as np + DATASET = 'citeseer' parser = argparse.ArgumentParser() -parser.add_argument('--learning_rate', help = 'learning rate for supervised loss', type = float, default = 0.1) -parser.add_argument('--embedding_size', help = 'embedding dimensions', type = int, default = 50) -parser.add_argument('--window_size', help = 'window size in random walk sequences', type = int, default = 3) -parser.add_argument('--path_size', help = 'length of random walk sequences', type = int, default = 10) -parser.add_argument('--batch_size', help = 'batch size for supervised loss', type = int, default = 200) -parser.add_argument('--g_batch_size', help = 'batch size for graph context loss', type = int, default = 20) -parser.add_argument('--g_sample_size', help = 'batch size for label context loss', type = int, default = 20) -parser.add_argument('--neg_samp', help = 'negative sampling rate; zero means using softmax', type = int, default = 0) -parser.add_argument('--g_learning_rate', help = 'learning rate for unsupervised loss', type = float, default = 1e-3) -parser.add_argument('--model_file', help = 'filename for saving models', type = str, default = 'ind.model') -parser.add_argument('--use_feature', help = 'whether use input features', type = bool, default = True) -parser.add_argument('--update_emb', help = 'whether update embedding when optimizing supervised loss', type = bool, default = True) -parser.add_argument('--layer_loss', help = 'whether incur loss on hidden layers', type = bool, default = True) +parser.add_argument('--learning_rate', help='learning rate for supervised loss', type=float, default=0.1) +parser.add_argument('--embedding_size', help='embedding dimensions', type=int, default=50) +parser.add_argument('--window_size', help='window size in random walk sequences', type=int, default=3) +parser.add_argument('--path_size', help='length of random walk sequences', type=int, default=10) +parser.add_argument('--batch_size', help='batch size for supervised loss', type=int, default=200) +parser.add_argument('--g_batch_size', help='batch size for graph context loss', type=int, default=20) +parser.add_argument('--g_sample_size', help='batch size for label context loss', type=int, default=20) +parser.add_argument('--neg_samp', help='negative sampling rate; zero means using softmax', type=int, default=0) +parser.add_argument('--g_learning_rate', help='learning rate for unsupervised loss', type=float, default=1e-3) +parser.add_argument('--model_file', help='filename for saving models', type=str, default='ind.model') +parser.add_argument('--use_feature', help='whether use input features', type=bool, default=True) +parser.add_argument('--update_emb', help='whether update embedding when optimizing supervised loss', + type=bool, default=True) +parser.add_argument('--layer_loss', help='whether incur loss on hidden layers', type=bool, default=True) args = parser.parse_args() + def comp_accu(tpy, ty): - import numpy as np - return (np.argmax(tpy, axis = 1) == np.argmax(ty, axis = 1)).sum() * 1.0 / tpy.shape[0] + return (np.argmax(tpy, axis=1) == np.argmax(ty, axis=1)).sum() * 1.0 / tpy.shape[0] + # load the data: x, y, tx, ty, allx, graph NAMES = ['x', 'y', 'tx', 'ty', 'allx', 'graph'] -OBJECTS = [] -for i in range(len(NAMES)): - OBJECTS.append(cPickle.load(open("data/ind.{}.{}".format(DATASET, NAMES[i])))) -x, y, tx, ty, allx, graph = tuple(OBJECTS) - -m = model(args) # initialize the model -m.add_data(x, y, allx, graph) # add data -m.build() # build the model -m.init_train(init_iter_label = 10000, init_iter_graph = 400) # pre-training +objects = {} +for name in NAMES: + data = pkl.load(open("data/ind.{}.{}".format(DATASET, name), 'rb'), encoding='latin1') + objects[name] = data + +# initialize the model +m = model(args) + +# add data +m.add_data(objects['x'], objects['y'], objects['allx'], objects['graph']) + +# build the model +m.build() +m.init_train(init_iter_label=10000, init_iter_graph=400) # pre-training iter_cnt, max_accu = 0, 0 -while True: - m.step_train(max_iter = 1, iter_graph = 0.1, iter_inst = 1, iter_label = 0) # perform a training step - tpy = m.predict(tx) # predict the dev set - accu = comp_accu(tpy, ty) # compute the accuracy on the dev set - print iter_cnt, accu, max_accu +for _ in range(1000): + # perform a training step + m.step_train(max_iter=1, iter_graph=0.1, iter_inst=1, iter_label=0) + + # predict the dev set + tpy = m.predict(objects['tx']) + + # compute the accuracy on the dev set + accu = comp_accu(tpy, objects['ty']) + print(iter_cnt, accu, max_accu) iter_cnt += 1 if accu > max_accu: - m.store_params() # store the model if better result is obtained + # store the model if better result is obtained + m.store_params() max_accu = max(max_accu, accu) diff --git a/test_trans.py b/test_trans.py index 4c2f2ce..2e7b1f9 100644 --- a/test_trans.py +++ b/test_trans.py @@ -2,48 +2,61 @@ from scipy import sparse as sp from trans_model import trans_model as model import argparse -import cPickle +import pickle as pkl DATASET = 'citeseer' parser = argparse.ArgumentParser() -parser.add_argument('--learning_rate', help = 'learning rate for supervised loss', type = float, default = 0.1) -parser.add_argument('--embedding_size', help = 'embedding dimensions', type = int, default = 50) -parser.add_argument('--window_size', help = 'window size in random walk sequences', type = int, default = 3) -parser.add_argument('--path_size', help = 'length of random walk sequences', type = int, default = 10) -parser.add_argument('--batch_size', help = 'batch size for supervised loss', type = int, default = 200) -parser.add_argument('--g_batch_size', help = 'batch size for graph context loss', type = int, default = 200) -parser.add_argument('--g_sample_size', help = 'batch size for label context loss', type = int, default = 100) -parser.add_argument('--neg_samp', help = 'negative sampling rate; zero means using softmax', type = int, default = 0) -parser.add_argument('--g_learning_rate', help = 'learning rate for unsupervised loss', type = float, default = 1e-2) -parser.add_argument('--model_file', help = 'filename for saving models', type = str, default = 'trans.model') -parser.add_argument('--use_feature', help = 'whether use input features', type = bool, default = True) -parser.add_argument('--update_emb', help = 'whether update embedding when optimizing supervised loss', type = bool, default = True) -parser.add_argument('--layer_loss', help = 'whether incur loss on hidden layers', type = bool, default = True) +parser.add_argument('--learning_rate', help='learning rate for supervised loss', type=float, default=0.1) +parser.add_argument('--embedding_size', help='embedding dimensions', type=int, default=50) +parser.add_argument('--window_size', help='window size in random walk sequences', type=int, default=3) +parser.add_argument('--path_size', help='length of random walk sequences', type=int, default=10) +parser.add_argument('--batch_size', help='batch size for supervised loss', type=int, default=200) +parser.add_argument('--g_batch_size', help='batch size for graph context loss', type=int, default=200) +parser.add_argument('--g_sample_size', help='batch size for label context loss', type=int, default=100) +parser.add_argument('--neg_samp', help='negative sampling rate; zero means using softmax', type=int, default=0) +parser.add_argument('--g_learning_rate', help='learning rate for unsupervised loss', type=float, default=1e-2) +parser.add_argument('--model_file', help='filename for saving models', type=str, default='trans.model') +parser.add_argument('--use_feature', help='whether use input features', type=bool, default=True) +parser.add_argument('--update_emb', help='whether update embedding when optimizing supervised loss', + type=bool, default=True) +parser.add_argument('--layer_loss', help='whether incur loss on hidden layers', type=bool, default=True) args = parser.parse_args() + def comp_accu(tpy, ty): import numpy as np - return (np.argmax(tpy, axis = 1) == np.argmax(ty, axis = 1)).sum() * 1.0 / tpy.shape[0] + return (np.argmax(tpy, axis=1) == np.argmax(ty, axis=1)).sum() * 1.0 / tpy.shape[0] + # load the data: x, y, tx, ty, graph NAMES = ['x', 'y', 'tx', 'ty', 'graph'] -OBJECTS = [] -for i in range(len(NAMES)): - OBJECTS.append(cPickle.load(open("data/trans.{}.{}".format(DATASET, NAMES[i])))) -x, y, tx, ty, graph = tuple(OBJECTS) - -m = model(args) # initialize the model -m.add_data(x, y, graph) # add data -m.build() # build the model -m.init_train(init_iter_label = 2000, init_iter_graph = 70) # pre-training +objects = {} +for name in NAMES: + data = pkl.load(open("data/ind.{}.{}".format(DATASET, name), 'rb'), encoding='latin1') + objects[name] = data +# objects.append(cPickle.load(open("data/trans.{}.{}".format(DATASET, name)))) +# x, y, tx, ty, graph = tuple(objects) + +# initialize the model +m = model(args) +# add data +m.add_data(objects['x'], objects['y'], objects['graph']) +# build the model +m.build() +# pre-training +m.init_train(init_iter_label=2000, init_iter_graph=70) iter_cnt, max_accu = 0, 0 -while True: - m.step_train(max_iter = 1, iter_graph = 0, iter_inst = 1, iter_label = 0) # perform a training step - tpy = m.predict(tx) # predict the dev set - accu = comp_accu(tpy, ty) # compute the accuracy on the dev set - print iter_cnt, accu, max_accu +for _ in range(10000): + # perform a training step + m.step_train(max_iter=1, iter_graph=0, iter_inst=1, iter_label=0) + # predict the dev set + tpy = m.predict(objects['tx']) + # compute the accuracy on the dev set + accu = comp_accu(tpy, objects['ty']) + print(iter_cnt, accu, max_accu) iter_cnt += 1 if accu > max_accu: - m.store_params() # store the model if better result is obtained + # store the model if better result is + m.store_params() max_accu = max(max_accu, accu) diff --git a/trans_model.py b/trans_model.py index 33c16da..3ba0274 100644 --- a/trans_model.py +++ b/trans_model.py @@ -164,12 +164,12 @@ def init_train(self, init_iter_label, init_iter_graph): for i in range(init_iter_label): gx, gy = next(self.label_generator) loss = self.g_fn(gx, gy) - print 'iter label', i, loss + print('iter label', i, loss) for i in range(init_iter_graph): gx, gy = next(self.graph_generator) loss = self.g_fn(gx, gy) - print 'iter graph', i, loss + print('iter graph', i, loss) def step_train(self, max_iter, iter_graph, iter_inst, iter_label): """a training step. Iteratively sample batches for three loss functions. From 6efd5dd97549084120c7b33c93aa88fca6ae44c2 Mon Sep 17 00:00:00 2001 From: "H. Jin" Date: Sun, 22 Sep 2019 16:49:44 -0500 Subject: [PATCH 2/3] FIX: dataset in transductive setting --- test_trans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_trans.py b/test_trans.py index 2e7b1f9..4d17a94 100644 --- a/test_trans.py +++ b/test_trans.py @@ -33,7 +33,7 @@ def comp_accu(tpy, ty): NAMES = ['x', 'y', 'tx', 'ty', 'graph'] objects = {} for name in NAMES: - data = pkl.load(open("data/ind.{}.{}".format(DATASET, name), 'rb'), encoding='latin1') + data = pkl.load(open("data/trans.{}.{}".format(DATASET, name), 'rb'), encoding='latin1') objects[name] = data # objects.append(cPickle.load(open("data/trans.{}.{}".format(DATASET, name)))) # x, y, tx, ty, graph = tuple(objects) From 93fc5da571b580d23f5931b822b0b1e94d7b178b Mon Sep 17 00:00:00 2001 From: "H. Jin" Date: Thu, 5 Mar 2020 15:02:19 -0600 Subject: [PATCH 3/3] add requirements --- requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5792040 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +lasagne==0.2.dev1 +theano +scipy +numpy \ No newline at end of file