Skip to content

Commit

Permalink
most code
Browse files Browse the repository at this point in the history
  • Loading branch information
siddsax committed Nov 16, 2018
1 parent 8dd59ac commit 6c86d68
Show file tree
Hide file tree
Showing 22 changed files with 1,402 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
code/saved_models
datasets
*.npy
*.npz
*.pyc
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
# XML-CNN
Pytorch implementation of the paper http://nyc.lti.cs.cmu.edu/yiming/Publications/jliu-sigir17.pdf

## Dependencies
* NLTK (stopwords)
* Pytorch
* Gensim
* Matplotlib
79 changes: 79 additions & 0 deletions code/cnn_cvae.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from header import *
from cnn_train import *
from cnn_test import *
import pdb

# ------------------------ Params -------------------------------------------------------------------------------
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--pca', dest='pca_flag', type=int, default=0, help='1 to do pca, 0 for not doing it')
parser.add_argument('--zd', dest='Z_dim', type=int, default=100, help='Latent layer dimension')
parser.add_argument('--mb', dest='mb_size', type=int, default=20, help='Size of minibatch, changing might result in latent layer variance overflow')
parser.add_argument('--hd', dest='h_dim', type=int, default=600, help='hidden layer dimension')
parser.add_argument('--Hd', dest='H_dim', type=int, default=512, help='hidden layer dimension')
parser.add_argument('--lr', dest='lr', type=int, default=1e-3, help='Learning Rate')
parser.add_argument('--p', dest='plot_flg', type=int, default=0, help='1 to plot, 0 to not plot')
parser.add_argument('--e', dest='num_epochs', type=int, default=100, help='step for displaying loss')
parser.add_argument('--b', dest='beta', type=float, default=1, help='factor multipied to likelihood param')
parser.add_argument('--d', dest='disp_flg', type=int, default=0, help='display graphs')
parser.add_argument('--sve', dest='save', type=int, default=1, help='save models or not')
parser.add_argument('--ss', dest='save_step', type=int, default=10, help='gap between model saves')
parser.add_argument('--mn', dest='model_name', type=str, default='', help='model name')
parser.add_argument('--tr', dest='training', type=int, default=1, help='model name')
parser.add_argument('--lm', dest='load_model', type=str, default="", help='model name')
parser.add_argument('--ds', dest='data_set', type=str, default="rcv", help='dataset name')
parser.add_argument('--fl', dest='fin_layer', type=str, default="ReLU", help='model name')
parser.add_argument('--pp', dest='pp_flg', type=int, default=0, help='1 is for min-max pp, 2 is for gaussian pp, 0 for none')
parser.add_argument('--loss', dest='loss_type', type=str, default="BCELoss", help='Loss')

parser.add_argument('--sequence_length',help='max sequence length of a document', type=int,default=500)
parser.add_argument('--embedding_dim', help='dimension of word embedding representation', type=int, default=300)
parser.add_argument('--model_variation', help='model variation: CNN-rand or CNN-pretrain', type=str, default='pretrain')
parser.add_argument('--pretrain_type', help='pretrain model: GoogleNews or glove', type=str, default='glove')
parser.add_argument('--vocab_size', help='size of vocabulary keeping the most frequent words', type=int, default=30000)
parser.add_argument('--drop_prob', help='Dropout probability', type=int, default=.3)
parser.add_argument('--load_data', help='Load Data or not', type=int, default=0)
parser.add_argument('--mg', dest='multi_gpu', type=int, default=0, help='1 for 2 gpus and 0 for normal')
parser.add_argument('--filter_sizes', help='number of filter sizes (could be a list of integer)', type=int, default=[2, 4, 8], nargs='+')
parser.add_argument('--num_filters', help='number of filters (i.e. kernels) in CNN model', type=int, default=32)
parser.add_argument('--pooling_units', help='number of pooling units in 1D pooling layer', type=int, default=32)
parser.add_argument('--pooling_type', help='max or average', type=str, default='max')
parser.add_argument('--model_type', help='glove or GoogleNews', type=str, default='glove')
parser.add_argument('--num_features', help='50, 100, 200, 300', type=int, default=300)
parser.add_argument('--dropouts', help='0 for not using, 1 for using', type=int, default=0)
parser.add_argument('--clip', help='gradient clipping', type=float, default=1000)
parser.add_argument('--dataset_gpu', help='load dataset in full to gpu', type=int, default=1)
parser.add_argument('--dp', dest='dataparallel', help='to train on multiple GPUs or not', type=int, default=0)


params = parser.parse_args()

if(len(params.model_name)==0):
params.model_name = "Gen_data_CNN_Z_dim-{}_mb_size-{}_h_dim-{}_preproc-{}_beta-{}_final_ly-{}_loss-{}_sequence_length-{}_embedding_dim-{}_params.vocab_size={}".format(params.Z_dim, params.mb_size, params.h_dim, params.pp_flg, params.beta, params.fin_layer, params.loss_type, \
params.sequence_length, params.embedding_dim, params.vocab_size)

print('Saving Model to: ' + params.model_name)

# ------------------ data ----------------------------------------------
params.data_path = '../datasets/' + params.data_set
x_tr, x_te, y_tr, y_te, params.vocabulary, params.vocabulary_inv, params = save_load_data(params, save=params.load_data)

params = update_params(params)
# ----------------------- Loss ------------------------------------
params.loss_fn = getattr(loss(), params.loss_type)
# -------------------------- Params ---------------------------------------------
if params.model_variation=='pretrain':
embedding_weights = load_word2vec(params)
else:
embedding_weights = None

if torch.cuda.is_available():
params.dtype = torch.cuda.FloatTensor
else:
params.dtype = torch.FloatTensor


if(params.training):
train(x_tr, y_tr, x_te, y_te, embedding_weights, params)

else:
test_class(x_te, y_te, params, x_tr=x_tr, y_tr=y_tr, embedding_weights=embedding_weights)
70 changes: 70 additions & 0 deletions code/cnn_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from header import *
from collections import OrderedDict
from sklearn.metrics import log_loss

def test_class(x_te, y_te, params, model=None, x_tr=None, y_tr=None, embedding_weights=None, verbose=True, save=True ):


if(model==None):
if(embedding_weights is None):
print("Error: Embedding weights needed!")
exit()
else:
model = cnn_encoder_decoder(params, embedding_weights)
# state_dict = torch.load(params.load_model + "/model_best", map_location=lambda storage, loc: storage)
# new_state_dict = OrderedDict()
# for k, v in state_dict.items():
# name = k[7:]
# new_state_dict[name] = v
# model.load_state_dict(new_state_dict)
# del new_state_dict
model.load_state_dict(torch.load(params.load_model, map_location=lambda storage, loc: storage))
if(torch.cuda.is_available()):
params.dtype_f = torch.cuda.FloatTensor
params.dtype_i = torch.cuda.LongTensor
model = model.cuda()
else:
params.dtype_f = torch.FloatTensor
params.dtype_i = torch.LongTensor

if(x_tr is not None and y_tr is not None):
x_tr, _, _, _ = load_batch_cnn(x_tr, y_tr, params, batch=False)
Y = np.zeros(y_tr.shape)
rem = x_tr.shape[0]%params.mb_size
e_emb = model.embedding_layer.forward(x_tr[-rem:].view(rem, x_te.shape[1]))
H = model.encoder.forward(e_emb)
Y[-rem:, :] = model.classifier(H).data
for i in range(0, x_tr.shape[0] - rem, params.mb_size ):
print(i)
e_emb = model.embedding_layer.forward(x_tr[i:i+params.mb_size].view(params.mb_size, x_te.shape[1]))
H = model.encoder.forward(e_emb)
Y[i:i+params.mb_size,:] = model.classifier(H).data

loss = log_loss(y_tr, Y)
prec = precision_k(y_tr, Y, 5)
print('Test Loss; Precision Scores [1->5] {} Cross Entropy {};'.format(prec, loss))

y_te = y_te[:,:-1]
x_te, _ = load_batch_cnn(x_te, y_te, params, batch=False)
Y2 = np.zeros(y_te.shape)
rem = x_te.shape[0]%params.mb_size
for i in range(0,x_te.shape[0] - rem,params.mb_size):
# print(i)
e_emb = model.embedding_layer.forward(x_te[i:i+params.mb_size].view(params.mb_size, x_te.shape[1]))
H2 = model.encoder.forward(e_emb)
Y2[i:i+params.mb_size,:] = model.classifier(H2).data

if(rem):
e_emb = model.embedding_layer.forward(x_te[-rem:].view(rem, x_te.shape[1]))
H2 = model.encoder.forward(e_emb)
Y2[-rem:,:] = model.classifier(H2).data

loss = log_loss(y_te, Y2) # Reverse of pytorch
prec = precision_k(y_te, Y2, 5) # Reverse of pytorch
print('Test Loss; Precision Scores [1->5] {} Cross Entropy {};'.format(prec, loss))

if(save):
Y_probabs2 = sparse.csr_matrix(Y2)
sio.savemat('score_matrix.mat' , {'score_matrix': Y_probabs2})

return prec[0], loss
106 changes: 106 additions & 0 deletions code/cnn_train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from header import *
from cnn_test import *

# ---------------------------------------------------------------------------------

def train(x_tr, y_tr, x_te, y_te, embedding_weights, params):

viz = Visdom()
loss_best = float('Inf')
bestTotalLoss = float('Inf')
best_test_acc = 0

num_mb = np.ceil(params.N/params.mb_size)

model = xmlCNN(params, embedding_weights)
if(torch.cuda.is_available()):
print("--------------- Using GPU! ---------")
model.params.dtype_f = torch.cuda.FloatTensor
model.params.dtype_i = torch.cuda.LongTensor

model = model.cuda()
else:
model.params.dtype_f = torch.FloatTensor
model.params.dtype_i = torch.LongTensor
print("=============== Using CPU =========")

optimizer = optim.Adam(filter(lambda p: p.requires_grad,model.parameters()), lr=params.lr)
print(model);print("%"*100)

if params.dataparallel:
model = nn.DataParallel(model)

if(len(params.load_model)):
print(params.load_model)
model, optimizer, start = load_model(model, params.load_model, optimizer=optimizer)
else:
init = 0
iteration = 0
# =============================== TRAINING ====================================
for epoch in range(init, params.num_epochs):
totalLoss = 0.0

for i in range(int(num_mb)):
# ------------------ Load Batch Data ---------------------------------------------------------
batch_x, batch_y = load_batch_cnn(x_tr, y_tr, params)
# -----------------------------------------------------------------------------------
loss, output = model.forward(batch_x, batch_y)
loss = loss.mean().squeeze()
# --------------------------------------------------------------------

totalLoss += loss.data

if i % int(num_mb/12) == 0:
print('Iter-{}; Loss: {:.4}; best_loss: {:.4}'.format(i, loss.data, loss_best))
if not os.path.exists('saved_models/' + params.model_name ):
os.makedirs('saved_models/' + params.model_name)
save_model(model, optimizer, epoch, params.model_name + "/model_best_batch")
if(loss<loss_best):
loss_best = loss.data

# ------------------------ Propogate loss -----------------------------------
loss.backward()
loss = loss.data
torch.nn.utils.clip_grad_norm(model.parameters(), params.clip)
optimizer.step()
optimizer.zero_grad()

# ----------------------------------------------------------------------------
if(params.disp_flg):
if(iteration==0):
loss_old = loss
else:
viz.line(X=np.linspace(iteration-1,iteration,50), Y=np.linspace(loss_old, loss,50), update='append', win=win)
loss_old = loss
if(iteration % 100 == 0 ):
win = viz.line(X=np.arange(iteration, iteration + .1), Y=np.arange(0, .1))
iteration +=1

if(epoch==0):
break

if(totalLoss<bestTotalLoss):

bestTotalLoss = totalLoss
if not os.path.exists('saved_models/' + params.model_name ):
os.makedirs('saved_models/' + params.model_name)
save_model(model, optimizer, epoch, params.model_name + "/model_best_epoch")

print('End-of-Epoch: Loss: {:.4}; best_loss: {:.4};'.format(totalLoss, bestTotalLoss))

test_prec_acc, test_ce_loss = test_class(x_te, y_te, params, model=model, verbose=False, save=False)
model.train()

if(test_prec_acc > best_test_acc):
best_test_loss = test_ce_loss
best_test_acc = test_prec_acc
print("This acc is better than the previous recored test acc:- {} ; while CELoss:- {}".format(best_test_acc, best_test_loss))
if not os.path.exists('saved_models/' + params.model_name ):
os.makedirs('saved_models/' + params.model_name)
save_model(model, optimizer, epoch, params.model_name + "/model_best_test")

if params.save:
save_model(model, optimizer, epoch, params.model_name + "/model_" + str(epoch))



32 changes: 32 additions & 0 deletions code/header.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import torch
import torch.nn as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
import sys
import numpy as np
sys.path.append('../utils/')
sys.path.append('models')
import data_helpers

from w2v import *
from embedding_layer import embedding_layer
from cnn_decoder import cnn_decoder
from cnn_encoder import cnn_encoder
from classifier import classifier
from sklearn import preprocessing
from sklearn.decomposition import PCA
import scipy.io as sio
from scipy import sparse
import argparse
from visdom import Visdom
from sklearn.externals import joblib
from futils import *
from loss import loss
from xmlCNN import xmlCNN
import timeit
from precision_k import precision_k
19 changes: 19 additions & 0 deletions code/models/classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from header import *
class classifier(nn.Module):
def __init__(self, params):
super(classifier, self).__init__()
self.params = params
if(self.params.dropouts):
self.drp = nn.Dropout(.5)
self.l1 = nn.Linear(params.h_dim, params.H_dim)
self.l2 = nn.Linear(params.H_dim, params.y_dim)
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
torch.nn.init.xavier_uniform_(self.l1.weight)

def forward(self, H):
H = self.l1(H)
H = self.relu(H)
H = self.l2(H)
H = self.sigmoid(H)
return H
48 changes: 48 additions & 0 deletions code/models/cnn_decoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from header import *

class cnn_decoder(nn.Module):
def __init__(self, params):
super(cnn_decoder, self).__init__()
self.params = params
self.out_size = self.params.decoder_kernels[-1][0]

self.bn_1 = nn.BatchNorm1d(self.params.sequence_length + 1)
self.drp = nn.Dropout(p=params.drop_prob)
self.conv_layers = nn.ModuleList()
self.bn_x = nn.ModuleList()
self.relu = nn.ReLU()
for layer in range(len(params.decoder_kernels)):
[out_chan, in_chan, width] = params.decoder_kernels[layer]
layer = nn.Conv1d(in_chan, out_chan, width,
dilation=self.params.decoder_dilations[layer],
padding=self.params.decoder_paddings[layer])
torch.nn.init.xavier_uniform_(layer.weight)
bn_layer = nn.BatchNorm1d(out_chan)
self.conv_layers.append(layer)
self.bn_x.append(bn_layer)

# self.bn_2 = nn.BatchNorm1d(self.out_size)
self.fc = nn.Linear(self.out_size, self.params.vocab_size)
torch.nn.init.xavier_uniform_(self.fc.weight)

def forward(self, decoder_input, z, batch_y):
[batch_size, seq_len, embed_size] = decoder_input.size()
z = torch.cat([z, batch_y], 1)
z = torch.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.Z_dim + self.params.H_dim)
x = torch.cat([decoder_input, z], 2)
x = x.transpose(1, 2).contiguous()
x = self.drp(x)
for layer in range(len(self.params.decoder_kernels)):
x = self.conv_layers[layer](x)
x_width = x.size()[2]
x = x[:, :, :(x_width - self.params.decoder_paddings[layer])].contiguous()
x = self.relu(x)
x = self.bn_x[layer](x)
x = x.transpose(1, 2).contiguous()
if(self.params.multi_gpu):
x = x.cuda(2)
x = self.fc(x)#.cuda(1)
else:
x = self.fc(x)
x = x.view(-1, seq_len, self.params.vocab_size)
return x
Loading

0 comments on commit 6c86d68

Please sign in to comment.