-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
22 changed files
with
1,402 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
code/saved_models | ||
datasets | ||
*.npy | ||
*.npz | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,8 @@ | ||
# XML-CNN | ||
Pytorch implementation of the paper http://nyc.lti.cs.cmu.edu/yiming/Publications/jliu-sigir17.pdf | ||
|
||
## Dependencies | ||
* NLTK (stopwords) | ||
* Pytorch | ||
* Gensim | ||
* Matplotlib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
from header import * | ||
from cnn_train import * | ||
from cnn_test import * | ||
import pdb | ||
|
||
# ------------------------ Params ------------------------------------------------------------------------------- | ||
parser = argparse.ArgumentParser(description='Process some integers.') | ||
parser.add_argument('--pca', dest='pca_flag', type=int, default=0, help='1 to do pca, 0 for not doing it') | ||
parser.add_argument('--zd', dest='Z_dim', type=int, default=100, help='Latent layer dimension') | ||
parser.add_argument('--mb', dest='mb_size', type=int, default=20, help='Size of minibatch, changing might result in latent layer variance overflow') | ||
parser.add_argument('--hd', dest='h_dim', type=int, default=600, help='hidden layer dimension') | ||
parser.add_argument('--Hd', dest='H_dim', type=int, default=512, help='hidden layer dimension') | ||
parser.add_argument('--lr', dest='lr', type=int, default=1e-3, help='Learning Rate') | ||
parser.add_argument('--p', dest='plot_flg', type=int, default=0, help='1 to plot, 0 to not plot') | ||
parser.add_argument('--e', dest='num_epochs', type=int, default=100, help='step for displaying loss') | ||
parser.add_argument('--b', dest='beta', type=float, default=1, help='factor multipied to likelihood param') | ||
parser.add_argument('--d', dest='disp_flg', type=int, default=0, help='display graphs') | ||
parser.add_argument('--sve', dest='save', type=int, default=1, help='save models or not') | ||
parser.add_argument('--ss', dest='save_step', type=int, default=10, help='gap between model saves') | ||
parser.add_argument('--mn', dest='model_name', type=str, default='', help='model name') | ||
parser.add_argument('--tr', dest='training', type=int, default=1, help='model name') | ||
parser.add_argument('--lm', dest='load_model', type=str, default="", help='model name') | ||
parser.add_argument('--ds', dest='data_set', type=str, default="rcv", help='dataset name') | ||
parser.add_argument('--fl', dest='fin_layer', type=str, default="ReLU", help='model name') | ||
parser.add_argument('--pp', dest='pp_flg', type=int, default=0, help='1 is for min-max pp, 2 is for gaussian pp, 0 for none') | ||
parser.add_argument('--loss', dest='loss_type', type=str, default="BCELoss", help='Loss') | ||
|
||
parser.add_argument('--sequence_length',help='max sequence length of a document', type=int,default=500) | ||
parser.add_argument('--embedding_dim', help='dimension of word embedding representation', type=int, default=300) | ||
parser.add_argument('--model_variation', help='model variation: CNN-rand or CNN-pretrain', type=str, default='pretrain') | ||
parser.add_argument('--pretrain_type', help='pretrain model: GoogleNews or glove', type=str, default='glove') | ||
parser.add_argument('--vocab_size', help='size of vocabulary keeping the most frequent words', type=int, default=30000) | ||
parser.add_argument('--drop_prob', help='Dropout probability', type=int, default=.3) | ||
parser.add_argument('--load_data', help='Load Data or not', type=int, default=0) | ||
parser.add_argument('--mg', dest='multi_gpu', type=int, default=0, help='1 for 2 gpus and 0 for normal') | ||
parser.add_argument('--filter_sizes', help='number of filter sizes (could be a list of integer)', type=int, default=[2, 4, 8], nargs='+') | ||
parser.add_argument('--num_filters', help='number of filters (i.e. kernels) in CNN model', type=int, default=32) | ||
parser.add_argument('--pooling_units', help='number of pooling units in 1D pooling layer', type=int, default=32) | ||
parser.add_argument('--pooling_type', help='max or average', type=str, default='max') | ||
parser.add_argument('--model_type', help='glove or GoogleNews', type=str, default='glove') | ||
parser.add_argument('--num_features', help='50, 100, 200, 300', type=int, default=300) | ||
parser.add_argument('--dropouts', help='0 for not using, 1 for using', type=int, default=0) | ||
parser.add_argument('--clip', help='gradient clipping', type=float, default=1000) | ||
parser.add_argument('--dataset_gpu', help='load dataset in full to gpu', type=int, default=1) | ||
parser.add_argument('--dp', dest='dataparallel', help='to train on multiple GPUs or not', type=int, default=0) | ||
|
||
|
||
params = parser.parse_args() | ||
|
||
if(len(params.model_name)==0): | ||
params.model_name = "Gen_data_CNN_Z_dim-{}_mb_size-{}_h_dim-{}_preproc-{}_beta-{}_final_ly-{}_loss-{}_sequence_length-{}_embedding_dim-{}_params.vocab_size={}".format(params.Z_dim, params.mb_size, params.h_dim, params.pp_flg, params.beta, params.fin_layer, params.loss_type, \ | ||
params.sequence_length, params.embedding_dim, params.vocab_size) | ||
|
||
print('Saving Model to: ' + params.model_name) | ||
|
||
# ------------------ data ---------------------------------------------- | ||
params.data_path = '../datasets/' + params.data_set | ||
x_tr, x_te, y_tr, y_te, params.vocabulary, params.vocabulary_inv, params = save_load_data(params, save=params.load_data) | ||
|
||
params = update_params(params) | ||
# ----------------------- Loss ------------------------------------ | ||
params.loss_fn = getattr(loss(), params.loss_type) | ||
# -------------------------- Params --------------------------------------------- | ||
if params.model_variation=='pretrain': | ||
embedding_weights = load_word2vec(params) | ||
else: | ||
embedding_weights = None | ||
|
||
if torch.cuda.is_available(): | ||
params.dtype = torch.cuda.FloatTensor | ||
else: | ||
params.dtype = torch.FloatTensor | ||
|
||
|
||
if(params.training): | ||
train(x_tr, y_tr, x_te, y_te, embedding_weights, params) | ||
|
||
else: | ||
test_class(x_te, y_te, params, x_tr=x_tr, y_tr=y_tr, embedding_weights=embedding_weights) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
from header import * | ||
from collections import OrderedDict | ||
from sklearn.metrics import log_loss | ||
|
||
def test_class(x_te, y_te, params, model=None, x_tr=None, y_tr=None, embedding_weights=None, verbose=True, save=True ): | ||
|
||
|
||
if(model==None): | ||
if(embedding_weights is None): | ||
print("Error: Embedding weights needed!") | ||
exit() | ||
else: | ||
model = cnn_encoder_decoder(params, embedding_weights) | ||
# state_dict = torch.load(params.load_model + "/model_best", map_location=lambda storage, loc: storage) | ||
# new_state_dict = OrderedDict() | ||
# for k, v in state_dict.items(): | ||
# name = k[7:] | ||
# new_state_dict[name] = v | ||
# model.load_state_dict(new_state_dict) | ||
# del new_state_dict | ||
model.load_state_dict(torch.load(params.load_model, map_location=lambda storage, loc: storage)) | ||
if(torch.cuda.is_available()): | ||
params.dtype_f = torch.cuda.FloatTensor | ||
params.dtype_i = torch.cuda.LongTensor | ||
model = model.cuda() | ||
else: | ||
params.dtype_f = torch.FloatTensor | ||
params.dtype_i = torch.LongTensor | ||
|
||
if(x_tr is not None and y_tr is not None): | ||
x_tr, _, _, _ = load_batch_cnn(x_tr, y_tr, params, batch=False) | ||
Y = np.zeros(y_tr.shape) | ||
rem = x_tr.shape[0]%params.mb_size | ||
e_emb = model.embedding_layer.forward(x_tr[-rem:].view(rem, x_te.shape[1])) | ||
H = model.encoder.forward(e_emb) | ||
Y[-rem:, :] = model.classifier(H).data | ||
for i in range(0, x_tr.shape[0] - rem, params.mb_size ): | ||
print(i) | ||
e_emb = model.embedding_layer.forward(x_tr[i:i+params.mb_size].view(params.mb_size, x_te.shape[1])) | ||
H = model.encoder.forward(e_emb) | ||
Y[i:i+params.mb_size,:] = model.classifier(H).data | ||
|
||
loss = log_loss(y_tr, Y) | ||
prec = precision_k(y_tr, Y, 5) | ||
print('Test Loss; Precision Scores [1->5] {} Cross Entropy {};'.format(prec, loss)) | ||
|
||
y_te = y_te[:,:-1] | ||
x_te, _ = load_batch_cnn(x_te, y_te, params, batch=False) | ||
Y2 = np.zeros(y_te.shape) | ||
rem = x_te.shape[0]%params.mb_size | ||
for i in range(0,x_te.shape[0] - rem,params.mb_size): | ||
# print(i) | ||
e_emb = model.embedding_layer.forward(x_te[i:i+params.mb_size].view(params.mb_size, x_te.shape[1])) | ||
H2 = model.encoder.forward(e_emb) | ||
Y2[i:i+params.mb_size,:] = model.classifier(H2).data | ||
|
||
if(rem): | ||
e_emb = model.embedding_layer.forward(x_te[-rem:].view(rem, x_te.shape[1])) | ||
H2 = model.encoder.forward(e_emb) | ||
Y2[-rem:,:] = model.classifier(H2).data | ||
|
||
loss = log_loss(y_te, Y2) # Reverse of pytorch | ||
prec = precision_k(y_te, Y2, 5) # Reverse of pytorch | ||
print('Test Loss; Precision Scores [1->5] {} Cross Entropy {};'.format(prec, loss)) | ||
|
||
if(save): | ||
Y_probabs2 = sparse.csr_matrix(Y2) | ||
sio.savemat('score_matrix.mat' , {'score_matrix': Y_probabs2}) | ||
|
||
return prec[0], loss |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
from header import * | ||
from cnn_test import * | ||
|
||
# --------------------------------------------------------------------------------- | ||
|
||
def train(x_tr, y_tr, x_te, y_te, embedding_weights, params): | ||
|
||
viz = Visdom() | ||
loss_best = float('Inf') | ||
bestTotalLoss = float('Inf') | ||
best_test_acc = 0 | ||
|
||
num_mb = np.ceil(params.N/params.mb_size) | ||
|
||
model = xmlCNN(params, embedding_weights) | ||
if(torch.cuda.is_available()): | ||
print("--------------- Using GPU! ---------") | ||
model.params.dtype_f = torch.cuda.FloatTensor | ||
model.params.dtype_i = torch.cuda.LongTensor | ||
|
||
model = model.cuda() | ||
else: | ||
model.params.dtype_f = torch.FloatTensor | ||
model.params.dtype_i = torch.LongTensor | ||
print("=============== Using CPU =========") | ||
|
||
optimizer = optim.Adam(filter(lambda p: p.requires_grad,model.parameters()), lr=params.lr) | ||
print(model);print("%"*100) | ||
|
||
if params.dataparallel: | ||
model = nn.DataParallel(model) | ||
|
||
if(len(params.load_model)): | ||
print(params.load_model) | ||
model, optimizer, start = load_model(model, params.load_model, optimizer=optimizer) | ||
else: | ||
init = 0 | ||
iteration = 0 | ||
# =============================== TRAINING ==================================== | ||
for epoch in range(init, params.num_epochs): | ||
totalLoss = 0.0 | ||
|
||
for i in range(int(num_mb)): | ||
# ------------------ Load Batch Data --------------------------------------------------------- | ||
batch_x, batch_y = load_batch_cnn(x_tr, y_tr, params) | ||
# ----------------------------------------------------------------------------------- | ||
loss, output = model.forward(batch_x, batch_y) | ||
loss = loss.mean().squeeze() | ||
# -------------------------------------------------------------------- | ||
|
||
totalLoss += loss.data | ||
|
||
if i % int(num_mb/12) == 0: | ||
print('Iter-{}; Loss: {:.4}; best_loss: {:.4}'.format(i, loss.data, loss_best)) | ||
if not os.path.exists('saved_models/' + params.model_name ): | ||
os.makedirs('saved_models/' + params.model_name) | ||
save_model(model, optimizer, epoch, params.model_name + "/model_best_batch") | ||
if(loss<loss_best): | ||
loss_best = loss.data | ||
|
||
# ------------------------ Propogate loss ----------------------------------- | ||
loss.backward() | ||
loss = loss.data | ||
torch.nn.utils.clip_grad_norm(model.parameters(), params.clip) | ||
optimizer.step() | ||
optimizer.zero_grad() | ||
|
||
# ---------------------------------------------------------------------------- | ||
if(params.disp_flg): | ||
if(iteration==0): | ||
loss_old = loss | ||
else: | ||
viz.line(X=np.linspace(iteration-1,iteration,50), Y=np.linspace(loss_old, loss,50), update='append', win=win) | ||
loss_old = loss | ||
if(iteration % 100 == 0 ): | ||
win = viz.line(X=np.arange(iteration, iteration + .1), Y=np.arange(0, .1)) | ||
iteration +=1 | ||
|
||
if(epoch==0): | ||
break | ||
|
||
if(totalLoss<bestTotalLoss): | ||
|
||
bestTotalLoss = totalLoss | ||
if not os.path.exists('saved_models/' + params.model_name ): | ||
os.makedirs('saved_models/' + params.model_name) | ||
save_model(model, optimizer, epoch, params.model_name + "/model_best_epoch") | ||
|
||
print('End-of-Epoch: Loss: {:.4}; best_loss: {:.4};'.format(totalLoss, bestTotalLoss)) | ||
|
||
test_prec_acc, test_ce_loss = test_class(x_te, y_te, params, model=model, verbose=False, save=False) | ||
model.train() | ||
|
||
if(test_prec_acc > best_test_acc): | ||
best_test_loss = test_ce_loss | ||
best_test_acc = test_prec_acc | ||
print("This acc is better than the previous recored test acc:- {} ; while CELoss:- {}".format(best_test_acc, best_test_loss)) | ||
if not os.path.exists('saved_models/' + params.model_name ): | ||
os.makedirs('saved_models/' + params.model_name) | ||
save_model(model, optimizer, epoch, params.model_name + "/model_best_test") | ||
|
||
if params.save: | ||
save_model(model, optimizer, epoch, params.model_name + "/model_" + str(epoch)) | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import torch | ||
import torch.nn as nn | ||
import torch.autograd as autograd | ||
import torch.optim as optim | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import matplotlib.gridspec as gridspec | ||
import os | ||
from torch.autograd import Variable | ||
import sys | ||
import numpy as np | ||
sys.path.append('../utils/') | ||
sys.path.append('models') | ||
import data_helpers | ||
|
||
from w2v import * | ||
from embedding_layer import embedding_layer | ||
from cnn_decoder import cnn_decoder | ||
from cnn_encoder import cnn_encoder | ||
from classifier import classifier | ||
from sklearn import preprocessing | ||
from sklearn.decomposition import PCA | ||
import scipy.io as sio | ||
from scipy import sparse | ||
import argparse | ||
from visdom import Visdom | ||
from sklearn.externals import joblib | ||
from futils import * | ||
from loss import loss | ||
from xmlCNN import xmlCNN | ||
import timeit | ||
from precision_k import precision_k |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from header import * | ||
class classifier(nn.Module): | ||
def __init__(self, params): | ||
super(classifier, self).__init__() | ||
self.params = params | ||
if(self.params.dropouts): | ||
self.drp = nn.Dropout(.5) | ||
self.l1 = nn.Linear(params.h_dim, params.H_dim) | ||
self.l2 = nn.Linear(params.H_dim, params.y_dim) | ||
self.relu = nn.ReLU() | ||
self.sigmoid = nn.Sigmoid() | ||
torch.nn.init.xavier_uniform_(self.l1.weight) | ||
|
||
def forward(self, H): | ||
H = self.l1(H) | ||
H = self.relu(H) | ||
H = self.l2(H) | ||
H = self.sigmoid(H) | ||
return H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from header import * | ||
|
||
class cnn_decoder(nn.Module): | ||
def __init__(self, params): | ||
super(cnn_decoder, self).__init__() | ||
self.params = params | ||
self.out_size = self.params.decoder_kernels[-1][0] | ||
|
||
self.bn_1 = nn.BatchNorm1d(self.params.sequence_length + 1) | ||
self.drp = nn.Dropout(p=params.drop_prob) | ||
self.conv_layers = nn.ModuleList() | ||
self.bn_x = nn.ModuleList() | ||
self.relu = nn.ReLU() | ||
for layer in range(len(params.decoder_kernels)): | ||
[out_chan, in_chan, width] = params.decoder_kernels[layer] | ||
layer = nn.Conv1d(in_chan, out_chan, width, | ||
dilation=self.params.decoder_dilations[layer], | ||
padding=self.params.decoder_paddings[layer]) | ||
torch.nn.init.xavier_uniform_(layer.weight) | ||
bn_layer = nn.BatchNorm1d(out_chan) | ||
self.conv_layers.append(layer) | ||
self.bn_x.append(bn_layer) | ||
|
||
# self.bn_2 = nn.BatchNorm1d(self.out_size) | ||
self.fc = nn.Linear(self.out_size, self.params.vocab_size) | ||
torch.nn.init.xavier_uniform_(self.fc.weight) | ||
|
||
def forward(self, decoder_input, z, batch_y): | ||
[batch_size, seq_len, embed_size] = decoder_input.size() | ||
z = torch.cat([z, batch_y], 1) | ||
z = torch.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.Z_dim + self.params.H_dim) | ||
x = torch.cat([decoder_input, z], 2) | ||
x = x.transpose(1, 2).contiguous() | ||
x = self.drp(x) | ||
for layer in range(len(self.params.decoder_kernels)): | ||
x = self.conv_layers[layer](x) | ||
x_width = x.size()[2] | ||
x = x[:, :, :(x_width - self.params.decoder_paddings[layer])].contiguous() | ||
x = self.relu(x) | ||
x = self.bn_x[layer](x) | ||
x = x.transpose(1, 2).contiguous() | ||
if(self.params.multi_gpu): | ||
x = x.cuda(2) | ||
x = self.fc(x)#.cuda(1) | ||
else: | ||
x = self.fc(x) | ||
x = x.view(-1, seq_len, self.params.vocab_size) | ||
return x |
Oops, something went wrong.