From 3b65e11186a9d834e7040d222a72b6d649d92501 Mon Sep 17 00:00:00 2001 From: gmichalo Date: Sat, 4 Jul 2020 10:39:17 -0400 Subject: [PATCH 1/7] update char_cnn and fasttext --- models/char_cnn/model.py | 31 ++++++++++++++++++++----------- models/fasttext/model.py | 2 +- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/models/char_cnn/model.py b/models/char_cnn/model.py index 4f7bdc2d..d0d2c946 100644 --- a/models/char_cnn/model.py +++ b/models/char_cnn/model.py @@ -14,17 +14,27 @@ def __init__(self, config): output_channel = config.output_channel num_affine_neurons = config.num_affine_neurons target_class = config.target_class - input_channel = 68 - - self.conv1 = nn.Conv1d(input_channel, num_conv_filters, kernel_size=7) - self.conv2 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=7) - self.conv3 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=3) - self.conv4 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=3) - self.conv5 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=3) - self.conv6 = nn.Conv1d(num_conv_filters, output_channel, kernel_size=3) + # we can add these parameters in the config + input_channel = 68 #number of characters + first_kernel_size = 7 + second_kernel_size = 3 + pool_size = 3 + max_sentence_length = 1014 #maximum number of characters per sentence + + self.conv1 = nn.Conv1d(input_channel, num_conv_filters, kernel_size=first_kernel_size) + self.conv2 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=first_kernel_size) + self.conv3 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size) + self.conv4 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size) + self.conv5 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size) + self.conv6 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size) + + # due to reduction based on the convolutional neural network + temp = first_kernel_size - 1 + pool_size * (first_kernel_size - 1) + ( + pool_size ** 2 * 4 * (second_kernel_size - 1)) + linear_size_temp = int((max_sentence_length - temp) / (pool_size ** 3)) * num_conv_filters self.dropout = nn.Dropout(config.dropout) - self.fc1 = nn.Linear(output_channel, num_affine_neurons) + self.fc1 = nn.Linear(linear_size_temp, num_affine_neurons) self.fc2 = nn.Linear(num_affine_neurons, num_affine_neurons) self.fc3 = nn.Linear(num_affine_neurons, target_class) @@ -39,9 +49,8 @@ def forward(self, x, **kwargs): x = F.relu(self.conv3(x)) x = F.relu(self.conv4(x)) x = F.relu(self.conv5(x)) - x = F.relu(self.conv6(x)) + x = F.max_pool1d(F.relu(self.conv6(x)), 3) - x = F.max_pool1d(x, x.size(2)).squeeze(2) x = F.relu(self.fc1(x.view(x.size(0), -1))) x = self.dropout(x) x = F.relu(self.fc2(x)) diff --git a/models/fasttext/model.py b/models/fasttext/model.py index 450561a1..b3dedd9c 100644 --- a/models/fasttext/model.py +++ b/models/fasttext/model.py @@ -36,7 +36,7 @@ def forward(self, x, **kwargs): elif self.mode == 'non-static': x = self.non_static_embed(x) # (batch, sent_len, embed_dim) - x = F.avg_pool2d(x, (x.shape[1], 1)).squeeze(1) # (batch, embed_dim) + x = F.avg_pool1d(x, x.shape[1]).squeeze(1) # (batch, embed_dim) logit = self.fc1(x) # (batch, target_size) return logit From cda846ffa55bcfa6729e6fbabd8fb6433a065b94 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 1 Sep 2020 21:44:46 +0000 Subject: [PATCH 2/7] updating models --- models/char_cnn/args.py | 8 ++++++++ models/char_cnn/model.py | 14 ++++++-------- models/fasttext/model.py | 4 +--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/models/char_cnn/args.py b/models/char_cnn/args.py index 62c517f3..1340798a 100644 --- a/models/char_cnn/args.py +++ b/models/char_cnn/args.py @@ -14,6 +14,14 @@ def get_args(): parser.add_argument('--epoch-decay', type=int, default=15) parser.add_argument('--weight-decay', type=float, default=0) + parser.add_argument('--number_of_characters', type=float, default=68) + parser.add_argument('--first_kernel', type=int, default=7) + parser.add_argument('--second_kernel', type=int, default=3) + parser.add_argument('--pool_size', type=int, default=3) + parser.add_argument('--max_sentence_length', type=int, default=1000) + + + parser.add_argument('--word-vectors-dir', default=os.path.join(os.pardir, 'hedwig-data', 'embeddings', 'word2vec')) parser.add_argument('--word-vectors-file', default='GoogleNews-vectors-negative300.txt') parser.add_argument('--save-path', type=str, default=os.path.join('model_checkpoints', 'char_cnn')) diff --git a/models/char_cnn/model.py b/models/char_cnn/model.py index d0d2c946..5a123050 100644 --- a/models/char_cnn/model.py +++ b/models/char_cnn/model.py @@ -14,12 +14,13 @@ def __init__(self, config): output_channel = config.output_channel num_affine_neurons = config.num_affine_neurons target_class = config.target_class + #added paremeters in the config + input_channel = config.number_of_characters #number of characters + first_kernel_size = config.first_kernel + second_kernel_size = config.second_kernel + pool_size = config.pool_size # we can add these parameters in the config - input_channel = 68 #number of characters - first_kernel_size = 7 - second_kernel_size = 3 - pool_size = 3 - max_sentence_length = 1014 #maximum number of characters per sentence + max_sentence_length = config.max_sentence_length #maximum number of characters per sentence self.conv1 = nn.Conv1d(input_channel, num_conv_filters, kernel_size=first_kernel_size) self.conv2 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=first_kernel_size) @@ -32,7 +33,6 @@ def __init__(self, config): temp = first_kernel_size - 1 + pool_size * (first_kernel_size - 1) + ( pool_size ** 2 * 4 * (second_kernel_size - 1)) linear_size_temp = int((max_sentence_length - temp) / (pool_size ** 3)) * num_conv_filters - self.dropout = nn.Dropout(config.dropout) self.fc1 = nn.Linear(linear_size_temp, num_affine_neurons) self.fc2 = nn.Linear(num_affine_neurons, num_affine_neurons) @@ -43,14 +43,12 @@ def forward(self, x, **kwargs): x = x.transpose(1, 2).type(torch.cuda.FloatTensor) else: x = x.transpose(1, 2).type(torch.FloatTensor) - x = F.max_pool1d(F.relu(self.conv1(x)), 3) x = F.max_pool1d(F.relu(self.conv2(x)), 3) x = F.relu(self.conv3(x)) x = F.relu(self.conv4(x)) x = F.relu(self.conv5(x)) x = F.max_pool1d(F.relu(self.conv6(x)), 3) - x = F.relu(self.fc1(x.view(x.size(0), -1))) x = self.dropout(x) x = F.relu(self.fc2(x)) diff --git a/models/fasttext/model.py b/models/fasttext/model.py index b3dedd9c..920d49d4 100644 --- a/models/fasttext/model.py +++ b/models/fasttext/model.py @@ -35,9 +35,7 @@ def forward(self, x, **kwargs): x = self.static_embed(x) # (batch, sent_len, embed_dim) elif self.mode == 'non-static': x = self.non_static_embed(x) # (batch, sent_len, embed_dim) - - x = F.avg_pool1d(x, x.shape[1]).squeeze(1) # (batch, embed_dim) - + x = F.avg_pool1d(x.transpose(1,2), x.shape[1]).squeeze(2) # (batch, embed_dim) logit = self.fc1(x) # (batch, target_size) return logit From 15ea3ebc2cd19802bafd95ea8703dca230d85ced Mon Sep 17 00:00:00 2001 From: gmichalo Date: Tue, 1 Sep 2020 17:48:41 -0400 Subject: [PATCH 3/7] fixing comments --- models/char_cnn/model.py | 11 ++++++----- models/fasttext/model.py | 4 +--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/models/char_cnn/model.py b/models/char_cnn/model.py index 5a123050..e6a86357 100644 --- a/models/char_cnn/model.py +++ b/models/char_cnn/model.py @@ -11,16 +11,16 @@ def __init__(self, config): self.is_cuda_enabled = config.cuda num_conv_filters = config.num_conv_filters - output_channel = config.output_channel + output_channel = config.output_channel #this parameter is not used anymore for conv6 num_affine_neurons = config.num_affine_neurons target_class = config.target_class - #added paremeters in the config - input_channel = config.number_of_characters #number of characters + # added paremeters in the config + input_channel = config.number_of_characters # number of characters first_kernel_size = config.first_kernel second_kernel_size = config.second_kernel pool_size = config.pool_size - # we can add these parameters in the config - max_sentence_length = config.max_sentence_length #maximum number of characters per sentence + + max_sentence_length = config.max_sentence_length # maximum number of characters per sentence self.conv1 = nn.Conv1d(input_channel, num_conv_filters, kernel_size=first_kernel_size) self.conv2 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=first_kernel_size) @@ -33,6 +33,7 @@ def __init__(self, config): temp = first_kernel_size - 1 + pool_size * (first_kernel_size - 1) + ( pool_size ** 2 * 4 * (second_kernel_size - 1)) linear_size_temp = int((max_sentence_length - temp) / (pool_size ** 3)) * num_conv_filters + self.dropout = nn.Dropout(config.dropout) self.fc1 = nn.Linear(linear_size_temp, num_affine_neurons) self.fc2 = nn.Linear(num_affine_neurons, num_affine_neurons) diff --git a/models/fasttext/model.py b/models/fasttext/model.py index 920d49d4..ec7c0253 100644 --- a/models/fasttext/model.py +++ b/models/fasttext/model.py @@ -35,8 +35,6 @@ def forward(self, x, **kwargs): x = self.static_embed(x) # (batch, sent_len, embed_dim) elif self.mode == 'non-static': x = self.non_static_embed(x) # (batch, sent_len, embed_dim) - x = F.avg_pool1d(x.transpose(1,2), x.shape[1]).squeeze(2) # (batch, embed_dim) + x = F.avg_pool1d(x.transpose(1, 2), x.shape[1]).squeeze(2) # (batch, embed_dim) logit = self.fc1(x) # (batch, target_size) return logit - - From 52ec7b5f0d1db4566c198600e1dbd5a816436201 Mon Sep 17 00:00:00 2001 From: gmichalo Date: Tue, 15 Sep 2020 00:49:03 -0400 Subject: [PATCH 4/7] updating the char_cnn model and readme --- models/char_cnn/README.md | 1 + models/char_cnn/args.py | 2 ++ models/char_cnn/model.py | 25 +++++++++++++++++-------- models/fasttext/README.md | 27 +++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 8 deletions(-) create mode 100644 models/fasttext/README.md diff --git a/models/char_cnn/README.md b/models/char_cnn/README.md index 982c17eb..6a2d8385 100644 --- a/models/char_cnn/README.md +++ b/models/char_cnn/README.md @@ -28,6 +28,7 @@ We experiment the model on the following datasets. - Reuters (ModApte) - AAPD +**It should be noted** that if version that follows the implementation of the [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf) produces an dev F1 of 0 on Reuters ## Settings Adam is used for training. diff --git a/models/char_cnn/args.py b/models/char_cnn/args.py index 1340798a..84f5ddee 100644 --- a/models/char_cnn/args.py +++ b/models/char_cnn/args.py @@ -20,6 +20,8 @@ def get_args(): parser.add_argument('--pool_size', type=int, default=3) parser.add_argument('--max_sentence_length', type=int, default=1000) + parser.add_argument('--using_fixed', type=bool, default=False) + parser.add_argument('--word-vectors-dir', default=os.path.join(os.pardir, 'hedwig-data', 'embeddings', 'word2vec')) diff --git a/models/char_cnn/model.py b/models/char_cnn/model.py index e6a86357..b8edd569 100644 --- a/models/char_cnn/model.py +++ b/models/char_cnn/model.py @@ -19,6 +19,8 @@ def __init__(self, config): first_kernel_size = config.first_kernel second_kernel_size = config.second_kernel pool_size = config.pool_size + #whether we are using the fix version of the paper + self.using_fixed = config.using_fixed max_sentence_length = config.max_sentence_length # maximum number of characters per sentence @@ -27,15 +29,18 @@ def __init__(self, config): self.conv3 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size) self.conv4 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size) self.conv5 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size) - self.conv6 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size) - - # due to reduction based on the convolutional neural network - temp = first_kernel_size - 1 + pool_size * (first_kernel_size - 1) + ( - pool_size ** 2 * 4 * (second_kernel_size - 1)) - linear_size_temp = int((max_sentence_length - temp) / (pool_size ** 3)) * num_conv_filters + if self.using_fixed: + self.conv6 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size) + # due to reduction based on the convolutional neural network + temp = first_kernel_size - 1 + pool_size * (first_kernel_size - 1) + ( + pool_size ** 2 * 4 * (second_kernel_size - 1)) + linear_size_temp = int((max_sentence_length - temp) / (pool_size ** 3)) * num_conv_filters + self.fc1 = nn.Linear(linear_size_temp, num_affine_neurons) + else: + self.conv6 = nn.Conv1d(num_conv_filters, output_channel, kernel_size=second_kernel_size) + self.fc1 = nn.Linear(output_channel, num_affine_neurons) self.dropout = nn.Dropout(config.dropout) - self.fc1 = nn.Linear(linear_size_temp, num_affine_neurons) self.fc2 = nn.Linear(num_affine_neurons, num_affine_neurons) self.fc3 = nn.Linear(num_affine_neurons, target_class) @@ -49,7 +54,11 @@ def forward(self, x, **kwargs): x = F.relu(self.conv3(x)) x = F.relu(self.conv4(x)) x = F.relu(self.conv5(x)) - x = F.max_pool1d(F.relu(self.conv6(x)), 3) + if self.using_fixed: + x = F.max_pool1d(F.relu(self.conv6(x)), 3) + else: + x = F.relu(self.conv6(x)) + x = F.max_pool1d(x, x.size(2)).squeeze(2) x = F.relu(self.fc1(x.view(x.size(0), -1))) x = self.dropout(x) x = F.relu(self.fc2(x)) diff --git a/models/fasttext/README.md b/models/fasttext/README.md new file mode 100644 index 00000000..f7527b76 --- /dev/null +++ b/models/fasttext/README.md @@ -0,0 +1,27 @@ +## Bag of Tricks for Efficient Text Classification + +Implementation of [FastText (2016)](https://arxiv.org/pdf/1607.01759.pdf) + +## Quick Start + +To run the model on Reuters dataset, just run the following from the Castor working directory: + +``` +python -m models.fasttext --dataset Reuters --batch-size 128 --lr 0.001 --seed 3435 +``` + +The best model weights will be saved in + +``` +models/fasttext/saves/Reuters/best_model.pt +``` + +To test the model, you can use the following command. + +``` +python -m models.char_cnn --dataset Reuters --batch_size 32 --trained-model modelsfasttext/saves/Reuters/best_model.pt --seed 3435 +``` + +## Settings + +Adam is used for training. From cf49ec2b71edb578b30c05f83a51a1af9231c20d Mon Sep 17 00:00:00 2001 From: gmichalo Date: Tue, 15 Sep 2020 02:11:27 -0400 Subject: [PATCH 5/7] updating fasttext readme --- models/fasttext/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/fasttext/README.md b/models/fasttext/README.md index f7527b76..4b3f7ae9 100644 --- a/models/fasttext/README.md +++ b/models/fasttext/README.md @@ -19,7 +19,7 @@ models/fasttext/saves/Reuters/best_model.pt To test the model, you can use the following command. ``` -python -m models.char_cnn --dataset Reuters --batch_size 32 --trained-model modelsfasttext/saves/Reuters/best_model.pt --seed 3435 +python -m models.fasttext --dataset Reuters --batch_size 32 --trained-model models/fasttext/saves/Reuters/best_model.pt --seed 3435 ``` ## Settings From 60494a43f6401eb0ca6af7751911c64ed427381a Mon Sep 17 00:00:00 2001 From: gmichalo Date: Tue, 22 Sep 2020 15:18:27 -0400 Subject: [PATCH 6/7] fixing readme and args for fasttext and char_cnn --- models/char_cnn/README.md | 14 ++++++++++++-- models/fasttext/README.md | 13 +------------ models/fasttext/args.py | 2 +- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/models/char_cnn/README.md b/models/char_cnn/README.md index 6a2d8385..7b1334d8 100644 --- a/models/char_cnn/README.md +++ b/models/char_cnn/README.md @@ -7,7 +7,13 @@ Implementation of [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-l To run the model on Reuters dataset, just run the following from the Castor working directory: ``` -python -m models.char_cnn --dataset Reuters --batch-size 128 --lr 0.001 --seed 3435 +python -m models.char_cnn --dataset Reuters --batch-size 32 --lr 0.01 --seed 3435 +``` + +in order to run the indetical implementation of the paper, run the following from the Castor working directory: + +``` +python -m models.char_cnn --dataset Reuters --batch-size 32 --lr 0.01 --seed 3435 --using_fixed True ``` The best model weights will be saved in @@ -28,7 +34,11 @@ We experiment the model on the following datasets. - Reuters (ModApte) - AAPD -**It should be noted** that if version that follows the implementation of the [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf) produces an dev F1 of 0 on Reuters +**It should be noted** that if version that follows the implementation of the [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf) produces an dev F1 of 0 on Reuters unless you run the model with the following (strict) parameters: +``` +python3 -m models.char_cnn --dataset Reuters --batch-size 1 --lr 0.1 --seed 3435 --using_fixed True --epochs 30 --patience 30 +``` + ## Settings Adam is used for training. diff --git a/models/fasttext/README.md b/models/fasttext/README.md index 4b3f7ae9..524e3f21 100644 --- a/models/fasttext/README.md +++ b/models/fasttext/README.md @@ -7,20 +7,9 @@ Implementation of [FastText (2016)](https://arxiv.org/pdf/1607.01759.pdf) To run the model on Reuters dataset, just run the following from the Castor working directory: ``` -python -m models.fasttext --dataset Reuters --batch-size 128 --lr 0.001 --seed 3435 +python -m models.fasttext --dataset Reuters --batch-size 128 --lr 0.01 --seed 3435 --epochs 30 ``` -The best model weights will be saved in - -``` -models/fasttext/saves/Reuters/best_model.pt -``` - -To test the model, you can use the following command. - -``` -python -m models.fasttext --dataset Reuters --batch_size 32 --trained-model models/fasttext/saves/Reuters/best_model.pt --seed 3435 -``` ## Settings diff --git a/models/fasttext/args.py b/models/fasttext/args.py index 85c7ce53..5928df6e 100644 --- a/models/fasttext/args.py +++ b/models/fasttext/args.py @@ -15,7 +15,7 @@ def get_args(): parser.add_argument('--word-vectors-dir', default=os.path.join(os.pardir, 'hedwig-data', 'embeddings', 'word2vec')) parser.add_argument('--word-vectors-file', default='GoogleNews-vectors-negative300.txt') - parser.add_argument('--save-path', type=str, default=os.path.join('model_checkpoints', 'kim_cnn')) + parser.add_argument('--save-path', type=str, default=os.path.join('model_checkpoints', 'fasttext')) parser.add_argument('--resume-snapshot', type=str) parser.add_argument('--trained-model', type=str) From f0bf8bb598baef315f5a092003e08a47a4641ae4 Mon Sep 17 00:00:00 2001 From: gmichalo Date: Tue, 22 Sep 2020 15:23:27 -0400 Subject: [PATCH 7/7] fixing readme and args for fasttext and char_cnn --- models/char_cnn/README.md | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/models/char_cnn/README.md b/models/char_cnn/README.md index 7b1334d8..69584d1a 100644 --- a/models/char_cnn/README.md +++ b/models/char_cnn/README.md @@ -10,7 +10,7 @@ To run the model on Reuters dataset, just run the following from the Castor work python -m models.char_cnn --dataset Reuters --batch-size 32 --lr 0.01 --seed 3435 ``` -in order to run the indetical implementation of the paper, run the following from the Castor working directory: +in order to run the implementation of the paper, run the following from the Castor working directory: ``` python -m models.char_cnn --dataset Reuters --batch-size 32 --lr 0.01 --seed 3435 --using_fixed True @@ -27,14 +27,9 @@ To test the model, you can use the following command. ``` python -m models.char_cnn --dataset Reuters --batch_size 32 --trained-model models/char_cnn/saves/Reuters/best_model.pt --seed 3435 ``` -## Dataset -We experiment the model on the following datasets. -- Reuters (ModApte) -- AAPD - -**It should be noted** that if version that follows the implementation of the [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf) produces an dev F1 of 0 on Reuters unless you run the model with the following (strict) parameters: +**It should be noted** that the version that follows the implementation of the [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf) needs to be run with the following (or similar) parameters otherwise it produces an dev F1 of 0 on Reuters: ``` python3 -m models.char_cnn --dataset Reuters --batch-size 1 --lr 0.1 --seed 3435 --using_fixed True --epochs 30 --patience 30 ```