From 3b65e11186a9d834e7040d222a72b6d649d92501 Mon Sep 17 00:00:00 2001
From: gmichalo <gmichalo@uwaterloo.ca>
Date: Sat, 4 Jul 2020 10:39:17 -0400
Subject: [PATCH 1/7] update char_cnn and fasttext

---
 models/char_cnn/model.py | 31 ++++++++++++++++++++-----------
 models/fasttext/model.py |  2 +-
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/models/char_cnn/model.py b/models/char_cnn/model.py
index 4f7bdc2d..d0d2c946 100644
--- a/models/char_cnn/model.py
+++ b/models/char_cnn/model.py
@@ -14,17 +14,27 @@ def __init__(self, config):
         output_channel = config.output_channel
         num_affine_neurons = config.num_affine_neurons
         target_class = config.target_class
-        input_channel = 68
-
-        self.conv1 = nn.Conv1d(input_channel, num_conv_filters, kernel_size=7)
-        self.conv2 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=7)
-        self.conv3 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=3)
-        self.conv4 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=3)
-        self.conv5 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=3)
-        self.conv6 = nn.Conv1d(num_conv_filters, output_channel, kernel_size=3)
+        # we can add these parameters in the config
+        input_channel = 68 #number of characters
+        first_kernel_size = 7
+        second_kernel_size = 3
+        pool_size = 3
+        max_sentence_length = 1014 #maximum number of characters per sentence
+
+        self.conv1 = nn.Conv1d(input_channel, num_conv_filters, kernel_size=first_kernel_size)
+        self.conv2 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=first_kernel_size)
+        self.conv3 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size)
+        self.conv4 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size)
+        self.conv5 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size)
+        self.conv6 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size)
+
+        # due to reduction based on the convolutional  neural network
+        temp = first_kernel_size - 1 + pool_size * (first_kernel_size - 1) + (
+                pool_size ** 2 * 4 * (second_kernel_size - 1))
+        linear_size_temp = int((max_sentence_length - temp) / (pool_size ** 3)) * num_conv_filters
 
         self.dropout = nn.Dropout(config.dropout)
-        self.fc1 = nn.Linear(output_channel, num_affine_neurons)
+        self.fc1 = nn.Linear(linear_size_temp, num_affine_neurons)
         self.fc2 = nn.Linear(num_affine_neurons, num_affine_neurons)
         self.fc3 = nn.Linear(num_affine_neurons, target_class)
 
@@ -39,9 +49,8 @@ def forward(self, x, **kwargs):
         x = F.relu(self.conv3(x))
         x = F.relu(self.conv4(x))
         x = F.relu(self.conv5(x))
-        x = F.relu(self.conv6(x))
+        x = F.max_pool1d(F.relu(self.conv6(x)), 3)
 
-        x = F.max_pool1d(x, x.size(2)).squeeze(2)
         x = F.relu(self.fc1(x.view(x.size(0), -1)))
         x = self.dropout(x)
         x = F.relu(self.fc2(x))
diff --git a/models/fasttext/model.py b/models/fasttext/model.py
index 450561a1..b3dedd9c 100644
--- a/models/fasttext/model.py
+++ b/models/fasttext/model.py
@@ -36,7 +36,7 @@ def forward(self, x, **kwargs):
         elif self.mode == 'non-static':
             x = self.non_static_embed(x)  # (batch, sent_len, embed_dim)
 
-        x = F.avg_pool2d(x, (x.shape[1], 1)).squeeze(1)  # (batch, embed_dim)
+        x = F.avg_pool1d(x, x.shape[1]).squeeze(1) # (batch, embed_dim)
 
         logit = self.fc1(x)  # (batch, target_size)
         return logit

From cda846ffa55bcfa6729e6fbabd8fb6433a065b94 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <gmichalo@hedwig.rxrrezml0inudldoyob4ap02xd.bx.internal.cloudapp.net>
Date: Tue, 1 Sep 2020 21:44:46 +0000
Subject: [PATCH 2/7] updating models

---
 models/char_cnn/args.py  |  8 ++++++++
 models/char_cnn/model.py | 14 ++++++--------
 models/fasttext/model.py |  4 +---
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/models/char_cnn/args.py b/models/char_cnn/args.py
index 62c517f3..1340798a 100644
--- a/models/char_cnn/args.py
+++ b/models/char_cnn/args.py
@@ -14,6 +14,14 @@ def get_args():
     parser.add_argument('--epoch-decay', type=int, default=15)
     parser.add_argument('--weight-decay', type=float, default=0)
 
+    parser.add_argument('--number_of_characters', type=float, default=68)
+    parser.add_argument('--first_kernel', type=int, default=7)
+    parser.add_argument('--second_kernel', type=int, default=3)
+    parser.add_argument('--pool_size', type=int, default=3)
+    parser.add_argument('--max_sentence_length', type=int, default=1000)
+
+
+
     parser.add_argument('--word-vectors-dir', default=os.path.join(os.pardir, 'hedwig-data', 'embeddings', 'word2vec'))
     parser.add_argument('--word-vectors-file', default='GoogleNews-vectors-negative300.txt')
     parser.add_argument('--save-path', type=str, default=os.path.join('model_checkpoints', 'char_cnn'))
diff --git a/models/char_cnn/model.py b/models/char_cnn/model.py
index d0d2c946..5a123050 100644
--- a/models/char_cnn/model.py
+++ b/models/char_cnn/model.py
@@ -14,12 +14,13 @@ def __init__(self, config):
         output_channel = config.output_channel
         num_affine_neurons = config.num_affine_neurons
         target_class = config.target_class
+        #added paremeters in the config
+        input_channel = config.number_of_characters #number of characters
+        first_kernel_size = config.first_kernel
+        second_kernel_size = config.second_kernel
+        pool_size = config.pool_size
         # we can add these parameters in the config
-        input_channel = 68 #number of characters
-        first_kernel_size = 7
-        second_kernel_size = 3
-        pool_size = 3
-        max_sentence_length = 1014 #maximum number of characters per sentence
+        max_sentence_length = config.max_sentence_length #maximum number of characters per sentence
 
         self.conv1 = nn.Conv1d(input_channel, num_conv_filters, kernel_size=first_kernel_size)
         self.conv2 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=first_kernel_size)
@@ -32,7 +33,6 @@ def __init__(self, config):
         temp = first_kernel_size - 1 + pool_size * (first_kernel_size - 1) + (
                 pool_size ** 2 * 4 * (second_kernel_size - 1))
         linear_size_temp = int((max_sentence_length - temp) / (pool_size ** 3)) * num_conv_filters
-
         self.dropout = nn.Dropout(config.dropout)
         self.fc1 = nn.Linear(linear_size_temp, num_affine_neurons)
         self.fc2 = nn.Linear(num_affine_neurons, num_affine_neurons)
@@ -43,14 +43,12 @@ def forward(self, x, **kwargs):
             x = x.transpose(1, 2).type(torch.cuda.FloatTensor)
         else:
             x = x.transpose(1, 2).type(torch.FloatTensor)
-
         x = F.max_pool1d(F.relu(self.conv1(x)), 3)
         x = F.max_pool1d(F.relu(self.conv2(x)), 3)
         x = F.relu(self.conv3(x))
         x = F.relu(self.conv4(x))
         x = F.relu(self.conv5(x))
         x = F.max_pool1d(F.relu(self.conv6(x)), 3)
-
         x = F.relu(self.fc1(x.view(x.size(0), -1)))
         x = self.dropout(x)
         x = F.relu(self.fc2(x))
diff --git a/models/fasttext/model.py b/models/fasttext/model.py
index b3dedd9c..920d49d4 100644
--- a/models/fasttext/model.py
+++ b/models/fasttext/model.py
@@ -35,9 +35,7 @@ def forward(self, x, **kwargs):
             x = self.static_embed(x)  # (batch, sent_len, embed_dim)
         elif self.mode == 'non-static':
             x = self.non_static_embed(x)  # (batch, sent_len, embed_dim)
-
-        x = F.avg_pool1d(x, x.shape[1]).squeeze(1) # (batch, embed_dim)
-
+        x = F.avg_pool1d(x.transpose(1,2), x.shape[1]).squeeze(2) # (batch, embed_dim)
         logit = self.fc1(x)  # (batch, target_size)
         return logit
 

From 15ea3ebc2cd19802bafd95ea8703dca230d85ced Mon Sep 17 00:00:00 2001
From: gmichalo <gmichalo@uwaterloo.ca>
Date: Tue, 1 Sep 2020 17:48:41 -0400
Subject: [PATCH 3/7] fixing comments

---
 models/char_cnn/model.py | 11 ++++++-----
 models/fasttext/model.py |  4 +---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/models/char_cnn/model.py b/models/char_cnn/model.py
index 5a123050..e6a86357 100644
--- a/models/char_cnn/model.py
+++ b/models/char_cnn/model.py
@@ -11,16 +11,16 @@ def __init__(self, config):
         self.is_cuda_enabled = config.cuda
 
         num_conv_filters = config.num_conv_filters
-        output_channel = config.output_channel
+        output_channel = config.output_channel #this parameter is not used anymore for conv6
         num_affine_neurons = config.num_affine_neurons
         target_class = config.target_class
-        #added paremeters in the config
-        input_channel = config.number_of_characters #number of characters
+        # added paremeters in the config
+        input_channel = config.number_of_characters  # number of characters
         first_kernel_size = config.first_kernel
         second_kernel_size = config.second_kernel
         pool_size = config.pool_size
-        # we can add these parameters in the config
-        max_sentence_length = config.max_sentence_length #maximum number of characters per sentence
+
+        max_sentence_length = config.max_sentence_length  # maximum number of characters per sentence
 
         self.conv1 = nn.Conv1d(input_channel, num_conv_filters, kernel_size=first_kernel_size)
         self.conv2 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=first_kernel_size)
@@ -33,6 +33,7 @@ def __init__(self, config):
         temp = first_kernel_size - 1 + pool_size * (first_kernel_size - 1) + (
                 pool_size ** 2 * 4 * (second_kernel_size - 1))
         linear_size_temp = int((max_sentence_length - temp) / (pool_size ** 3)) * num_conv_filters
+
         self.dropout = nn.Dropout(config.dropout)
         self.fc1 = nn.Linear(linear_size_temp, num_affine_neurons)
         self.fc2 = nn.Linear(num_affine_neurons, num_affine_neurons)
diff --git a/models/fasttext/model.py b/models/fasttext/model.py
index 920d49d4..ec7c0253 100644
--- a/models/fasttext/model.py
+++ b/models/fasttext/model.py
@@ -35,8 +35,6 @@ def forward(self, x, **kwargs):
             x = self.static_embed(x)  # (batch, sent_len, embed_dim)
         elif self.mode == 'non-static':
             x = self.non_static_embed(x)  # (batch, sent_len, embed_dim)
-        x = F.avg_pool1d(x.transpose(1,2), x.shape[1]).squeeze(2) # (batch, embed_dim)
+        x = F.avg_pool1d(x.transpose(1, 2), x.shape[1]).squeeze(2)  # (batch, embed_dim)
         logit = self.fc1(x)  # (batch, target_size)
         return logit
-
-

From 52ec7b5f0d1db4566c198600e1dbd5a816436201 Mon Sep 17 00:00:00 2001
From: gmichalo <gmichalo@uwaterloo.ca>
Date: Tue, 15 Sep 2020 00:49:03 -0400
Subject: [PATCH 4/7] updating the char_cnn model and readme

---
 models/char_cnn/README.md |  1 +
 models/char_cnn/args.py   |  2 ++
 models/char_cnn/model.py  | 25 +++++++++++++++++--------
 models/fasttext/README.md | 27 +++++++++++++++++++++++++++
 4 files changed, 47 insertions(+), 8 deletions(-)
 create mode 100644 models/fasttext/README.md

diff --git a/models/char_cnn/README.md b/models/char_cnn/README.md
index 982c17eb..6a2d8385 100644
--- a/models/char_cnn/README.md
+++ b/models/char_cnn/README.md
@@ -28,6 +28,7 @@ We experiment the model on the following datasets.
 - Reuters (ModApte)
 - AAPD
 
+**It should be noted** that if version that follows the implementation of the [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf) produces an dev F1 of 0 on Reuters
 ## Settings
 
 Adam is used for training.
diff --git a/models/char_cnn/args.py b/models/char_cnn/args.py
index 1340798a..84f5ddee 100644
--- a/models/char_cnn/args.py
+++ b/models/char_cnn/args.py
@@ -20,6 +20,8 @@ def get_args():
     parser.add_argument('--pool_size', type=int, default=3)
     parser.add_argument('--max_sentence_length', type=int, default=1000)
 
+    parser.add_argument('--using_fixed', type=bool, default=False)
+
 
 
     parser.add_argument('--word-vectors-dir', default=os.path.join(os.pardir, 'hedwig-data', 'embeddings', 'word2vec'))
diff --git a/models/char_cnn/model.py b/models/char_cnn/model.py
index e6a86357..b8edd569 100644
--- a/models/char_cnn/model.py
+++ b/models/char_cnn/model.py
@@ -19,6 +19,8 @@ def __init__(self, config):
         first_kernel_size = config.first_kernel
         second_kernel_size = config.second_kernel
         pool_size = config.pool_size
+        #whether we are using the fix version of the paper
+        self.using_fixed = config.using_fixed
 
         max_sentence_length = config.max_sentence_length  # maximum number of characters per sentence
 
@@ -27,15 +29,18 @@ def __init__(self, config):
         self.conv3 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size)
         self.conv4 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size)
         self.conv5 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size)
-        self.conv6 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size)
-
-        # due to reduction based on the convolutional  neural network
-        temp = first_kernel_size - 1 + pool_size * (first_kernel_size - 1) + (
-                pool_size ** 2 * 4 * (second_kernel_size - 1))
-        linear_size_temp = int((max_sentence_length - temp) / (pool_size ** 3)) * num_conv_filters
+        if self.using_fixed:
+            self.conv6 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=second_kernel_size)
+            # due to reduction based on the convolutional  neural network
+            temp = first_kernel_size - 1 + pool_size * (first_kernel_size - 1) + (
+                    pool_size ** 2 * 4 * (second_kernel_size - 1))
+            linear_size_temp = int((max_sentence_length - temp) / (pool_size ** 3)) * num_conv_filters
 
+            self.fc1 = nn.Linear(linear_size_temp, num_affine_neurons)
+        else:
+            self.conv6 = nn.Conv1d(num_conv_filters, output_channel, kernel_size=second_kernel_size)
+            self.fc1 = nn.Linear(output_channel, num_affine_neurons)
         self.dropout = nn.Dropout(config.dropout)
-        self.fc1 = nn.Linear(linear_size_temp, num_affine_neurons)
         self.fc2 = nn.Linear(num_affine_neurons, num_affine_neurons)
         self.fc3 = nn.Linear(num_affine_neurons, target_class)
 
@@ -49,7 +54,11 @@ def forward(self, x, **kwargs):
         x = F.relu(self.conv3(x))
         x = F.relu(self.conv4(x))
         x = F.relu(self.conv5(x))
-        x = F.max_pool1d(F.relu(self.conv6(x)), 3)
+        if self.using_fixed:
+            x = F.max_pool1d(F.relu(self.conv6(x)), 3)
+        else:
+            x = F.relu(self.conv6(x))
+            x = F.max_pool1d(x, x.size(2)).squeeze(2)
         x = F.relu(self.fc1(x.view(x.size(0), -1)))
         x = self.dropout(x)
         x = F.relu(self.fc2(x))
diff --git a/models/fasttext/README.md b/models/fasttext/README.md
new file mode 100644
index 00000000..f7527b76
--- /dev/null
+++ b/models/fasttext/README.md
@@ -0,0 +1,27 @@
+## Bag of Tricks for Efficient Text Classification
+
+Implementation of [FastText (2016)](https://arxiv.org/pdf/1607.01759.pdf)
+
+## Quick Start
+
+To run the model on Reuters dataset, just run the following from the Castor working directory:
+
+```
+python -m models.fasttext --dataset Reuters --batch-size 128 --lr 0.001 --seed 3435
+```
+
+The best model weights will be saved in
+
+```
+models/fasttext/saves/Reuters/best_model.pt
+```
+
+To test the model, you can use the following command.
+
+```
+python -m models.char_cnn --dataset Reuters --batch_size 32 --trained-model modelsfasttext/saves/Reuters/best_model.pt --seed 3435
+```
+
+## Settings
+
+Adam is used for training.

From cf49ec2b71edb578b30c05f83a51a1af9231c20d Mon Sep 17 00:00:00 2001
From: gmichalo <gmichalo@uwaterloo.ca>
Date: Tue, 15 Sep 2020 02:11:27 -0400
Subject: [PATCH 5/7] updating fasttext readme

---
 models/fasttext/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/fasttext/README.md b/models/fasttext/README.md
index f7527b76..4b3f7ae9 100644
--- a/models/fasttext/README.md
+++ b/models/fasttext/README.md
@@ -19,7 +19,7 @@ models/fasttext/saves/Reuters/best_model.pt
 To test the model, you can use the following command.
 
 ```
-python -m models.char_cnn --dataset Reuters --batch_size 32 --trained-model modelsfasttext/saves/Reuters/best_model.pt --seed 3435
+python -m models.fasttext --dataset Reuters --batch_size 32 --trained-model models/fasttext/saves/Reuters/best_model.pt --seed 3435
 ```
 
 ## Settings

From 60494a43f6401eb0ca6af7751911c64ed427381a Mon Sep 17 00:00:00 2001
From: gmichalo <gmichalo@uwaterloo.ca>
Date: Tue, 22 Sep 2020 15:18:27 -0400
Subject: [PATCH 6/7] fixing readme and args for fasttext and char_cnn

---
 models/char_cnn/README.md | 14 ++++++++++++--
 models/fasttext/README.md | 13 +------------
 models/fasttext/args.py   |  2 +-
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/models/char_cnn/README.md b/models/char_cnn/README.md
index 6a2d8385..7b1334d8 100644
--- a/models/char_cnn/README.md
+++ b/models/char_cnn/README.md
@@ -7,7 +7,13 @@ Implementation of [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-l
 To run the model on Reuters dataset, just run the following from the Castor working directory:
 
 ```
-python -m models.char_cnn --dataset Reuters --batch-size 128 --lr 0.001 --seed 3435
+python -m models.char_cnn --dataset Reuters --batch-size 32 --lr 0.01 --seed 3435
+```
+
+in order to run the indetical implementation of the paper, run the following from the Castor working directory:
+
+```
+python -m models.char_cnn --dataset Reuters --batch-size 32 --lr 0.01 --seed 3435 --using_fixed True
 ```
 
 The best model weights will be saved in
@@ -28,7 +34,11 @@ We experiment the model on the following datasets.
 - Reuters (ModApte)
 - AAPD
 
-**It should be noted** that if version that follows the implementation of the [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf) produces an dev F1 of 0 on Reuters
+**It should be noted** that if version that follows the implementation of the [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf) produces an dev F1 of 0 on Reuters unless you run the model with the following (strict) parameters:
+```
+python3 -m models.char_cnn --dataset Reuters --batch-size 1 --lr 0.1 --seed 3435 --using_fixed True --epochs 30 --patience 30
+```
+
 ## Settings
 
 Adam is used for training.
diff --git a/models/fasttext/README.md b/models/fasttext/README.md
index 4b3f7ae9..524e3f21 100644
--- a/models/fasttext/README.md
+++ b/models/fasttext/README.md
@@ -7,20 +7,9 @@ Implementation of [FastText (2016)](https://arxiv.org/pdf/1607.01759.pdf)
 To run the model on Reuters dataset, just run the following from the Castor working directory:
 
 ```
-python -m models.fasttext --dataset Reuters --batch-size 128 --lr 0.001 --seed 3435
+python -m models.fasttext --dataset Reuters --batch-size 128 --lr 0.01 --seed 3435 --epochs 30
 ```
 
-The best model weights will be saved in
-
-```
-models/fasttext/saves/Reuters/best_model.pt
-```
-
-To test the model, you can use the following command.
-
-```
-python -m models.fasttext --dataset Reuters --batch_size 32 --trained-model models/fasttext/saves/Reuters/best_model.pt --seed 3435
-```
 
 ## Settings
 
diff --git a/models/fasttext/args.py b/models/fasttext/args.py
index 85c7ce53..5928df6e 100644
--- a/models/fasttext/args.py
+++ b/models/fasttext/args.py
@@ -15,7 +15,7 @@ def get_args():
 
     parser.add_argument('--word-vectors-dir', default=os.path.join(os.pardir, 'hedwig-data', 'embeddings', 'word2vec'))
     parser.add_argument('--word-vectors-file', default='GoogleNews-vectors-negative300.txt')
-    parser.add_argument('--save-path', type=str, default=os.path.join('model_checkpoints', 'kim_cnn'))
+    parser.add_argument('--save-path', type=str, default=os.path.join('model_checkpoints', 'fasttext'))
     parser.add_argument('--resume-snapshot', type=str)
     parser.add_argument('--trained-model', type=str)
 

From f0bf8bb598baef315f5a092003e08a47a4641ae4 Mon Sep 17 00:00:00 2001
From: gmichalo <gmichalo@uwaterloo.ca>
Date: Tue, 22 Sep 2020 15:23:27 -0400
Subject: [PATCH 7/7] fixing readme and args for fasttext and char_cnn

---
 models/char_cnn/README.md | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/models/char_cnn/README.md b/models/char_cnn/README.md
index 7b1334d8..69584d1a 100644
--- a/models/char_cnn/README.md
+++ b/models/char_cnn/README.md
@@ -10,7 +10,7 @@ To run the model on Reuters dataset, just run the following from the Castor work
 python -m models.char_cnn --dataset Reuters --batch-size 32 --lr 0.01 --seed 3435
 ```
 
-in order to run the indetical implementation of the paper, run the following from the Castor working directory:
+in order to run the implementation of the paper, run the following from the Castor working directory:
 
 ```
 python -m models.char_cnn --dataset Reuters --batch-size 32 --lr 0.01 --seed 3435 --using_fixed True
@@ -27,14 +27,9 @@ To test the model, you can use the following command.
 ```
 python -m models.char_cnn --dataset Reuters --batch_size 32 --trained-model models/char_cnn/saves/Reuters/best_model.pt --seed 3435
 ```
-## Dataset
 
-We experiment the model on the following datasets.
 
-- Reuters (ModApte)
-- AAPD
-
-**It should be noted** that if version that follows the implementation of the [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf) produces an dev F1 of 0 on Reuters unless you run the model with the following (strict) parameters:
+**It should be noted** that the version that follows the implementation of the [Char-CNN (2015)](http://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf) needs to be run with the following (or similar) parameters otherwise it produces an dev F1 of 0 on Reuters:
 ```
 python3 -m models.char_cnn --dataset Reuters --batch-size 1 --lr 0.1 --seed 3435 --using_fixed True --epochs 30 --patience 30
 ```