From 073195aa23275fb61ce982cfb83788e715165d03 Mon Sep 17 00:00:00 2001
From: Daniel Liu <mr.picklepinosaur@gmail.com>
Date: Sun, 5 Jun 2022 15:20:28 -0400
Subject: [PATCH] training loop

---
 README.md        |  2 +-
 dataset.py       |  7 +++++--
 main.py          | 41 +++++++++++++++++++++++++++++++++++++++--
 nn.py            | 23 +++++++++++++++++++++++
 requirements.txt |  3 +++
 5 files changed, 71 insertions(+), 5 deletions(-)
 create mode 100644 nn.py
diff --git a/README.md b/README.md
index 7e6f3e8..2f84ec5 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ very simple python chatbot to suck less at nlp
 
 First create venv and install dependencies
 ```
-$ virtualenv venv
+$ virtualenv --python=<path to python3.7> venv
 $ source venv/bin/activate
 $ pip install -r requirements.txt
 ```
diff --git a/dataset.py b/dataset.py
index f3a0a15..316cae0 100644
--- a/dataset.py
+++ b/dataset.py
@@ -1,5 +1,8 @@
 
-class IntentDataset:
+from torch.utils.data import Dataset
+
+
+class IntentDataset(Dataset):
 
     def __init__(self, x, y):
         self.x_data = x
@@ -8,5 +11,5 @@ def __init__(self, x, y):
     def __len__(self):
         return len(self.x_data)
 
-    def __get_item__(self, index):
+    def __getitem__(self, index):
         return self.x_data[index], self.y_data[index]
diff --git a/main.py b/main.py
index 234941b..92e4e1f 100644
--- a/main.py
+++ b/main.py
@@ -2,12 +2,13 @@
 import numpy as np
 import torch
 import torch.nn as nn
-from torch.utils.data import Dataset, DataLoader
+from torch.utils.data import DataLoader
 
 # TODO this is prob not needed lol
 from pipeop import pipes
 import preprocess
 from dataset import IntentDataset
+from nn import NeuralNet
 
 test_data = [
     ("advice", "In my younger and more vulnerable years my father gave me some advice that I've been turning over in my mind ever since."),
@@ -15,6 +16,7 @@
     ("communication", "He didn't say any more but we've always been unusually communicative in a reserved way, and I understood that he meant a great deal more than that.")
 ]
 
+
 @pipes
 def run():
     word_dict = []
@@ -37,9 +39,11 @@ def run():
     x_data = np.array([
         preprocess.bag_words(tokenized, word_dict) for (tokenized, tag) in xy
     ])
-    y_data = np.array([tag for (tokenized, tag) in xy])
+    # TODO make this reference index of tag
+    y_data = np.array([i for i in range(len(test_data))])
     dataset = IntentDataset(x_data, y_data)
 
+    # build dataloader
     batch_size = 8
     num_workers = 2
     loader = DataLoader(
@@ -49,4 +53,37 @@ def run():
         num_workers=num_workers
     )
 
+    # build neural net
+    input_size = len(word_dict)
+    hidden_size = 8
+    output_size = 3
+    device = 'cpu'
+    model = NeuralNet(input_size, hidden_size, output_size).to(device)
+
+    # start training
+    learning_rate = 0.001
+    training_epochs = 1000
+    criterion = nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
+
+    for epoch in range(training_epochs):
+        for (words, labels) in loader:
+            words = words.to(device)
+            labels = labels.to(device)
+
+            # forward pass
+            outputs = model(words)
+            loss = criterion(outputs, labels)
+
+            # backwards pass
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+        if epoch % 100 == 0:
+            print(f'epoch={epoch}/{training_epochs} loss={loss.item():.4f}')
+
+    print(f'final loss={loss.item():.4f}')
+
+
 run()
diff --git a/nn.py b/nn.py
new file mode 100644
index 0000000..d98ff9c
--- /dev/null
+++ b/nn.py
@@ -0,0 +1,23 @@
+import torch
+import torch.nn as nn
+
+from pipeop import pipes
+
+
+class NeuralNet(nn.Module):
+
+    def __init__(self, input_size, hidden_size, output_size):
+        super(NeuralNet, self).__init__()
+        self.l1 = nn.Linear(input_size, hidden_size)
+        self.l2 = nn.Linear(hidden_size, hidden_size)
+        self.l3 = nn.Linear(hidden_size, output_size)
+        self.relu = nn.ReLU()
+
+    @pipes
+    def forward(self, x):
+        return (
+            x
+            >> self.l1 >> self.relu
+            >> self.l2 >> self.relu
+            >> self.l3
+        )
diff --git a/requirements.txt b/requirements.txt
index 13c9b31..7385237 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,9 @@
 click==8.1.3
 joblib==1.1.0
 nltk==3.7
+numpy==1.22.4
 pipeop==0.3.0
 regex==2022.6.2
+torch==1.11.0
 tqdm==4.64.0
+typing_extensions==4.2.0