From 073195aa23275fb61ce982cfb83788e715165d03 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Sun, 5 Jun 2022 15:20:28 -0400 Subject: [PATCH] training loop --- README.md | 2 +- dataset.py | 7 +++++-- main.py | 41 +++++++++++++++++++++++++++++++++++++++-- nn.py | 23 +++++++++++++++++++++++ requirements.txt | 3 +++ 5 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 nn.py diff --git a/README.md b/README.md index 7e6f3e8..2f84ec5 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ very simple python chatbot to suck less at nlp First create venv and install dependencies ``` -$ virtualenv venv +$ virtualenv --python= venv $ source venv/bin/activate $ pip install -r requirements.txt ``` diff --git a/dataset.py b/dataset.py index f3a0a15..316cae0 100644 --- a/dataset.py +++ b/dataset.py @@ -1,5 +1,8 @@ -class IntentDataset: +from torch.utils.data import Dataset + + +class IntentDataset(Dataset): def __init__(self, x, y): self.x_data = x @@ -8,5 +11,5 @@ def __init__(self, x, y): def __len__(self): return len(self.x_data) - def __get_item__(self, index): + def __getitem__(self, index): return self.x_data[index], self.y_data[index] diff --git a/main.py b/main.py index 234941b..92e4e1f 100644 --- a/main.py +++ b/main.py @@ -2,12 +2,13 @@ import numpy as np import torch import torch.nn as nn -from torch.utils.data import Dataset, DataLoader +from torch.utils.data import DataLoader # TODO this is prob not needed lol from pipeop import pipes import preprocess from dataset import IntentDataset +from nn import NeuralNet test_data = [ ("advice", "In my younger and more vulnerable years my father gave me some advice that I've been turning over in my mind ever since."), @@ -15,6 +16,7 @@ ("communication", "He didn't say any more but we've always been unusually communicative in a reserved way, and I understood that he meant a great deal more than that.") ] + @pipes def run(): word_dict = [] @@ -37,9 +39,11 @@ def run(): x_data = np.array([ preprocess.bag_words(tokenized, word_dict) for (tokenized, tag) in xy ]) - y_data = np.array([tag for (tokenized, tag) in xy]) + # TODO make this reference index of tag + y_data = np.array([i for i in range(len(test_data))]) dataset = IntentDataset(x_data, y_data) + # build dataloader batch_size = 8 num_workers = 2 loader = DataLoader( @@ -49,4 +53,37 @@ def run(): num_workers=num_workers ) + # build neural net + input_size = len(word_dict) + hidden_size = 8 + output_size = 3 + device = 'cpu' + model = NeuralNet(input_size, hidden_size, output_size).to(device) + + # start training + learning_rate = 0.001 + training_epochs = 1000 + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) + + for epoch in range(training_epochs): + for (words, labels) in loader: + words = words.to(device) + labels = labels.to(device) + + # forward pass + outputs = model(words) + loss = criterion(outputs, labels) + + # backwards pass + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if epoch % 100 == 0: + print(f'epoch={epoch}/{training_epochs} loss={loss.item():.4f}') + + print(f'final loss={loss.item():.4f}') + + run() diff --git a/nn.py b/nn.py new file mode 100644 index 0000000..d98ff9c --- /dev/null +++ b/nn.py @@ -0,0 +1,23 @@ +import torch +import torch.nn as nn + +from pipeop import pipes + + +class NeuralNet(nn.Module): + + def __init__(self, input_size, hidden_size, output_size): + super(NeuralNet, self).__init__() + self.l1 = nn.Linear(input_size, hidden_size) + self.l2 = nn.Linear(hidden_size, hidden_size) + self.l3 = nn.Linear(hidden_size, output_size) + self.relu = nn.ReLU() + + @pipes + def forward(self, x): + return ( + x + >> self.l1 >> self.relu + >> self.l2 >> self.relu + >> self.l3 + ) diff --git a/requirements.txt b/requirements.txt index 13c9b31..7385237 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,9 @@ click==8.1.3 joblib==1.1.0 nltk==3.7 +numpy==1.22.4 pipeop==0.3.0 regex==2022.6.2 +torch==1.11.0 tqdm==4.64.0 +typing_extensions==4.2.0