Skip to content

Commit

Permalink
added trainsplit and normalization of data so the bug with insane err…
Browse files Browse the repository at this point in the history
…or is fixed

tested also the Dataloader and now set for good model implementation
  • Loading branch information
Gerhardsa0 committed Nov 14, 2023
1 parent a434a1a commit ad75240
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 28 deletions.
22 changes: 13 additions & 9 deletions src/safeds/data/tabular/containers/_timeseries_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def _from_table(

# Create Time Series Object
result = object.__new__(TimeSeries)
result._feature_names = feature_names
result._data = table._data
result._schema = table._schema
result._features = table.keep_only_columns(feature_names)
Expand Down Expand Up @@ -214,7 +215,7 @@ def features(self) -> Table:
return self._features

@property
def target(self) -> Column:
def target(self) -> Column:
"""
Get the target column of the tagged table.
Expand Down Expand Up @@ -435,34 +436,37 @@ def _as_table(self: TimeSeries) -> Table:
# for testing purposes they are here
# ------------------------------------------------------------------------------------------------------------------

def _create_all_windows_for_column(self):
def _create_all_windows_for_column(self, train_size: float):
#this generator generates all windows for all feature columns
def in_yield(col: Column):
ser = col._data
testsplit_index = int(col.__len__()*train_size)
#get only the training data of a column and normalize it
ser = (col._data[:testsplit_index]-col.mean())/col.standard_deviation()
for i in range(len(ser) - self._window_size):
yield list(ser.iloc[i : i + self._window_size])
for col_name in self._feature_names:
col = self._features.get_column(col_name)
yield list(in_yield(col))



def _create_all_labels_for_target_column(self):
def _create_all_labels_for_target_column(self, train_size: float):
#this generator generates all forecast horizons for the target column
def _generate_label_windows( ):
ser = self._target._data
testsplit_index = int(self.target.__len__()*train_size)
#get only the training data of a column and normalize it
ser = (self._target._data[:testsplit_index]-self._target.mean())/self._target.standard_deviation()
for i in range(len(ser) - self._window_size):
yield list(ser.iloc[i + self._window_size : i + self._window_size + self._forecast_horizon])
return list((_generate_label_windows()))



def into_DataLoader(self):
def into_train_DataLoader(self, train_size: float):
#code below concatenate the column like the following
#f1:[w1, w2, w3] f2[w1, w2, w3] -> [w1+w1, w2+w2, w3+w3]
x_train = np.concatenate(list(self._create_all_windows_for_column()), axis=1)
x_train = np.concatenate(list(self._create_all_windows_for_column(train_size)), axis=1)
#for target this will be created: [ t1, t2, t3]
y_train = np.array(self._create_all_labels_for_target_column())
y_train = np.array(self._create_all_labels_for_target_column(train_size))
#load them into PyTorch
dataset = TimeSeriesDataset(x_train,y_train)
return DataLoader(dataset, batch_size=1)
Expand Down
32 changes: 25 additions & 7 deletions src/safeds/ml/nn/_model.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import pandas as pd
import numpy as np
import torch
import time
import torch.nn as nn
from torch.utils.data import DataLoader
from safeds.ml.nn import RNN_Layer
from safeds.data.tabular.containers import Column, Table, TaggedTable, TimeSeries
from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError

from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError
class Model():
def __init__(self, layers : list):
self._model = PyTorchModel(layers)
Expand All @@ -23,18 +23,36 @@ def model_forward(self, data : DataLoader):
self._model(inputs)


def train(self,x):
pass
def train(self, train_loader: DataLoader, epochs: int, learningrate : float):
start_time = time.time()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(self._model.parameters(), lr = learningrate)

for epoch in range(epochs):
for batch in iter(train_loader):
inputs, labels = batch
optimizer.zero_grad()

labels = labels.to(torch.float32)
inputs = inputs.to(torch.float32)
outputs = self._model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
print(f'Epoch: {epoch+1:2} Loss {loss.item():10.8f}')
print(f'\nDuration: {time.time()-start_time:.0f} seconds')

class PyTorchModel(nn.Module):
def __init__(self, LayerListe :list[RNN_Layer]):
super(PyTorchModel, self).__init__()
self.layerliste = []
layers = []
for layer in LayerListe:
self.layerliste.append(layer._create_pytorch_layer())
layers.append(layer._create_pytorch_layer())

self._layerliste = nn.ModuleList(layers)

def forward(self, x):
out = x
for layer in self.layerliste:
for layer in self._layerliste:
out = layer(out)
return out
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import torch
import torch.nn as nn
import time
from torch.utils.data import DataLoader
from safeds.data.tabular.containers import Column, Table, TaggedTable, TimeSeries
from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError
Expand All @@ -12,31 +13,27 @@


def test_create_timeseries() -> None:

table = Table(data={"f1": [1, 2, 3, 4, 6, 7], "target": [7,2, 3, 1, 3, 7], "f2": [4,7, 5, 5, 5, 7]})
ts = TimeSeries(data={"f1": [1, 2, 3, 4, 6, 7], "target": [7,2, 3, 1, 3, 7], "f2": [4,7, 5, 5, 5, 7]},
target_name="target",
date_name="f1",
window_size=2,
forecast_horizon=1,
feature_names=["f1", "f2", "target"])
table = Table.from_csv_file(r"tests\resources\Alcohol_Sales (1).csv")
ts = TimeSeries._from_table(table,target_name="S4248SM144NCEN", date_name="DATE", window_size=12, forecast_horizon=1, feature_names=["S4248SM144NCEN"])



# ein Modell erstellen ist in safeDS noch nicht definiert darum low level in PyTorch
# 2 ist hier die number der feature Columns
input_dim = ts._window_size * len(ts._feature_names)
hidden_dim = 1
hidden_dim = 256
output_dim = ts._forecast_horizon
layer1 = RNN_Layer(input_dim, hidden_dim)
layer2 = RNN_Layer(hidden_dim, output_dim)
model = Model([layer1, layer2])
#model.train(ts.into_DataLoader(), 5, 0.01)

#damit der Datensatz low level laden kann hier into_Dataloader
model.model_forward(ts.into_DataLoader())





#wenn durchläuft wurde korrekt Table in Dataloader geladen
#assert False
assert False


0 comments on commit ad75240

Please sign in to comment.