Skip to content

Commit

Permalink
Merge branch 'dengai' into develop
Browse files Browse the repository at this point in the history
Conflicts:
	CHANGELOG.md
  • Loading branch information
opcode81 committed Feb 19, 2024
2 parents 91eed35 + 7d7e27c commit ad4957a
Show file tree
Hide file tree
Showing 6 changed files with 814 additions and 82 deletions.
26 changes: 22 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,28 @@
### Improvements/Changes

* `vectoriser`:
* `SequenceVectoriser`: Allow to inject a sequence item identifier provider
(instance of new class `ItemIdentifierProvider`) in order to determine the set of
relevant unique items when using fitting mode UNIQUE
* `SequenceVectoriser`:
* Allow to inject a sequence item identifier provider
(instance of new class `ItemIdentifierProvider`) in order to determine the set of
relevant unique items when using fitting mode UNIQUE
* Allow sharing of vectorisers between instances such
that a previously fitted vectoriser can be reused in its fitted state,
which can be particularly useful for encoder-decoder settings where
the decoding stage uses some of the same features (vectorisers) as the
encoding stage.
* Make Vectorisers aware of their 'fitted' status.
* `torch`:
* `TorchVectorRegressionModel`: Add support for auto-regressive predictions
by adding class `TorchAutoregressiveResultHandler` and method
`with_autogressive_result_handler`
* `LSTNetwork`:
* Add new mode 'encoder', where the output of the complex path
prior to the dense layer is returned
* Changed constructor interface to comply with PEP-8
* Add package `seq` for encoder-decoder-style sequence models, adding the
highly flexible vector model implementation
`EncoderDecoderVectorRegressionModel` and a multitude of low-level encoder
and decoder modules
* `data`:
* Add `DataFrameSplitterColumnEquivalenceClass`, which splits a data frame
based on equivalence classes of a given column
Expand All @@ -25,10 +40,13 @@

### Fixes

* `TagBuilder`: Fix return value of `with_component`
* `TagBuilder`: Fix return value of `with_component`
* `ModelEvaluation`: `create_plots` did not track plots with given tracking context
if `show_plots`=False and `result_writer`=None.
* `ParametersMetricsCollection`: `csv_path` could not be None
* `LSTNetworkVectorClassificationModel` is now functional in v1,
improving the representation (no more dictionaries).
This breaks compatibility with sensAI v0.x representations of this class.

## v1.0.0 (2023-12-06)

Expand Down
76 changes: 45 additions & 31 deletions src/sensai/torch/torch_models/lstnet/lstnet_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from ...torch_data import TorchDataSetProviderFromDataUtil, TensorScalerIdentity, TensorScaler, DataUtil
from ...torch_enums import ActivationFunction
from ...torch_opt import NNOptimiserParams
from ....util.string import object_repr

log: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -51,48 +50,63 @@ def __init__(self, num_input_time_slices, input_dim_per_time_slice, num_classes:
:param output_activation: the output activation function
:param nn_optimiser_params: parameters for NNOptimiser to use for training
"""
self.num_input_time_slices = num_input_time_slices
self.input_dim_per_time_slice = input_dim_per_time_slice
self.num_convolutions = num_convolutions
self.num_cnn_time_slices = num_cnn_time_slices
self.hid_rnn = hid_rnn
self.skip = skip
self.hid_skip = hid_skip
self.hw_window = hw_window
self.hw_combine = hw_combine
self.dropout = dropout
self.cuda = cuda
self.numClasses = num_classes
lstnet_args = dict(numInputTimeSlices=num_input_time_slices, inputDimPerTimeSlice=input_dim_per_time_slice, numOutputTimeSlices=1,
outputDimPerTimeSlice=num_classes, numConvolutions=num_convolutions, numCnnTimeSlices=num_cnn_time_slices, hidRNN=hid_rnn,
hwWindow=hw_window, hwCombine=hw_combine, dropout=dropout, outputActivation=output_activation,
skip=skip, hidSkip=hid_skip, isClassification=True)
self.output_activation = output_activation
self.num_classes = num_classes
output_mode = ClassificationOutputMode.for_activation_fn(ActivationFunction.torch_function_from_any(output_activation))
super().__init__(output_mode, self._LSTNetworkModel, model_args=[self.cuda], model_kwargs=lstnet_args,
nn_optimiser_params=nn_optimiser_params)
super().__init__(output_mode, self._create_lst_network_model, nn_optimiser_params=nn_optimiser_params)

def _create_lst_network_model(self):
return self._LSTNetworkModel(self)

class _LSTNetworkModel(VectorTorchModel):
def __init__(self, cuda, **lstnet_args):
"""
:param cuda: flag indicating whether cuda is used
:param inputDim: the total number of inputs per data point
:param numClasses: the number of classes to predict
:param lstnet_args: arguments with which to construct the underlying LSTNetwork instance
"""
super().__init__(cuda)
self.lstnetArgs = lstnet_args
def __init__(self, parent: "LSTNetworkVectorClassificationModel"):
super().__init__(parent.cuda)
self.parent = parent

def create_torch_module_for_dims(self, input_dim, output_dim):
expected_input_dim = self.lstnetArgs["numInputTimeSlices"] * self.lstnetArgs["inputDimPerTimeSlice"]
p = self.parent
expected_input_dim = p.num_input_time_slices * p.input_dim_per_time_slice
if expected_input_dim != input_dim:
raise ValueError(f"Unexpected input size {input_dim}, expected {self.inputDim}")
if self.lstnetArgs["outputDimPerTimeSlice"] is None:
self.lstnetArgs["outputDimPerTimeSlice"] = output_dim
if p.num_classes is None:
output_dim_per_time_slice = output_dim
else:
if self.lstnetArgs["outputDimPerTimeSlice"] != output_dim:
raise ValueError(f"Unexpected output size {output_dim}, expected {self.lstnetArgs['outputDimPerTimeSlice']}")
return LSTNetwork(**self.lstnetArgs)

def __str__(self):
return object_repr(self, self.lstnetArgs)
output_dim_per_time_slice = p.num_classes
if p.num_classes != output_dim:
raise ValueError(f"Unexpected output dim {output_dim}, expected {p.num_classes}")
return LSTNetwork(num_input_time_slices=p.num_input_time_slices,
input_dim_per_time_slice=p.input_dim_per_time_slice,
num_output_time_slices=1,
output_dim_per_time_slice=output_dim_per_time_slice,
num_convolutions=p.num_convolutions,
num_cnn_time_slices=p.num_cnn_time_slices,
hid_rnn=p.hid_rnn,
hw_window=p.hw_window,
hw_combine=p.hw_combine,
dropout=p.dropout,
output_activation=p.output_activation,
skip=p.skip,
hid_skip=p.hid_skip,
mode=LSTNetwork.Mode.CLASSIFICATION)

def _create_data_set_provider(self, inputs: pd.DataFrame, outputs: pd.DataFrame) -> TorchDataSetProviderFromDataUtil:
if self.numClasses is None:
self.numClasses = len(self._labels)
elif self.numClasses != len(self._labels):
raise ValueError(f"Output dimension {self.numClasses} per time time slice was specified, while the training data contains "
if self.num_classes is None:
self.num_classes = len(self._labels)
elif self.num_classes != len(self._labels):
raise ValueError(f"Output dimension {self.num_classes} per time time slice was specified, while the training data contains "
f"{len(self._labels)} classes")
return TorchDataSetProviderFromDataUtil(self.DataUtil(inputs, outputs, self.numClasses), self.cuda)
return TorchDataSetProviderFromDataUtil(self.DataUtil(inputs, outputs, self.num_classes), self.cuda)

def _predict_outputs_for_input_data_frame(self, inputs: pd.DataFrame) -> torch.Tensor:
log.info(f"Predicting outputs for {len(inputs)} inputs")
Expand Down
118 changes: 79 additions & 39 deletions src/sensai/torch/torch_models/lstnet/lstnet_modules.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from enum import Enum
from typing import Union, Callable

import torch
from torch import nn
from torch.nn import functional as F

from sensai.util.pickle import setstate
from ...torch_base import MCDropoutCapableNNModule
from ...torch_enums import ActivationFunction

Expand All @@ -30,7 +32,8 @@ class LSTNetwork(MCDropoutCapableNNModule):
* Dense layer
* Direct regression dense layer (so-called "highway" path).
* Direct regression dense layer (so-called "highway" path) which uses the features of the last hwWindow time slices to
directly make a prediction
The model ultimately combines the outputs of these two paths via a combination function.
Many parts of the model are optional and can be completely disabled.
Expand All @@ -39,55 +42,74 @@ class LSTNetwork(MCDropoutCapableNNModule):
The model expects as input a tensor of size (batchSize, numInputTimeSlices, inputDimPerTimeSlice).
As output, the model will produce a tensor of size (batchSize, numOutputTimeSlices, outputDimPerTimeSlice)
if isClassification==False (default) and a tensor of size (batchSize, outputDimPerTimeSlice=numClasses, numOutputTimeSlices)
if isClassification==True; the latter shape matches what is required by the multi-dimensional case of loss function
if mode==REGRESSION and a tensor of size (batchSize, outputDimPerTimeSlice=numClasses, numOutputTimeSlices)
if mode==CLASSIFICATION; the latter shape matches what is required by the multi-dimensional case of loss function
CrossEntropyLoss, for example, and therefore is suitable for classification use cases.
For mode==ENCODER, the model will produce a tensor of size (batch_size, hidRNN + skip * hidSkip).
"""
def __init__(self, numInputTimeSlices, inputDimPerTimeSlice, numOutputTimeSlices=1, outputDimPerTimeSlice=1,
numConvolutions: int = 100, numCnnTimeSlices: int = 6, hidRNN: int = 100, skip: int = 0, hidSkip: int = 5,
hwWindow: int = 0, hwCombine: str = "plus", dropout=0.2, outputActivation: Union[str, ActivationFunction, Callable] = "sigmoid",
isClassification=False):

class Mode(Enum):
REGRESSION = "regression"
CLASSIFICATION = "classification"
ENCODER = "encoder"

def __init__(self,
num_input_time_slices: int,
input_dim_per_time_slice: int,
num_output_time_slices: int = 1,
output_dim_per_time_slice: int = 1,
num_convolutions: int = 100,
num_cnn_time_slices: int = 6,
hid_rnn: int = 100,
skip: int = 0,
hid_skip: int = 5,
hw_window: int = 0,
hw_combine: str = "plus",
dropout=0.2,
output_activation: Union[str, ActivationFunction, Callable] = "sigmoid",
mode: Mode = Mode.REGRESSION):
"""
:param numInputTimeSlices: the number of input time slices
:param inputDimPerTimeSlice: the dimension of the input data per time slice
:param numOutputTimeSlices: the number of time slices predicted by the model
:param outputDimPerTimeSlice: the number of dimensions per output time slice. While this is the number of
:param num_input_time_slices: the number of input time slices
:param input_dim_per_time_slice: the dimension of the input data per time slice
:param num_output_time_slices: the number of time slices predicted by the model
:param output_dim_per_time_slice: the number of dimensions per output time slice. While this is the number of
target variables per time slice for regression problems, this must be the number of classes for classification problems.
:param numCnnTimeSlices: the number of time slices considered by each convolution (i.e. it is one of the dimensions of the matrix used for
:param num_cnn_time_slices: the number of time slices considered by each convolution (i.e. it is one of the dimensions of the matrix used for
convolutions, the other dimension being inputDimPerTimeSlice), a.k.a. "Ck"
:param numConvolutions: the number of separate convolutions to apply, i.e. the number of independent convolution matrices, a.k.a "hidC";
:param num_convolutions: the number of separate convolutions to apply, i.e. the number of independent convolution matrices, a.k.a "hidC";
if it is 0, then the entire complex processing path is not applied.
:param hidRNN: the number of hidden output dimensions for the RNN stage
:param hid_rnn: the number of hidden output dimensions for the RNN stage
:param skip: the number of time slices to skip for the skip-RNN. If it is 0, then the skip-RNN is not used.
:param hidSkip: the number of output dimensions of each of the skip parallel RNNs
:param hwWindow: the number of time slices from the end of the input time series to consider as input for the highway component.
:param hid_skip: the number of output dimensions of each of the skip parallel RNNs
:param hw_window: the number of time slices from the end of the input time series to consider as input for the highway component.
If it is 0, the highway component is not used.
:param hwCombine: {"plus", "product", "bilinear"} the function with which the highway component's output is combined with the complex path's output
:param hw_combine: {"plus", "product", "bilinear"} the function with which the highway component's output is combined with the complex path's output
:param dropout: the dropout probability to use during training (dropouts are applied after every major step in the evaluation path)
:param outputActivation: the output activation function
:param isClassification: whether the model is to serve as a classifier, in which case the output tensor dimension ordering is adapted
to suit loss functions such as CrossEntropyLoss
:param output_activation: the output activation function
:param mode: the prediction mode. For `CLASSIFICATION`, the output tensor dimension ordering is adapted to suit loss functions such
as CrossEntropyLoss. When set to `ENCODER`, will output the latent representation prior to the dense layer in the complex path
of the network (see class docstring).
"""
if numConvolutions == 0 and hwWindow == 0:
if num_convolutions == 0 and hw_window == 0:
raise ValueError("No processing paths remain")
if numInputTimeSlices < numCnnTimeSlices or (hwWindow != 0 and hwWindow < numInputTimeSlices):
if num_input_time_slices < num_cnn_time_slices or (hw_window != 0 and hw_window < num_input_time_slices):
raise Exception("Inconsistent numbers of times slices provided")

super().__init__()
self.inputDimPerTimeSlice = inputDimPerTimeSlice
self.timeSeriesDimPerTimeSlice = outputDimPerTimeSlice
self.totalOutputDim = self.timeSeriesDimPerTimeSlice * numOutputTimeSlices
self.numOutputTimeSlices = numOutputTimeSlices
self.window = numInputTimeSlices
self.hidRNN = hidRNN
self.numConv = numConvolutions
self.hidSkip = hidSkip
self.Ck = numCnnTimeSlices # the "height" of the CNN filter/kernel; the "width" being inputDimPerTimeSlice
self.inputDimPerTimeSlice = input_dim_per_time_slice
self.timeSeriesDimPerTimeSlice = output_dim_per_time_slice
self.totalOutputDim = self.timeSeriesDimPerTimeSlice * num_output_time_slices
self.numOutputTimeSlices = num_output_time_slices
self.window = num_input_time_slices
self.hidRNN = hid_rnn
self.numConv = num_convolutions
self.hidSkip = hid_skip
self.Ck = num_cnn_time_slices # the "height" of the CNN filter/kernel; the "width" being inputDimPerTimeSlice
self.convSeqLength = self.window - self.Ck + 1 # the length of the output sequence produced by the CNN for each kernel matrix
self.skip = skip
self.hw = hwWindow
self.hw = hw_window
self.pDropout = dropout
self.isClassification = isClassification
self.mode = mode

# configure CNN-RNN path
if self.numConv > 0:
Expand All @@ -106,16 +128,31 @@ def __init__(self, numInputTimeSlices, inputDimPerTimeSlice, numOutputTimeSlices
if self.hw > 0:
# direct mapping from all inputs to all outputs
self.highway = nn.Linear(self.hw * self.inputDimPerTimeSlice, self.totalOutputDim)
if hwCombine == 'plus':
if hw_combine == 'plus':
self.highwayCombine = self._plus
elif hwCombine == 'product':
elif hw_combine == 'product':
self.highwayCombine = self._product
elif hwCombine == 'bilinear':
elif hw_combine == 'bilinear':
self.highwayCombine = nn.Bilinear(self.totalOutputDim, self.totalOutputDim, self.totalOutputDim)
else:
raise ValueError("Unknown highway combination function '%s'" % hwCombine)
raise ValueError("Unknown highway combination function '%s'" % hw_combine)

self.output = ActivationFunction.torch_function_from_any(output_activation)

def __setstate__(self, state):
if "isClassification" in state:
state["mode"] = self.Mode.CLASSIFICATION if state["isClassification"] else self.Mode.REGRESSION
setstate(LSTNetwork, self, state, removed_properties=["isClassification"])

@staticmethod
def compute_encoder_dim(hid_rnn: int, skip: int, hid_skip: int) -> int:
return hid_rnn + skip * hid_skip

self.output = ActivationFunction.torch_function_from_any(outputActivation)
def get_encoder_dim(self):
"""
:return: the vector dimension that is output for the case where mode=ENCODER
"""
return self.compute_encoder_dim(self.hidRNN, self.skip, self.hidSkip)

def forward(self, x):
batch_size = x.size(0)
Expand Down Expand Up @@ -160,6 +197,9 @@ def forward(self, x):
s = dropout(s)
r = torch.cat((r, s), 1) # (batch_size, hidR + skip * hidS)

if self.mode == self.Mode.ENCODER:
return r

res = self.linear1(r) # (batch_size, totalOutputDim)

# auto-regressive highway model
Expand All @@ -176,7 +216,7 @@ def forward(self, x):
res = self.output(res)

res = res.view(batch_size, self.numOutputTimeSlices, self.timeSeriesDimPerTimeSlice)
if self.isClassification:
if self.mode == self.Mode.CLASSIFICATION:
res = res.permute(0, 2, 1)
return res

Expand Down
Loading

0 comments on commit ad4957a

Please sign in to comment.