From 2a65660d6cf9613482004562bba43ea02c317097 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Thu, 23 Jan 2025 19:41:35 +0100 Subject: [PATCH 01/24] adapt embedding layer to new input format of tuple information --- .../arch_utils/layer_utils/embedding_layer.py | 57 ++++++++++++------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/mambular/arch_utils/layer_utils/embedding_layer.py b/mambular/arch_utils/layer_utils/embedding_layer.py index 0fb93fd..0184ca3 100644 --- a/mambular/arch_utils/layer_utils/embedding_layer.py +++ b/mambular/arch_utils/layer_utils/embedding_layer.py @@ -6,7 +6,7 @@ class EmbeddingLayer(nn.Module): - def __init__(self, num_feature_info, cat_feature_info, config): + def __init__(self, num_feature_info, cat_feature_info, emb_feature_info, config): """Embedding layer that handles numerical and categorical embeddings. Parameters @@ -28,6 +28,7 @@ def __init__(self, num_feature_info, cat_feature_info, config): self.layer_norm_after_embedding = getattr( config, "layer_norm_after_embedding", False ) + self.embedding_projection = getattr(config, "embedding_projection", True) self.use_cls = getattr(config, "use_cls", False) self.cls_position = getattr(config, "cls_position", 0) self.embedding_dropout = ( @@ -100,6 +101,21 @@ def __init__(self, num_feature_info, cat_feature_info, config): ] ) + if self.embedding_projection: + self.emb_embeddings = nn.ModuleList( + [ + nn.Sequential( + nn.Linear( + feature_info["dimension"], + self.d_model, + bias=self.embedding_bias, + ), + self.embedding_activation, + ) + for feature_name, feature_info in emb_feature_info.items() + ] + ) + # Class token if required if self.use_cls: self.cls_token = nn.Parameter(torch.zeros(1, 1, self.d_model)) @@ -108,15 +124,12 @@ def __init__(self, num_feature_info, cat_feature_info, config): if self.layer_norm_after_embedding: self.embedding_norm = nn.LayerNorm(self.d_model) - def forward(self, num_features=None, cat_features=None): + def forward(self, num_features, cat_features, emb_features): """Defines the forward pass of the model. Parameters ---------- - num_features : Tensor, optional - Tensor containing the numerical features. - cat_features : Tensor, optional - Tensor containing the categorical features. + data: tuple of lists of tensors Returns ------- @@ -128,6 +141,7 @@ def forward(self, num_features=None, cat_features=None): ValueError If no features are provided to the model. """ + num_embeddings, cat_embeddings, emb_embeddings = None, None, None # Class token initialization if self.use_cls: @@ -147,8 +161,6 @@ def forward(self, num_features=None, cat_features=None): cat_embeddings = torch.squeeze(cat_embeddings, dim=2) if self.layer_norm_after_embedding: cat_embeddings = self.embedding_norm(cat_embeddings) - else: - cat_embeddings = None # Process numerical embeddings based on embedding_type if self.embedding_type == "plr": @@ -161,8 +173,6 @@ def forward(self, num_features=None, cat_features=None): num_embeddings = self.num_embeddings(num_features) if self.layer_norm_after_embedding: num_embeddings = self.embedding_norm(num_embeddings) - else: - num_embeddings = None else: # For linear and ndt embeddings, handle each feature individually if self.num_embeddings and num_features is not None: @@ -170,16 +180,23 @@ def forward(self, num_features=None, cat_features=None): num_embeddings = torch.stack(num_embeddings, dim=1) if self.layer_norm_after_embedding: num_embeddings = self.embedding_norm(num_embeddings) - else: - num_embeddings = None - - # Combine categorical and numerical embeddings - if cat_embeddings is not None and num_embeddings is not None: - x = torch.cat([cat_embeddings, num_embeddings], dim=1) - elif cat_embeddings is not None: - x = cat_embeddings - elif num_embeddings is not None: - x = num_embeddings + + if self.embedding_projection: + emb_embeddings = [ + emb(emb_features[i]) for i, emb in enumerate(self.emb_embeddings) + ] + emb_embeddings = torch.stack(emb_embeddings, dim=1) + else: + emb_embeddings = torch.stack(emb_features, dim=1) + if self.layer_norm_after_embedding: + emb_embeddings = self.embedding_norm(emb_embeddings) + + embeddings = [ + e for e in [cat_embeddings, num_embeddings, emb_embeddings] if e is not None + ] + + if embeddings: + x = torch.cat(embeddings, dim=1) if len(embeddings) > 1 else embeddings[0] else: raise ValueError("No features provided to the model.") From 4d5f94a9ce5cf568029bbce8607ad74ad5a55269 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Thu, 23 Jan 2025 19:41:53 +0100 Subject: [PATCH 02/24] adapt basemodel encoding function to tuple input --- mambular/base_models/basemodel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mambular/base_models/basemodel.py b/mambular/base_models/basemodel.py index fd21852..49b56cd 100644 --- a/mambular/base_models/basemodel.py +++ b/mambular/base_models/basemodel.py @@ -223,7 +223,7 @@ def pool_sequence(self, out): else: raise ValueError(f"Invalid pooling method: {self.hparams.pooling_method}") - def encode(self, num_features, cat_features): + def encode(self, data): if not hasattr(self, "embedding_layer"): raise ValueError("The model does not have an embedding layer") @@ -237,7 +237,7 @@ def encode(self, num_features, cat_features): raise ValueError("The model does not generate contextualized embeddings") # Get the actual layer and call it - x = self.embedding_layer(num_features=num_features, cat_features=cat_features) + x = self.embedding_layer(*data) if getattr(self.hparams, "shuffle_embeddings", False): x = x[:, self.perm, :] From adc6d191c04a9f025aa36fc7fb4a3060a53bd7dd Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Thu, 23 Jan 2025 19:42:08 +0100 Subject: [PATCH 03/24] batch now returns tuple and *data is passed to forward method --- mambular/base_models/lightning_wrapper.py | 45 +++++++++++++---------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/mambular/base_models/lightning_wrapper.py b/mambular/base_models/lightning_wrapper.py index 1d8530e..f1c836c 100644 --- a/mambular/base_models/lightning_wrapper.py +++ b/mambular/base_models/lightning_wrapper.py @@ -30,8 +30,7 @@ def __init__( self, model_class: type[nn.Module], config, - cat_feature_info, - num_feature_info, + feature_information, num_classes=1, lss=False, family=None, @@ -91,13 +90,12 @@ def __init__( self.base_model = model_class( config=config, - num_feature_info=num_feature_info, - cat_feature_info=cat_feature_info, + feature_information=feature_information, num_classes=output_dim, **kwargs, ) - def forward(self, num_features, cat_features): + def forward(self, num_features, cat_features, embeddings): """Forward pass through the model. Parameters @@ -113,7 +111,7 @@ def forward(self, num_features, cat_features): Model output. """ - return self.base_model.forward(num_features, cat_features) + return self.base_model.forward(num_features, cat_features, embeddings) def compute_loss(self, predictions, y_true): """Compute the loss for the given predictions and true labels. @@ -145,7 +143,10 @@ def compute_loss(self, predictions, y_true): ) if getattr(self.base_model, "returns_ensemble", False): # Ensemble case - if self.loss_fct.__class__.__name__ == "CrossEntropyLoss" and predictions.dim() == 3: + if ( + self.loss_fct.__class__.__name__ == "CrossEntropyLoss" + and predictions.dim() == 3 + ): # Classification case with ensemble: predictions (N, E, k), y_true (N,) N, E, k = predictions.shape loss = 0.0 @@ -186,18 +187,20 @@ def training_step(self, batch, batch_idx): # type: ignore Tensor Training loss. """ - num_features, cat_features, labels = batch + data, labels = batch # Check if the model has a `penalty_forward` method if hasattr(self.base_model, "penalty_forward"): - preds, penalty = self.base_model.penalty_forward(num_features=num_features, cat_features=cat_features) + preds, penalty = self.base_model.penalty_forward(*data) loss = self.compute_loss(preds, labels) + penalty else: - preds = self(num_features=num_features, cat_features=cat_features) + preds = self(*data) loss = self.compute_loss(preds, labels) # Log the training loss - self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True) + self.log( + "train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True + ) # Log custom training metrics for metric_name, metric_fn in self.train_metrics.items(): @@ -229,8 +232,8 @@ def validation_step(self, batch, batch_idx): # type: ignore Validation loss. """ - num_features, cat_features, labels = batch - preds = self(num_features=num_features, cat_features=cat_features) + data, labels = batch + preds = self(*data) val_loss = self.compute_loss(preds, labels) self.log( @@ -271,8 +274,8 @@ def test_step(self, batch, batch_idx): # type: ignore Tensor Test loss. """ - num_features, cat_features, labels = batch - preds = self(num_features=num_features, cat_features=cat_features) + data, labels = batch + preds = self(*data) test_loss = self.compute_loss(preds, labels) self.log( @@ -302,8 +305,7 @@ def predict_step(self, batch, batch_idx): Predictions. """ - num_features, cat_features = batch - preds = self(num_features=num_features, cat_features=cat_features) + preds = self(*batch) return preds @@ -346,8 +348,13 @@ def on_validation_epoch_end(self): # Apply pruning logic if needed if self.current_epoch >= self.pruning_epoch: - if self.early_pruning_threshold is not None and val_loss_value > self.early_pruning_threshold: - print(f"Pruned at epoch {self.current_epoch}, val_loss {val_loss_value}") + if ( + self.early_pruning_threshold is not None + and val_loss_value > self.early_pruning_threshold + ): + print( + f"Pruned at epoch {self.current_epoch}, val_loss {val_loss_value}" + ) self.trainer.should_stop = True # Stop training early def epoch_val_loss_at(self, epoch): From a02b9dd7b11e590107a0cba8e031c04969e9a409 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Thu, 23 Jan 2025 19:42:18 +0100 Subject: [PATCH 04/24] first two basemodels adapted to new logic --- mambular/base_models/mlp.py | 33 ++++++++++++++++--------------- mambular/base_models/tabularnn.py | 15 ++++++-------- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/mambular/base_models/mlp.py b/mambular/base_models/mlp.py index 0c9251f..1a38871 100644 --- a/mambular/base_models/mlp.py +++ b/mambular/base_models/mlp.py @@ -5,6 +5,7 @@ from ..configs.mlp_config import DefaultMLPConfig from ..utils.get_feature_dimensions import get_feature_dimensions from .basemodel import BaseModel +import numpy as np class MLP(BaseModel): @@ -57,31 +58,29 @@ class MLP(BaseModel): def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (cat_feature_info, num_feature_info, embedding_feature_info) num_classes: int = 1, config: DefaultMLPConfig = DefaultMLPConfig(), # noqa: B008 **kwargs, ): super().__init__(config=config, **kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) self.returns_ensemble = False - self.cat_feature_info = cat_feature_info - self.num_feature_info = num_feature_info # Initialize layers self.layers = nn.ModuleList() - input_dim = get_feature_dimensions(num_feature_info, cat_feature_info) + input_dim = get_feature_dimensions(*feature_information) if self.hparams.use_embeddings: self.embedding_layer = EmbeddingLayer( - num_feature_info=num_feature_info, - cat_feature_info=cat_feature_info, + *feature_information, config=config, ) - input_dim = len(num_feature_info) * self.hparams.d_model + len(cat_feature_info) * self.hparams.d_model + input_dim = np.sum( + [len(info) * self.hparams.d_model for info in feature_information] + ) # Input layer self.layers.append(nn.Linear(input_dim, self.hparams.layer_sizes[0])) @@ -97,7 +96,9 @@ def __init__( # Hidden layers for i in range(1, len(self.hparams.layer_sizes)): - self.layers.append(nn.Linear(self.hparams.layer_sizes[i - 1], self.hparams.layer_sizes[i])) + self.layers.append( + nn.Linear(self.hparams.layer_sizes[i - 1], self.hparams.layer_sizes[i]) + ) if self.hparams.batch_norm: self.layers.append(nn.BatchNorm1d(self.hparams.layer_sizes[i])) if self.hparams.layer_norm: @@ -112,26 +113,26 @@ def __init__( # Output layer self.layers.append(nn.Linear(self.hparams.layer_sizes[-1], num_classes)) - def forward(self, num_features, cat_features) -> torch.Tensor: + def forward(self, *data) -> torch.Tensor: """Forward pass of the MLP model. Parameters ---------- - x : torch.Tensor - Input tensor. + data : tuple + Input tuple of tensors of num_features, cat_features, embeddings. Returns ------- torch.Tensor Output tensor. """ + if self.hparams.use_embeddings: - x = self.embedding_layer(num_features, cat_features) + x = self.embedding_layer(*data) B, S, D = x.shape x = x.reshape(B, S * D) else: - x = num_features + cat_features - x = torch.cat(x, dim=1) + x = torch.cat([t for tensors in data for t in tensors], dim=1) for i in range(len(self.layers) - 1): if isinstance(self.layers[i], nn.Linear): diff --git a/mambular/base_models/tabularnn.py b/mambular/base_models/tabularnn.py index d4824e9..5699bf7 100644 --- a/mambular/base_models/tabularnn.py +++ b/mambular/base_models/tabularnn.py @@ -12,10 +12,10 @@ class TabulaRNN(BaseModel): + def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (cat_feature_info, num_feature_info, embedding_feature_info) num_classes=1, config: DefaultTabulaRNNConfig = DefaultTabulaRNNConfig(), # noqa: B008 **kwargs, @@ -24,14 +24,11 @@ def __init__( self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) self.returns_ensemble = False - self.cat_feature_info = cat_feature_info - self.num_feature_info = num_feature_info self.rnn = ConvRNN(config) self.embedding_layer = EmbeddingLayer( - num_feature_info=num_feature_info, - cat_feature_info=cat_feature_info, + *feature_information, config=config, ) @@ -50,10 +47,10 @@ def __init__( self.norm_f = get_normalization_layer(temp_config) # pooling - n_inputs = len(num_feature_info) + len(cat_feature_info) + n_inputs = [len(info) for info in feature_information] self.initialize_pooling_layers(config=config, n_inputs=n_inputs) - def forward(self, num_features, cat_features): + def forward(self, *data): """Defines the forward pass of the model. Parameters @@ -69,7 +66,7 @@ def forward(self, num_features, cat_features): The output predictions of the model. """ - x = self.embedding_layer(num_features, cat_features) + x = self.embedding_layer(*data) # RNN forward pass out, _ = self.rnn(x) z = self.linear(torch.mean(x, dim=1)) From 10d1c00487474c023ed7e308dfc4d02e4282b68f Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Thu, 23 Jan 2025 19:42:41 +0100 Subject: [PATCH 05/24] major changes in handling embeddings as array/list inputs in addition to tabular data --- mambular/data_utils/datamodule.py | 123 +++++++++++++++++++++++++----- 1 file changed, 103 insertions(+), 20 deletions(-) diff --git a/mambular/data_utils/datamodule.py b/mambular/data_utils/datamodule.py index b6bfb32..459b8c1 100644 --- a/mambular/data_utils/datamodule.py +++ b/mambular/data_utils/datamodule.py @@ -78,6 +78,8 @@ def __init__( # Initialize placeholders for data self.X_train = None self.y_train = None + self.embeddings_train = None + self.embeddings_val = None self.test_preprocessor_fitted = False self.dataloader_kwargs = dataloader_kwargs @@ -87,6 +89,8 @@ def preprocess_data( y_train, X_val=None, y_val=None, + embeddings_train=None, + embeddings_val=None, val_size=0.2, random_state=101, ): @@ -98,10 +102,14 @@ def preprocess_data( Training feature set. y_train : array-like, shape (n_samples_train,) Training target values. + embeddings_train : array-like or list of array-like, optional + Training embeddings if available. X_val : DataFrame or array-like, shape (n_samples_val, n_features), optional Validation feature set. If None, a validation set will be created from `X_train`. y_val : array-like, shape (n_samples_val,), optional Validation target values. If None, a validation set will be created from `y_train`. + embeddings_val : array-like or list of array-like, optional + Validation embeddings if available. val_size : float, optional Proportion of data to include in the validation split if `X_val` and `y_val` are None. random_state : int, optional @@ -113,41 +121,85 @@ def preprocess_data( """ if X_val is None or y_val is None: - self.X_train, self.X_val, self.y_train, self.y_val = train_test_split( - X_train, y_train, test_size=val_size, random_state=random_state - ) + split_data = [X_train, y_train] + + if embeddings_train is not None: + if not isinstance(embeddings_train, list): + embeddings_train = [embeddings_train] + if embeddings_val is not None and not isinstance(embeddings_val, list): + embeddings_val = [embeddings_val] + + split_data += embeddings_train + split_result = train_test_split( + *split_data, test_size=val_size, random_state=random_state + ) + + self.X_train, self.X_val, self.y_train, self.y_val = split_result[:4] + self.embeddings_train = split_result[4::2] + self.embeddings_val = split_result[5::2] + else: + self.X_train, self.X_val, self.y_train, self.y_val = train_test_split( + *split_data, test_size=val_size, random_state=random_state + ) + self.embeddings_train = None + self.embeddings_val = None else: self.X_train = X_train self.y_train = y_train self.X_val = X_val self.y_val = y_val + if embeddings_train is not None and embeddings_val is not None: + if not isinstance(embeddings_train, list): + embeddings_train = [embeddings_train] + if not isinstance(embeddings_val, list): + embeddings_val = [embeddings_val] + self.embeddings_train = embeddings_train + self.embeddings_val = embeddings_val + else: + self.embeddings_train = None + self.embeddings_val = None + # Fit the preprocessor on the combined training and validation data combined_X = pd.concat([self.X_train, self.X_val], axis=0).reset_index( drop=True ) combined_y = np.concatenate((self.y_train, self.y_val), axis=0) - # Fit the preprocessor - self.preprocessor.fit(combined_X, combined_y) + if self.embeddings_train is not None and self.embeddings_val is not None: + combined_embeddings = [ + np.concatenate((emb_train, emb_val), axis=0) + for emb_train, emb_val in zip( + self.embeddings_train, self.embeddings_val + ) + ] + else: + combined_embeddings = None + + self.preprocessor.fit(combined_X, combined_y, combined_embeddings) # Update feature info based on the actual processed data - ( - self.num_feature_info, - self.cat_feature_info, - ) = self.preprocessor.get_feature_info() + (self.num_feature_info, self.cat_feature_info, self.embedding_feature_info) = ( + self.preprocessor.get_feature_info() + ) def setup(self, stage: str): """Transform the data and create DataLoaders.""" if stage == "fit": - train_preprocessed_data = self.preprocessor.transform(self.X_train) - val_preprocessed_data = self.preprocessor.transform(self.X_val) + train_preprocessed_data = self.preprocessor.transform( + self.X_train, self.embeddings_train + ) + val_preprocessed_data = self.preprocessor.transform( + self.X_val, self.embeddings_val + ) # Initialize lists for tensors train_cat_tensors = [] train_num_tensors = [] + train_emb_tensors = [] val_cat_tensors = [] val_num_tensors = [] + val_emb_tensors = [] # Populate tensors for categorical features, if present in processed data for key in self.cat_feature_info: # type: ignore @@ -201,6 +253,21 @@ def setup(self, stage: str): ) ) + if self.embedding_feature_info is not None: + for key in self.embedding_feature_info: + if key in train_preprocessed_data: + train_emb_tensors.append( + torch.tensor( + train_preprocessed_data[key], dtype=torch.float32 + ) + ) + if key in val_preprocessed_data: + val_emb_tensors.append( + torch.tensor( + val_preprocessed_data[key], dtype=torch.float32 + ) + ) + train_labels = torch.tensor( self.y_train, dtype=self.labels_dtype ).unsqueeze(dim=1) @@ -208,21 +275,26 @@ def setup(self, stage: str): dim=1 ) - # Create datasets self.train_dataset = MambularDataset( train_cat_tensors, train_num_tensors, + train_emb_tensors, train_labels, regression=self.regression, ) self.val_dataset = MambularDataset( - val_cat_tensors, val_num_tensors, val_labels, regression=self.regression + val_cat_tensors, + val_num_tensors, + val_emb_tensors, + val_labels, + regression=self.regression, ) - def preprocess_new_data(self, X): + def preprocess_new_data(self, X, embeddings): cat_tensors = [] num_tensors = [] - preprocessed_data = self.preprocessor.transform(X) + emb_tensors = [] + preprocessed_data = self.preprocessor.transform(X, embeddings) # Populate tensors for categorical features, if present in processed data for key in self.cat_feature_info: # type: ignore @@ -254,15 +326,26 @@ def preprocess_new_data(self, X): torch.tensor(preprocessed_data[num_key], dtype=torch.float32) ) + if self.embedding_feature_info is not None: + for key in self.embedding_feature_info: + if key in preprocessed_data: + emb_tensors.append( + torch.tensor(preprocessed_data[key], dtype=torch.float32) + ) + return MambularDataset( - cat_tensors, num_tensors, labels=None, regression=self.regression + cat_tensors, + num_tensors, + emb_tensors, + labels=None, + regression=self.regression, ) - def assign_predict_dataset(self, X): - self.predict_dataset = self.preprocess_new_data(X) + def assign_predict_dataset(self, X, embeddings=None): + self.predict_dataset = self.preprocess_new_data(X, embeddings) - def assign_test_dataset(self, X): - self.test_dataset = self.preprocess_new_data(X) + def assign_test_dataset(self, X, embeddings=None): + self.test_dataset = self.preprocess_new_data(X, embeddings) def train_dataloader(self): """Returns the training dataloader. From cbe8dd36bb63554d1eba9e9cd7d8416abdddfeba Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Thu, 23 Jan 2025 19:43:00 +0100 Subject: [PATCH 06/24] dataset returns tuple of data (cat, num, emb), label --- mambular/data_utils/dataset.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/mambular/data_utils/dataset.py b/mambular/data_utils/dataset.py index 6bb0485..db6c63a 100644 --- a/mambular/data_utils/dataset.py +++ b/mambular/data_utils/dataset.py @@ -11,13 +11,22 @@ class MambularDataset(Dataset): ---------- cat_features_list (list of Tensors): A list of tensors representing the categorical features. num_features_list (list of Tensors): A list of tensors representing the numerical features. + embeddings_list (list of Tensors, optional): A list of tensors representing the embeddings. labels (Tensor, optional): A tensor of labels. If None, the dataset is used for prediction. regression (bool, optional): A flag indicating if the dataset is for a regression task. Defaults to True. """ - def __init__(self, cat_features_list, num_features_list, labels=None, regression=True): + def __init__( + self, + cat_features_list, + num_features_list, + embeddings_list=None, + labels=None, + regression=True, + ): self.cat_features_list = cat_features_list # Categorical features tensors self.num_features_list = num_features_list # Numerical features tensors + self.embeddings_list = embeddings_list # Embeddings tensors (optional) self.regression = regression if labels is not None: @@ -46,15 +55,25 @@ def __getitem__(self, idx): Returns ------- - tuple: A tuple containing two lists of tensors (one for categorical features and one for numerical features) - and a single label (if available). + tuple: A tuple containing lists of tensors for numerical features, categorical features, embeddings + (if available), and a label (if available). """ - cat_features = [feature_tensor[idx] for feature_tensor in self.cat_features_list] + cat_features = [ + feature_tensor[idx] for feature_tensor in self.cat_features_list + ] num_features = [ torch.as_tensor(feature_tensor[idx]).clone().detach().to(torch.float32) for feature_tensor in self.num_features_list ] + if self.embeddings_list is not None: + embeddings = [ + torch.as_tensor(embed_tensor[idx]).clone().detach().to(torch.float32) + for embed_tensor in self.embeddings_list + ] + else: + embeddings = None + if self.labels is not None: label = self.labels[idx] if self.regression: @@ -63,6 +82,7 @@ def __getitem__(self, idx): label = label.clone().detach().to(torch.float32) else: label = label.clone().detach().to(torch.long) - return num_features, cat_features, label + + return (num_features, cat_features, embeddings), label else: - return num_features, cat_features # No label in prediction mode + return (num_features, cat_features, embeddings) From b84aa50d1a93c0d709805556126bd07cf4c192ee Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Thu, 23 Jan 2025 19:43:15 +0100 Subject: [PATCH 07/24] adjust two first basemodel configs to handle projection for embeddings --- mambular/configs/mlp_config.py | 1 + mambular/configs/tabularnn_config.py | 1 + 2 files changed, 2 insertions(+) diff --git a/mambular/configs/mlp_config.py b/mambular/configs/mlp_config.py index 0c43cc1..08711be 100644 --- a/mambular/configs/mlp_config.py +++ b/mambular/configs/mlp_config.py @@ -83,3 +83,4 @@ class DefaultMLPConfig: plr_lite: bool = False n_frequencies: int = 48 frequencies_init_scale: float = 0.01 + embedding_projection: bool = True diff --git a/mambular/configs/tabularnn_config.py b/mambular/configs/tabularnn_config.py index 037c96d..f945fbe 100644 --- a/mambular/configs/tabularnn_config.py +++ b/mambular/configs/tabularnn_config.py @@ -97,6 +97,7 @@ class DefaultTabulaRNNConfig: frequencies_init_scale: float = 0.01 embedding_activation: Callable = nn.ReLU() # noqa: RUF009 layer_norm_after_embedding: bool = False + embedding_projection: bool = True # Head params head_layer_sizes: list = field(default_factory=list) From 8cc3e8368d0b30bfb0aa5c426dce66e6bab4b1ad Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Thu, 23 Jan 2025 19:43:29 +0100 Subject: [PATCH 08/24] adapt first only regressor and classifier to handle embeddings --- mambular/models/sklearn_base_classifier.py | 161 ++++++++++++++++----- mambular/models/sklearn_base_regressor.py | 151 ++++++++++++++----- 2 files changed, 235 insertions(+), 77 deletions(-) diff --git a/mambular/models/sklearn_base_classifier.py b/mambular/models/sklearn_base_classifier.py index 6317e62..1149e14 100644 --- a/mambular/models/sklearn_base_classifier.py +++ b/mambular/models/sklearn_base_classifier.py @@ -8,7 +8,7 @@ import torch from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint, ModelSummary from sklearn.base import BaseEstimator -from sklearn.metrics import accuracy_score, log_loss, mean_squared_error +from sklearn.metrics import accuracy_score, log_loss from skopt import gp_minimize from torch.utils.data import DataLoader from tqdm import tqdm @@ -16,7 +16,11 @@ from ..base_models.lightning_wrapper import TaskModel from ..data_utils.datamodule import MambularDataModule from ..preprocessing import Preprocessor -from ..utils.config_mapper import activation_mapper, get_search_space, round_to_nearest_16 +from ..utils.config_mapper import ( + activation_mapper, + get_search_space, + round_to_nearest_16, +) class SklearnBaseClassifier(BaseEstimator): @@ -39,11 +43,15 @@ def __init__(self, model, config, **kwargs): ] self.config_kwargs = { - k: v for k, v in kwargs.items() if k not in self.preprocessor_arg_names and not k.startswith("optimizer") + k: v + for k, v in kwargs.items() + if k not in self.preprocessor_arg_names and not k.startswith("optimizer") } self.config = config(**self.config_kwargs) - preprocessor_kwargs = {k: v for k, v in kwargs.items() if k in self.preprocessor_arg_names} + preprocessor_kwargs = { + k: v for k, v in kwargs.items() if k in self.preprocessor_arg_names + } self.preprocessor = Preprocessor(**preprocessor_kwargs) self.task_model = None @@ -63,7 +71,8 @@ def __init__(self, model, config, **kwargs): self.optimizer_kwargs = { k: v for k, v in kwargs.items() - if k not in ["lr", "weight_decay", "patience", "lr_patience", "optimizer_type"] + if k + not in ["lr", "weight_decay", "patience", "lr_patience", "optimizer_type"] and k.startswith("optimizer_") } @@ -84,7 +93,10 @@ def get_params(self, deep=True): params.update(self.config_kwargs) if deep: - preprocessor_params = {"prepro__" + key: value for key, value in self.preprocessor.get_params().items()} + preprocessor_params = { + "prepro__" + key: value + for key, value in self.preprocessor.get_params().items() + } params.update(preprocessor_params) return params @@ -102,8 +114,14 @@ def set_params(self, **parameters): self : object Estimator instance. """ - config_params = {k: v for k, v in parameters.items() if not k.startswith("prepro__")} - preprocessor_params = {k.split("__")[1]: v for k, v in parameters.items() if k.startswith("prepro__")} + config_params = { + k: v for k, v in parameters.items() if not k.startswith("prepro__") + } + preprocessor_params = { + k.split("__")[1]: v + for k, v in parameters.items() + if k.startswith("prepro__") + } if config_params: self.config_kwargs.update(config_params) @@ -125,6 +143,8 @@ def build_model( val_size: float = 0.2, X_val=None, y_val=None, + embeddings=None, + embeddings_val=None, random_state: int = 101, batch_size: int = 128, shuffle: bool = True, @@ -201,7 +221,16 @@ def build_model( **dataloader_kwargs, ) - self.data_module.preprocess_data(X, y, X_val, y_val, val_size=val_size, random_state=random_state) + self.data_module.preprocess_data( + X, + y, + X_val=X_val, + y_val=y_val, + embeddings_train=embeddings, + embeddings_val=embeddings_val, + val_size=val_size, + random_state=random_state, + ) num_classes = len(np.unique(np.array(y))) @@ -209,12 +238,19 @@ def build_model( model_class=self.base_model, # type: ignore num_classes=num_classes, config=self.config, - cat_feature_info=self.data_module.cat_feature_info, - num_feature_info=self.data_module.num_feature_info, - lr_patience=(lr_patience if lr_patience is not None else self.config.lr_patience), + feature_information=( + self.data_module.num_feature_info, + self.data_module.cat_feature_info, + self.data_module.embedding_feature_info, + ), + lr_patience=( + lr_patience if lr_patience is not None else self.config.lr_patience + ), lr=lr if lr is not None else self.config.lr, lr_factor=lr_factor if lr_factor is not None else self.config.lr_factor, - weight_decay=(weight_decay if weight_decay is not None else self.config.weight_decay), + weight_decay=( + weight_decay if weight_decay is not None else self.config.weight_decay + ), train_metrics=train_metrics, val_metrics=val_metrics, optimizer_type=self.optimizer_type, @@ -245,7 +281,9 @@ def get_number_of_params(self, requires_grad=True): If the model has not been built prior to calling this method. """ if not self.built: - raise ValueError("The model must be built before the number of parameters can be estimated") + raise ValueError( + "The model must be built before the number of parameters can be estimated" + ) else: if requires_grad: return sum(p.numel() for p in self.task_model.parameters() if p.requires_grad) # type: ignore @@ -259,6 +297,8 @@ def fit( val_size: float = 0.2, X_val=None, y_val=None, + embeddings=None, + embeddings_val=None, max_epochs: int = 100, random_state: int = 101, batch_size: int = 128, @@ -340,6 +380,8 @@ def fit( val_size=val_size, X_val=X_val, y_val=y_val, + embeddings=embeddings, + embeddings_val=embeddings_val, random_state=random_state, batch_size=batch_size, shuffle=shuffle, @@ -390,7 +432,7 @@ def fit( return self - def predict(self, X, device=None): + def predict(self, X, embeddings=None, device=None): """Predicts target labels for the given input samples. Parameters @@ -408,7 +450,7 @@ def predict(self, X, device=None): raise ValueError("The model or data module has not been fitted yet.") # Preprocess the data using the data module - self.data_module.assign_predict_dataset(X) + self.data_module.assign_predict_dataset(X, embeddings) # Set model to evaluation mode self.task_model.eval() @@ -438,7 +480,7 @@ def predict(self, X, device=None): # Convert predictions to NumPy array and return return predictions.cpu().numpy() - def predict_proba(self, X, device=None): + def predict_proba(self, X, embeddings=None, device=None): """Predicts class probabilities for the given input samples. Parameters @@ -482,7 +524,7 @@ def predict_proba(self, X, device=None): # Convert probabilities to NumPy array and return return probabilities.cpu().numpy() - def evaluate(self, X, y_true, metrics=None): + def evaluate(self, X, y_true, embeddings=None, metrics=None): """Evaluate the model on the given data using specified metrics. Parameters @@ -491,6 +533,8 @@ def evaluate(self, X, y_true, metrics=None): The input samples to predict. y_true : array-like of shape (n_samples,) The true class labels against which to evaluate the predictions. + embneddings : array-like or list of shape(n_samples, dimension) + List or array with embeddings for unstructured data inputs metrics : dict A dictionary where keys are metric names and values are tuples containing the metric function and a boolean indicating whether the metric requires probability scores (True) or class labels (False). @@ -518,11 +562,11 @@ def evaluate(self, X, y_true, metrics=None): # Generate class probabilities if any metric requires them if any(use_proba for _, use_proba in metrics.values()): - probabilities = self.predict_proba(X) + probabilities = self.predict_proba(X, embeddings) # Generate class labels if any metric requires them if any(not use_proba for _, use_proba in metrics.values()): - predictions = self.predict(X) + predictions = self.predict(X, embeddings) # Compute each metric for metric_name, (metric_func, use_proba) in metrics.items(): @@ -533,7 +577,7 @@ def evaluate(self, X, y_true, metrics=None): return scores - def score(self, X, y, metric=(log_loss, True)): + def score(self, X, y, embeddings=None, metric=(log_loss, True)): """Calculate the score of the model using the specified metric. Parameters @@ -557,13 +601,13 @@ def score(self, X, y, metric=(log_loss, True)): X = pd.DataFrame(X) if use_proba: - probabilities = self.predict_proba(X) + probabilities = self.predict_proba(X, embeddings) return metric_func(y, probabilities) else: - predictions = self.predict(X) + predictions = self.predict(X, embeddings) return metric_func(y, predictions) - def encode(self, X, batch_size=64): + def encode(self, X, embeddings=None, batch_size=64): """ Encodes input data using the trained model's embedding layer. @@ -587,14 +631,16 @@ def encode(self, X, batch_size=64): # Ensure model and data module are initialized if self.task_model is None or self.data_module is None: raise ValueError("The model or data module has not been fitted yet.") - encoded_dataset = self.data_module.preprocess_new_data(X) + encoded_dataset = self.data_module.preprocess_new_data(X, embeddings) data_loader = DataLoader(encoded_dataset, batch_size=batch_size, shuffle=False) # Process data in batches encoded_outputs = [] - for num_features, cat_features in tqdm(data_loader): - embeddings = self.task_model.base_model.encode(num_features, cat_features) # Call your encode function + for batch in tqdm(data_loader): + embeddings = self.task_model.base_model.encode( + batch + ) # Call your encode function encoded_outputs.append(embeddings) # Concatenate all encoded outputs @@ -608,6 +654,8 @@ def optimize_hparams( y, X_val=None, y_val=None, + embeddings=None, + embeddings_val=None, time=100, max_epochs=200, prune_by_epoch=True, @@ -658,13 +706,25 @@ def optimize_hparams( ) # Initial model fitting to get the baseline validation loss - self.fit(X, y, X_val=X_val, y_val=y_val, max_epochs=max_epochs) + self.fit( + X, + y, + X_val=X_val, + y_val=y_val, + embeddings=embeddings, + embeddings_val=embeddings_val, + max_epochs=max_epochs, + ) best_val_loss = float("inf") if X_val is not None and y_val is not None: - val_loss = self.evaluate(X_val, y_val, metrics={"Accuracy": (accuracy_score, False)})["Accuracy"] + val_loss = self.evaluate( + X_val, y_val, metrics={"Accuracy": (accuracy_score, False)} + )["Accuracy"] else: - val_loss = self.trainer.validate(self.task_model, self.data_module)[0]["val_loss"] + val_loss = self.trainer.validate(self.task_model, self.data_module)[0][ + "val_loss" + ] best_val_loss = val_loss best_epoch_val_loss = self.task_model.epoch_val_loss_at( # type: ignore @@ -690,7 +750,9 @@ def _objective(hyperparams): if param_value in activation_mapper: setattr(self.config, key, activation_mapper[param_value]) else: - raise ValueError(f"Unknown activation function: {param_value}") + raise ValueError( + f"Unknown activation function: {param_value}" + ) else: setattr(self.config, key, param_value) @@ -699,11 +761,15 @@ def _objective(hyperparams): self.config.head_layer_sizes = head_layer_sizes[:head_layer_size_length] # Build the model with updated hyperparameters - self.build_model(X, y, X_val=X_val, y_val=y_val, lr=self.config.lr, **optimize_kwargs) + self.build_model( + X, y, X_val=X_val, y_val=y_val, lr=self.config.lr, **optimize_kwargs + ) # Dynamically set the early pruning threshold if prune_by_epoch: - early_pruning_threshold = best_epoch_val_loss * 1.5 # Prune based on specific epoch loss + early_pruning_threshold = ( + best_epoch_val_loss * 1.5 + ) # Prune based on specific epoch loss else: # Prune based on the best overall validation loss early_pruning_threshold = best_val_loss * 1.5 @@ -715,15 +781,26 @@ def _objective(hyperparams): # Fit the model (limit epochs for faster optimization) try: # Wrap the risky operation (model fitting) in a try-except block - self.fit(X, y, X_val=X_val, y_val=y_val, max_epochs=max_epochs, rebuild=False) + self.fit( + X, + y, + X_val=X_val, + y_val=y_val, + embeddings=embeddings, + embeddings_val=embeddings_val, + max_epochs=max_epochs, + rebuild=False, + ) # Evaluate validation loss if X_val is not None and y_val is not None: - val_loss = self.evaluate(X_val, y_val, metrics={"Mean Squared Error": mean_squared_error})[ # type: ignore + val_loss = self.evaluate(X_val, y_val, metrics={"Accuracy": (accuracy_score, False)})[ # type: ignore "Mean Squared Error" ] else: - val_loss = self.trainer.validate(self.task_model, self.data_module)[0]["val_loss"] + val_loss = self.trainer.validate(self.task_model, self.data_module)[ + 0 + ]["val_loss"] # Pruning based on validation loss at specific epoch epoch_val_loss = self.task_model.epoch_val_loss_at( # type: ignore @@ -740,15 +817,21 @@ def _objective(hyperparams): except Exception as e: # Penalize the hyperparameter configuration with a large value - print(f"Error encountered during fit with hyperparameters {hyperparams}: {e}") - return best_val_loss * 100 # Large value to discourage this configuration + print( + f"Error encountered during fit with hyperparameters {hyperparams}: {e}" + ) + return ( + best_val_loss * 100 + ) # Large value to discourage this configuration # Perform Bayesian optimization using scikit-optimize result = gp_minimize(_objective, param_space, n_calls=time, random_state=42) # Update the model with the best-found hyperparameters best_hparams = result.x # type: ignore - head_layer_sizes = [] if "head_layer_sizes" in self.config.__dataclass_fields__ else None + head_layer_sizes = ( + [] if "head_layer_sizes" in self.config.__dataclass_fields__ else None + ) layer_sizes = [] if "layer_sizes" in self.config.__dataclass_fields__ else None # Iterate over the best hyperparameters found by optimization diff --git a/mambular/models/sklearn_base_regressor.py b/mambular/models/sklearn_base_regressor.py index 04f9ac3..94e9bac 100644 --- a/mambular/models/sklearn_base_regressor.py +++ b/mambular/models/sklearn_base_regressor.py @@ -41,11 +41,15 @@ def __init__(self, model, config, **kwargs): ] self.config_kwargs = { - k: v for k, v in kwargs.items() if k not in self.preprocessor_arg_names and not k.startswith("optimizer") + k: v + for k, v in kwargs.items() + if k not in self.preprocessor_arg_names and not k.startswith("optimizer") } self.config = config(**self.config_kwargs) - preprocessor_kwargs = {k: v for k, v in kwargs.items() if k in self.preprocessor_arg_names} + preprocessor_kwargs = { + k: v for k, v in kwargs.items() if k in self.preprocessor_arg_names + } self.preprocessor = Preprocessor(**preprocessor_kwargs) self.base_model = model @@ -65,7 +69,8 @@ def __init__(self, model, config, **kwargs): self.optimizer_kwargs = { k: v for k, v in kwargs.items() - if k not in ["lr", "weight_decay", "patience", "lr_patience", "optimizer_type"] + if k + not in ["lr", "weight_decay", "patience", "lr_patience", "optimizer_type"] and k.startswith("optimizer_") } @@ -86,7 +91,10 @@ def get_params(self, deep=True): params.update(self.config_kwargs) if deep: - preprocessor_params = {"prepro__" + key: value for key, value in self.preprocessor.get_params().items()} + preprocessor_params = { + "prepro__" + key: value + for key, value in self.preprocessor.get_params().items() + } params.update(preprocessor_params) return params @@ -104,8 +112,14 @@ def set_params(self, **parameters): self : object Estimator instance. """ - config_params = {k: v for k, v in parameters.items() if not k.startswith("prepro__")} - preprocessor_params = {k.split("__")[1]: v for k, v in parameters.items() if k.startswith("prepro__")} + config_params = { + k: v for k, v in parameters.items() if not k.startswith("prepro__") + } + preprocessor_params = { + k.split("__")[1]: v + for k, v in parameters.items() + if k.startswith("prepro__") + } if config_params: self.config_kwargs.update(config_params) @@ -127,6 +141,8 @@ def build_model( val_size: float = 0.2, X_val=None, y_val=None, + embeddings=None, + embeddings_val=None, random_state: int = 101, batch_size: int = 128, shuffle: bool = True, @@ -203,17 +219,33 @@ def build_model( **dataloader_kwargs, ) - self.data_module.preprocess_data(X, y, X_val, y_val, val_size=val_size, random_state=random_state) + self.data_module.preprocess_data( + X, + y, + X_val=X_val, + y_val=y_val, + embeddings_train=embeddings, + embeddings_val=embeddings_val, + val_size=val_size, + random_state=random_state, + ) self.task_model = TaskModel( model_class=self.base_model, # type: ignore config=self.config, - cat_feature_info=self.data_module.cat_feature_info, - num_feature_info=self.data_module.num_feature_info, + feature_information=( + self.data_module.num_feature_info, + self.data_module.cat_feature_info, + self.data_module.embedding_feature_info, + ), lr=lr if lr is not None else self.config.lr, - lr_patience=(lr_patience if lr_patience is not None else self.config.lr_patience), + lr_patience=( + lr_patience if lr_patience is not None else self.config.lr_patience + ), lr_factor=lr_factor if lr_factor is not None else self.config.lr_factor, - weight_decay=(weight_decay if weight_decay is not None else self.config.weight_decay), + weight_decay=( + weight_decay if weight_decay is not None else self.config.weight_decay + ), train_metrics=train_metrics, val_metrics=val_metrics, optimizer_type=self.optimizer_type, @@ -244,7 +276,9 @@ def get_number_of_params(self, requires_grad=True): If the model has not been built prior to calling this method. """ if not self.built: - raise ValueError("The model must be built before the number of parameters can be estimated") + raise ValueError( + "The model must be built before the number of parameters can be estimated" + ) else: if requires_grad: return sum(p.numel() for p in self.task_model.parameters() if p.requires_grad) # type: ignore @@ -258,6 +292,8 @@ def fit( val_size: float = 0.2, X_val=None, y_val=None, + embeddings=None, + embeddings_val=None, max_epochs: int = 100, random_state: int = 101, batch_size: int = 128, @@ -339,6 +375,8 @@ def fit( val_size=val_size, X_val=X_val, y_val=y_val, + embeddings=embeddings, + embeddings_val=embeddings_val, random_state=random_state, batch_size=batch_size, shuffle=shuffle, @@ -389,7 +427,7 @@ def fit( return self - def predict(self, X, device=None): + def predict(self, X, embeddings=None, device=None): """Predicts target values for the given input samples. Parameters @@ -408,7 +446,7 @@ def predict(self, X, device=None): raise ValueError("The model or data module has not been fitted yet.") # Preprocess the data using the data module - self.data_module.assign_predict_dataset(X) + self.data_module.assign_predict_dataset(X, embeddings) # Set model to evaluation mode self.task_model.eval() @@ -426,7 +464,7 @@ def predict(self, X, device=None): # Convert predictions to NumPy array and return return predictions.cpu().numpy() - def evaluate(self, X, y_true, metrics=None): + def evaluate(self, X, y_true, embeddings=None, metrics=None): """Evaluate the model on the given data using specified metrics. Parameters @@ -452,7 +490,7 @@ def evaluate(self, X, y_true, metrics=None): metrics = {"Mean Squared Error": mean_squared_error} # Generate predictions using the trained model - predictions = self.predict(X) + predictions = self.predict(X, embeddings=embeddings) # Initialize dictionary to store results scores = {} @@ -463,7 +501,7 @@ def evaluate(self, X, y_true, metrics=None): return scores - def score(self, X, y, metric=mean_squared_error): + def score(self, X, y, embeddings=None, metric=mean_squared_error): """Calculate the score of the model using the specified metric. Parameters @@ -480,10 +518,10 @@ def score(self, X, y, metric=mean_squared_error): score : float The score calculated using the specified metric. """ - predictions = self.predict(X) + predictions = self.predict(X, embeddings) return metric(y, predictions) - def encode(self, X, batch_size=64): + def encode(self, X, embeddings=None, batch_size=64): """ Encodes input data using the trained model's embedding layer. @@ -507,14 +545,16 @@ def encode(self, X, batch_size=64): # Ensure model and data module are initialized if self.task_model is None or self.data_module is None: raise ValueError("The model or data module has not been fitted yet.") - encoded_dataset = self.data_module.preprocess_new_data(X) + encoded_dataset = self.data_module.preprocess_new_data(X, embeddings) data_loader = DataLoader(encoded_dataset, batch_size=batch_size, shuffle=False) # Process data in batches encoded_outputs = [] - for num_features, cat_features in tqdm(data_loader): - embeddings = self.task_model.base_model.encode(num_features, cat_features) # Call your encode function + for batch in tqdm(data_loader): + embeddings = self.task_model.base_model.encode( + batch + ) # Call your encode function encoded_outputs.append(embeddings) # Concatenate all encoded outputs @@ -528,6 +568,8 @@ def optimize_hparams( y, X_val=None, y_val=None, + embeddings=None, + embeddings_val=None, time=100, max_epochs=200, prune_by_epoch=True, @@ -578,15 +620,25 @@ def optimize_hparams( ) # Initial model fitting to get the baseline validation loss - self.fit(X, y, X_val=X_val, y_val=y_val, max_epochs=max_epochs) + self.fit( + X, + y, + X_val=X_val, + y_val=y_val, + embeddings=embeddings, + embeddings_val=embeddings_val, + max_epochs=max_epochs, + ) best_val_loss = float("inf") if X_val is not None and y_val is not None: - val_loss = self.evaluate(X_val, y_val, metrics={"Mean Squared Error": mean_squared_error})[ - "Mean Squared Error" - ] + val_loss = self.evaluate( + X_val, y_val, metrics={"Mean Squared Error": mean_squared_error} + )["Mean Squared Error"] else: - val_loss = self.trainer.validate(self.task_model, self.data_module)[0]["val_loss"] + val_loss = self.trainer.validate(self.task_model, self.data_module)[0][ + "val_loss" + ] best_val_loss = val_loss best_epoch_val_loss = self.task_model.epoch_val_loss_at( # type: ignore @@ -612,7 +664,9 @@ def _objective(hyperparams): if param_value in activation_mapper: setattr(self.config, key, activation_mapper[param_value]) else: - raise ValueError(f"Unknown activation function: {param_value}") + raise ValueError( + f"Unknown activation function: {param_value}" + ) else: setattr(self.config, key, param_value) @@ -621,11 +675,22 @@ def _objective(hyperparams): self.config.head_layer_sizes = head_layer_sizes[:head_layer_size_length] # Build the model with updated hyperparameters - self.build_model(X, y, X_val=X_val, y_val=y_val, lr=self.config.lr, **optimize_kwargs) + self.build_model( + X, + y, + X_val=X_val, + y_val=y_val, + embeddings=embeddings, + embeddings_val=embeddings_val, + lr=self.config.lr, + **optimize_kwargs, + ) # Dynamically set the early pruning threshold if prune_by_epoch: - early_pruning_threshold = best_epoch_val_loss * 1.5 # Prune based on specific epoch loss + early_pruning_threshold = ( + best_epoch_val_loss * 1.5 + ) # Prune based on specific epoch loss else: # Prune based on the best overall validation loss early_pruning_threshold = best_val_loss * 1.5 @@ -636,15 +701,19 @@ def _objective(hyperparams): try: # Wrap the risky operation (model fitting) in a try-except block - self.fit(X, y, X_val=X_val, y_val=y_val, max_epochs=max_epochs, rebuild=False) + self.fit( + X, y, X_val=X_val, y_val=y_val, max_epochs=max_epochs, rebuild=False + ) # Evaluate validation loss if X_val is not None and y_val is not None: - val_loss = self.evaluate(X_val, y_val, metrics={"Mean Squared Error": mean_squared_error})[ - "Mean Squared Error" - ] + val_loss = self.evaluate( + X_val, y_val, metrics={"Mean Squared Error": mean_squared_error} + )["Mean Squared Error"] else: - val_loss = self.trainer.validate(self.task_model, self.data_module)[0]["val_loss"] + val_loss = self.trainer.validate(self.task_model, self.data_module)[ + 0 + ]["val_loss"] # Pruning based on validation loss at specific epoch epoch_val_loss = self.task_model.epoch_val_loss_at( # type: ignore @@ -661,15 +730,21 @@ def _objective(hyperparams): except Exception as e: # Penalize the hyperparameter configuration with a large value - print(f"Error encountered during fit with hyperparameters {hyperparams}: {e}") - return best_val_loss * 100 # Large value to discourage this configuration + print( + f"Error encountered during fit with hyperparameters {hyperparams}: {e}" + ) + return ( + best_val_loss * 100 + ) # Large value to discourage this configuration # Perform Bayesian optimization using scikit-optimize result = gp_minimize(_objective, param_space, n_calls=time, random_state=42) # Update the model with the best-found hyperparameters best_hparams = result.x # type: ignore - head_layer_sizes = [] if "head_layer_sizes" in self.config.__dataclass_fields__ else None + head_layer_sizes = ( + [] if "head_layer_sizes" in self.config.__dataclass_fields__ else None + ) layer_sizes = [] if "layer_sizes" in self.config.__dataclass_fields__ else None # Iterate over the best hyperparameters found by optimization From 6c0bc5c52aba0b8de598ca1cab9cc6ca3ebca1d0 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Thu, 23 Jan 2025 19:43:47 +0100 Subject: [PATCH 09/24] preprocessor does not preprocess embeddings, but takes them as input to include them in feature information --- mambular/preprocessing/preprocessor.py | 178 +++++++++++++++++++------ 1 file changed, 136 insertions(+), 42 deletions(-) diff --git a/mambular/preprocessing/preprocessor.py b/mambular/preprocessing/preprocessor.py index a691649..0fa7340 100644 --- a/mambular/preprocessing/preprocessor.py +++ b/mambular/preprocessing/preprocessor.py @@ -111,10 +111,14 @@ def __init__( ): self.n_bins = n_bins self.numerical_preprocessing = ( - numerical_preprocessing.lower() if numerical_preprocessing is not None else "none" + numerical_preprocessing.lower() + if numerical_preprocessing is not None + else "none" ) self.categorical_preprocessing = ( - categorical_preprocessing.lower() if categorical_preprocessing is not None else "none" + categorical_preprocessing.lower() + if categorical_preprocessing is not None + else "none" ) if self.numerical_preprocessing not in [ "ple", @@ -237,20 +241,40 @@ def _detect_column_types(self, X): numerical_features.append(col) else: if isinstance(self.cat_cutoff, float): - cutoff_condition = (num_unique_values / total_samples) < self.cat_cutoff + cutoff_condition = ( + num_unique_values / total_samples + ) < self.cat_cutoff elif isinstance(self.cat_cutoff, int): cutoff_condition = num_unique_values < self.cat_cutoff else: - raise ValueError("cat_cutoff should be either a float or an integer.") + raise ValueError( + "cat_cutoff should be either a float or an integer." + ) - if X[col].dtype.kind not in "iufc" or (X[col].dtype.kind == "i" and cutoff_condition): + if X[col].dtype.kind not in "iufc" or ( + X[col].dtype.kind == "i" and cutoff_condition + ): categorical_features.append(col) else: numerical_features.append(col) return numerical_features, categorical_features - def fit(self, X, y=None): + def _fit_embeddings(self, embeddings): + if embeddings is not None: + self.embeddings = True + self.embedding_dimensions = {} + if isinstance(embeddings, np.ndarray): + self.embedding_dimensions["embeddings_1"] = embeddings.shape[1] + elif isinstance(embeddings, list) and all( + isinstance(e, np.ndarray) for e in embeddings + ): + for idx, e in enumerate(embeddings): + self.embedding_dimensions[f"embedding_{idx+1}"] = e.shape[1] + else: + self.embeddings = False + + def fit(self, X, y=None, embeddings=None): """Fits the preprocessor to the data by identifying feature types and configuring the appropriate transformations for each feature. It sets up a column transformer with a pipeline of transformations for numerical and categorical features based on the specified preprocessing strategy. @@ -269,6 +293,8 @@ def fit(self, X, y=None): if isinstance(X, dict): X = pd.DataFrame(X) + self._fit_embeddings(embeddings) + numerical_features, categorical_features = self._detect_column_types(X) transformers = [] @@ -308,7 +334,11 @@ def fit(self, X, y=None): ( "discretizer", KBinsDiscretizer( - n_bins=(bins if isinstance(bins, int) else len(bins) - 1), + n_bins=( + bins + if isinstance(bins, int) + else len(bins) - 1 + ), encode="ordinal", strategy=self.binning_strategy, # type: ignore subsample=200_000 if len(X) > 200_000 else None, @@ -337,13 +367,17 @@ def fit(self, X, y=None): numeric_transformer_steps.append(("scaler", StandardScaler())) elif self.numerical_preprocessing == "minmax": - numeric_transformer_steps.append(("minmax", MinMaxScaler(feature_range=(-1, 1)))) + numeric_transformer_steps.append( + ("minmax", MinMaxScaler(feature_range=(-1, 1))) + ) elif self.numerical_preprocessing == "quantile": numeric_transformer_steps.append( ( "quantile", - QuantileTransformer(n_quantiles=self.n_bins, random_state=101), + QuantileTransformer( + n_quantiles=self.n_bins, random_state=101 + ), ) ) @@ -351,7 +385,9 @@ def fit(self, X, y=None): if self.scaling_strategy == "standardization": numeric_transformer_steps.append(("scaler", StandardScaler())) elif self.scaling_strategy == "minmax": - numeric_transformer_steps.append(("minmax", MinMaxScaler(feature_range=(-1, 1)))) + numeric_transformer_steps.append( + ("minmax", MinMaxScaler(feature_range=(-1, 1))) + ) numeric_transformer_steps.append( ( "polynomial", @@ -366,7 +402,9 @@ def fit(self, X, y=None): if self.scaling_strategy == "standardization": numeric_transformer_steps.append(("scaler", StandardScaler())) elif self.scaling_strategy == "minmax": - numeric_transformer_steps.append(("minmax", MinMaxScaler(feature_range=(-1, 1)))) + numeric_transformer_steps.append( + ("minmax", MinMaxScaler(feature_range=(-1, 1))) + ) numeric_transformer_steps.append( ( "splines", @@ -385,7 +423,9 @@ def fit(self, X, y=None): if self.scaling_strategy == "standardization": numeric_transformer_steps.append(("scaler", StandardScaler())) elif self.scaling_strategy == "minmax": - numeric_transformer_steps.append(("minmax", MinMaxScaler(feature_range=(-1, 1)))) + numeric_transformer_steps.append( + ("minmax", MinMaxScaler(feature_range=(-1, 1))) + ) numeric_transformer_steps.append( ( "rbf", @@ -402,7 +442,9 @@ def fit(self, X, y=None): if self.scaling_strategy == "standardization": numeric_transformer_steps.append(("scaler", StandardScaler())) elif self.scaling_strategy == "minmax": - numeric_transformer_steps.append(("minmax", MinMaxScaler(feature_range=(-1, 1)))) + numeric_transformer_steps.append( + ("minmax", MinMaxScaler(feature_range=(-1, 1))) + ) numeric_transformer_steps.append( ( "sigmoid", @@ -416,8 +458,12 @@ def fit(self, X, y=None): ) elif self.numerical_preprocessing == "ple": - numeric_transformer_steps.append(("minmax", MinMaxScaler(feature_range=(-1, 1)))) - numeric_transformer_steps.append(("ple", PLE(n_bins=self.n_bins, task=self.task))) + numeric_transformer_steps.append( + ("minmax", MinMaxScaler(feature_range=(-1, 1))) + ) + numeric_transformer_steps.append( + ("ple", PLE(n_bins=self.n_bins, task=self.task)) + ) elif self.numerical_preprocessing == "box-cox": numeric_transformer_steps.append( @@ -483,12 +529,18 @@ def fit(self, X, y=None): ] ) else: - raise ValueError(f"Unknown categorical_preprocessing type: {self.categorical_preprocessing}") + raise ValueError( + f"Unknown categorical_preprocessing type: {self.categorical_preprocessing}" + ) # Append the transformer for the current categorical feature - transformers.append((f"cat_{feature}", categorical_transformer, [feature])) + transformers.append( + (f"cat_{feature}", categorical_transformer, [feature]) + ) - self.column_transformer = ColumnTransformer(transformers=transformers, remainder="passthrough") + self.column_transformer = ColumnTransformer( + transformers=transformers, remainder="passthrough" + ) self.column_transformer.fit(X, y) self.fitted = True @@ -514,16 +566,20 @@ def _get_decision_tree_bins(self, X, y, numerical_features): bins = [] for feature in numerical_features: tree_model = ( - DecisionTreeClassifier(max_depth=3) if y.dtype.kind in "bi" else DecisionTreeRegressor(max_depth=3) + DecisionTreeClassifier(max_depth=3) + if y.dtype.kind in "bi" + else DecisionTreeRegressor(max_depth=3) ) tree_model.fit(X[[feature]], y) thresholds = tree_model.tree_.threshold[tree_model.tree_.feature != -2] # type: ignore bin_edges = np.sort(np.unique(thresholds)) - bins.append(np.concatenate(([X[feature].min()], bin_edges, [X[feature].max()]))) + bins.append( + np.concatenate(([X[feature].min()], bin_edges, [X[feature].max()])) + ) return bins - def transform(self, X): + def transform(self, X, embeddings=None): """Transforms the input data using the preconfigured column transformer and converts the output into a dictionary format with keys corresponding to transformed feature names and values as arrays of transformed data. @@ -538,8 +594,7 @@ def transform(self, X): Parameters ---------- X (DataFrame): The input data to be transformed. - X (DataFrame): The input data to be transformed. - + embeddings (np.array or list of np.arrays, optional): The embedding data to include in the transformation. Returns ------- @@ -554,6 +609,33 @@ def transform(self, X): # Now let's convert this into a dictionary of arrays, one per column transformed_dict = self._split_transformed_output(X, transformed_X) + if embeddings is not None: + assert self.embeddings is True, "self.embeddings should be True but is not." + + if isinstance(embeddings, np.ndarray): + assert ( + self.embedding_dimensions["embedding_1"] == embeddings.shape[1] + ), ( + f"Expected embedding dimension {self.embedding_dimensions['embeddings']}, " + f"but got {embeddings.shape[1]}" + ) + transformed_dict["embedding_1"] = embeddings.astype(np.float32) + elif isinstance(embeddings, list) and all( + isinstance(e, np.ndarray) for e in embeddings + ): + for idx, e in enumerate(embeddings): + key = f"embedding_{idx+1}" + assert self.embedding_dimensions[key] == e.shape[1], ( + f"Expected embedding dimension {self.embedding_dimensions[key]} for {key}, " + f"but got {e.shape[1]}" + ) + transformed_dict[key] = e.astype(np.float32) + else: + assert ( + self.embeddings is False + ), "self.embeddings should be False when embeddings are None." + self.embeddings = False + return transformed_dict def _split_transformed_output(self, X, transformed_X): @@ -592,7 +674,7 @@ def _split_transformed_output(self, X, transformed_X): start = end return transformed_dict - def fit_transform(self, X, y=None): + def fit_transform(self, X, y=None, embeddings=None): """Fits the preprocessor to the data and then transforms the data using the fitted preprocessing pipelines. This is a convenience method that combines `fit` and `transform`. @@ -607,9 +689,9 @@ def fit_transform(self, X, y=None): dict: A dictionary with the transformed data, where keys are the base feature names and values are the transformed features as arrays. """ - self.fit(X, y) + self.fit(X, y, embeddings) self.fitted = True - return self.transform(X) + return self.transform(X, embeddings) def get_feature_info(self, verbose=True): """Retrieves information about how features are encoded within the model's preprocessor. This method identifies @@ -619,24 +701,34 @@ def get_feature_info(self, verbose=True): This method should only be called after the preprocessor has been fitted, as it relies on the structure and configuration of the `column_transformer` attribute. - Raises ------ RuntimeError: If the `column_transformer` is not yet fitted, indicating that the preprocessor must be fitted before invoking this method. - Returns ------- - tuple of (dict, dict): + tuple of (dict, dict, dict): - The first dictionary maps feature names to their respective number of bins or categories if they are processed using discretization or ordinal encoding. - The second dictionary includes feature names with other encoding details, such as the dimension of features after encoding transformations (e.g., one-hot encoding dimensions). + - The third dictionary includes feature information for embeddings if available. """ numerical_feature_info = {} categorical_feature_info = {} + if self.embeddings: + embedding_feature_info = {} + for key, dim in self.embedding_dimensions.items(): + embedding_feature_info[key] = { + "preprocessing": None, + "dimension": dim, + "categories": None, + } + else: + embedding_feature_info = None + if not self.column_transformer: raise RuntimeError("The preprocessor has not been fitted yet.") @@ -648,12 +740,10 @@ def get_feature_info(self, verbose=True): steps = [step[0] for step in transformer_pipeline.steps] for feature_name in columns: - # Initialize common fields preprocessing_type = " -> ".join(steps) dimension = None categories = None - # Numerical features if "discretizer" in steps or any( step in steps for step in [ @@ -666,23 +756,23 @@ def get_feature_info(self, verbose=True): ): last_step = transformer_pipeline.steps[-1][1] if hasattr(last_step, "transform"): - # Single-column input for dimension check dummy_input = np.zeros((1, 1)) transformed_feature = last_step.transform(dummy_input) dimension = transformed_feature.shape[1] numerical_feature_info[feature_name] = { "preprocessing": preprocessing_type, "dimension": dimension, - "categories": None, # Numerical features don't have categories + "categories": None, } if verbose: - print(f"Numerical Feature: {feature_name}, Info: {numerical_feature_info[feature_name]}") + print( + f"Numerical Feature: {feature_name}, Info: {numerical_feature_info[feature_name]}" + ) - # Categorical features elif "continuous_ordinal" in steps: step = transformer_pipeline.named_steps["continuous_ordinal"] categories = len(step.mapping_[columns.index(feature_name)]) - dimension = 1 # Ordinal encoding always outputs one dimension + dimension = 1 categorical_feature_info[feature_name] = { "preprocessing": preprocessing_type, "dimension": dimension, @@ -697,7 +787,7 @@ def get_feature_info(self, verbose=True): step = transformer_pipeline.named_steps["onehot"] if hasattr(step, "categories_"): categories = sum(len(cat) for cat in step.categories_) - dimension = categories # One-hot encoding expands into multiple dimensions + dimension = categories categorical_feature_info[feature_name] = { "preprocessing": preprocessing_type, "dimension": dimension, @@ -708,7 +798,6 @@ def get_feature_info(self, verbose=True): f"Categorical Feature (One-Hot): {feature_name}, Info: {categorical_feature_info[feature_name]}" ) - # Fallback for other transformations else: last_step = transformer_pipeline.steps[-1][1] if hasattr(last_step, "transform"): @@ -719,20 +808,25 @@ def get_feature_info(self, verbose=True): categorical_feature_info[feature_name] = { "preprocessing": preprocessing_type, "dimension": dimension, - "categories": None, # Categories not defined for unknown categorical transformations + "categories": None, } else: numerical_feature_info[feature_name] = { "preprocessing": preprocessing_type, "dimension": dimension, - "categories": None, # Numerical features don't have categories + "categories": None, } if verbose: print( - f"Categorical Feature: {feature_name}, Info: {preprocessing_type}, Dimension: {dimension}" + f"Feature: {feature_name}, Info: {preprocessing_type}, Dimension: {dimension}" ) if verbose: print("-" * 50) - return numerical_feature_info, categorical_feature_info + if verbose and self.embeddings: + print("Embeddings:") + for key, value in embedding_feature_info.items(): + print(f" Feature: {key}, Dimension: {value['dimension']}") + + return numerical_feature_info, categorical_feature_info, embedding_feature_info From 743c214c4a5f47b224535edb92b2d90bca6ff3ef Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Thu, 23 Jan 2025 19:44:06 +0100 Subject: [PATCH 10/24] feature dimensions adapted to new output format of get_feature_info --- mambular/utils/get_feature_dimensions.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mambular/utils/get_feature_dimensions.py b/mambular/utils/get_feature_dimensions.py index 7ad000d..b72980b 100644 --- a/mambular/utils/get_feature_dimensions.py +++ b/mambular/utils/get_feature_dimensions.py @@ -1,8 +1,10 @@ -def get_feature_dimensions(num_feature_info, cat_feature_info): +def get_feature_dimensions(num_feature_info, cat_feature_info, embedding_info): input_dim = 0 - for feature_name, feature_info in num_feature_info.items(): + for _, feature_info in num_feature_info.items(): input_dim += feature_info["dimension"] - for feature_name, feature_info in cat_feature_info.items(): + for _, feature_info in cat_feature_info.items(): + input_dim += feature_info["dimension"] + for _, feature_info in embedding_info.items(): input_dim += feature_info["dimension"] return input_dim From 4ec70f81549f9d5e3e0a6db06be53d91ccab07b5 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Fri, 24 Jan 2025 14:45:48 +0100 Subject: [PATCH 11/24] adapting all basemodels to new dataset __getitem__ method --- mambular/base_models/ft_transformer.py | 23 +++++------- mambular/base_models/mambatab.py | 31 ++++++++++------- mambular/base_models/mambattn.py | 28 +++++++-------- mambular/base_models/mambular.py | 21 +++++------ mambular/base_models/mlp.py | 9 +++-- mambular/base_models/ndtf.py | 34 +++++++++--------- mambular/base_models/node.py | 26 +++++++------- mambular/base_models/resnet.py | 39 ++++++++++++--------- mambular/base_models/saint.py | 27 ++++++--------- mambular/base_models/tabm.py | 48 ++++++++++++++++---------- mambular/base_models/tabtransformer.py | 26 ++++++-------- mambular/base_models/tabularnn.py | 5 ++- 12 files changed, 158 insertions(+), 159 deletions(-) diff --git a/mambular/base_models/ft_transformer.py b/mambular/base_models/ft_transformer.py index 56e546d..f0c7fb8 100644 --- a/mambular/base_models/ft_transformer.py +++ b/mambular/base_models/ft_transformer.py @@ -6,6 +6,7 @@ from ..arch_utils.transformer_utils import CustomTransformerEncoderLayer from ..configs.fttransformer_config import DefaultFTTransformerConfig from .basemodel import BaseModel +import numpy as np class FTTransformer(BaseModel): @@ -52,22 +53,18 @@ class FTTransformer(BaseModel): def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes=1, config: DefaultFTTransformerConfig = DefaultFTTransformerConfig(), # noqa: B008 **kwargs, ): super().__init__(config=config, **kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) self.returns_ensemble = False - self.cat_feature_info = cat_feature_info - self.num_feature_info = num_feature_info # embedding layer self.embedding_layer = EmbeddingLayer( - num_feature_info=num_feature_info, - cat_feature_info=cat_feature_info, + *feature_information, config=config, ) @@ -87,25 +84,23 @@ def __init__( ) # pooling - n_inputs = len(num_feature_info) + len(cat_feature_info) + n_inputs = np.sum([len(info) for info in feature_information]) self.initialize_pooling_layers(config=config, n_inputs=n_inputs) - def forward(self, num_features, cat_features): + def forward(self, *data): """Defines the forward pass of the model. Parameters ---------- - num_features : Tensor - Tensor containing the numerical features. - cat_features : Tensor - Tensor containing the categorical features. + data : tuple + Input tuple of tensors of num_features, cat_features, embeddings. Returns ------- Tensor The output predictions of the model. """ - x = self.embedding_layer(num_features, cat_features) + x = self.embedding_layer(*data) x = self.encoder(x) diff --git a/mambular/base_models/mambatab.py b/mambular/base_models/mambatab.py index fa1e231..b1b111a 100644 --- a/mambular/base_models/mambatab.py +++ b/mambular/base_models/mambatab.py @@ -5,6 +5,7 @@ from ..arch_utils.mamba_utils.mamba_arch import Mamba from ..arch_utils.mamba_utils.mamba_original import MambaOriginal from ..arch_utils.mlp_utils import MLPhead +from ..utils.get_feature_dimensions import get_feature_dimensions from ..configs.mambatab_config import DefaultMambaTabConfig from .basemodel import BaseModel @@ -56,23 +57,16 @@ class MambaTab(BaseModel): def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes=1, config: DefaultMambaTabConfig = DefaultMambaTabConfig(), # noqa: B008 **kwargs, ): super().__init__(config=config, **kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) - input_dim = 0 - for feature_name, input_shape in num_feature_info.items(): - input_dim += 1 - for feature_name, input_shape in cat_feature_info.items(): - input_dim += 1 + input_dim = get_feature_dimensions(*feature_information) - self.cat_feature_info = cat_feature_info - self.num_feature_info = num_feature_info self.returns_ensemble = False self.initial_layer = nn.Linear(input_dim, config.d_model) @@ -93,9 +87,20 @@ def __init__( else: self.mamba = MambaOriginal(config) - def forward(self, num_features, cat_features): - x = num_features + cat_features - x = torch.cat(x, dim=1) + def forward(self, *data): + """Forward pass of the Mambatab model + + Parameters + ---------- + data : tuple + Input tuple of tensors of num_features, cat_features, embeddings. + + Returns + ------- + torch.Tensor + Output tensor. + """ + x = torch.cat([t for tensors in data for t in tensors], dim=1) x = self.initial_layer(x) if self.axis == 1: diff --git a/mambular/base_models/mambattn.py b/mambular/base_models/mambattn.py index f393154..fd86eee 100644 --- a/mambular/base_models/mambattn.py +++ b/mambular/base_models/mambattn.py @@ -1,5 +1,5 @@ import torch - +import numpy as np from ..arch_utils.get_norm_fn import get_normalization_layer from ..arch_utils.layer_utils.embedding_layer import EmbeddingLayer from ..arch_utils.mamba_utils.mambattn_arch import MambAttn @@ -52,14 +52,15 @@ class MambAttention(BaseModel): def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes=1, config: DefaultMambAttentionConfig = DefaultMambAttentionConfig(), # noqa: B008 **kwargs, ): super().__init__(config=config, **kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) + + self.returns_ensemble = False try: self.pooling_method = self.hparams.pooling_method @@ -76,8 +77,7 @@ def __init__( # embedding layer self.embedding_layer = EmbeddingLayer( - num_feature_info=num_feature_info, - cat_feature_info=cat_feature_info, + *feature_information, config=config, ) @@ -101,25 +101,23 @@ def __init__( self.perm = torch.randperm(self.embedding_layer.seq_len) # pooling - n_inputs = len(num_feature_info) + len(cat_feature_info) + n_inputs = np.sum([len(info) for info in feature_information]) self.initialize_pooling_layers(config=config, n_inputs=n_inputs) - def forward(self, num_features, cat_features): + def forward(self, *data): """Defines the forward pass of the model. Parameters ---------- - num_features : Tensor - Tensor containing the numerical features. - cat_features : Tensor - Tensor containing the categorical features. + data : tuple + Input tuple of tensors of num_features, cat_features, embeddings. Returns ------- - Tensor - The output predictions of the model. + torch.Tensor + Output tensor. """ - x = self.embedding_layer(num_features, cat_features) + x = self.embedding_layer(*data) if self.shuffle_embeddings: x = x[:, self.perm, :] diff --git a/mambular/base_models/mambular.py b/mambular/base_models/mambular.py index ee73b3d..f24df96 100644 --- a/mambular/base_models/mambular.py +++ b/mambular/base_models/mambular.py @@ -6,6 +6,7 @@ from ..arch_utils.mlp_utils import MLPhead from ..configs.mambular_config import DefaultMambularConfig from .basemodel import BaseModel +import numpy as np class Mambular(BaseModel): @@ -52,21 +53,19 @@ class Mambular(BaseModel): def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (cat_feature_info, num_feature_info, embedding_feature_info) num_classes=1, config: DefaultMambularConfig = DefaultMambularConfig(), # noqa: B008 **kwargs, ): super().__init__(config=config, **kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) self.returns_ensemble = False # embedding layer self.embedding_layer = EmbeddingLayer( - num_feature_info=num_feature_info, - cat_feature_info=cat_feature_info, + *feature_information, config=config, ) @@ -85,25 +84,23 @@ def __init__( self.perm = torch.randperm(self.embedding_layer.seq_len) # pooling - n_inputs = len(num_feature_info) + len(cat_feature_info) + n_inputs = np.sum([len(info) for info in feature_information]) self.initialize_pooling_layers(config=config, n_inputs=n_inputs) - def forward(self, num_features, cat_features): + def forward(self, *data): """Defines the forward pass of the model. Parameters ---------- - num_features : Tensor - Tensor containing the numerical features. - cat_features : Tensor - Tensor containing the categorical features. + data : tuple + Input tuple of tensors of num_features, cat_features, embeddings. Returns ------- Tensor The output predictions of the model. """ - x = self.embedding_layer(num_features, cat_features) + x = self.embedding_layer(*data) if self.hparams.shuffle_embeddings: x = x[:, self.perm, :] diff --git a/mambular/base_models/mlp.py b/mambular/base_models/mlp.py index 1a38871..94194d8 100644 --- a/mambular/base_models/mlp.py +++ b/mambular/base_models/mlp.py @@ -1,11 +1,10 @@ import torch import torch.nn as nn - +import numpy as np from ..arch_utils.layer_utils.embedding_layer import EmbeddingLayer from ..configs.mlp_config import DefaultMLPConfig from ..utils.get_feature_dimensions import get_feature_dimensions from .basemodel import BaseModel -import numpy as np class MLP(BaseModel): @@ -58,7 +57,7 @@ class MLP(BaseModel): def __init__( self, - feature_information: tuple, # Expecting (cat_feature_info, num_feature_info, embedding_feature_info) + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes: int = 1, config: DefaultMLPConfig = DefaultMLPConfig(), # noqa: B008 **kwargs, @@ -71,8 +70,6 @@ def __init__( # Initialize layers self.layers = nn.ModuleList() - input_dim = get_feature_dimensions(*feature_information) - if self.hparams.use_embeddings: self.embedding_layer = EmbeddingLayer( *feature_information, @@ -81,6 +78,8 @@ def __init__( input_dim = np.sum( [len(info) * self.hparams.d_model for info in feature_information] ) + else: + input_dim = get_feature_dimensions(*feature_information) # Input layer self.layers.append(nn.Linear(input_dim, self.hparams.layer_sizes[0])) diff --git a/mambular/base_models/ndtf.py b/mambular/base_models/ndtf.py index e279dc0..c750993 100644 --- a/mambular/base_models/ndtf.py +++ b/mambular/base_models/ndtf.py @@ -54,20 +54,17 @@ class NDTF(BaseModel): def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes: int = 1, config: DefaultNDTFConfig = DefaultNDTFConfig(), # noqa: B008 **kwargs, ): super().__init__(config=config, **kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) - self.cat_feature_info = cat_feature_info - self.num_feature_info = num_feature_info self.returns_ensemble = False - input_dim = get_feature_dimensions(num_feature_info, cat_feature_info) + input_dim = get_feature_dimensions(*feature_information) self.input_dimensions = [input_dim] @@ -78,10 +75,13 @@ def __init__( [ NeuralDecisionTree( input_dim=self.input_dimensions[idx], - depth=np.random.randint(self.hparams.min_depth, self.hparams.max_depth), + depth=np.random.randint( + self.hparams.min_depth, self.hparams.max_depth + ), output_dim=num_classes, lamda=self.hparams.lamda, - temperature=self.hparams.temperature + np.abs(np.random.normal(0, 0.1)), + temperature=self.hparams.temperature + + np.abs(np.random.normal(0, 0.1)), node_sampling=self.hparams.node_sampling, ) for idx in range(self.hparams.n_ensembles) @@ -103,21 +103,20 @@ def __init__( requires_grad=True, ) - def forward(self, num_features, cat_features) -> torch.Tensor: + def forward(self, *data) -> torch.Tensor: """Forward pass of the NDTF model. Parameters ---------- - x : torch.Tensor - Input tensor. + data : tuple + Input tuple of tensors of num_features, cat_features, embeddings. Returns ------- torch.Tensor Output tensor. """ - x = num_features + cat_features - x = torch.cat(x, dim=1) + x = torch.cat([t for tensors in data for t in tensors], dim=1) x = self.conv_layer(x.unsqueeze(2)) x = x.transpose(1, 2).squeeze(-1) @@ -131,21 +130,20 @@ def forward(self, num_features, cat_features) -> torch.Tensor: return preds @ self.tree_weights - def penalty_forward(self, num_features, cat_features) -> torch.Tensor: + def penalty_forward(self, *data) -> torch.Tensor: """Forward pass of the NDTF model. Parameters ---------- - x : torch.Tensor - Input tensor. + data : tuple + Input tuple of tensors of num_features, cat_features, embeddings. Returns ------- torch.Tensor Output tensor. """ - x = num_features + cat_features - x = torch.cat(x, dim=1) + x = torch.cat([t for tensors in data for t in tensors], dim=1) x = self.conv_layer(x.unsqueeze(2)) x = x.transpose(1, 2).squeeze(-1) diff --git a/mambular/base_models/node.py b/mambular/base_models/node.py index 82cbf91..7010460 100644 --- a/mambular/base_models/node.py +++ b/mambular/base_models/node.py @@ -6,6 +6,7 @@ from ..configs.node_config import DefaultNODEConfig from ..utils.get_feature_dimensions import get_feature_dimensions from .basemodel import BaseModel +import numpy as np class NODE(BaseModel): @@ -52,8 +53,7 @@ class NODE(BaseModel): def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes: int = 1, config: DefaultNODEConfig = DefaultNODEConfig(), # noqa: B008 **kwargs, @@ -63,16 +63,17 @@ def __init__( self.returns_ensemble = False - self.cat_feature_info = cat_feature_info - self.num_feature_info = num_feature_info - if self.hparams.use_embeddings: - input_dim = len(num_feature_info) * self.hparams.d_model + len(cat_feature_info) * self.hparams.d_model - - self.embedding_layer = EmbeddingLayer(config) # type: ignore + self.embedding_layer = EmbeddingLayer( + *feature_information, + config=config, + ) + input_dim = np.sum( + [len(info) * self.hparams.d_model for info in feature_information] + ) else: - input_dim = get_feature_dimensions(num_feature_info, cat_feature_info) + input_dim = get_feature_dimensions(*feature_information) self.d_out = num_classes self.block = DenseBlock( @@ -90,7 +91,7 @@ def __init__( output_dim=num_classes, ) - def forward(self, num_features, cat_features): + def forward(self, *data): """Forward pass through the NODE model. Parameters @@ -106,12 +107,11 @@ def forward(self, num_features, cat_features): Model output of shape [batch_size, num_classes]. """ if self.hparams.use_embeddings: - x = self.embedding_layer(num_features, cat_features) + x = self.embedding_layer(*data) B, S, D = x.shape x = x.reshape(B, S * D) else: - x = num_features + cat_features - x = torch.cat(x, dim=1) + x = torch.cat([t for tensors in data for t in tensors], dim=1) x = self.block(x).squeeze(-1) x = self.tabular_head(x) diff --git a/mambular/base_models/resnet.py b/mambular/base_models/resnet.py index a2e487e..2a383bc 100644 --- a/mambular/base_models/resnet.py +++ b/mambular/base_models/resnet.py @@ -1,6 +1,6 @@ import torch import torch.nn as nn - +import numpy as np from ..arch_utils.layer_utils.embedding_layer import EmbeddingLayer from ..arch_utils.resnet_utils import ResidualBlock from ..configs.resnet_config import DefaultResNetConfig @@ -56,30 +56,26 @@ class ResNet(BaseModel): def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes: int = 1, config: DefaultResNetConfig = DefaultResNetConfig(), # noqa: B008 **kwargs, ): super().__init__(config=config, **kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) self.returns_ensemble = False - self.cat_feature_info = cat_feature_info - self.num_feature_info = num_feature_info if self.hparams.use_embeddings: - input_dim = len(num_feature_info) * self.hparams.d_model + len(cat_feature_info) * self.hparams.d_model - # embedding layer self.embedding_layer = EmbeddingLayer( - num_feature_info=num_feature_info, - cat_feature_info=cat_feature_info, + *feature_information, config=config, ) - + input_dim = np.sum( + [len(info) * self.hparams.d_model for info in feature_information] + ) else: - input_dim = get_feature_dimensions(num_feature_info, cat_feature_info) + input_dim = get_feature_dimensions(*feature_information) self.initial_layer = nn.Linear(input_dim, self.hparams.layer_sizes[0]) @@ -102,14 +98,25 @@ def __init__( self.output_layer = nn.Linear(self.hparams.layer_sizes[-1], num_classes) - def forward(self, num_features, cat_features): + def forward(self, *data): + """Forward pass of the ResNet model. + + Parameters + ---------- + data : tuple + Input tuple of tensors of num_features, cat_features, embeddings. + + Returns + ------- + torch.Tensor + Output tensor. + """ if self.hparams.use_embeddings: - x = self.embedding_layer(num_features, cat_features) + x = self.embedding_layer(*data) B, S, D = x.shape x = x.reshape(B, S * D) else: - x = num_features + cat_features - x = torch.cat(x, dim=1) + x = torch.cat([t for tensors in data for t in tensors], dim=1) x = self.initial_layer(x) for block in self.blocks: diff --git a/mambular/base_models/saint.py b/mambular/base_models/saint.py index 38847fa..e2c6738 100644 --- a/mambular/base_models/saint.py +++ b/mambular/base_models/saint.py @@ -50,25 +50,22 @@ class SAINT(BaseModel): def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes=1, config: DefaultSAINTConfig = DefaultSAINTConfig(), # noqa: B008 **kwargs, ): super().__init__(config=config, **kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) self.returns_ensemble = False - self.cat_feature_info = cat_feature_info - self.num_feature_info = num_feature_info - n_inputs = len(num_feature_info) + len(cat_feature_info) + + n_inputs = np.sum([len(info) for info in feature_information]) if getattr(config, "use_cls", True): n_inputs += 1 # embedding layer self.embedding_layer = EmbeddingLayer( - num_feature_info=num_feature_info, - cat_feature_info=cat_feature_info, + *feature_information, config=config, ) @@ -89,22 +86,20 @@ def __init__( self.initialize_pooling_layers(config=config, n_inputs=n_inputs) - def forward(self, num_features, cat_features): + def forward(self, *data): """Defines the forward pass of the model. Parameters ---------- - num_features : Tensor - Tensor containing the numerical features. - cat_features : Tensor - Tensor containing the categorical features. + data : tuple + Input tuple of tensors of num_features, cat_features, embeddings. Returns ------- - Tensor - The output predictions of the model. + torch.Tensor + Output tensor. """ - x = self.embedding_layer(num_features, cat_features) + x = self.embedding_layer(*data) x = self.encoder(x) diff --git a/mambular/base_models/tabm.py b/mambular/base_models/tabm.py index 7683b4b..ef6e605 100644 --- a/mambular/base_models/tabm.py +++ b/mambular/base_models/tabm.py @@ -1,6 +1,6 @@ import torch import torch.nn as nn - +import numpy as np from ..arch_utils.get_norm_fn import get_normalization_layer from ..arch_utils.layer_utils.batch_ensemble_layer import LinearBatchEnsembleLayer from ..arch_utils.layer_utils.embedding_layer import EmbeddingLayer @@ -11,10 +11,10 @@ class TabM(BaseModel): + def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes: int = 1, config: DefaultTabMConfig = DefaultTabMConfig(), # noqa: B008 **kwargs, @@ -23,7 +23,7 @@ def __init__( super().__init__(config=config, **kwargs) # Save hparams including config attributes - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) if not self.hparams.average_ensembles: self.returns_ensemble = True # Directly set ensemble flag else: @@ -35,18 +35,19 @@ def __init__( # Conditionally initialize EmbeddingLayer based on self.hparams if self.hparams.use_embeddings: self.embedding_layer = EmbeddingLayer( - num_feature_info=num_feature_info, - cat_feature_info=cat_feature_info, + *feature_information, config=config, ) if self.hparams.average_embeddings: input_dim = self.hparams.d_model else: - input_dim = (len(num_feature_info) + len(cat_feature_info)) * config.d_model + input_dim = np.sum( + [len(info) * self.hparams.d_model for info in feature_information] + ) else: - input_dim = get_feature_dimensions(num_feature_info, cat_feature_info) + input_dim = get_feature_dimensions(*feature_information) # Input layer with batch ensembling self.layers.append( @@ -71,7 +72,11 @@ def __init__( if self.hparams.use_glu: self.layers.append(nn.GLU()) else: - self.layers.append(self.hparams.activation if hasattr(self.hparams, "activation") else nn.SELU()) + self.layers.append( + self.hparams.activation + if hasattr(self.hparams, "activation") + else nn.SELU() + ) if self.hparams.dropout > 0.0: self.layers.append(nn.Dropout(self.hparams.dropout)) @@ -105,7 +110,11 @@ def __init__( if self.hparams.use_glu: self.layers.append(nn.GLU()) else: - self.layers.append(self.hparams.activation if hasattr(self.hparams, "activation") else nn.SELU()) + self.layers.append( + self.hparams.activation + if hasattr(self.hparams, "activation") + else nn.SELU() + ) if self.hparams.dropout > 0.0: self.layers.append(nn.Dropout(self.hparams.dropout)) @@ -118,15 +127,13 @@ def __init__( num_classes, ) - def forward(self, num_features, cat_features) -> torch.Tensor: + def forward(self, *data) -> torch.Tensor: """Forward pass of the TabM model with batch ensembling. Parameters ---------- - num_features : torch.Tensor - Numerical features tensor. - cat_features : torch.Tensor - Categorical features tensor. + data : tuple + Input tuple of tensors of num_features, cat_features, embeddings. Returns ------- @@ -135,7 +142,7 @@ def forward(self, num_features, cat_features) -> torch.Tensor: """ # Handle embeddings if used if self.hparams.use_embeddings: - x = self.embedding_layer(num_features, cat_features) + x = self.embedding_layer(*data) # Option 1: Average over feature dimension (N) if self.hparams.average_embeddings: x = x.mean(dim=1) # Shape: (B, D) @@ -145,15 +152,18 @@ def forward(self, num_features, cat_features) -> torch.Tensor: x = x.reshape(B, N * D) # Shape: (B, N * D) else: - x = num_features + cat_features - x = torch.cat(x, dim=1) + x = torch.cat([t for tensors in data for t in tensors], dim=1) # Process through layers with optional skip connections for i in range(len(self.layers) - 1): if isinstance(self.layers[i], LinearBatchEnsembleLayer): out = self.layers[i](x) # `out` shape is expected to be (batch_size, ensemble_size, out_features) - if hasattr(self, "skip_connections") and self.skip_connections and x.shape == out.shape: + if ( + hasattr(self, "skip_connections") + and self.skip_connections + and x.shape == out.shape + ): x = x + out else: x = out diff --git a/mambular/base_models/tabtransformer.py b/mambular/base_models/tabtransformer.py index df8104a..aee1f7a 100644 --- a/mambular/base_models/tabtransformer.py +++ b/mambular/base_models/tabtransformer.py @@ -1,6 +1,6 @@ import torch import torch.nn as nn - +import numpy as np from ..arch_utils.get_norm_fn import get_normalization_layer from ..arch_utils.layer_utils.embedding_layer import EmbeddingLayer from ..arch_utils.mlp_utils import MLPhead @@ -61,14 +61,14 @@ class TabTransformer(BaseModel): def __init__( self, - cat_feature_info, - num_feature_info, + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes=1, config: DefaultTabTransformerConfig = DefaultTabTransformerConfig(), # noqa: B008 **kwargs, ): super().__init__(config=config, **kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) + num_feature_info, cat_feature_info, emb_feature_info = feature_information if cat_feature_info == {}: raise ValueError( "You are trying to fit a TabTransformer with no categorical features. \ @@ -76,13 +76,10 @@ def __init__( ) self.returns_ensemble = False - self.cat_feature_info = cat_feature_info - self.num_feature_info = num_feature_info # embedding layer self.embedding_layer = EmbeddingLayer( - num_feature_info=num_feature_info, - cat_feature_info=cat_feature_info, + *({}, cat_feature_info, emb_feature_info), config=config, ) @@ -107,25 +104,24 @@ def __init__( ) # pooling - n_inputs = len(num_feature_info) + len(cat_feature_info) + n_inputs = n_inputs = [len(info) for info in feature_information] self.initialize_pooling_layers(config=config, n_inputs=n_inputs) - def forward(self, num_features, cat_features): + def forward(self, *data): """Defines the forward pass of the model. Parameters ---------- - num_features : Tensor - Tensor containing the numerical features. - cat_features : Tensor - Tensor containing the categorical features. + ata : tuple + Input tuple of tensors of num_features, cat_features, embeddings. Returns ------- Tensor The output predictions of the model. """ - cat_embeddings = self.embedding_layer(None, cat_features) + num_features, cat_features, emb_features = data + cat_embeddings = self.embedding_layer(*(None, cat_features, emb_features)) num_features = torch.cat(num_features, dim=1) num_embeddings = self.norm_f(num_features) # type: ignore diff --git a/mambular/base_models/tabularnn.py b/mambular/base_models/tabularnn.py index 5699bf7..6ac5c3a 100644 --- a/mambular/base_models/tabularnn.py +++ b/mambular/base_models/tabularnn.py @@ -1,5 +1,4 @@ from dataclasses import replace - import torch import torch.nn as nn @@ -15,13 +14,13 @@ class TabulaRNN(BaseModel): def __init__( self, - feature_information: tuple, # Expecting (cat_feature_info, num_feature_info, embedding_feature_info) + feature_information: tuple, # Expecting (num_feature_info, cat_feature_info, embedding_feature_info) num_classes=1, config: DefaultTabulaRNNConfig = DefaultTabulaRNNConfig(), # noqa: B008 **kwargs, ): super().__init__(config=config, **kwargs) - self.save_hyperparameters(ignore=["cat_feature_info", "num_feature_info"]) + self.save_hyperparameters(ignore=["feature_information"]) self.returns_ensemble = False From a2c7845ee2fb82b42d729493520d249443b7eea5 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Fri, 24 Jan 2025 15:44:15 +0100 Subject: [PATCH 12/24] adapt lightning layer and preprocessor to account for no passed embeddings --- .../arch_utils/layer_utils/embedding_layer.py | 48 ++++++++++--------- mambular/preprocessing/preprocessor.py | 2 +- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/mambular/arch_utils/layer_utils/embedding_layer.py b/mambular/arch_utils/layer_utils/embedding_layer.py index 0184ca3..6098adb 100644 --- a/mambular/arch_utils/layer_utils/embedding_layer.py +++ b/mambular/arch_utils/layer_utils/embedding_layer.py @@ -101,20 +101,21 @@ def __init__(self, num_feature_info, cat_feature_info, emb_feature_info, config) ] ) - if self.embedding_projection: - self.emb_embeddings = nn.ModuleList( - [ - nn.Sequential( - nn.Linear( - feature_info["dimension"], - self.d_model, - bias=self.embedding_bias, - ), - self.embedding_activation, - ) - for feature_name, feature_info in emb_feature_info.items() - ] - ) + if len(emb_feature_info) >= 1: + if self.embedding_projection: + self.emb_embeddings = nn.ModuleList( + [ + nn.Sequential( + nn.Linear( + feature_info["dimension"], + self.d_model, + bias=self.embedding_bias, + ), + self.embedding_activation, + ) + for feature_name, feature_info in emb_feature_info.items() + ] + ) # Class token if required if self.use_cls: @@ -181,15 +182,16 @@ def forward(self, num_features, cat_features, emb_features): if self.layer_norm_after_embedding: num_embeddings = self.embedding_norm(num_embeddings) - if self.embedding_projection: - emb_embeddings = [ - emb(emb_features[i]) for i, emb in enumerate(self.emb_embeddings) - ] - emb_embeddings = torch.stack(emb_embeddings, dim=1) - else: - emb_embeddings = torch.stack(emb_features, dim=1) - if self.layer_norm_after_embedding: - emb_embeddings = self.embedding_norm(emb_embeddings) + if emb_features != []: + if self.embedding_projection: + emb_embeddings = [ + emb(emb_features[i]) for i, emb in enumerate(self.emb_embeddings) + ] + emb_embeddings = torch.stack(emb_embeddings, dim=1) + else: + emb_embeddings = torch.stack(emb_features, dim=1) + if self.layer_norm_after_embedding: + emb_embeddings = self.embedding_norm(emb_embeddings) embeddings = [ e for e in [cat_embeddings, num_embeddings, emb_embeddings] if e is not None diff --git a/mambular/preprocessing/preprocessor.py b/mambular/preprocessing/preprocessor.py index 0fa7340..99b4123 100644 --- a/mambular/preprocessing/preprocessor.py +++ b/mambular/preprocessing/preprocessor.py @@ -727,7 +727,7 @@ def get_feature_info(self, verbose=True): "categories": None, } else: - embedding_feature_info = None + embedding_feature_info = {} if not self.column_transformer: raise RuntimeError("The preprocessor has not been fitted yet.") From b8bc5e977bdcf27ba8f717f62cfbafe3532f7d08 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:11:46 +0100 Subject: [PATCH 13/24] restructure configs to create parent config-class --- mambular/configs/base_config.py | 83 +++++++++++++++++++++++ mambular/configs/fttransformer_config.py | 42 +----------- mambular/configs/mambatab_config.py | 38 +---------- mambular/configs/mambattention_config.py | 44 +----------- mambular/configs/mambular_config.py | 45 ++---------- mambular/configs/mlp_config.py | 57 +--------------- mambular/configs/ndtf_config.py | 15 +--- mambular/configs/node_config.py | 48 +------------ mambular/configs/resnet_config.py | 50 +------------- mambular/configs/saint_config.py | 51 ++------------ mambular/configs/tabm_config.py | 60 +--------------- mambular/configs/tabtransformer_config.py | 42 ++---------- mambular/configs/tabularnn_config.py | 46 ++----------- 13 files changed, 120 insertions(+), 501 deletions(-) create mode 100644 mambular/configs/base_config.py diff --git a/mambular/configs/base_config.py b/mambular/configs/base_config.py new file mode 100644 index 0000000..0e5a639 --- /dev/null +++ b/mambular/configs/base_config.py @@ -0,0 +1,83 @@ +from dataclasses import dataclass, field +from collections.abc import Callable +import torch.nn as nn + + +@dataclass +class BaseConfig: + """ + Base configuration class with shared hyperparameters for models. + + This configuration class provides common hyperparameters for optimization, + embeddings, and categorical encoding, which can be inherited by specific + model configurations. + + Parameters + ---------- + lr : float, default=1e-04 + Learning rate for the optimizer. + lr_patience : int, default=10 + Number of epochs with no improvement before reducing the learning rate. + weight_decay : float, default=1e-06 + L2 regularization parameter for weight decay in the optimizer. + lr_factor : float, default=0.1 + Factor by which the learning rate is reduced when patience is exceeded. + activation : Callable, default=nn.ReLU() + Activation function to use in the model's layers. + cat_encoding : str, default="int" + Method for encoding categorical features ('int', 'one-hot', or 'linear'). + + Embedding Parameters + -------------------- + use_embeddings : bool, default=False + Whether to use embeddings for categorical or numerical features. + embedding_activation : Callable, default=nn.Identity() + Activation function applied to embeddings. + embedding_type : str, default="linear" + Type of embedding to use ('linear', 'plr', etc.). + embedding_bias : bool, default=False + Whether to use bias in embedding layers. + layer_norm_after_embedding : bool, default=False + Whether to apply layer normalization after embedding layers. + d_model : int, default=32 + Dimensionality of embeddings or model representations. + plr_lite : bool, default=False + Whether to use a lightweight version of Piecewise Linear Regression (PLR). + n_frequencies : int, default=48 + Number of frequency components for embeddings. + frequencies_init_scale : float, default=0.01 + Initial scale for frequency components in embeddings. + embedding_projection : bool, default=True + Whether to apply a projection layer after embeddings. + + Notes + ----- + - This base class is meant to be inherited by other configurations. + - Provides default values that can be overridden in derived configurations. + + """ + + # Training Parameters + lr: float = 1e-04 + lr_patience: int = 10 + weight_decay: float = 1e-06 + lr_factor: float = 0.1 + + # Embedding Parameters + use_embeddings: bool = False + embedding_activation: Callable = nn.Identity() # noqa: RUF009 + embedding_type: str = "linear" + embedding_bias: bool = False + layer_norm_after_embedding: bool = False + d_model: int = 32 + plr_lite: bool = False + n_frequencies: int = 48 + frequencies_init_scale: float = 0.01 + embedding_projection: bool = True + + # Architecture Parameters + batch_norm: bool = False + layer_norm: bool = False + layer_norm_eps: float = 1e-05 + activation: Callable = nn.ReLU() # noqa: RUF009 + cat_encoding: str = "int" diff --git a/mambular/configs/fttransformer_config.py b/mambular/configs/fttransformer_config.py index d6aa11d..37bdcf4 100644 --- a/mambular/configs/fttransformer_config.py +++ b/mambular/configs/fttransformer_config.py @@ -1,25 +1,16 @@ from collections.abc import Callable from dataclasses import dataclass, field - import torch.nn as nn - from ..arch_utils.transformer_utils import ReGLU +from .base_config import BaseConfig @dataclass -class DefaultFTTransformerConfig: +class DefaultFTTransformerConfig(BaseConfig): """Configuration class for the FT Transformer model with predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which the learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 regularization) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. d_model : int, default=128 Dimensionality of the transformer model. n_layers : int, default=4 @@ -44,20 +35,6 @@ class DefaultFTTransformerConfig: Whether to apply normalization before other operations in each transformer block. bias : bool, default=True Whether to use bias in linear layers. - embedding_activation : callable, default=nn.Identity() - Activation function for embeddings. - embedding_type : str, default="linear" - Type of embedding to use ('linear', 'plr', etc.). - plr_lite : bool, default=False - Whether to use a lightweight version of Piecewise Linear Regression (PLR). - n_frequencies : int, default=48 - Number of frequencies for PLR embeddings. - frequencies_init_scale : float, default=0.01 - Initial scale for frequency parameters in embeddings. - embedding_bias : bool, default=False - Whether to use bias in embedding layers. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding layers. head_layer_sizes : list, default=() Sizes of the fully connected layers in the model's head. head_dropout : float, default=0.5 @@ -76,12 +53,6 @@ class DefaultFTTransformerConfig: Method for encoding categorical features ('int', 'one-hot', or 'linear'). """ - # Optimizer Parameters - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 - # Architecture Parameters d_model: int = 128 n_layers: int = 4 @@ -96,15 +67,6 @@ class DefaultFTTransformerConfig: norm_first: bool = False bias: bool = True - # Embedding Parameters - embedding_activation: Callable = nn.Identity() # noqa: RUF009 - embedding_type: str = "linear" - plr_lite: bool = False - n_frequencies: int = 48 - frequencies_init_scale: float = 0.01 - embedding_bias: bool = False - layer_norm_after_embedding: bool = False - # Head Parameters head_layer_sizes: list = field(default_factory=list) head_dropout: float = 0.5 diff --git a/mambular/configs/mambatab_config.py b/mambular/configs/mambatab_config.py index c00d4ba..ccfe459 100644 --- a/mambular/configs/mambatab_config.py +++ b/mambular/configs/mambatab_config.py @@ -1,23 +1,15 @@ from collections.abc import Callable from dataclasses import dataclass, field - import torch.nn as nn +from .base_config import BaseConfig @dataclass -class DefaultMambaTabConfig: +class DefaultMambaTabConfig(BaseConfig): """Configuration class for the Default MambaTab model with predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which the learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 regularization) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. d_model : int, default=64 Dimensionality of the model. n_layers : int, default=1 @@ -50,18 +42,6 @@ class DefaultMambaTabConfig: Activation function for the model. axis : int, default=1 Axis along which operations are applied, if applicable. - num_embedding_activation : callable, default=nn.ReLU() - Activation function for numerical embeddings. - embedding_type : str, default="linear" - Type of embedding to use ('linear', etc.). - embedding_bias : bool, default=False - Whether to use bias in the embedding layers. - plr_lite : bool, default=False - Whether to use a lightweight version of Piecewise Linear Regression (PLR). - n_frequencies : int, default=48 - Number of frequencies for PLR embeddings. - frequencies_init_scale : float, default=0.01 - Initial scale for frequency parameters in embeddings. head_layer_sizes : list, default=() Sizes of the fully connected layers in the model's head. head_dropout : float, default=0.0 @@ -82,12 +62,6 @@ class DefaultMambaTabConfig: Whether to process data bidirectionally. """ - # Optimizer Parameters - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 - # Architecture Parameters d_model: int = 64 n_layers: int = 1 @@ -106,14 +80,6 @@ class DefaultMambaTabConfig: activation: Callable = nn.ReLU() # noqa: RUF009 axis: int = 1 - # Embedding Parameters - num_embedding_activation: Callable = nn.ReLU() # noqa: RUF009 - embedding_type: str = "linear" - embedding_bias: bool = False - plr_lite: bool = False - n_frequencies: int = 48 - frequencies_init_scale: float = 0.01 - # Head Parameters head_layer_sizes: list = field(default_factory=list) head_dropout: float = 0.0 diff --git a/mambular/configs/mambattention_config.py b/mambular/configs/mambattention_config.py index b1f029a..49e596e 100644 --- a/mambular/configs/mambattention_config.py +++ b/mambular/configs/mambattention_config.py @@ -1,23 +1,15 @@ from collections.abc import Callable from dataclasses import dataclass, field - import torch.nn as nn +from .base_config import BaseConfig @dataclass -class DefaultMambAttentionConfig: +class DefaultMambAttentionConfig(BaseConfig): """Configuration class for the Default Mambular Attention model with predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 penalty) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. d_model : int, default=64 Dimensionality of the model. n_layers : int, default=4 @@ -58,22 +50,6 @@ class DefaultMambAttentionConfig: Type of normalization used in the model. activation : callable, default=nn.SiLU() Activation function for the model. - layer_norm_eps : float, default=1e-05 - Epsilon value for layer normalization. - num_embedding_activation : callable, default=nn.ReLU() - Activation function for numerical embeddings. - embedding_type : str, default="linear" - Type of embedding to use ('linear', etc.). - embedding_bias : bool, default=False - Whether to use bias in the embedding layers. - plr_lite : bool, default=False - Whether to use a lightweight version of Piecewise Linear Regression (PLR). - n_frequencies : int, default=48 - Number of frequencies for PLR embeddings. - frequencies_init_scale : float, default=0.01 - Initial scale for frequency parameters in embeddings. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding layers. head_layer_sizes : list, default=() Sizes of the fully connected layers in the model's head. head_dropout : float, default=0.5 @@ -106,12 +82,6 @@ class DefaultMambAttentionConfig: Number of attention layers in the model. """ - # Optimizer Parameters - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 - # Architecture Parameters d_model: int = 64 n_layers: int = 4 @@ -133,16 +103,6 @@ class DefaultMambAttentionConfig: dt_init_floor: float = 1e-04 norm: str = "LayerNorm" activation: Callable = nn.SiLU() # noqa: RUF009 - layer_norm_eps: float = 1e-05 - - # Embedding Parameters - num_embedding_activation: Callable = nn.ReLU() # noqa: RUF009 - embedding_type: str = "linear" - embedding_bias: bool = False - plr_lite: bool = False - n_frequencies: int = 48 - frequencies_init_scale: float = 0.01 - layer_norm_after_embedding: bool = False # Head Parameters head_layer_sizes: list = field(default_factory=list) diff --git a/mambular/configs/mambular_config.py b/mambular/configs/mambular_config.py index fcebca0..8bc2f90 100644 --- a/mambular/configs/mambular_config.py +++ b/mambular/configs/mambular_config.py @@ -1,23 +1,15 @@ from collections.abc import Callable from dataclasses import dataclass, field - import torch.nn as nn +from .base_config import BaseConfig @dataclass -class DefaultMambularConfig: +class DefaultMambularConfig(BaseConfig): """Configuration class for the Default Mambular model with predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which the learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 penalty) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. d_model : int, default=64 Dimensionality of the model. n_layers : int, default=4 @@ -28,6 +20,8 @@ class DefaultMambularConfig: Whether to use bias in the linear layers. dropout : float, default=0.0 Dropout rate for regularization. + d_conv : int, default=4 + Size of convolution over columns. dt_rank : str, default="auto" Rank of the decision tree used in the model. d_state : int, default=128 @@ -46,22 +40,6 @@ class DefaultMambularConfig: Type of normalization used ('RMSNorm', etc.). activation : callable, default=nn.SiLU() Activation function for the model. - layer_norm_eps : float, default=1e-05 - Epsilon value for layer normalization. - embedding_activation : callable, default=nn.Identity() - Activation function for embeddings. - embedding_type : str, default="linear" - Type of embedding to use ('linear', etc.). - embedding_bias : bool, default=False - Whether to use bias in the embedding layers. - plr_lite : bool, default=False - Whether to use a lightweight version of Piecewise Linear Regression (PLR). - n_frequencies : int, default=48 - Number of frequencies for PLR embeddings. - frequencies_init_scale : float, default=0.01 - Initial scale for frequency parameters in embeddings. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding layers. shuffle_embeddings : bool, default=False Whether to shuffle embeddings before being passed to Mamba layers. head_layer_sizes : list, default=() @@ -88,15 +66,10 @@ class DefaultMambularConfig: Version of the Mamba model to use ('mamba-torch', 'mamba1', 'mamba2'). """ - # Optimizer Parameters - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 - # Architecture Parameters d_model: int = 64 n_layers: int = 4 + d_conv: int = 4 expand_factor: int = 2 bias: bool = False dropout: float = 0.0 @@ -109,16 +82,8 @@ class DefaultMambularConfig: dt_init_floor: float = 1e-04 norm: str = "RMSNorm" activation: Callable = nn.SiLU() # noqa: RUF009 - layer_norm_eps: float = 1e-05 # Embedding Parameters - embedding_activation: Callable = nn.Identity() # noqa: RUF009 - embedding_type: str = "linear" - embedding_bias: bool = False - plr_lite: bool = False - n_frequencies: int = 48 - frequencies_init_scale: float = 0.01 - layer_norm_after_embedding: bool = False shuffle_embeddings: bool = False # Head Parameters diff --git a/mambular/configs/mlp_config.py b/mambular/configs/mlp_config.py index 08711be..1dda45f 100644 --- a/mambular/configs/mlp_config.py +++ b/mambular/configs/mlp_config.py @@ -1,23 +1,15 @@ from collections.abc import Callable from dataclasses import dataclass, field - import torch.nn as nn +from .base_config import BaseConfig @dataclass -class DefaultMLPConfig: +class DefaultMLPConfig(BaseConfig): """Configuration class for the default Multi-Layer Perceptron (MLP) model with predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which the learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 regularization) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. layer_sizes : list, default=(256, 128, 32) Sizes of the layers in the MLP. activation : callable, default=nn.ReLU() @@ -30,38 +22,8 @@ class DefaultMLPConfig: Whether to use Gated Linear Units (GLU) in the MLP. skip_connections : bool, default=False Whether to use skip connections in the MLP. - batch_norm : bool, default=False - Whether to use batch normalization in the MLP layers. - layer_norm : bool, default=False - Whether to use layer normalization in the MLP layers. - layer_norm_eps : float, default=1e-05 - Epsilon value for layer normalization. - use_embeddings : bool, default=False - Whether to use embedding layers for all features. - embedding_activation : callable, default=nn.Identity() - Activation function for embeddings. - embedding_type : str, default="linear" - Type of embedding to use ('linear', 'plr', etc.). - embedding_bias : bool, default=False - Whether to use bias in the embedding layers. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding. - d_model : int, default=32 - Dimensionality of the embeddings. - plr_lite : bool, default=False - Whether to use a lightweight version of Piecewise Linear Regression (PLR). - n_frequencies : int, default=48 - Number of frequencies for PLR embeddings. - frequencies_init_scale : float, default=0.01 - Initial scale for frequency parameters in embeddings. """ - # Optimizer Parameters - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 - # Architecture Parameters layer_sizes: list = field(default_factory=lambda: [256, 128, 32]) activation: Callable = nn.ReLU() # noqa: RUF009 @@ -69,18 +31,3 @@ class DefaultMLPConfig: dropout: float = 0.2 use_glu: bool = False skip_connections: bool = False - batch_norm: bool = False - layer_norm: bool = False - layer_norm_eps: float = 1e-05 - - # Embedding Parameters - use_embeddings: bool = False - embedding_activation: Callable = nn.Identity() # noqa: RUF009 - embedding_type: str = "linear" - embedding_bias: bool = False - layer_norm_after_embedding: bool = False - d_model: int = 32 - plr_lite: bool = False - n_frequencies: int = 48 - frequencies_init_scale: float = 0.01 - embedding_projection: bool = True diff --git a/mambular/configs/ndtf_config.py b/mambular/configs/ndtf_config.py index 89fad29..1fa1eec 100644 --- a/mambular/configs/ndtf_config.py +++ b/mambular/configs/ndtf_config.py @@ -1,20 +1,13 @@ from dataclasses import dataclass +from .base_config import BaseConfig @dataclass -class DefaultNDTFConfig: +class DefaultNDTFConfig(BaseConfig): """Configuration class for the default Neural Decision Tree Forest (NDTF) model with predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which the learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 penalty) applied to the model's weights during optimization. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced when a plateau is reached. min_depth : int, default=2 Minimum depth of trees in the forest. Controls the simplest model structure. max_depth : int, default=10 @@ -33,10 +26,6 @@ class DefaultNDTFConfig: Factor with which the penalty is multiplied """ - lr: float = 1e-4 - lr_patience: int = 5 - weight_decay: float = 1e-7 - lr_factor: float = 0.1 min_depth: int = 4 max_depth: int = 16 temperature: float = 0.1 diff --git a/mambular/configs/node_config.py b/mambular/configs/node_config.py index 82a4bda..2c93d30 100644 --- a/mambular/configs/node_config.py +++ b/mambular/configs/node_config.py @@ -1,23 +1,15 @@ from collections.abc import Callable from dataclasses import dataclass, field - import torch.nn as nn +from .base_config import BaseConfig @dataclass -class DefaultNODEConfig: +class DefaultNODEConfig(BaseConfig): """Configuration class for the Neural Oblivious Decision Ensemble (NODE) model. Parameters ---------- - lr : float, default=1e-03 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs without improvement after which the learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 regularization penalty) applied by the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate is reduced when there is no improvement. num_layers : int, default=4 Number of dense layers in the model. layer_dim : int, default=128 @@ -28,24 +20,6 @@ class DefaultNODEConfig: Depth of each decision tree in the ensemble. norm : str, default=None Type of normalization to use in the model. - use_embeddings : bool, default=False - Whether to use embedding layers for categorical features. - embedding_activation : callable, default=nn.Identity() - Activation function to apply to embeddings. - embedding_type : str, default="linear" - Type of embedding to use ('linear', etc.). - embedding_bias : bool, default=False - Whether to use bias in the embedding layers. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding layers. - d_model : int, default=32 - Dimensionality of the embedding space. - plr_lite : bool, default=False - Whether to use a lightweight version of Piecewise Linear Regression (PLR). - n_frequencies : int, default=48 - Number of frequencies for PLR embeddings. - frequencies_init_scale : float, default=0.01 - Initial scale for frequency parameters in embeddings. head_layer_sizes : list, default=() Sizes of the layers in the model's head. head_dropout : float, default=0.5 @@ -58,31 +32,13 @@ class DefaultNODEConfig: Whether to use batch normalization in the head layers. """ - # Optimizer Parameters - lr: float = 1e-03 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 - # Architecture Parameters num_layers: int = 4 layer_dim: int = 128 tree_dim: int = 1 depth: int = 6 - norm: str | None = None - # Embedding Parameters - use_embeddings: bool = False - embedding_activation: Callable = nn.Identity() # noqa: RUF009 - embedding_type: str = "linear" - embedding_bias: bool = False - layer_norm_after_embedding: bool = False - d_model: int = 32 - plr_lite: bool = False - n_frequencies: int = 48 - frequencies_init_scale: float = 0.01 - # Head Parameters head_layer_sizes: list = field(default_factory=list) head_dropout: float = 0.5 diff --git a/mambular/configs/resnet_config.py b/mambular/configs/resnet_config.py index e904957..7a458d5 100644 --- a/mambular/configs/resnet_config.py +++ b/mambular/configs/resnet_config.py @@ -1,23 +1,15 @@ from collections.abc import Callable from dataclasses import dataclass, field - import torch.nn as nn +from .base_config import BaseConfig @dataclass -class DefaultResNetConfig: +class DefaultResNetConfig(BaseConfig): """Configuration class for the default ResNet model with predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which the learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 regularization penalty) applied by the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate is reduced when there is no improvement. layer_sizes : list, default=(256, 128, 32) Sizes of the layers in the ResNet. activation : callable, default=nn.SELU() @@ -32,36 +24,13 @@ class DefaultResNetConfig: Whether to use Gated Linear Units (GLU) in the ResNet. skip_connections : bool, default=True Whether to use skip connections in the ResNet. - batch_norm : bool, default=True - Whether to use batch normalization in the ResNet layers. - layer_norm : bool, default=False - Whether to use layer normalization in the ResNet layers. - layer_norm_eps : float, default=1e-05 - Epsilon value for layer normalization. num_blocks : int, default=3 Number of residual blocks in the ResNet. - use_embeddings : bool, default=True - Whether to use embedding layers for all features. - embedding_type : str, default="linear" - Type of embedding to use ('linear', etc.). - embedding_bias : bool, default=False - Whether to use bias in the embedding layers. - plr_lite : bool, default=False - Whether to use a lightweight version of Piecewise Linear Regression (PLR). average_embeddings : bool, default=True Whether to average embeddings during the forward pass. - embedding_activation : callable, default=nn.Identity() - Activation function for embeddings. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding layers. - d_model : int, default=64 - Dimensionality of the embeddings. """ - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 + # model params layer_sizes: list = field(default_factory=lambda: [256, 128, 32]) activation: Callable = nn.SELU() # noqa: RUF009 skip_layers: bool = False @@ -69,20 +38,7 @@ class DefaultResNetConfig: norm: bool = False use_glu: bool = False skip_connections: bool = True - batch_norm: bool = True - layer_norm: bool = False - layer_norm_eps: float = 1e-05 num_blocks: int = 3 # embedding params - use_embeddings: bool = True - embedding_type: str = "linear" - embedding_bias = False - plr_lite: bool = False average_embeddings: bool = True - embedding_activation: Callable = nn.Identity() # noqa: RUF009 - layer_norm_after_embedding: bool = False - d_model: int = 64 - plr_lite: bool = False - n_frequencies: int = 48 - frequencies_init_scale: float = 0.01 diff --git a/mambular/configs/saint_config.py b/mambular/configs/saint_config.py index 6c166cb..3e90369 100644 --- a/mambular/configs/saint_config.py +++ b/mambular/configs/saint_config.py @@ -1,29 +1,21 @@ from collections.abc import Callable from dataclasses import dataclass, field - import torch.nn as nn +from .base_config import BaseConfig @dataclass -class DefaultSAINTConfig: +class DefaultSAINTConfig(BaseConfig): """Configuration class for the SAINT model with predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which the learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 regularization) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. - d_model : int, default=128 - Dimensionality of the transformer model. n_layers : int, default=4 Number of transformer layers. n_heads : int, default=8 Number of attention heads in the transformer. + d_model : int, default=128 + Dimensionality of embeddings or model representations. attn_dropout : float, default=0.2 Dropout rate for the attention mechanism. ff_dropout : float, default=0.1 @@ -36,26 +28,10 @@ class DefaultSAINTConfig: Activation function for the transformer feed-forward layers. transformer_dim_feedforward : int, default=256 Dimensionality of the feed-forward layers in the transformer. - layer_norm_eps : float, default=1e-05 - Epsilon value for layer normalization to improve numerical stability. norm_first : bool, default=False Whether to apply normalization before other operations in each transformer block. bias : bool, default=True Whether to use bias in linear layers. - embedding_activation : callable, default=nn.Identity() - Activation function for embeddings. - embedding_type : str, default="linear" - Type of embedding to use ('linear', 'plr', etc.). - plr_lite : bool, default=False - Whether to use a lightweight version of Piecewise Linear Regression (PLR). - n_frequencies : int, default=48 - Number of frequencies for PLR embeddings. - frequencies_init_scale : float, default=0.01 - Initial scale for frequency parameters in embeddings. - embedding_bias : bool, default=False - Whether to use bias in embedding layers. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding layers. head_layer_sizes : list, default=() Sizes of the fully connected layers in the model's head. head_dropout : float, default=0.5 @@ -74,32 +50,17 @@ class DefaultSAINTConfig: Method for encoding categorical features ('int', 'one-hot', or 'linear'). """ - # Optimizer Parameters - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 - # Architecture Parameters - d_model: int = 32 + n_layers: int = 1 n_heads: int = 2 attn_dropout: float = 0.2 ff_dropout: float = 0.1 norm: str = "LayerNorm" activation: Callable = nn.GELU() # noqa: RUF009 - layer_norm_eps: float = 1e-05 norm_first: bool = False bias: bool = True - - # Embedding Parameters - embedding_activation: Callable = nn.Identity() # noqa: RUF009 - embedding_type: str = "linear" - plr_lite: bool = False - n_frequencies: int = 48 - frequencies_init_scale: float = 0.01 - embedding_bias: bool = False - layer_norm_after_embedding: bool = False + d_model: int = 128 # Head Parameters head_layer_sizes: list = field(default_factory=list) diff --git a/mambular/configs/tabm_config.py b/mambular/configs/tabm_config.py index ee52dc8..4c4a931 100644 --- a/mambular/configs/tabm_config.py +++ b/mambular/configs/tabm_config.py @@ -1,24 +1,16 @@ from collections.abc import Callable from dataclasses import dataclass, field from typing import Literal - import torch.nn as nn +from .base_config import BaseConfig @dataclass -class DefaultTabMConfig: +class DefaultTabMConfig(BaseConfig): """Configuration class for the TabM model with batch ensembling and predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which the learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 penalty) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate is reduced when there is no improvement. layer_sizes : list, default=(512, 512, 128) Sizes of the layers in the model. activation : callable, default=nn.ReLU() @@ -29,32 +21,6 @@ class DefaultTabMConfig: Normalization method to be used, if any. use_glu : bool, default=False Whether to use Gated Linear Units (GLU) in the model. - batch_norm : bool, default=False - Whether to use batch normalization in the model layers. - layer_norm : bool, default=False - Whether to use layer normalization in the model layers. - layer_norm_eps : float, default=1e-05 - Epsilon value for layer normalization. - use_embeddings : bool, default=True - Whether to use embedding layers for all features. - embedding_type : str, default="plr" - Type of embedding to use ('plr', etc.). - embedding_bias : bool, default=False - Whether to use bias in the embedding layers. - plr_lite : bool, default=False - Whether to use a lightweight version of Piecewise Linear Regression (PLR). - n_frequencies : int, default=48 - Number of frequencies for PLR embeddings. - frequencies_init_scale : float, default=0.01 - Initial scale for frequency parameters in embeddings. - average_embeddings : bool, default=False - Whether to average embeddings during the forward pass. - embedding_activation : callable, default=nn.ReLU() - Activation function for embeddings. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding layers. - d_model : int, default=64 - Dimensionality of the embeddings. ensemble_size : int, default=32 Number of ensemble members for batch ensembling. ensemble_scaling_in : bool, default=True @@ -71,34 +37,12 @@ class DefaultTabMConfig: Model type to use ('mini' for reduced version, 'full' for complete model). """ - # lr params - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-05 - lr_factor: float = 0.1 - # arch params layer_sizes: list = field(default_factory=lambda: [256, 256, 128]) activation: Callable = nn.ReLU() # noqa: RUF009 dropout: float = 0.5 norm: str | None = None use_glu: bool = False - batch_norm: bool = False - layer_norm: bool = False - layer_norm_eps: float = 1e-05 - - # embedding params - use_embeddings: bool = True - embedding_type: str = "linear" - embedding_bias = False - plr_lite: bool = False - average_embeddings: bool = False - embedding_activation: Callable = nn.Identity() # noqa: RUF009 - layer_norm_after_embedding: bool = False - d_model: int = 32 - plr_lite: bool = False - n_frequencies: int = 48 - frequencies_init_scale: float = 0.01 # Batch ensembling specific configurations ensemble_size: int = 32 diff --git a/mambular/configs/tabtransformer_config.py b/mambular/configs/tabtransformer_config.py index 3cdea5c..84f16c9 100644 --- a/mambular/configs/tabtransformer_config.py +++ b/mambular/configs/tabtransformer_config.py @@ -1,31 +1,22 @@ from collections.abc import Callable from dataclasses import dataclass, field - import torch.nn as nn - from ..arch_utils.transformer_utils import ReGLU +from .base_config import BaseConfig @dataclass -class DefaultTabTransformerConfig: +class DefaultTabTransformerConfig(BaseConfig): """Configuration class for the default Tab Transformer model with predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 penalty) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. - d_model : int, default=128 - Dimensionality of the model. n_layers : int, default=4 Number of layers in the transformer. n_heads : int, default=8 Number of attention heads in the transformer. + d_model : int, default=128 + Dimensionality of embeddings or model representations. attn_dropout : float, default=0.2 Dropout rate for the attention mechanism. ff_dropout : float, default=0.1 @@ -38,20 +29,10 @@ class DefaultTabTransformerConfig: Activation function for the transformer layers. transformer_dim_feedforward : int, default=512 Dimensionality of the feed-forward layers in the transformer. - layer_norm_eps : float, default=1e-05 - Epsilon value for layer normalization. norm_first : bool, default=True Whether to apply normalization before other operations in each transformer block. bias : bool, default=True Whether to use bias in the linear layers. - embedding_activation : callable, default=nn.Identity() - Activation function for embeddings. - embedding_type : str, default="linear" - Type of embedding to use ('linear', etc.). - embedding_bias : bool, default=False - Whether to use bias in the embedding layers. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding. head_layer_sizes : list, default=() Sizes of the layers in the model's head. head_dropout : float, default=0.5 @@ -68,14 +49,7 @@ class DefaultTabTransformerConfig: Encoding method for categorical features ('int', 'one-hot', etc.). """ - # Optimizer Parameters - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 - # Architecture Parameters - d_model: int = 128 n_layers: int = 4 n_heads: int = 8 attn_dropout: float = 0.2 @@ -84,15 +58,9 @@ class DefaultTabTransformerConfig: activation: Callable = nn.SELU() # noqa: RUF009 transformer_activation: Callable = ReGLU() # noqa: RUF009 transformer_dim_feedforward: int = 512 - layer_norm_eps: float = 1e-05 norm_first: bool = True bias: bool = True - - # Embedding Parameters - embedding_activation: Callable = nn.Identity() # noqa: RUF009 - embedding_type: str = "linear" - embedding_bias: bool = False - layer_norm_after_embedding: bool = False + d_model: int = 128 # Head Parameters head_layer_sizes: list = field(default_factory=list) diff --git a/mambular/configs/tabularnn_config.py b/mambular/configs/tabularnn_config.py index f945fbe..84a9a99 100644 --- a/mambular/configs/tabularnn_config.py +++ b/mambular/configs/tabularnn_config.py @@ -1,51 +1,29 @@ from collections.abc import Callable from dataclasses import dataclass, field - import torch.nn as nn +from .base_config import BaseConfig @dataclass -class DefaultTabulaRNNConfig: +class DefaultTabulaRNNConfig(BaseConfig): """Configuration class for the TabulaRNN model with predefined hyperparameters. Parameters ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 penalty) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. model_type : str, default="RNN" Type of model, one of "RNN", "LSTM", "GRU", "mLSTM", "sLSTM". - d_model : int, default=128 - Dimensionality of the model. n_layers : int, default=4 Number of layers in the RNN. rnn_dropout : float, default=0.2 Dropout rate for the RNN layers. + d_model : int, default=128 + Dimensionality of embeddings or model representations. norm : str, default="RMSNorm" Normalization method to be used. activation : callable, default=nn.SELU() Activation function for the RNN layers. residuals : bool, default=False Whether to include residual connections in the RNN. - embedding_type : str, default="linear" - Type of embedding for features ('linear', 'plr', etc.). - embedding_bias : bool, default=False - Whether to use bias in the embedding layers. - plr_lite : bool, default=False - Whether to use a lightweight version of Piecewise Linear Regression (PLR). - n_frequencies : int, default=48 - Number of frequencies for PLR embeddings. - frequencies_init_scale : float, default=0.01 - Initial scale for frequency parameters in embeddings. - embedding_activation : callable, default=nn.ReLU() - Activation function for embeddings. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding layers. head_layer_sizes : list, default=() Sizes of the layers in the head of the model. head_dropout : float, default=0.5 @@ -74,12 +52,6 @@ class DefaultTabulaRNNConfig: Whether to use bias in the convolutional layers. """ - # Optimizer params - lr: float = 1e-04 - lr_patience: int = 10 - weight_decay: float = 1e-06 - lr_factor: float = 0.1 - # Architecture params model_type: str = "RNN" d_model: int = 128 @@ -89,16 +61,6 @@ class DefaultTabulaRNNConfig: activation: Callable = nn.SELU() # noqa: RUF009 residuals: bool = False - # Embedding params - embedding_type: str = "linear" - embedding_bias: bool = False - plr_lite: bool = False - n_frequencies: int = 48 - frequencies_init_scale: float = 0.01 - embedding_activation: Callable = nn.ReLU() # noqa: RUF009 - layer_norm_after_embedding: bool = False - embedding_projection: bool = True - # Head params head_layer_sizes: list = field(default_factory=list) head_dropout: float = 0.5 From a4c5992a453598a3fbbd8fd16c1e81c84bf82ae7 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:12:05 +0100 Subject: [PATCH 14/24] fix minor bugs related to imports and dim identification --- mambular/base_models/mambatab.py | 2 +- mambular/base_models/saint.py | 1 + mambular/base_models/tabtransformer.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mambular/base_models/mambatab.py b/mambular/base_models/mambatab.py index b1b111a..4314bab 100644 --- a/mambular/base_models/mambatab.py +++ b/mambular/base_models/mambatab.py @@ -72,7 +72,7 @@ def __init__( self.initial_layer = nn.Linear(input_dim, config.d_model) self.norm_f = LayerNorm(config.d_model) - self.embedding_activation = self.hparams.num_embedding_activation + self.embedding_activation = self.hparams.embedding_activation self.axis = config.axis diff --git a/mambular/base_models/saint.py b/mambular/base_models/saint.py index e2c6738..e6cfe19 100644 --- a/mambular/base_models/saint.py +++ b/mambular/base_models/saint.py @@ -4,6 +4,7 @@ from ..arch_utils.transformer_utils import RowColTransformer from ..configs.saint_config import DefaultSAINTConfig from .basemodel import BaseModel +import numpy as np class SAINT(BaseModel): diff --git a/mambular/base_models/tabtransformer.py b/mambular/base_models/tabtransformer.py index aee1f7a..0287203 100644 --- a/mambular/base_models/tabtransformer.py +++ b/mambular/base_models/tabtransformer.py @@ -93,8 +93,8 @@ def __init__( ) mlp_input_dim = 0 - for feature_name, input_shape in num_feature_info.items(): - mlp_input_dim += input_shape + for feature_name, info in num_feature_info.items(): + mlp_input_dim += info["dimension"] mlp_input_dim += self.hparams.d_model self.tabular_head = MLPhead( From 6fc04ebe4d8235a515e998e3707eb6c5a21bd017 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:12:24 +0100 Subject: [PATCH 15/24] fix bug related to column names in datamodule - turn int to string --- mambular/data_utils/datamodule.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/mambular/data_utils/datamodule.py b/mambular/data_utils/datamodule.py index 459b8c1..40e80f7 100644 --- a/mambular/data_utils/datamodule.py +++ b/mambular/data_utils/datamodule.py @@ -212,8 +212,8 @@ def setup(self, stage: str): else torch.long ) - cat_key = ( - "cat_" + key + cat_key = "cat_" + str( + key ) # Assuming categorical keys are prefixed with 'cat_' if cat_key in train_preprocessed_data: train_cat_tensors.append( @@ -224,7 +224,7 @@ def setup(self, stage: str): torch.tensor(val_preprocessed_data[cat_key], dtype=dtype) ) - binned_key = "num_" + key # for binned features + binned_key = "num_" + str(key) # for binned features if binned_key in train_preprocessed_data: train_cat_tensors.append( torch.tensor(train_preprocessed_data[binned_key], dtype=dtype) @@ -237,8 +237,8 @@ def setup(self, stage: str): # Populate tensors for numerical features, if present in processed data for key in self.num_feature_info: # type: ignore - num_key = ( - "num_" + key + num_key = "num_" + str( + key ) # Assuming numerical keys are prefixed with 'num_' if num_key in train_preprocessed_data: train_num_tensors.append( @@ -306,13 +306,15 @@ def preprocess_new_data(self, X, embeddings): ) else torch.long ) - cat_key = "cat_" + key # Assuming categorical keys are prefixed with 'cat_' + cat_key = "cat_" + str( + key + ) # Assuming categorical keys are prefixed with 'cat_' if cat_key in preprocessed_data: cat_tensors.append( torch.tensor(preprocessed_data[cat_key], dtype=dtype) ) - binned_key = "num_" + key # for binned features + binned_key = "num_" + str(key) # for binned features if binned_key in preprocessed_data: cat_tensors.append( torch.tensor(preprocessed_data[binned_key], dtype=dtype) @@ -320,7 +322,9 @@ def preprocess_new_data(self, X, embeddings): # Populate tensors for numerical features, if present in processed data for key in self.num_feature_info: # type: ignore - num_key = "num_" + key # Assuming numerical keys are prefixed with 'num_' + num_key = "num_" + str( + key + ) # Assuming numerical keys are prefixed with 'num_' if num_key in preprocessed_data: num_tensors.append( torch.tensor(preprocessed_data[num_key], dtype=torch.float32) From e60dd80ef168febf8aa46a8d7f3ecb87c600991b Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:12:44 +0100 Subject: [PATCH 16/24] make box-cox strictly positive --- mambular/preprocessing/preprocessor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mambular/preprocessing/preprocessor.py b/mambular/preprocessing/preprocessor.py index 99b4123..cbb9f2f 100644 --- a/mambular/preprocessing/preprocessor.py +++ b/mambular/preprocessing/preprocessor.py @@ -466,6 +466,9 @@ def fit(self, X, y=None, embeddings=None): ) elif self.numerical_preprocessing == "box-cox": + numeric_transformer_steps.append( + ("check_positive", MinMaxScaler(feature_range=(1e-3, 1))) + ) numeric_transformer_steps.append( ( "box-cox", @@ -752,11 +755,12 @@ def get_feature_info(self, verbose=True): "quantile", "polynomial", "splines", + "box-cox", ] ): last_step = transformer_pipeline.steps[-1][1] if hasattr(last_step, "transform"): - dummy_input = np.zeros((1, 1)) + dummy_input = np.zeros((1, 1)) + 1e-05 transformed_feature = last_step.transform(dummy_input) dimension = transformed_feature.shape[1] numerical_feature_info[feature_name] = { From febf1651851040dc426ace45fd52d5c4aa679ccc Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:12:54 +0100 Subject: [PATCH 17/24] include unit tests --- tests/test_base.py | 155 ++++++++++++++++++ tests/test_classifier.py | 115 ------------- tests/test_configs.py | 115 +++++++++++++ tests/test_distributions.py | 311 ------------------------------------ tests/test_lss.py | 104 ------------ tests/test_preprocessor.py | 185 ++++++++++++--------- tests/test_regressor.py | 103 ------------ 7 files changed, 377 insertions(+), 711 deletions(-) create mode 100644 tests/test_base.py delete mode 100644 tests/test_classifier.py create mode 100644 tests/test_configs.py delete mode 100644 tests/test_distributions.py delete mode 100644 tests/test_lss.py delete mode 100644 tests/test_regressor.py diff --git a/tests/test_base.py b/tests/test_base.py new file mode 100644 index 0000000..20d9797 --- /dev/null +++ b/tests/test_base.py @@ -0,0 +1,155 @@ +import pytest +import inspect +import torch +import os +import importlib +from mambular.base_models.basemodel import BaseModel + +# Paths for models and configs +MODEL_MODULE_PATH = "mambular.base_models" +CONFIG_MODULE_PATH = "mambular.configs" + +# Discover all models +model_classes = [] +for filename in os.listdir(os.path.dirname(__file__) + "/../mambular/base_models"): + if filename.endswith(".py") and filename not in [ + "__init__.py", + "basemodel.py", + "lightning_wrapper.py", + "bayesian_tabm.py", + ]: + module_name = f"{MODEL_MODULE_PATH}.{filename[:-3]}" + module = importlib.import_module(module_name) + + for name, obj in inspect.getmembers(module, inspect.isclass): + if issubclass(obj, BaseModel) and obj is not BaseModel: + model_classes.append(obj) + + +def get_model_config(model_class): + """Dynamically load the correct config class for each model.""" + model_name = model_class.__name__ # e.g., "Mambular" + config_class_name = f"Default{model_name}Config" # e.g., "DefaultMambularConfig" + + try: + config_module = importlib.import_module( + f"{CONFIG_MODULE_PATH}.{model_name.lower()}_config" + ) + config_class = getattr(config_module, config_class_name) + return config_class() # Instantiate config + except (ModuleNotFoundError, AttributeError) as e: + pytest.fail( + f"Could not find or instantiate config {config_class_name} for {model_name}: {e}" + ) + + +@pytest.mark.parametrize("model_class", model_classes) +def test_model_inherits_base_model(model_class): + """Test that each model correctly inherits from BaseModel.""" + assert issubclass( + model_class, BaseModel + ), f"{model_class.__name__} should inherit from BaseModel." + + +@pytest.mark.parametrize("model_class", model_classes) +def test_model_has_forward_method(model_class): + """Test that each model has a forward method with *data.""" + assert hasattr( + model_class, "forward" + ), f"{model_class.__name__} is missing a forward method." + + sig = inspect.signature(model_class.forward) + assert any( + p.kind == inspect.Parameter.VAR_POSITIONAL for p in sig.parameters.values() + ), f"{model_class.__name__}.forward should have *data argument." + + +@pytest.mark.parametrize("model_class", model_classes) +def test_model_takes_config(model_class): + """Test that each model accepts a config argument.""" + sig = inspect.signature(model_class.__init__) + assert ( + "config" in sig.parameters + ), f"{model_class.__name__} should accept a 'config' parameter." + + +@pytest.mark.parametrize("model_class", model_classes) +def test_model_has_num_classes(model_class): + """Test that each model accepts a num_classes argument.""" + sig = inspect.signature(model_class.__init__) + assert ( + "num_classes" in sig.parameters + ), f"{model_class.__name__} should accept a 'num_classes' parameter." + + +@pytest.mark.parametrize("model_class", model_classes) +def test_model_calls_super_init(model_class): + """Test that each model calls super().__init__(config=config, **kwargs).""" + source = inspect.getsource(model_class.__init__) + assert ( + "super().__init__(config=config" in source + ), f"{model_class.__name__} should call super().__init__(config=config, **kwargs)." + + +@pytest.mark.parametrize("model_class", model_classes) +def test_model_initialization(model_class): + """Test that each model can be initialized with its correct config.""" + config = get_model_config(model_class) + feature_info = ( + { + "A": { + "preprocessing": "imputer -> check_positive -> box-cox", + "dimension": 1, + "categories": None, + } + }, + { + "sibsp": { + "preprocessing": "imputer -> continuous_ordinal", + "dimension": 1, + "categories": 8, + } + }, + {}, + ) # Mock feature info + + try: + model = model_class( + feature_information=feature_info, num_classes=3, config=config + ) + except Exception as e: + pytest.fail(f"Failed to initialize {model_class.__name__}: {e}") + + +@pytest.mark.parametrize("model_class", model_classes) +def test_model_defines_key_attributes(model_class): + """Test that each model defines expected attributes like returns_ensemble""" + config = get_model_config(model_class) + feature_info = ( + { + "A": { + "preprocessing": "imputer -> check_positive -> box-cox", + "dimension": 1, + "categories": None, + } + }, + { + "sibsp": { + "preprocessing": "imputer -> continuous_ordinal", + "dimension": 1, + "categories": 8, + } + }, + {}, + ) # Mock feature info + + try: + model = model_class( + feature_information=feature_info, num_classes=3, config=config + ) + except TypeError as e: + pytest.fail(f"Failed to initialize {model_class.__name__}: {e}") + + expected_attrs = ["returns_ensemble"] + for attr in expected_attrs: + assert hasattr(model, attr), f"{model_class.__name__} should define '{attr}'." diff --git a/tests/test_classifier.py b/tests/test_classifier.py deleted file mode 100644 index 7243233..0000000 --- a/tests/test_classifier.py +++ /dev/null @@ -1,115 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch - -import numpy as np -import pandas as pd -import torch -from sklearn.metrics import accuracy_score, log_loss - -from mambular.models import MambularClassifier # Ensure correct import path - - -class TestMambularClassifier(unittest.TestCase): - def setUp(self): - # Patching external dependencies - self.patcher_pl_trainer = patch("lightning.Trainer") - self.mock_pl_trainer = self.patcher_pl_trainer.start() - - self.patcher_base_model = patch("mambular.base_models.classifier.BaseMambularClassifier") - self.mock_base_model = self.patcher_base_model.start() - - self.classifier = MambularClassifier(d_model=128, dropout=0.1) - - # Sample data - self.X = pd.DataFrame(np.random.randn(100, 10)) - self.y = np.random.choice(["A", "B", "C"], size=100) - - self.classifier.cat_feature_info = {} - self.classifier.num_feature_info = {} - - def tearDown(self): - self.patcher_pl_trainer.stop() - self.patcher_base_model.stop() - - def test_initialization(self): - # This assumes MambularConfig is properly imported and used in the MambularRegressor class - from mambular.utils.configs import DefaultMambularConfig - - self.assertIsInstance(self.classifier.config, DefaultMambularConfig) - self.assertEqual(self.classifier.config.d_model, 128) - self.assertEqual(self.classifier.config.dropout, 0.1) - - def test_split_data(self): - """Test the data splitting functionality.""" - X_train, X_val, y_train, y_val = self.classifier.split_data(self.X, self.y, val_size=0.2, random_state=42) - self.assertEqual(len(X_train), 80) - self.assertEqual(len(X_val), 20) - self.assertEqual(len(y_train), 80) - self.assertEqual(len(y_val), 20) - - def test_fit(self): - """Test the training setup and call.""" - # Mock the necessary parts to simulate training - self.classifier.preprocess_data = MagicMock() - self.classifier.model = self.mock_base_model - - self.classifier.fit(self.X, self.y) - - # Ensure that the fit method of the trainer is called - self.mock_pl_trainer.return_value.fit.assert_called_once() - - def test_predict(self): - # Create a mock tensor as the model output - # Assuming three classes A, B, C as per self.y - mock_logits = torch.rand(100, 3) - - # Mock the model and its method calls - self.classifier.model = MagicMock() - self.classifier.model.eval.return_value = None - self.classifier.model.return_value = mock_logits - - # Mock preprocess_test_data to return dummy tensor data - self.classifier.preprocess_test_data = MagicMock(return_value=([], [])) - - predictions = self.classifier.predict(self.X) - - # Assert that predictions return as expected - expected_predictions = torch.argmax(mock_logits, dim=1).numpy() - np.testing.assert_array_equal(predictions, expected_predictions) - - def test_evaluate(self): - # Mock predict and predict_proba to simulate classifier output - mock_predictions = np.random.choice([0, 1, 2], size=100) - raw_probabilities = np.random.rand(100, 3) - # Normalize these probabilities so that each row sums to 1 - mock_probabilities = raw_probabilities / raw_probabilities.sum(axis=1, keepdims=True) - self.classifier.predict = MagicMock(return_value=mock_predictions) - self.classifier.predict_proba = MagicMock(return_value=mock_probabilities) - - # Define metrics to test - metrics = { - "Accuracy": (accuracy_score, False), - # Log Loss requires probability scores - "Log Loss": (log_loss, True), - } - - # Call evaluate with the defined metrics - result = self.classifier.evaluate(self.X, self.y, metrics=metrics) - - # Assert that predict and predict_proba were called correctly - self.classifier.predict.assert_called_once() - self.classifier.predict_proba.assert_called_once() - - # Check the results of evaluate - expected_accuracy = accuracy_score(self.y, mock_predictions) - expected_log_loss = log_loss(self.y, mock_probabilities) - self.assertEqual(result["Accuracy"], expected_accuracy) - self.assertAlmostEqual(result["Log Loss"], expected_log_loss) - - # Assert calls with appropriate arguments - self.classifier.predict.assert_called_once_with(self.X) - self.classifier.predict_proba.assert_called_once_with(self.X) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_configs.py b/tests/test_configs.py new file mode 100644 index 0000000..5299534 --- /dev/null +++ b/tests/test_configs.py @@ -0,0 +1,115 @@ +import pytest +import inspect +import importlib +import os +import dataclasses +import typing +from mambular.configs.base_config import BaseConfig # Ensure correct path + +CONFIG_MODULE_PATH = "mambular.configs" +config_classes = [] + +# Discover all config classes in mambular/configs/ +for filename in os.listdir(os.path.dirname(__file__) + "/../mambular/configs"): + if ( + filename.endswith(".py") + and filename != "base_config.py" + and not filename.startswith("__") + ): + module_name = f"{CONFIG_MODULE_PATH}.{filename[:-3]}" + module = importlib.import_module(module_name) + + for name, obj in inspect.getmembers(module, inspect.isclass): + if issubclass(obj, BaseConfig) and obj is not BaseConfig: + config_classes.append(obj) + + +@pytest.mark.parametrize("config_class", config_classes) +def test_config_inherits_baseconfig(config_class): + """Test that each config class correctly inherits from BaseConfig.""" + assert issubclass( + config_class, BaseConfig + ), f"{config_class.__name__} should inherit from BaseConfig." + + +@pytest.mark.parametrize("config_class", config_classes) +def test_config_instantiation(config_class): + """Test that each config class can be instantiated without errors.""" + try: + config = config_class() + except Exception as e: + pytest.fail(f"Failed to instantiate {config_class.__name__}: {e}") + + +@pytest.mark.parametrize("config_class", config_classes) +def test_config_has_expected_attributes(config_class): + """Test that each config has all required attributes from BaseConfig.""" + base_attrs = {field.name for field in dataclasses.fields(BaseConfig)} + config_attrs = {field.name for field in dataclasses.fields(config_class)} + + missing_attrs = base_attrs - config_attrs + assert ( + not missing_attrs + ), f"{config_class.__name__} is missing attributes: {missing_attrs}" + + +@pytest.mark.parametrize("config_class", config_classes) +def test_config_default_values(config_class): + """Ensure that each config class has default values assigned correctly.""" + config = config_class() + + for field in dataclasses.fields(config_class): + attr = field.name + expected_type = field.type + + assert hasattr( + config, attr + ), f"{config_class.__name__} is missing attribute '{attr}'." + + value = getattr(config, attr) + + # Handle generic types properly + origin = typing.get_origin(expected_type) + + if origin is typing.Literal: + # If the field is a Literal, ensure the value is one of the allowed options + allowed_values = typing.get_args(expected_type) + assert ( + value in allowed_values + ), f"{config_class.__name__}.{attr} has incorrect value: expected one of {allowed_values}, got {value}" + elif origin is typing.Union: + # For Union types (e.g., Optional[str]), check if value matches any type in the union + allowed_types = typing.get_args(expected_type) + assert any( + isinstance(value, t) for t in allowed_types + ), f"{config_class.__name__}.{attr} has incorrect type: expected one of {allowed_types}, got {type(value)}" + elif origin is not None: + # If it's another generic type (e.g., list[str]), check against the base type + assert ( + isinstance(value, origin) or value is None + ), f"{config_class.__name__}.{attr} has incorrect type: expected {expected_type}, got {type(value)}" + else: + # Standard type check + assert ( + isinstance(value, expected_type) or value is None + ), f"{config_class.__name__}.{attr} has incorrect type: expected {expected_type}, got {type(value)}" + + +@pytest.mark.parametrize("config_class", config_classes) +def test_config_allows_updates(config_class): + """Ensure that config values can be updated and remain type-consistent.""" + config = config_class() + + update_values = { + "lr": 0.01, + "d_model": 128, + "embedding_type": "plr", + "activation": lambda x: x, # Function update + } + + for attr, new_value in update_values.items(): + if hasattr(config, attr): + setattr(config, attr, new_value) + assert ( + getattr(config, attr) == new_value + ), f"{config_class.__name__}.{attr} did not update correctly." diff --git a/tests/test_distributions.py b/tests/test_distributions.py deleted file mode 100644 index 1a8f2ca..0000000 --- a/tests/test_distributions.py +++ /dev/null @@ -1,311 +0,0 @@ -import unittest - -import torch - -from mambular.utils.distributions import ( - BetaDistribution, - CategoricalDistribution, - DirichletDistribution, - GammaDistribution, - InverseGammaDistribution, - NegativeBinomialDistribution, - NormalDistribution, - PoissonDistribution, - StudentTDistribution, -) - - -class TestNormalDistribution(unittest.TestCase): - def setUp(self): - """Initialize the NormalDistribution object with default transforms.""" - self.normal = NormalDistribution() - - def test_initialization(self): - """Test the initialization and default parameter settings.""" - self.assertEqual(self.normal._name, "Normal") - self.assertEqual(self.normal.param_names, ["mean", "variance"]) - self.assertIsInstance(self.normal.mean_transform, type(lambda x: x)) - self.assertIsInstance(self.normal.variance_transform, type(torch.nn.functional.softplus)) - - def test_predefined_transforms(self): - """Test if predefined transformations are correctly applied.""" - x = torch.tensor([-1.0, 0.0, 1.0]) - self.assertTrue(torch.allclose(self.normal.mean_transform(x), x)) # 'none' should change nothing - self.assertTrue( - torch.all(torch.ge(self.normal.variance_transform(x), 0)) - ) # 'positive' should make all values non-negative - - def test_compute_loss_known_values(self): - """Test the loss computation against known values.""" - predictions = torch.tensor([[0.0, 1.0]]) # mean = 0, variance = 1 - y_true = torch.tensor([0.0]) - self.normal = NormalDistribution() - loss = self.normal.compute_loss(predictions, y_true) - test_dist = torch.distributions.Normal( - loc=predictions[:, 0], scale=torch.nn.functional.softplus(predictions[:, 1]) - ) - expected_loss = -test_dist.log_prob(torch.tensor(0.0)).mean() - self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) - - def test_evaluate_nll(self): - """Test the evaluate NLL function.""" - y_true = [0.0] - y_pred = [[0.0, 1.0]] # mean=0, variance=1 - result = self.normal.evaluate_nll(y_true, y_pred) - self.assertIn("NLL", result) - self.assertIn("mse", result) - self.assertIn("mae", result) - self.assertIn("rmse", result) - - -class TestPoissonDistribution(unittest.TestCase): - def setUp(self): - """Initialize the PoissonDistribution object with default transform.""" - self.poisson = PoissonDistribution() - - def test_initialization(self): - """Test the initialization and parameter settings.""" - self.assertEqual(self.poisson._name, "Poisson") - self.assertEqual(self.poisson.param_names, ["rate"]) - self.assertIsInstance(self.poisson.rate_transform, type(torch.nn.functional.softplus)) - - def test_compute_loss_known_values(self): - """Test the loss computation against known values.""" - predictions = torch.tensor([[1.0]]) # rate = 1 - y_true = torch.tensor([1.0]) - loss = self.poisson.compute_loss(predictions, y_true) - expected_loss = ( - -torch.distributions.Poisson(torch.nn.functional.softplus(predictions[:, 0])) - .log_prob(torch.tensor(1.0)) - .mean() - ) - self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) - - -class TestBetaDistribution(unittest.TestCase): - def setUp(self): - """Initialize the BetaDistribution object with default transforms.""" - self.beta = BetaDistribution() - - def test_initialization(self): - """Test the initialization and parameter settings.""" - self.assertEqual(self.beta._name, "Beta") - self.assertEqual(self.beta.param_names, ["alpha", "beta"]) - self.assertIsInstance(self.beta.alpha_transform, type(torch.nn.functional.softplus)) - self.assertIsInstance(self.beta.beta_transform, type(torch.nn.functional.softplus)) - - def test_compute_loss_known_values(self): - """Test the loss computation against known values.""" - predictions = torch.tensor([[1.0, 1.0]]) # alpha = 1, beta = 1 (uniform distribution) - y_true = torch.tensor([0.5]) - loss = self.beta.compute_loss(predictions, y_true) - expected_loss = ( - -torch.distributions.Beta( - torch.nn.functional.softplus(predictions[:, 0]), - torch.nn.functional.softplus(predictions[:, 1]), - ) - .log_prob(torch.tensor(0.5)) - .mean() - ) - self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) - - -class TestInverseGammaDistribution(unittest.TestCase): - def setUp(self): - """Initialize the InverseGammaDistribution object with default transforms.""" - self.inverse_gamma = InverseGammaDistribution() - - def test_initialization(self): - """Test the initialization and parameter settings.""" - self.assertEqual(self.inverse_gamma._name, "InverseGamma") - self.assertEqual(self.inverse_gamma.param_names, ["shape", "scale"]) - self.assertIsInstance(self.inverse_gamma.shape_transform, type(torch.nn.functional.softplus)) - self.assertIsInstance(self.inverse_gamma.scale_transform, type(torch.nn.functional.softplus)) - - def test_compute_loss_known_values(self): - """Test the loss computation against known values.""" - # These values for shape and scale parameters are chosen to be feasible and testable. - predictions = torch.tensor([[3.0, 2.0]]) # shape = 3, scale = 2 - y_true = torch.tensor([0.5]) - - loss = self.inverse_gamma.compute_loss(predictions, y_true) - # Manually calculate the expected loss using torch's distribution functions - shape = torch.nn.functional.softplus(predictions[:, 0]) - scale = torch.nn.functional.softplus(predictions[:, 1]) - inverse_gamma_dist = torch.distributions.InverseGamma(shape, scale) - expected_loss = -inverse_gamma_dist.log_prob(y_true).mean() - - self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) - - -class TestDirichletDistribution(unittest.TestCase): - def setUp(self): - """Initialize the DirichletDistribution object with default transforms.""" - self.dirichlet = DirichletDistribution() - - def test_initialization(self): - """Test the initialization and parameter settings.""" - self.assertEqual(self.dirichlet._name, "Dirichlet") - # Concentration param_name is a simplification as mentioned in your class docstring - self.assertEqual(self.dirichlet.param_names, ["concentration"]) - self.assertIsInstance(self.dirichlet.concentration_transform, type(torch.nn.functional.softplus)) - - def test_compute_loss_known_values(self): - """Test the loss computation against known values.""" - # These values are chosen to be feasible and testable. - # Example: Concentrations for a 3-dimensional Dirichlet distribution - predictions = torch.tensor( - [[1.0, 1.0, 1.0]] - ) # Equal concentration, should resemble uniform distribution over simplex - y_true = torch.tensor([[0.33, 0.33, 0.34]]) # Example point in the probability simplex - - loss = self.dirichlet.compute_loss(predictions, y_true) - # Manually calculate the expected loss using torch's distribution functions - concentration = torch.nn.functional.softplus(predictions) - dirichlet_dist = torch.distributions.Dirichlet(concentration) - expected_loss = -dirichlet_dist.log_prob(y_true).mean() - - self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) - - -class TestGammaDistribution(unittest.TestCase): - def setUp(self): - """Initialize the GammaDistribution object with default transforms.""" - self.gamma = GammaDistribution() - - def test_initialization(self): - """Test the initialization and parameter settings.""" - self.assertEqual(self.gamma._name, "Gamma") - self.assertEqual(self.gamma.param_names, ["shape", "rate"]) - self.assertIsInstance(self.gamma.shape_transform, type(torch.nn.functional.softplus)) - self.assertIsInstance(self.gamma.rate_transform, type(torch.nn.functional.softplus)) - - def test_compute_loss_known_values(self): - """Test the loss computation against known values.""" - # Set some test parameters and observations - predictions = torch.tensor([[2.0, 3.0]]) # shape = 2, rate = 3 - y_true = torch.tensor([0.5]) # Test value - - loss = self.gamma.compute_loss(predictions, y_true) - # Manually calculate the expected loss using torch's distribution functions - shape = torch.nn.functional.softplus(predictions[:, 0]) - rate = torch.nn.functional.softplus(predictions[:, 1]) - gamma_dist = torch.distributions.Gamma(shape, rate) - expected_loss = -gamma_dist.log_prob(y_true).mean() - - self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) - - -class TestStudentTDistribution(unittest.TestCase): - def setUp(self): - """Initialize the StudentTDistribution object with default transforms.""" - self.student_t = StudentTDistribution() - - def test_initialization(self): - """Test the initialization and parameter settings.""" - self.assertEqual(self.student_t._name, "StudentT") - self.assertEqual(self.student_t.param_names, ["df", "loc", "scale"]) - self.assertIsInstance(self.student_t.df_transform, type(torch.nn.functional.softplus)) - self.assertIsInstance( - self.student_t.loc_transform, - type(lambda x: x), # Assuming 'none' transformation - ) - self.assertIsInstance(self.student_t.scale_transform, type(torch.nn.functional.softplus)) - - def test_compute_loss_known_values(self): - """Test the loss computation against known values.""" - # Set some test parameters and observations - predictions = torch.tensor([[10.0, 0.0, 1.0]]) # df=10, loc=0, scale=1 - y_true = torch.tensor([0.5]) # Test value - - loss = self.student_t.compute_loss(predictions, y_true) - # Manually calculate the expected loss using torch's distribution functions - df = torch.nn.functional.softplus(predictions[:, 0]) - loc = predictions[:, 1] # 'none' transformation - scale = torch.nn.functional.softplus(predictions[:, 2]) - student_t_dist = torch.distributions.StudentT(df, loc, scale) - expected_loss = -student_t_dist.log_prob(y_true).mean() - - self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) - - def test_evaluate_nll(self): - """Test the evaluate NLL function and additional metrics.""" - y_true = [0.5] - y_pred = [[10.0, 0.0, 1.0]] # df=10, loc=0, scale=1 - result = self.student_t.evaluate_nll(y_true, y_pred) - - self.assertIn("NLL", result) - self.assertIn("mse", result) - self.assertIn("mae", result) - self.assertIn("rmse", result) - - # Check that MSE, MAE, RMSE calculations are reasonable - self.assertGreaterEqual(result["mse"], 0) - self.assertGreaterEqual(result["mae"], 0) - self.assertGreaterEqual(result["rmse"], 0) - - -class TestNegativeBinomialDistribution(unittest.TestCase): - def setUp(self): - """Initialize the NegativeBinomialDistribution object with default transforms.""" - self.negative_binomial = NegativeBinomialDistribution() - - def test_initialization(self): - """Test the initialization and parameter settings.""" - self.assertEqual(self.negative_binomial._name, "NegativeBinomial") - self.assertEqual(self.negative_binomial.param_names, ["mean", "dispersion"]) - self.assertIsInstance(self.negative_binomial.mean_transform, type(torch.nn.functional.softplus)) - self.assertIsInstance( - self.negative_binomial.dispersion_transform, - type(torch.nn.functional.softplus), - ) - - def test_compute_loss_known_values(self): - """Test the loss computation against known values.""" - # Set some test parameters and observations - predictions = torch.tensor([[10.0, 0.1]]) # mean=10, dispersion=0.1 - y_true = torch.tensor([5.0]) # Test value - - loss = self.negative_binomial.compute_loss(predictions, y_true) - # Manually calculate the expected loss using torch's distribution functions - mean = torch.nn.functional.softplus(predictions[:, 0]) - dispersion = torch.nn.functional.softplus(predictions[:, 1]) - r = 1 / dispersion - p = r / (r + mean) - negative_binomial_dist = torch.distributions.NegativeBinomial(total_count=r, probs=p) - expected_loss = -negative_binomial_dist.log_prob(y_true).mean() - - self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) - - -class TestCategoricalDistribution(unittest.TestCase): - def setUp(self): - """Initialize the CategoricalDistribution object with a probability transformation.""" - self.categorical = CategoricalDistribution() - - def test_initialization(self): - """Test the initialization and parameter settings.""" - self.assertEqual(self.categorical._name, "Categorical") - self.assertEqual(self.categorical.param_names, ["probs"]) - # The transformation function will need to ensure the probabilities are valid (non-negative and sum to 1) - # Typically, this might involve applying softmax to ensure the constraints are met. - # Here, we assume `prob_transform` is something akin to softmax for the sake of test setup. - self.assertIsInstance(self.categorical.probs_transform, type(torch.nn.functional.softmax)) - - def test_compute_loss_known_values(self): - # Example with three categories - logits = torch.tensor([[1.0, 2.0, 3.0], [1.0, 3.0, 4.0]]) # Logits for three categories - y_true = torch.tensor([2, 1]) - - loss = self.categorical.compute_loss(logits, y_true) - # Apply softmax to logits to convert them into probabilities - probs = torch.nn.functional.softmax(logits, dim=1) - cat_dist = torch.distributions.Categorical(probs=probs) - expected_loss = -cat_dist.log_prob(y_true).mean() - - self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) - - -# Running the tests -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_lss.py b/tests/test_lss.py deleted file mode 100644 index 01192db..0000000 --- a/tests/test_lss.py +++ /dev/null @@ -1,104 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch - -import numpy as np -import pandas as pd -from properscoring import ( - crps_gaussian, -) - -# Assuming this is the source of the CRPS function -from sklearn.metrics import mean_poisson_deviance, mean_squared_error - -from mambular.models import MambularLSS # Update the import path - - -class TestMambularLSS(unittest.TestCase): - def setUp(self): - # Patch PyTorch Lightning's Trainer and any other external dependencies - self.patcher_trainer = patch("lightning.Trainer") - self.mock_trainer = self.patcher_trainer.start() - - self.patcher_base_model = patch("mambular.base_models.distributional.BaseMambularLSS") - self.mock_base_model = self.patcher_base_model.start() - - # Initialize MambularLSS with example parameters - self.model = MambularLSS(d_model=128, dropout=0.1, n_layers=4) - - # Sample data - self.X = pd.DataFrame(np.random.randn(100, 10)) - self.y = np.random.rand(100) - - self.model.cat_feature_info = {} - self.model.num_feature_info = {} - - self.X_test = pd.DataFrame(np.random.randn(100, 10)) - self.y_test = np.random.rand(100) ** 2 - - def tearDown(self): - self.patcher_trainer.stop() - self.patcher_base_model.stop() - - def test_initialization(self): - from mambular.utils.configs import DefaultMambularConfig - - self.assertIsInstance(self.model.config, DefaultMambularConfig) - self.assertEqual(self.model.config.d_model, 128) - self.assertEqual(self.model.config.dropout, 0.1) - self.assertEqual(self.model.config.n_layers, 4) - - def test_split_data(self): - X_train, X_val, y_train, y_val = self.model.split_data(self.X, self.y, val_size=0.2, random_state=42) - self.assertEqual(len(X_train), 80) - self.assertEqual(len(X_val), 20) - self.assertEqual(len(y_train), 80) - self.assertEqual(len(y_val), 20) - - def test_fit(self): - # Mock preprocessing and model setup to focus on testing training logic - self.model.preprocess_data = MagicMock() - self.model.model = self.mock_base_model - - self.model.fit(self.X, self.y, family="normal") - - # Ensure the fit method of the trainer is called - self.mock_trainer.return_value.fit.assert_called_once() - - def test_normal_metrics(self): - # Mock predictions for the normal distribution: [mean, variance] - mock_predictions = np.column_stack((np.random.normal(size=100), np.abs(np.random.normal(size=100)))) - self.model.predict = MagicMock(return_value=mock_predictions) - - # Define custom metrics or use a function that fetches appropriate metrics - self.model.get_default_metrics = MagicMock( - return_value={ - "MSE": lambda y, pred: mean_squared_error(y, pred[:, 0]), - "CRPS": lambda y, pred: np.mean( - [crps_gaussian(y[i], mu=pred[i, 0], sig=np.sqrt(pred[i, 1])) for i in range(len(y))] - ), - } - ) - - results = self.model.evaluate(self.X_test, self.y_test, distribution_family="normal") - - # Validate the MSE - expected_mse = mean_squared_error(self.y_test, mock_predictions[:, 0]) - self.assertAlmostEqual(results["MSE"], expected_mse, places=4) - self.assertIn("CRPS", results) # Check for existence but not the exact value in this test - - def test_poisson_metrics(self): - # Mock predictions for Poisson - mock_predictions = np.random.poisson(lam=3, size=100) + 1e-3 - self.model.predict = MagicMock(return_value=mock_predictions) - - self.model.get_default_metrics = MagicMock(return_value={"Poisson Deviance": mean_poisson_deviance}) - - results = self.model.evaluate(self.X_test, self.y_test, distribution_family="poisson") - self.assertIn("Poisson Deviance", results) - # Optionally calculate expected deviance and check - expected_deviance = mean_poisson_deviance(self.y_test, mock_predictions) - self.assertAlmostEqual(results["Poisson Deviance"], expected_deviance) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index 68c3265..5c04630 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -1,82 +1,111 @@ -import unittest - +import pytest import numpy as np import pandas as pd from sklearn.exceptions import NotFittedError +from mambular.preprocessing import Preprocessor + + +@pytest.fixture +def sample_data(): + return pd.DataFrame( + { + "numerical": np.random.randn(100), + "categorical": np.random.choice(["A", "B", "C"], size=100), + "integer": np.random.randint(0, 5, size=100), + } + ) + + +@pytest.fixture +def sample_target(): + return np.random.randn(100) + + +@pytest.fixture( + params=[ + "ple", + "binning", + "one-hot", + "standardization", + "minmax", + "quantile", + "polynomial", + "robust", + "splines", + "yeo-johnson", + "box-cox", + "rbf", + "sigmoid", + "none", + ] +) +def preprocessor(request): + return Preprocessor( + numerical_preprocessing=request.param, categorical_preprocessing="one-hot" + ) + + +def test_preprocessor_initialization(preprocessor): + assert preprocessor.numerical_preprocessing in [ + "ple", + "binning", + "one-hot", + "standardization", + "minmax", + "quantile", + "polynomial", + "robust", + "splines", + "yeo-johnson", + "box-cox", + "rbf", + "sigmoid", + "none", + ] + assert preprocessor.categorical_preprocessing == "one-hot" + assert not preprocessor.fitted + + +def test_preprocessor_fit(preprocessor, sample_data, sample_target): + preprocessor.fit(sample_data, sample_target) + assert preprocessor.fitted + assert preprocessor.column_transformer is not None + + +def test_preprocessor_transform(preprocessor, sample_data, sample_target): + preprocessor.fit(sample_data, sample_target) + transformed = preprocessor.transform(sample_data) + assert isinstance(transformed, dict) + assert len(transformed) > 0 + + +def test_preprocessor_fit_transform(preprocessor, sample_data, sample_target): + transformed = preprocessor.fit_transform(sample_data, sample_target) + assert isinstance(transformed, dict) + assert len(transformed) > 0 + + +def test_preprocessor_get_params(preprocessor): + params = preprocessor.get_params() + assert "n_bins" in params + assert "numerical_preprocessing" in params + + +def test_preprocessor_set_params(preprocessor): + preprocessor.set_params(n_bins=128) + assert preprocessor.n_bins == 128 + + +def test_transform_before_fit_raises_error(preprocessor, sample_data): + with pytest.raises(NotFittedError): + preprocessor.transform(sample_data) + -from mambular.utils.preprocessor import Preprocessor - - -class TestPreprocessor(unittest.TestCase): - def setUp(self): - # Sample data for testing - self.data = pd.DataFrame( - { - "numerical": np.random.randn(500), - "categorical": np.random.choice(["A", "B", "C"], size=500), - "mixed": np.random.choice([1, "A", "B"], size=500), - } - ) - self.target = np.random.randn(500) - - def test_initialization(self): - """Test initialization of the Preprocessor with default parameters.""" - pp = Preprocessor(n_bins=20, numerical_preprocessing="binning") - self.assertEqual(pp.n_bins, 20) - self.assertEqual(pp.numerical_preprocessing, "binning") - self.assertFalse(pp.use_decision_tree_bins) - - def test_fit(self): - """Test the fitting process of the preprocessor.""" - pp = Preprocessor(numerical_preprocessing="binning", n_bins=20) - pp.fit(self.data, self.target) - self.assertIsNotNone(pp.column_transformer) - - def test_transform_not_fitted(self): - """Test that transform raises an error if called before fitting.""" - pp = Preprocessor() - with self.assertRaises(NotFittedError): - pp.transform(self.data) - - def test_fit_transform(self): - """Test fitting and transforming the data.""" - pp = Preprocessor(numerical_preprocessing="standardization") - transformed_data = pp.fit_transform(self.data, self.target) - self.assertIsInstance(transformed_data, dict) - self.assertTrue("num_numerical" in transformed_data) - self.assertTrue("cat_categorical" in transformed_data) - - def test_ple(self): - """Test fitting and transforming the data.""" - pp = Preprocessor(numerical_preprocessing="ple", n_bins=20) - transformed_data = pp.fit_transform(self.data, self.target) - self.assertIsInstance(transformed_data, dict) - self.assertTrue("num_numerical" in transformed_data) - self.assertTrue("cat_categorical" in transformed_data) - - def test_transform_with_missing_values(self): - """Ensure the preprocessor can handle missing values.""" - data_with_missing = self.data.copy() - data_with_missing.loc[0, "numerical"] = np.nan - data_with_missing.loc[1, "categorical"] = np.nan - pp = Preprocessor(numerical_preprocessing="normalization") - transformed_data = pp.fit_transform(data_with_missing, self.target) - self.assertNotIn(np.nan, transformed_data["num_numerical"]) - self.assertNotIn(np.nan, transformed_data["cat_categorical"]) - - def test_decision_tree_bins(self): - """Test the usage of decision tree for binning.""" - pp = Preprocessor(use_decision_tree_bins=True, numerical_preprocessing="binning", n_bins=5) - pp.fit(self.data, self.target) - # Checking if the preprocessor setup decision tree bins properly - self.assertTrue( - all( - isinstance(x, np.ndarray) - for x in pp._get_decision_tree_bins(self.data[["numerical"]], self.target, ["numerical"]) - ) - ) - - -# Running the tests -if __name__ == "__main__": - unittest.main() +def test_get_feature_info(preprocessor, sample_data, sample_target): + preprocessor.fit(sample_data, sample_target) + numerical_info, categorical_info, embedding_info = preprocessor.get_feature_info( + verbose=False + ) + assert isinstance(numerical_info, dict) + assert isinstance(categorical_info, dict) + assert isinstance(embedding_info, dict) diff --git a/tests/test_regressor.py b/tests/test_regressor.py deleted file mode 100644 index 8626039..0000000 --- a/tests/test_regressor.py +++ /dev/null @@ -1,103 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch - -import numpy as np -import pandas as pd -from sklearn.metrics import mean_squared_error, r2_score - -from mambular.models import MambularRegressor # Ensure correct import path - - -class TestMambularRegressor(unittest.TestCase): - def setUp(self): - # Patching external dependencies - self.patcher_pl_trainer = patch("lightning.Trainer") - self.mock_pl_trainer = self.patcher_pl_trainer.start() - - self.patcher_base_model = patch("mambular.base_models.regressor.BaseMambularRegressor") - self.mock_base_model = self.patcher_base_model.start() - - self.regressor = MambularRegressor(d_model=128, dropout=0.1) - - # Sample data - self.X = pd.DataFrame(np.random.randn(100, 10)) - self.y = np.random.rand(100) - - self.regressor.cat_feature_info = {} - self.regressor.num_feature_info = {} - - def tearDown(self): - self.patcher_pl_trainer.stop() - self.patcher_base_model.stop() - - def test_initialization(self): - # This assumes MambularConfig is properly imported and used in the MambularRegressor class - from mambular.utils.configs import DefaultMambularConfig - - self.assertIsInstance(self.regressor.config, DefaultMambularConfig) - self.assertEqual(self.regressor.config.d_model, 128) - self.assertEqual(self.regressor.config.dropout, 0.1) - - def test_split_data(self): - """Test the data splitting functionality.""" - X_train, X_val, y_train, y_val = self.regressor.split_data(self.X, self.y, val_size=0.2, random_state=42) - self.assertEqual(len(X_train), 80) - self.assertEqual(len(X_val), 20) - self.assertEqual(len(y_train), 80) - self.assertEqual(len(y_val), 20) - - def test_fit(self): - """Test the training setup and call.""" - # Mock the necessary parts to simulate training - self.regressor.preprocess_data = MagicMock() - self.regressor.model = self.mock_base_model - - self.regressor.fit(self.X, self.y) - - # Ensure that the fit method of the trainer is called - self.mock_pl_trainer.return_value.fit.assert_called_once() - - def test_predict(self): - # Create mock return objects that mimic tensor behavior - mock_prediction = MagicMock() - mock_prediction.cpu.return_value = MagicMock() - mock_prediction.cpu.return_value.numpy.return_value = np.array([0.5] * 100) - - # Mock the model and its method calls - self.regressor.model = MagicMock() - self.regressor.model.eval.return_value = None - self.regressor.model.return_value = mock_prediction - - # Mock preprocess_test_data to return dummy tensor data - self.regressor.preprocess_test_data = MagicMock(return_value=([], [])) - - predictions = self.regressor.predict(self.X) - - # Assert that predictions return as expected - np.testing.assert_array_equal(predictions, np.array([0.5] * 100)) - - def test_evaluate(self): - # Mock the predict method to simulate regressor output - mock_predictions = np.random.rand(100) - self.regressor.predict = MagicMock(return_value=mock_predictions) - - # Define metrics to test - metrics = {"Mean Squared Error": mean_squared_error, "R2 Score": r2_score} - - # Call evaluate with the defined metrics - result = self.regressor.evaluate(self.X, self.y, metrics=metrics) - - # Compute expected metrics directly - expected_mse = mean_squared_error(self.y, mock_predictions) - expected_r2 = r2_score(self.y, mock_predictions) - - # Check the results of evaluate - self.assertAlmostEqual(result["Mean Squared Error"], expected_mse) - self.assertAlmostEqual(result["R2 Score"], expected_r2) - - # Ensure predict was called correctly - self.regressor.predict.assert_called_once_with(self.X) - - -if __name__ == "__main__": - unittest.main() From 161f6de0c1eb21a7f5575471f376d777a353443e Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:14:42 +0100 Subject: [PATCH 18/24] remove dependence on rotary embeddings --- mambular/arch_utils/layer_utils/attention_utils.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/mambular/arch_utils/layer_utils/attention_utils.py b/mambular/arch_utils/layer_utils/attention_utils.py index bdfed31..1b50d72 100644 --- a/mambular/arch_utils/layer_utils/attention_utils.py +++ b/mambular/arch_utils/layer_utils/attention_utils.py @@ -5,7 +5,6 @@ import torch.nn as nn import torch.nn.functional as F from einops import rearrange -from rotary_embedding_torch import RotaryEmbedding class GEGLU(nn.Module): @@ -25,7 +24,7 @@ def FeedForward(dim, mult=4, dropout=0.0): class Attention(nn.Module): - def __init__(self, dim, heads=8, dim_head=64, dropout=0.0, rotary=False): + def __init__(self, dim, heads=8, dim_head=64, dropout=0.0): super().__init__() inner_dim = dim_head * heads self.heads = heads @@ -34,18 +33,13 @@ def __init__(self, dim, heads=8, dim_head=64, dropout=0.0, rotary=False): self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False) self.to_out = nn.Linear(inner_dim, dim, bias=False) self.dropout = nn.Dropout(dropout) - self.rotary = rotary dim = np.int64(dim / 2) - self.rotary_embedding = RotaryEmbedding(dim=dim) def forward(self, x): h = self.heads x = self.norm(x) q, k, v = self.to_qkv(x).chunk(3, dim=-1) q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=h), (q, k, v)) # type: ignore - if self.rotary: - q = self.rotary_embedding.rotate_queries_or_keys(q) - k = self.rotary_embedding.rotate_queries_or_keys(k) q = q * self.scale sim = torch.einsum("b h i d, b h j d -> b h i j", q, k) @@ -61,7 +55,7 @@ def forward(self, x): class Transformer(nn.Module): - def __init__(self, dim, depth, heads, dim_head, attn_dropout, ff_dropout, rotary=False): + def __init__(self, dim, depth, heads, dim_head, attn_dropout, ff_dropout): super().__init__() self.layers = nn.ModuleList([]) @@ -74,7 +68,6 @@ def __init__(self, dim, depth, heads, dim_head, attn_dropout, ff_dropout, rotary heads=heads, dim_head=dim_head, dropout=attn_dropout, - rotary=rotary, ), FeedForward(dim, dropout=ff_dropout), ] From bd998d3580160dff5b6c681755df27b317ec0992 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:18:08 +0100 Subject: [PATCH 19/24] =?UTF-8?q?include=20params=20rel=C3=B6ated=20to=20[?= =?UTF-8?q?BUG]=20Missing=20Configuration=20Attributes=20in=20DefaultMambu?= =?UTF-8?q?larConfig=20#209?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mambular/configs/mambular_config.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mambular/configs/mambular_config.py b/mambular/configs/mambular_config.py index 8bc2f90..a60b54e 100644 --- a/mambular/configs/mambular_config.py +++ b/mambular/configs/mambular_config.py @@ -64,6 +64,12 @@ class DefaultMambularConfig(BaseConfig): Whether to use PSCAN for the state-space model. mamba_version : str, default="mamba-torch" Version of the Mamba model to use ('mamba-torch', 'mamba1', 'mamba2'). + conv_bias : bool, default=False + Whether to use a bias in the 1D convolution before each mamba block + AD_weight_decay: bool = True + Whether to use weight decay als for the A and D matrices in Mamba + BC_layer_norm: bool = False + Whether to use layer norm on the B and C matrices """ # Architecture Parameters @@ -82,6 +88,9 @@ class DefaultMambularConfig(BaseConfig): dt_init_floor: float = 1e-04 norm: str = "RMSNorm" activation: Callable = nn.SiLU() # noqa: RUF009 + conv_bias: bool = False + AD_weight_decay: bool = True + BC_layer_norm: bool = False # Embedding Parameters shuffle_embeddings: bool = False From 44d3b3a69b722c0a2f3361968d957749d3b220da Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:32:26 +0100 Subject: [PATCH 20/24] test new unit test for pr-requests --- .github/workflows/pr-tests.yml | 37 ++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/pr-tests.yml diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml new file mode 100644 index 0000000..4e22604 --- /dev/null +++ b/.github/workflows/pr-tests.yml @@ -0,0 +1,37 @@ +name: PR Unit Tests + +on: + pull_request: + branches: + - develop + - master # Add any other branches where you want to enforce tests + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.12" # Change this to match your setup + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + poetry install + pip install pytest + + - name: Run Unit Tests + run: pytest tests/ + + - name: Verify Tests Passed + if: ${{ success() }} + run: echo "All tests passed! Pull request is allowed." + + - name: Fail PR on Test Failure + if: ${{ failure() }} + run: exit 1 # This ensures the PR cannot be merged if tests fail From 5fc2ed71b31ce9e978cc0bee3491c33042084d3b Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:35:02 +0100 Subject: [PATCH 21/24] change py-version --- .github/workflows/pr-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index 4e22604..2e65f57 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.12" # Change this to match your setup + python-version: "3.8" # Change this to match your setup - name: Install Dependencies run: | From e722767b3e005450103cc5f974be6beea81b6df5 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:36:53 +0100 Subject: [PATCH 22/24] adapt test to .py version 3.10 --- .github/workflows/pr-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index 2e65f57..fd31668 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.8" # Change this to match your setup + python-version: "3.10" # Change this to match your setup - name: Install Dependencies run: | From 1fcb03023378e362c84d841b8d385f67d986ca6b Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:39:14 +0100 Subject: [PATCH 23/24] install poetry in workflow --- .github/workflows/pr-tests.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index fd31668..ec85672 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -18,6 +18,11 @@ jobs: uses: actions/setup-python@v4 with: python-version: "3.10" # Change this to match your setup + + - name: Install Poetry + run: | + curl -sSL https://install.python-poetry.org | python3 - + export PATH="$HOME/.local/bin:$PATH" - name: Install Dependencies run: | From ac27a1da8a0a2299ee21344facee6b007f3dbfcd Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Wed, 12 Feb 2025 14:43:56 +0100 Subject: [PATCH 24/24] ensure mambular is locally installed --- .github/workflows/pr-tests.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index ec85672..0e878fe 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -22,6 +22,7 @@ jobs: - name: Install Poetry run: | curl -sSL https://install.python-poetry.org | python3 - + echo "$HOME/.local/bin" >> $GITHUB_PATH export PATH="$HOME/.local/bin:$PATH" - name: Install Dependencies @@ -30,7 +31,14 @@ jobs: poetry install pip install pytest + - name: Install Package Locally + run: | + poetry build + pip install dist/*.whl # Install the built package to fix "No module named 'mambular'" + - name: Run Unit Tests + env: + PYTHONPATH: ${{ github.workspace }} # Ensure the package is discoverable run: pytest tests/ - name: Verify Tests Passed