From 283de064072ce74f9f60929ce85c4aca32cbb542 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Wed, 21 Jun 2023 11:43:39 +0200
Subject: [PATCH 001/100] first proposal for batching in tranform method

---
 cebra/solver/base.py | 56 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index c350ba35..91588637 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -285,7 +285,40 @@ def decoding(self, train_loader, valid_loader):
         return decode_metric
 
     @torch.no_grad()
-    def transform(self, inputs: torch.Tensor) -> torch.Tensor:
+    def _transform(self, inputs, session_id):
+        output = self.model(inputs)
+        return output
+
+    
+    @torch.no_grad()
+    def _batched_transform(self, inputs, session_id, batch_size):
+        num_samples = inputs.shape[0]
+        num_batches = (num_samples + batch_size - 1) // batch_size
+        output = []
+
+        for i in range(num_batches):
+            start_idx = i * batch_size
+            end_idx = min((i + 1) * batch_size, num_samples)
+            batched_data = inputs[start_idx:end_idx]
+            output_batch = self.model(batched_data)
+            output.append(output_batch)
+
+        output = torch.cat(output)
+        return output
+    
+
+        # OPTION 2
+        #num_samples = inputs.shape[0]
+        #num_batches = (num_samples + batch_size - 1) // batch_size
+        #output = [self.model(inputs[i * batch_size : min((i + 1) * batch_size, num_samples)]) for i in range(num_batches)]
+        #output = torch.cat(output)
+        #return output
+
+    @torch.no_grad()
+    def transform(self,
+                  inputs: torch.Tensor,
+                  session_id: Optional[int] = None,
+                  batch_size: Optional[int] = None) -> torch.Tensor:
         """Compute the embedding.
 
         This function by default only applies the ``forward`` function
@@ -293,17 +326,26 @@ def transform(self, inputs: torch.Tensor) -> torch.Tensor:
 
         Args:
             inputs: The input signal
-
+            session_id: The session ID, an :py:class:`int` between 0 and 
+                the number of sessions -1 for multisession, and set to 
+                ``None`` for single session.
+        
         Returns:
             The output embedding.
-
-        TODO:
-            * Remove eval mode
         """
 
-        self.model.eval()
-        return self.model(inputs)
+        
+
+        if batch_size is not None:
+            #TODO: padding properly with convolutions!!
+            output = self._batched_transform(inputs, session_id, batch_size)
+
+        else:
+            output = self._transform(inputs, session_id)
 
+        return output
+    
+    
     @abc.abstractmethod
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """Given a batch of input examples, return the model outputs.

From 202e379bc5423bc9e1358aa104cadc94e20e5331 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Thu, 22 Jun 2023 16:02:30 +0200
Subject: [PATCH 002/100] first running version of padding with batched
 inference

---
 cebra/solver/base.py | 74 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 70 insertions(+), 4 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 91588637..52bef6b9 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -44,6 +44,7 @@
 import cebra.models
 from cebra.solver.util import Meter
 from cebra.solver.util import ProgressBar
+import numpy as np
 
 
 @dataclasses.dataclass
@@ -57,6 +58,11 @@ class Solver(abc.ABC, cebra.io.HasDevice):
         criterion: The criterion computed from the similarities between positive pairs
             and negative pairs. The criterion can have trainable parameters on its own.
         optimizer: A PyTorch optimizer for updating model and criterion parameters.
+        pad_before_transform: If ``False``, no padding is applied to the input sequence.
+            and the output sequence will be smaller than the input sequence due to the 
+            receptive field of the model. If the input sequence is ``n`` steps long, 
+            and a model with receptive field ``m`` is used, the output sequence would 
+            only be ``n-m+1`` steps long.
         history: Deprecated since 0.0.2. Use :py:attr:`log`.
         decode_history: Deprecated since 0.0.2. Use a hook during training for validation and
             decoding. See the arguments of :py:meth:`fit`.
@@ -69,6 +75,7 @@ class Solver(abc.ABC, cebra.io.HasDevice):
     model: torch.nn.Module
     criterion: torch.nn.Module
     optimizer: torch.optim.Optimizer
+    pad_before_transform: bool = True
     history: List = dataclasses.field(default_factory=list)
     decode_history: List = dataclasses.field(default_factory=list)
     log: Dict = dataclasses.field(default_factory=lambda: ({
@@ -95,6 +102,7 @@ def state_dict(self) -> dict:
         return {
             "model": self.model.state_dict(),
             "optimizer": self.optimizer.state_dict(),
+            "pad_before_transform": self.pad_before_transform,
             "loss": torch.tensor(self.history),
             "decode": self.decode_history,
             "criterion": self.criterion.state_dict(),
@@ -130,6 +138,8 @@ def _get(key):
             self.criterion.load_state_dict(_get("criterion"))
         if _contains("optimizer"):
             self.optimizer.load_state_dict(_get("optimizer"))
+        if _contains("pad_before_transform"):
+            self.pad_before_transform = _get("pad_before_transform")
         # TODO(stes): This will be deprecated at some point; the "log" attribute
         # holds the same information.
         if _contains("loss"):
@@ -286,12 +296,55 @@ def decoding(self, train_loader, valid_loader):
 
     @torch.no_grad()
     def _transform(self, inputs, session_id):
+
+        #model = self.select_model(n_inputs_features=inputs.shape[1],
+        #                          session_id=session_id)
+        #model.to(inputs.device)
+        #offset = model.get_offset()
+#
+        #model.eval()
+#
+        #if self.pad_before_transform:
+        #    device = inputs.device
+        #    inputs = np.pad(inputs.cpu().numpy(),
+        #                    ((offset.left, offset.right - 1), (0, 0)),
+        #                    mode="edge")
+        #    inputs = torch.from_numpy(inputs).float().to(device)
+#
+        #if isinstance(model, cebra.models.ConvolutionalModelMixin):
+        #    # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+        #    inputs = inputs.transpose(1, 0).unsqueeze(0)
+        #    output = model(inputs).squeeze(0).transpose(1, 0)
+        #else:
+        #    # Standard evaluation, (T, C, dt)
+        #    output = model(inputs)
+        
         output = self.model(inputs)
         return output
 
+
+    def _get_batched_data_with_padding(self, inputs, offset, start_idx, end_idx, batch_id, num_batches):
+
+        if batch_id == 0:
+            batched_data = inputs[start_idx:(end_idx+offset.right)]
+            batched_data = np.pad(batched_data.cpu().numpy(),
+                            ((offset.left, 0), (0, 0)),
+                            mode="edge")
+                    
+        elif batch_id == num_batches - 1: #Last batch 
+            batched_data = inputs[(start_idx-offset.left):end_idx]
+            batched_data = np.pad(batched_data.cpu().numpy(),
+                            ((0, offset.right-1), (0, 0)),
+                            mode="edge")
+            
+        else: # Middle batches  
+            batched_data = inputs[(start_idx-offset.left):(end_idx+offset.right-1)]
+
+        return torch.from_numpy(batched_data) if isinstance(batched_data, np.ndarray) else batched_data    
     
+
     @torch.no_grad()
-    def _batched_transform(self, inputs, session_id, batch_size):
+    def _batched_transform(self, inputs, offset, session_id, batch_size):
         num_samples = inputs.shape[0]
         num_batches = (num_samples + batch_size - 1) // batch_size
         output = []
@@ -300,12 +353,23 @@ def _batched_transform(self, inputs, session_id, batch_size):
             start_idx = i * batch_size
             end_idx = min((i + 1) * batch_size, num_samples)
             batched_data = inputs[start_idx:end_idx]
-            output_batch = self.model(batched_data)
+
+            if self.pad_before_transform:
+                batched_data = self._get_batched_data_with_padding(inputs, offset, start_idx, end_idx, i, num_batches)
+
+            if isinstance(self.model, cebra.models.ConvolutionalModelMixin):
+                # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+                batched_data = batched_data.transpose(1, 0).unsqueeze(0)
+                output_batch = self.model(batched_data).squeeze(0).transpose(1, 0)
+            else:
+                output_batch = self.model(batched_data)
+            
+
             output.append(output_batch)
 
         output = torch.cat(output)
+        
         return output
-    
 
         # OPTION 2
         #num_samples = inputs.shape[0]
@@ -334,11 +398,13 @@ def transform(self,
             The output embedding.
         """
 
+        offset = self.model.get_offset()
+
         
 
         if batch_size is not None:
             #TODO: padding properly with convolutions!!
-            output = self._batched_transform(inputs, session_id, batch_size)
+            output = self._batched_transform(inputs, offset, session_id, batch_size)
 
         else:
             output = self._transform(inputs, session_id)

From 1f1989d699253a487887c551aa67361e4ebcb79b Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Fri, 23 Jun 2023 11:53:00 +0200
Subject: [PATCH 003/100] start tests

---
 tests/test_solver.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/test_solver.py b/tests/test_solver.py
index 46efd319..633c1df0 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -29,6 +29,7 @@
 import cebra.datasets
 import cebra.models
 import cebra.solver
+import numpy as np
 
 device = "cpu"
 
@@ -168,3 +169,36 @@ def test_multi_session(data_name, loader_initfunc, solver_initfunc):
     assert isinstance(log, dict)
 
     solver.fit(loader)
+
+
+def test_batched_transform(data_name, loader_initfunc, solver_initfunc):
+    """
+    test to know if we are getting the batches right without padding
+    """
+
+    loader = _get_loader(data_name, loader_initfunc)
+    model = _make_model(loader.dataset)
+    criterion = cebra.models.InfoNCE()
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+
+    solver = solver_initfunc(model=model,
+                             criterion=criterion,
+                             optimizer=optimizer,
+                             pad_before_transform = False)
+
+    solver.fit(loader)
+
+    # batched_transform
+    batch_size = 1024
+
+    # should pad_before_transform be an argument of the transform() method?
+    embedding_batched = solver.transform(batch_size = batch_size)
+    embedding = solver.transform(batch_size = None)
+
+    assert embedding_batched.shape == embedding.shape
+    assert np.allclose(embedding_batched, embedding)
+
+
+    # TODO: how can I check that the batches are correct?
+    # maybe it is good enough if I compare to the embedding
+    # without batch size.
\ No newline at end of file

From 866566024df667e1d9419b6cfd3dc6a168780ee1 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Wed, 27 Sep 2023 17:57:07 +0200
Subject: [PATCH 004/100] add pad_before_transform to fit function and add
 support for convolutional models in _transform

---
 cebra/solver/base.py | 180 +++++++++++++++++++++++++++----------------
 1 file changed, 112 insertions(+), 68 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 52bef6b9..21b40d14 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -58,11 +58,6 @@ class Solver(abc.ABC, cebra.io.HasDevice):
         criterion: The criterion computed from the similarities between positive pairs
             and negative pairs. The criterion can have trainable parameters on its own.
         optimizer: A PyTorch optimizer for updating model and criterion parameters.
-        pad_before_transform: If ``False``, no padding is applied to the input sequence.
-            and the output sequence will be smaller than the input sequence due to the 
-            receptive field of the model. If the input sequence is ``n`` steps long, 
-            and a model with receptive field ``m`` is used, the output sequence would 
-            only be ``n-m+1`` steps long.
         history: Deprecated since 0.0.2. Use :py:attr:`log`.
         decode_history: Deprecated since 0.0.2. Use a hook during training for validation and
             decoding. See the arguments of :py:meth:`fit`.
@@ -75,7 +70,6 @@ class Solver(abc.ABC, cebra.io.HasDevice):
     model: torch.nn.Module
     criterion: torch.nn.Module
     optimizer: torch.optim.Optimizer
-    pad_before_transform: bool = True
     history: List = dataclasses.field(default_factory=list)
     decode_history: List = dataclasses.field(default_factory=list)
     log: Dict = dataclasses.field(default_factory=lambda: ({
@@ -102,7 +96,6 @@ def state_dict(self) -> dict:
         return {
             "model": self.model.state_dict(),
             "optimizer": self.optimizer.state_dict(),
-            "pad_before_transform": self.pad_before_transform,
             "loss": torch.tensor(self.history),
             "decode": self.decode_history,
             "criterion": self.criterion.state_dict(),
@@ -138,8 +131,6 @@ def _get(key):
             self.criterion.load_state_dict(_get("criterion"))
         if _contains("optimizer"):
             self.optimizer.load_state_dict(_get("optimizer"))
-        if _contains("pad_before_transform"):
-            self.pad_before_transform = _get("pad_before_transform")
         # TODO(stes): This will be deprecated at some point; the "log" attribute
         # holds the same information.
         if _contains("loss"):
@@ -294,95 +285,137 @@ def decoding(self, train_loader, valid_loader):
         )
         return decode_metric
 
-    @torch.no_grad()
-    def _transform(self, inputs, session_id):
+    def _select_model(self, inputs: torch.Tensor, session_id: int):
+        is_multisession = False #TODO: take care of this                    
+        self.num_sessions = self.loader.dataset.num_sessions if is_multisession else None
+        if self.num_sessions is not None:  # multisession implementation
+            if session_id is None:
+                raise RuntimeError(
+                    "No session_id provided: multisession model requires a session_id to choose the model corresponding to your data shape."
+                )
+            if session_id >= self.num_sessions or session_id < 0:
+                raise RuntimeError(
+                    f"Invalid session_id {session_id}: session_id for the current multisession model must be between 0 and {self.num_sessions-1}."
+                )
+            if self.n_features_[session_id] != X.shape[1]:
+                raise ValueError(
+                    f"Invalid input shape: model for session {session_id} requires an input of shape"
+                    f"(n_samples, {self.n_features_[session_id]}), got (n_samples, {X.shape[1]})."
+                )
 
-        #model = self.select_model(n_inputs_features=inputs.shape[1],
-        #                          session_id=session_id)
-        #model.to(inputs.device)
-        #offset = model.get_offset()
-#
-        #model.eval()
-#
-        #if self.pad_before_transform:
-        #    device = inputs.device
-        #    inputs = np.pad(inputs.cpu().numpy(),
-        #                    ((offset.left, offset.right - 1), (0, 0)),
-        #                    mode="edge")
-        #    inputs = torch.from_numpy(inputs).float().to(device)
-#
-        #if isinstance(model, cebra.models.ConvolutionalModelMixin):
-        #    # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
-        #    inputs = inputs.transpose(1, 0).unsqueeze(0)
-        #    output = model(inputs).squeeze(0).transpose(1, 0)
-        #else:
-        #    # Standard evaluation, (T, C, dt)
-        #    output = model(inputs)
+            model = self.model[session_id]
+            #model.to(self.device_) #TODO: do I need to do this?
         
-        output = self.model(inputs)
-        return output
+        else:  # single session
+            if session_id is not None and session_id > 0:
+                raise RuntimeError(
+                    f"Invalid session_id {session_id}: single session models only takes an optional null session_id."
+                )
+            model = self.model
+
+        offset = model.get_offset()
+        return model, offset
+    
 
+    def _get_batched_data_with_padding(self, 
+                                        inputs: torch.Tensor, 
+                                        offset: cebra.data.Offset,
+                                        start_batch_idx: int,
+                                        end_batch_idx: int,
+                                        batch_id: int,
+                                        num_batches: int) -> torch.Tensor:
 
-    def _get_batched_data_with_padding(self, inputs, offset, start_idx, end_idx, batch_id, num_batches):
+        """
+        Given the start_batch_idx, end_batch_idx, adds padding.
+        For the first batch it adds 0 to left, data to right
+        For the last batch it adds data to left, 0 to right
+        For the middle batches if adds data both to left and right
 
-        if batch_id == 0:
-            batched_data = inputs[start_idx:(end_idx+offset.right)]
+        Args:
+            inputs  
+            offset: 
+            start_batch_idx: 
+            end_batch_idx: 
+            offset: cebra.datatypes.Offset
+
+        """
+        print(start_batch_idx, end_batch_idx)
+        if batch_id == 0: # First batch
+            batched_data = inputs[start_batch_idx:(end_batch_idx+offset.right-1)]
             batched_data = np.pad(batched_data.cpu().numpy(),
                             ((offset.left, 0), (0, 0)),
                             mode="edge")
-                    
+
         elif batch_id == num_batches - 1: #Last batch 
-            batched_data = inputs[(start_idx-offset.left):end_idx]
+            batched_data = inputs[(start_batch_idx-offset.left):end_batch_idx]
             batched_data = np.pad(batched_data.cpu().numpy(),
                             ((0, offset.right-1), (0, 0)),
                             mode="edge")
-            
-        else: # Middle batches  
-            batched_data = inputs[(start_idx-offset.left):(end_idx+offset.right-1)]
 
+        else: # Middle batches  
+            batched_data = inputs[(start_batch_idx-offset.left):(end_batch_idx+offset.right-1)]
+        
+        print(inputs.shape, batched_data.shape)
         return torch.from_numpy(batched_data) if isinstance(batched_data, np.ndarray) else batched_data    
     
 
     @torch.no_grad()
-    def _batched_transform(self, inputs, offset, session_id, batch_size):
+    def _batched_transform(self, model, inputs, offset, batch_size, pad_before_transform) -> torch.Tensor:
         num_samples = inputs.shape[0]
         num_batches = (num_samples + batch_size - 1) // batch_size
         output = []
 
         for i in range(num_batches):
-            start_idx = i * batch_size
-            end_idx = min((i + 1) * batch_size, num_samples)
-            batched_data = inputs[start_idx:end_idx]
-
-            if self.pad_before_transform:
-                batched_data = self._get_batched_data_with_padding(inputs, offset, start_idx, end_idx, i, num_batches)
-
-            if isinstance(self.model, cebra.models.ConvolutionalModelMixin):
+            start_batch_idx = i * batch_size
+            end_batch_idx = min((i + 1) * batch_size, num_samples)
+            
+            if pad_before_transform:
+                batched_data = self._get_batched_data_with_padding(
+                                            inputs=inputs,
+                                            offset=offset,
+                                            start_batch_idx=start_batch_idx,
+                                            end_batch_idx=end_batch_idx,
+                                            batch_id=i,
+                                            num_batches=num_batches)
+            else:
+                batched_data = inputs[start_batch_idx:end_batch_idx]
+                
+            if isinstance(model, cebra.models.ConvolutionalModelMixin):
                 # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
                 batched_data = batched_data.transpose(1, 0).unsqueeze(0)
-                output_batch = self.model(batched_data).squeeze(0).transpose(1, 0)
+                output_batch = model(batched_data).squeeze(0).transpose(1, 0)
             else:
-                output_batch = self.model(batched_data)
+                output_batch = model(batched_data)
             
-
             output.append(output_batch)
-
         output = torch.cat(output)
         
         return output
 
-        # OPTION 2
-        #num_samples = inputs.shape[0]
-        #num_batches = (num_samples + batch_size - 1) // batch_size
-        #output = [self.model(inputs[i * batch_size : min((i + 1) * batch_size, num_samples)]) for i in range(num_batches)]
-        #output = torch.cat(output)
-        #return output
+    @torch.no_grad()
+    def _transform(self, model, inputs, offset, pad_before_transform) -> torch.Tensor:
+        
+        if pad_before_transform:
+            inputs = np.pad(inputs, ((offset.left, offset.right - 1), (0, 0)), mode="edge")
+            inputs = torch.from_numpy(inputs)
+
+        if isinstance(model, cebra.models.ConvolutionalModelMixin):
+            # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+            inputs = inputs.transpose(1, 0).unsqueeze(0)
+            output = model(inputs).squeeze(0).transpose(1, 0)
+        else:
+            output = model(inputs)
+        
+        return output
 
     @torch.no_grad()
     def transform(self,
                   inputs: torch.Tensor,
+                  pad_before_transform: bool = True, #TODO: what should be the default?
                   session_id: Optional[int] = None,
                   batch_size: Optional[int] = None) -> torch.Tensor:
+
+        
         """Compute the embedding.
 
         This function by default only applies the ``forward`` function
@@ -390,6 +423,11 @@ def transform(self,
 
         Args:
             inputs: The input signal
+            pad_before_transform: If ``False``, no padding is applied to the input sequence.
+            and the output sequence will be smaller than the input sequence due to the 
+            receptive field of the model. If the input sequence is ``n`` steps long, 
+            and a model with receptive field ``m`` is used, the output sequence would 
+            only be ``n-m+1`` steps long.
             session_id: The session ID, an :py:class:`int` between 0 and 
                 the number of sessions -1 for multisession, and set to 
                 ``None`` for single session.
@@ -397,21 +435,27 @@ def transform(self,
         Returns:
             The output embedding.
         """
+        model, offset = self._select_model(inputs, session_id)
+        model.eval()
 
-        offset = self.model.get_offset()
-
+        if len(offset) < 2 and pad_before_transform:
+            raise ValueError("Padding does not make sense when the offset of the model is < 2")
         
-
         if batch_size is not None:
-            #TODO: padding properly with convolutions!!
-            output = self._batched_transform(inputs, offset, session_id, batch_size)
+            output = self._batched_transform(model=model, 
+                                             inputs=inputs, 
+                                             offset=offset,
+                                             batch_size=batch_size,
+                                             pad_before_transform=pad_before_transform,)
 
         else:
-            output = self._transform(inputs, session_id)
+            output = self._transform(model=model,
+                                     inputs=inputs,
+                                     offset=offset,
+                                     pad_before_transform=pad_before_transform)
 
         return output
     
-    
     @abc.abstractmethod
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """Given a batch of input examples, return the model outputs.

From 8d5b114e085bfa0080cb57623bd4f1c058795670 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Wed, 27 Sep 2023 17:58:19 +0200
Subject: [PATCH 005/100] remove print statements

---
 cebra/solver/base.py | 137 ++++++++++++++++++++++---------------------
 1 file changed, 70 insertions(+), 67 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 21b40d14..a243fe2e 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -35,6 +35,7 @@
 from typing import Callable, Dict, List, Literal, Optional, Union
 
 import literate_dataclasses as dataclasses
+import numpy as np
 import torch
 import tqdm
 
@@ -44,7 +45,6 @@
 import cebra.models
 from cebra.solver.util import Meter
 from cebra.solver.util import ProgressBar
-import numpy as np
 
 
 @dataclasses.dataclass
@@ -286,7 +286,7 @@ def decoding(self, train_loader, valid_loader):
         return decode_metric
 
     def _select_model(self, inputs: torch.Tensor, session_id: int):
-        is_multisession = False #TODO: take care of this                    
+        is_multisession = False  #TODO: take care of this
         self.num_sessions = self.loader.dataset.num_sessions if is_multisession else None
         if self.num_sessions is not None:  # multisession implementation
             if session_id is None:
@@ -305,7 +305,7 @@ def _select_model(self, inputs: torch.Tensor, session_id: int):
 
             model = self.model[session_id]
             #model.to(self.device_) #TODO: do I need to do this?
-        
+
         else:  # single session
             if session_id is not None and session_id > 0:
                 raise RuntimeError(
@@ -315,16 +315,12 @@ def _select_model(self, inputs: torch.Tensor, session_id: int):
 
         offset = model.get_offset()
         return model, offset
-    
-
-    def _get_batched_data_with_padding(self, 
-                                        inputs: torch.Tensor, 
-                                        offset: cebra.data.Offset,
-                                        start_batch_idx: int,
-                                        end_batch_idx: int,
-                                        batch_id: int,
-                                        num_batches: int) -> torch.Tensor:
 
+    def _get_batched_data_with_padding(self, inputs: torch.Tensor,
+                                       offset: cebra.data.Offset,
+                                       start_batch_idx: int, end_batch_idx: int,
+                                       batch_id: int,
+                                       num_batches: int) -> torch.Tensor:
         """
         Given the start_batch_idx, end_batch_idx, adds padding.
         For the first batch it adds 0 to left, data to right
@@ -332,35 +328,37 @@ def _get_batched_data_with_padding(self,
         For the middle batches if adds data both to left and right
 
         Args:
-            inputs  
-            offset: 
-            start_batch_idx: 
-            end_batch_idx: 
+            inputs
+            offset:
+            start_batch_idx:
+            end_batch_idx:
             offset: cebra.datatypes.Offset
 
         """
-        print(start_batch_idx, end_batch_idx)
-        if batch_id == 0: # First batch
-            batched_data = inputs[start_batch_idx:(end_batch_idx+offset.right-1)]
+        if batch_id == 0:  # First batch
+            batched_data = inputs[start_batch_idx:(end_batch_idx +
+                                                   offset.right - 1)]
             batched_data = np.pad(batched_data.cpu().numpy(),
-                            ((offset.left, 0), (0, 0)),
-                            mode="edge")
+                                  ((offset.left, 0), (0, 0)),
+                                  mode="edge")
 
-        elif batch_id == num_batches - 1: #Last batch 
-            batched_data = inputs[(start_batch_idx-offset.left):end_batch_idx]
+        elif batch_id == num_batches - 1:  #Last batch
+            batched_data = inputs[(start_batch_idx - offset.left):end_batch_idx]
             batched_data = np.pad(batched_data.cpu().numpy(),
-                            ((0, offset.right-1), (0, 0)),
-                            mode="edge")
+                                  ((0, offset.right - 1), (0, 0)),
+                                  mode="edge")
+
+        else:  # Middle batches
+            batched_data = inputs[(start_batch_idx -
+                                   offset.left):(end_batch_idx + offset.right -
+                                                 1)]
 
-        else: # Middle batches  
-            batched_data = inputs[(start_batch_idx-offset.left):(end_batch_idx+offset.right-1)]
-        
-        print(inputs.shape, batched_data.shape)
-        return torch.from_numpy(batched_data) if isinstance(batched_data, np.ndarray) else batched_data    
-    
+        return torch.from_numpy(batched_data) if isinstance(
+            batched_data, np.ndarray) else batched_data
 
     @torch.no_grad()
-    def _batched_transform(self, model, inputs, offset, batch_size, pad_before_transform) -> torch.Tensor:
+    def _batched_transform(self, model, inputs, offset, batch_size,
+                           pad_before_transform) -> torch.Tensor:
         num_samples = inputs.shape[0]
         num_batches = (num_samples + batch_size - 1) // batch_size
         output = []
@@ -368,35 +366,37 @@ def _batched_transform(self, model, inputs, offset, batch_size, pad_before_trans
         for i in range(num_batches):
             start_batch_idx = i * batch_size
             end_batch_idx = min((i + 1) * batch_size, num_samples)
-            
+
             if pad_before_transform:
                 batched_data = self._get_batched_data_with_padding(
-                                            inputs=inputs,
-                                            offset=offset,
-                                            start_batch_idx=start_batch_idx,
-                                            end_batch_idx=end_batch_idx,
-                                            batch_id=i,
-                                            num_batches=num_batches)
+                    inputs=inputs,
+                    offset=offset,
+                    start_batch_idx=start_batch_idx,
+                    end_batch_idx=end_batch_idx,
+                    batch_id=i,
+                    num_batches=num_batches)
             else:
                 batched_data = inputs[start_batch_idx:end_batch_idx]
-                
+
             if isinstance(model, cebra.models.ConvolutionalModelMixin):
                 # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
                 batched_data = batched_data.transpose(1, 0).unsqueeze(0)
                 output_batch = model(batched_data).squeeze(0).transpose(1, 0)
             else:
                 output_batch = model(batched_data)
-            
+
             output.append(output_batch)
         output = torch.cat(output)
-        
+
         return output
 
     @torch.no_grad()
-    def _transform(self, model, inputs, offset, pad_before_transform) -> torch.Tensor:
-        
+    def _transform(self, model, inputs, offset,
+                   pad_before_transform) -> torch.Tensor:
+
         if pad_before_transform:
-            inputs = np.pad(inputs, ((offset.left, offset.right - 1), (0, 0)), mode="edge")
+            inputs = np.pad(inputs, ((offset.left, offset.right - 1), (0, 0)),
+                            mode="edge")
             inputs = torch.from_numpy(inputs)
 
         if isinstance(model, cebra.models.ConvolutionalModelMixin):
@@ -405,17 +405,16 @@ def _transform(self, model, inputs, offset, pad_before_transform) -> torch.Tenso
             output = model(inputs).squeeze(0).transpose(1, 0)
         else:
             output = model(inputs)
-        
+
         return output
 
     @torch.no_grad()
-    def transform(self,
-                  inputs: torch.Tensor,
-                  pad_before_transform: bool = True, #TODO: what should be the default?
-                  session_id: Optional[int] = None,
-                  batch_size: Optional[int] = None) -> torch.Tensor:
-
-        
+    def transform(
+            self,
+            inputs: torch.Tensor,
+            pad_before_transform: bool = True,  #TODO: what should be the default?
+            session_id: Optional[int] = None,
+            batch_size: Optional[int] = None) -> torch.Tensor:
         """Compute the embedding.
 
         This function by default only applies the ``forward`` function
@@ -424,14 +423,14 @@ def transform(self,
         Args:
             inputs: The input signal
             pad_before_transform: If ``False``, no padding is applied to the input sequence.
-            and the output sequence will be smaller than the input sequence due to the 
-            receptive field of the model. If the input sequence is ``n`` steps long, 
-            and a model with receptive field ``m`` is used, the output sequence would 
+            and the output sequence will be smaller than the input sequence due to the
+            receptive field of the model. If the input sequence is ``n`` steps long,
+            and a model with receptive field ``m`` is used, the output sequence would
             only be ``n-m+1`` steps long.
-            session_id: The session ID, an :py:class:`int` between 0 and 
-                the number of sessions -1 for multisession, and set to 
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
-        
+
         Returns:
             The output embedding.
         """
@@ -439,14 +438,18 @@ def transform(self,
         model.eval()
 
         if len(offset) < 2 and pad_before_transform:
-            raise ValueError("Padding does not make sense when the offset of the model is < 2")
-        
+            raise ValueError(
+                "Padding does not make sense when the offset of the model is < 2"
+            )
+
         if batch_size is not None:
-            output = self._batched_transform(model=model, 
-                                             inputs=inputs, 
-                                             offset=offset,
-                                             batch_size=batch_size,
-                                             pad_before_transform=pad_before_transform,)
+            output = self._batched_transform(
+                model=model,
+                inputs=inputs,
+                offset=offset,
+                batch_size=batch_size,
+                pad_before_transform=pad_before_transform,
+            )
 
         else:
             output = self._transform(model=model,
@@ -455,7 +458,7 @@ def transform(self,
                                      pad_before_transform=pad_before_transform)
 
         return output
-    
+
     @abc.abstractmethod
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """Given a batch of input examples, return the model outputs.

From 32c5ecd28d4ebce8b1063d18cd5a849327e85b76 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Wed, 27 Sep 2023 18:22:12 +0200
Subject: [PATCH 006/100] first passing test

---
 tests/test_solver.py | 61 +++++++++++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 21 deletions(-)

diff --git a/tests/test_solver.py b/tests/test_solver.py
index 633c1df0..06fea193 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -21,6 +21,7 @@
 #
 import itertools
 
+import numpy as np
 import pytest
 import torch
 from torch import nn
@@ -29,7 +30,6 @@
 import cebra.datasets
 import cebra.models
 import cebra.solver
-import numpy as np
 
 device = "cpu"
 
@@ -171,34 +171,53 @@ def test_multi_session(data_name, loader_initfunc, solver_initfunc):
     solver.fit(loader)
 
 
-def test_batched_transform(data_name, loader_initfunc, solver_initfunc):
-    """
-    test to know if we are getting the batches right without padding
-    """
+def create_model(model_name, dataset):
+    return cebra.models.init(model_name,
+                             num_neurons=dataset.input_dimension,
+                             num_units=128,
+                             num_output=5)
+
+
+single_session_tests_transform = []
+for model_name in ["offset1-model", "offset10-model"]:
+    for args in [
+        ("demo-discrete", model_name, cebra.data.DiscreteDataLoader),
+        ("demo-continuous", model_name, cebra.data.ContinuousDataLoader),
+        ("demo-mixed", model_name, cebra.data.MixedDataLoader),
+    ]:
+        single_session_tests_transform.append(
+            (*args, cebra.solver.SingleSessionSolver))
+
+
+@pytest.mark.parametrize(
+    "data_name, model_name, loader_initfunc, solver_initfunc",
+    single_session_tests_transform)
+def test_batched_transform_no_padding(data_name, model_name, loader_initfunc,
+                                      solver_initfunc):
+    batch_size = 1024
+    dataset = cebra.datasets.init(data_name)
+    model = create_model(model_name, dataset)
+    dataset.offset = model.get_offset()
+    loader_kwargs = dict(num_steps=10, batch_size=32)
+    loader = loader_initfunc(dataset, **loader_kwargs)
 
-    loader = _get_loader(data_name, loader_initfunc)
-    model = _make_model(loader.dataset)
     criterion = cebra.models.InfoNCE()
     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
 
     solver = solver_initfunc(model=model,
                              criterion=criterion,
-                             optimizer=optimizer,
-                             pad_before_transform = False)
-
+                             optimizer=optimizer)
     solver.fit(loader)
 
-    # batched_transform
-    batch_size = 1024
-
-    # should pad_before_transform be an argument of the transform() method?
-    embedding_batched = solver.transform(batch_size = batch_size)
-    embedding = solver.transform(batch_size = None)
+    embedding_batched = solver.transform(inputs=loader.dataset.neural,
+                                         batch_size=batch_size,
+                                         pad_before_transform=False)
 
-    assert embedding_batched.shape == embedding.shape
-    assert np.allclose(embedding_batched, embedding)
+    embedding = solver.transform(inputs=loader.dataset.neural,
+                                 pad_before_transform=False)
 
+    if not isinstance(model, cebra.models.ConvolutionalModelMixin):
+        assert embedding_batched.shape == embedding.shape
+        assert np.allclose(embedding_batched, embedding, rtol=1e-02)
 
-    # TODO: how can I check that the batches are correct?
-    # maybe it is good enough if I compare to the embedding
-    # without batch size.
\ No newline at end of file
+    #TODO: what tests can I do with convolutional models when there is no padding?

From 9928f635a0deaa8d8f6c95b91b38816b783eba4e Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Thu, 28 Sep 2023 11:48:47 +0200
Subject: [PATCH 007/100] add support for hybrid models

---
 cebra/solver/base.py |  19 ++++--
 tests/test_solver.py | 138 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 126 insertions(+), 31 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index a243fe2e..125c25c8 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -286,8 +286,10 @@ def decoding(self, train_loader, valid_loader):
         return decode_metric
 
     def _select_model(self, inputs: torch.Tensor, session_id: int):
-        is_multisession = False  #TODO: take care of this
-        self.num_sessions = self.loader.dataset.num_sessions if is_multisession else None
+        """ Select the right model based on the type of solver we have."""
+
+        self.num_sessions = self.loader.dataset.num_sessions if isinstance(
+            inputs, list) else None
         if self.num_sessions is not None:  # multisession implementation
             if session_id is None:
                 raise RuntimeError(
@@ -304,14 +306,23 @@ def _select_model(self, inputs: torch.Tensor, session_id: int):
                 )
 
             model = self.model[session_id]
-            #model.to(self.device_) #TODO: do I need to do this?
+            model.to(self.device_)  #TODO: why do I need to do this?
 
         else:  # single session
             if session_id is not None and session_id > 0:
                 raise RuntimeError(
                     f"Invalid session_id {session_id}: single session models only takes an optional null session_id."
                 )
-            model = self.model
+
+            if isinstance(
+                    self,
+                    cebra.solver.single_session.SingleSessionHybridSolver):
+                # NOTE: This is different from the sklearn API implementation. The issue is that here the
+                # model is a cebra.models.MultiObjective instance, and therefore to do inference I need
+                # to get the module inside this model.
+                model = self.model.module
+            else:
+                model = self.model
 
         offset = model.get_offset()
         return model, offset
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 06fea193..5412b697 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -171,32 +171,51 @@ def test_multi_session(data_name, loader_initfunc, solver_initfunc):
     solver.fit(loader)
 
 
-def create_model(model_name, dataset):
+def create_model(model_name, input_dimension):
     return cebra.models.init(model_name,
-                             num_neurons=dataset.input_dimension,
+                             num_neurons=input_dimension,
                              num_units=128,
                              num_output=5)
 
 
 single_session_tests_transform = []
-for model_name in ["offset1-model", "offset10-model"]:
-    for args in [
-        ("demo-discrete", model_name, cebra.data.DiscreteDataLoader),
-        ("demo-continuous", model_name, cebra.data.ContinuousDataLoader),
-        ("demo-mixed", model_name, cebra.data.MixedDataLoader),
-    ]:
-        single_session_tests_transform.append(
-            (*args, cebra.solver.SingleSessionSolver))
+for padding in [True, False]:
+    for model_name in ["offset1-model", "offset10-model"]:
+        for args in [
+            ("demo-discrete", model_name, padding,
+             cebra.data.DiscreteDataLoader),
+            ("demo-continuous", model_name, padding,
+             cebra.data.ContinuousDataLoader),
+            ("demo-mixed", model_name, padding, cebra.data.MixedDataLoader),
+        ]:
+            single_session_tests_transform.append(
+                (*args, cebra.solver.SingleSessionSolver))
+
+single_session_hybrid_tests_transform = []
+for padding in [True, False]:
+    for model_name in ["offset1-model", "offset10-model"]:
+        for args in [("demo-continuous", model_name, padding,
+                      cebra.data.HybridDataLoader)]:
+            single_session_hybrid_tests_transform.append(
+                (*args, cebra.solver.SingleSessionHybridSolver))
+
+multi_session_tests_transform = []
+for padding in [True, False]:
+    for model_name in ["offset1-model", "offset10-model"]:
+        for args in [("demo-continuous-multisession", model_name, padding,
+                      cebra.data.ContinuousMultiSessionDataLoader)]:
+            multi_session_tests_transform.append(
+                (*args, cebra.solver.MultiSessionSolver))
 
 
 @pytest.mark.parametrize(
-    "data_name, model_name, loader_initfunc, solver_initfunc",
-    single_session_tests_transform)
-def test_batched_transform_no_padding(data_name, model_name, loader_initfunc,
-                                      solver_initfunc):
+    "data_name, model_name, padding, loader_initfunc, solver_initfunc",
+    single_session_tests_transform + single_session_hybrid_tests_transform)
+def test_batched_transform_singlesession(data_name, model_name, padding,
+                                         loader_initfunc, solver_initfunc):
     batch_size = 1024
     dataset = cebra.datasets.init(data_name)
-    model = create_model(model_name, dataset)
+    model = create_model(model_name, dataset.input_dimension)
     dataset.offset = model.get_offset()
     loader_kwargs = dict(num_steps=10, batch_size=32)
     loader = loader_initfunc(dataset, **loader_kwargs)
@@ -209,15 +228,80 @@ def test_batched_transform_no_padding(data_name, model_name, loader_initfunc,
                              optimizer=optimizer)
     solver.fit(loader)
 
-    embedding_batched = solver.transform(inputs=loader.dataset.neural,
-                                         batch_size=batch_size,
-                                         pad_before_transform=False)
-
-    embedding = solver.transform(inputs=loader.dataset.neural,
-                                 pad_before_transform=False)
-
-    if not isinstance(model, cebra.models.ConvolutionalModelMixin):
-        assert embedding_batched.shape == embedding.shape
-        assert np.allclose(embedding_batched, embedding, rtol=1e-02)
-
-    #TODO: what tests can I do with convolutional models when there is no padding?
+    if len(model.get_offset()) < 2 and padding:
+        with pytest.raises(ValueError):
+            solver.transform(inputs=loader.dataset.neural,
+                             pad_before_transform=padding)
+
+        with pytest.raises(ValueError):
+            solver.transform(inputs=loader.dataset.neural,
+                             batch_size=batch_size,
+                             pad_before_transform=padding)
+    else:
+        embedding_batched = solver.transform(inputs=loader.dataset.neural,
+                                             batch_size=batch_size,
+                                             pad_before_transform=padding)
+
+        embedding = solver.transform(inputs=loader.dataset.neural,
+                                     pad_before_transform=padding)
+
+        if padding:
+            if isinstance(model, cebra.models.ConvolutionalModelMixin):
+                assert embedding_batched.shape == embedding.shape
+                assert embedding_batched.shape == embedding.shape
+
+        else:
+            if isinstance(model, cebra.models.ConvolutionalModelMixin):
+                #TODO: what to check here exactly?
+                pass
+            else:
+                assert embedding_batched.shape == embedding.shape
+                assert np.allclose(embedding_batched, embedding, rtol=1e-02)
+
+
+# def test_batched_transform_multisession(data_name, model_name, padding, loader_initfunc, solver_initfunc):
+#     batch_size = 1024
+#     dataset = cebra.datasets.init(data_name)
+#     model = nn.ModuleList(
+#             [create_model(model_name, dataset.input_dimension) for dataset in dataset.iter_sessions()])
+#     dataset.offset = model[0].get_offset()
+#     loader_kwargs = dict(num_steps=10, batch_size=32)
+#     loader = loader_initfunc(dataset, **loader_kwargs)
+
+#     criterion = cebra.models.InfoNCE()
+#     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+
+#     solver = solver_initfunc(model=model,
+#                              criterion=criterion,
+#                              optimizer=optimizer)
+#     solver.fit(loader)
+
+# if len(model.get_offset()) < 2 and padding:
+#     with pytest.raises(ValueError):
+#         solver.transform(inputs=loader.dataset.neural,
+#                             pad_before_transform=padding)
+
+#     with pytest.raises(ValueError):
+#         solver.transform(inputs=loader.dataset.neural,
+#                          batch_size=batch_size,
+#                          pad_before_transform=padding)
+# else:
+#     embedding_batched = solver.transform(inputs=loader.dataset.neural,
+#                                          batch_size=batch_size,
+#                                          pad_before_transform=padding)
+
+#     embedding = solver.transform(inputs=loader.dataset.neural,
+#                                 pad_before_transform=padding)
+
+#     if padding:
+#         if isinstance(model, cebra.models.ConvolutionalModelMixin):
+#             assert embedding_batched.shape == embedding.shape
+#             assert embedding_batched.shape == embedding.shape
+
+#     else:
+#         if isinstance(model, cebra.models.ConvolutionalModelMixin):
+#             #TODO: what to check here exactly?
+#             pass
+#         else:
+#             assert embedding_batched.shape == embedding.shape
+#             assert np.allclose(embedding_batched, embedding, rtol=1e-02)

From be5630aed262e9036523e1727f748e977df7b5f7 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Thu, 28 Sep 2023 13:40:13 +0200
Subject: [PATCH 008/100] rewrite transform in sklearn API

---
 cebra/integrations/sklearn/cebra.py | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 077d3c47..2c9eba2b 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1200,11 +1200,17 @@ def fit(
 
     def transform(self,
                   X: Union[npt.NDArray, torch.Tensor],
+                  pad_before_transform: bool = True,
                   session_id: Optional[int] = None) -> npt.NDArray:
         """Transform an input sequence and return the embedding.
 
         Args:
             X: A numpy array or torch tensor of size ``time x dimension``.
+            pad_before_transform: If ``False``, no padding is applied to the input sequence.
+            and the output sequence will be smaller than the input sequence due to the
+            receptive field of the model. If the input sequence is ``n`` steps long,
+            and a model with receptive field ``m`` is used, the output sequence would
+            only be ``n-m+1`` steps long.
             session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`num_sessions` for
                 multisession, set to ``None`` for single session.
 
@@ -1224,27 +1230,13 @@ def transform(self,
         """
 
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
-        model, offset = self._select_model(X, session_id)
-
         # Input validation
         X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
         input_dtype = X.dtype
 
         with torch.no_grad():
-            model.eval()
-
-            if self.pad_before_transform:
-                X = np.pad(X, ((offset.left, offset.right - 1), (0, 0)),
-                           mode="edge")
-            X = torch.from_numpy(X).float().to(self.device_)
-
-            if isinstance(model, cebra.models.ConvolutionalModelMixin):
-                # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
-                X = X.transpose(1, 0).unsqueeze(0)
-                output = model(X).cpu().numpy().squeeze(0).transpose(1, 0)
-            else:
-                # Standard evaluation, (T, C, dt)
-                output = model(X).cpu().numpy()
+            output = self.solver_.transform(
+                X, pad_before_transform=pad_before_transform)
 
         if input_dtype == "float64":
             return output.astype(input_dtype)

From 1300b2052ccc27d2eb7077de145c0f662202cd29 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Mon, 16 Oct 2023 16:41:25 +0200
Subject: [PATCH 009/100] baseline version of a torch.Datset

---
 cebra/solver/util.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/cebra/solver/util.py b/cebra/solver/util.py
index 584eb0da..c7dc7533 100644
--- a/cebra/solver/util.py
+++ b/cebra/solver/util.py
@@ -25,6 +25,7 @@
 from typing import Dict
 
 import literate_dataclasses as dataclasses
+import torch
 import tqdm
 
 
@@ -106,3 +107,31 @@ def set_description(self, stats: Dict[str, float]):
         """
         if self.use_tqdm:
             self.iterator.set_description(_description(stats))
+
+
+def initalize_torch_dataloader(inputs: torch.Tensor, batch_size: int):
+    """
+    Initializes a torch DataLoader.
+    Args:
+        inputs: NxD tensor
+        batch_size: what happens when is None? it should return the whole dataset.
+    """
+
+    class TorchDataset(torch.utils.data.Dataset):
+
+        def __init__(self, inputs):
+            self.inputs = inputs
+
+        def __len__(self):
+            return len(self.inputs)
+
+        def __getitem__(self, idx):
+            return self.data[idx]
+
+        # TODO: I need to implement the padding inside the dataset, otherwise
+        # I can't properly do this afterwards I think.
+
+        # I wrote the simplest version possible of a torch.utils.data.Dataset,
+        # but should be extended with the padding.
+
+    return torch.util.data.DataLoader(TorchDataset, batch_size=batch_size)

From bc6af241dceb8183a142627f756cc2c6d4c2973a Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Fri, 20 Oct 2023 17:22:58 +0200
Subject: [PATCH 010/100] move batching logic outside solver

---
 cebra/solver/base.py | 97 +++++++++++---------------------------------
 cebra/solver/util.py | 65 +++++++++++++++++++++--------
 2 files changed, 72 insertions(+), 90 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 125c25c8..b282b27f 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -43,6 +43,7 @@
 import cebra.data
 import cebra.io
 import cebra.models
+import cebra.solver.util as cebra_solver_util
 from cebra.solver.util import Meter
 from cebra.solver.util import ProgressBar
 
@@ -285,6 +286,17 @@ def decoding(self, train_loader, valid_loader):
         )
         return decode_metric
 
+    def _inference_transform(self, model, inputs):
+
+        if isinstance(model, cebra.models.ConvolutionalModelMixin):
+            # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+            inputs = inputs.transpose(1, 0).unsqueeze(0)
+            output = model(inputs).squeeze(0).transpose(1, 0)
+        else:
+            output = model(inputs)
+
+        return output
+
     def _select_model(self, inputs: torch.Tensor, session_id: int):
         """ Select the right model based on the type of solver we have."""
 
@@ -327,78 +339,23 @@ def _select_model(self, inputs: torch.Tensor, session_id: int):
         offset = model.get_offset()
         return model, offset
 
-    def _get_batched_data_with_padding(self, inputs: torch.Tensor,
-                                       offset: cebra.data.Offset,
-                                       start_batch_idx: int, end_batch_idx: int,
-                                       batch_id: int,
-                                       num_batches: int) -> torch.Tensor:
-        """
-        Given the start_batch_idx, end_batch_idx, adds padding.
-        For the first batch it adds 0 to left, data to right
-        For the last batch it adds data to left, 0 to right
-        For the middle batches if adds data both to left and right
-
-        Args:
-            inputs
-            offset:
-            start_batch_idx:
-            end_batch_idx:
-            offset: cebra.datatypes.Offset
-
-        """
-        if batch_id == 0:  # First batch
-            batched_data = inputs[start_batch_idx:(end_batch_idx +
-                                                   offset.right - 1)]
-            batched_data = np.pad(batched_data.cpu().numpy(),
-                                  ((offset.left, 0), (0, 0)),
-                                  mode="edge")
-
-        elif batch_id == num_batches - 1:  #Last batch
-            batched_data = inputs[(start_batch_idx - offset.left):end_batch_idx]
-            batched_data = np.pad(batched_data.cpu().numpy(),
-                                  ((0, offset.right - 1), (0, 0)),
-                                  mode="edge")
-
-        else:  # Middle batches
-            batched_data = inputs[(start_batch_idx -
-                                   offset.left):(end_batch_idx + offset.right -
-                                                 1)]
-
-        return torch.from_numpy(batched_data) if isinstance(
-            batched_data, np.ndarray) else batched_data
-
     @torch.no_grad()
     def _batched_transform(self, model, inputs, offset, batch_size,
                            pad_before_transform) -> torch.Tensor:
-        num_samples = inputs.shape[0]
-        num_batches = (num_samples + batch_size - 1) // batch_size
         output = []
-
-        for i in range(num_batches):
-            start_batch_idx = i * batch_size
-            end_batch_idx = min((i + 1) * batch_size, num_samples)
-
-            if pad_before_transform:
-                batched_data = self._get_batched_data_with_padding(
-                    inputs=inputs,
-                    offset=offset,
-                    start_batch_idx=start_batch_idx,
-                    end_batch_idx=end_batch_idx,
-                    batch_id=i,
-                    num_batches=num_batches)
-            else:
-                batched_data = inputs[start_batch_idx:end_batch_idx]
-
-            if isinstance(model, cebra.models.ConvolutionalModelMixin):
-                # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
-                batched_data = batched_data.transpose(1, 0).unsqueeze(0)
-                output_batch = model(batched_data).squeeze(0).transpose(1, 0)
-            else:
-                output_batch = model(batched_data)
-
+        batches = cebra_solver_util.get_batches_of_data(
+            inputs=inputs,
+            batch_size=batch_size,
+            padding=pad_before_transform,
+            offset=offset)
+
+        # NOTE: If we move this inside the `cebra_solver_util.get_batches_of_data`or similar
+        # we avoid a second for loop. Is it good practice to do inference outside the solver?
+        for batch in batches:
+            output_batch = self._inference_transform(model, batch)
             output.append(output_batch)
-        output = torch.cat(output)
 
+        output = torch.cat(output)
         return output
 
     @torch.no_grad()
@@ -410,13 +367,7 @@ def _transform(self, model, inputs, offset,
                             mode="edge")
             inputs = torch.from_numpy(inputs)
 
-        if isinstance(model, cebra.models.ConvolutionalModelMixin):
-            # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
-            inputs = inputs.transpose(1, 0).unsqueeze(0)
-            output = model(inputs).squeeze(0).transpose(1, 0)
-        else:
-            output = model(inputs)
-
+        output = self._inference_transform(model, inputs)
         return output
 
     @torch.no_grad()
diff --git a/cebra/solver/util.py b/cebra/solver/util.py
index c7dc7533..4137dab7 100644
--- a/cebra/solver/util.py
+++ b/cebra/solver/util.py
@@ -25,8 +25,13 @@
 from typing import Dict
 
 import literate_dataclasses as dataclasses
+import numpy as np
 import torch
 import tqdm
+from torch.utils.data import DataLoader
+from torch.utils.data import Dataset
+
+import cebra.data
 
 
 def _description(stats: Dict[str, float]):
@@ -109,15 +114,13 @@ def set_description(self, stats: Dict[str, float]):
             self.iterator.set_description(_description(stats))
 
 
-def initalize_torch_dataloader(inputs: torch.Tensor, batch_size: int):
-    """
-    Initializes a torch DataLoader.
-    Args:
-        inputs: NxD tensor
-        batch_size: what happens when is None? it should return the whole dataset.
-    """
+def get_batches_of_data(inputs: torch.Tensor,
+                        batch_size: int,
+                        padding: bool,
+                        offset: cebra.data.Offset = None):
+    batches = []
 
-    class TorchDataset(torch.utils.data.Dataset):
+    class IndexDataset(Dataset):
 
         def __init__(self, inputs):
             self.inputs = inputs
@@ -126,12 +129,40 @@ def __len__(self):
             return len(self.inputs)
 
         def __getitem__(self, idx):
-            return self.data[idx]
-
-        # TODO: I need to implement the padding inside the dataset, otherwise
-        # I can't properly do this afterwards I think.
-
-        # I wrote the simplest version possible of a torch.utils.data.Dataset,
-        # but should be extended with the padding.
-
-    return torch.util.data.DataLoader(TorchDataset, batch_size=batch_size)
+            return idx
+
+    index_dataset = IndexDataset(inputs)
+    index_dataloader = DataLoader(index_dataset, batch_size=batch_size)
+    for batch_id, index_batch in enumerate(index_dataloader):
+
+        start_batch_idx, end_batch_idx = index_batch[0], index_batch[-1]
+        if padding:
+            if offset is None:
+                raise ValueError("offset needs to be set if padding is True.")
+
+            if batch_id == 0:
+                indices = start_batch_idx, (end_batch_idx + offset.right)
+                batched_data = inputs[slice(*indices)]
+                batched_data = np.pad(batched_data.cpu().numpy(),
+                                      ((offset.left, 0), (0, 0)),
+                                      mode="edge")
+
+            elif batch_id == len(index_dataloader) - 1:
+                indices = (start_batch_idx - offset.left), end_batch_idx
+                batched_data = inputs[slice(*indices)]
+                batched_data = np.pad(batched_data.cpu().numpy(),
+                                      ((0, offset.right), (0, 0)),
+                                      mode="edge")
+            else:  # Middle batches
+                indices = start_batch_idx - offset.left, end_batch_idx + offset.right
+                batched_data = inputs[slice(*indices)]
+
+        else:
+            indices = start_batch_idx, end_batch_idx
+            batched_data = inputs[slice(*indices)]
+
+        batched_data = torch.from_numpy(batched_data) if isinstance(
+            batched_data, np.ndarray) else batched_data
+        batches.append(batched_data)
+
+    return batches

From ec377b9fca5c11b8325c0de3bda11ec5a85c2e6c Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Fri, 27 Oct 2023 13:43:05 +0200
Subject: [PATCH 011/100] move functionality to base file in solver and
 separate in functions

---
 cebra/solver/base.py | 139 ++++++++++++++++++++++++++++++++-----------
 cebra/solver/util.py |  58 ------------------
 2 files changed, 105 insertions(+), 92 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index b282b27f..d38d8c88 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -38,6 +38,8 @@
 import numpy as np
 import torch
 import tqdm
+from torch.utils.data import DataLoader
+from torch.utils.data import Dataset
 
 import cebra
 import cebra.data
@@ -48,6 +50,102 @@
 from cebra.solver.util import ProgressBar
 
 
+def _inference_transform(model, inputs):
+    if isinstance(model, cebra.models.ConvolutionalModelMixin):
+        # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+        inputs = inputs.transpose(1, 0).unsqueeze(0)
+        output = model(inputs).squeeze(0).transpose(1, 0)
+    else:
+        output = model(inputs)
+    return output
+
+
+def _process_batch(inputs: torch.Tensor, add_padding: bool,
+                   offset: cebra.data.Offset, start_batch_idx: int,
+                   end_batch_idx: int) -> torch.Tensor:
+    """
+    Process a batch of input data, optionally applying padding based on specified parameters.
+
+    Args:
+        inputs: The input data to be processed.
+        add_padding: Indicates whether padding should be applied before inference.
+        offset: Offset configuration for padding. If add_padding is True,
+            offset must be set. If add_padding is False, offset is not used and can be None.
+        start_batch_idx: The starting index of the current batch.
+        end_batch_idx: The last index of the current batch.
+
+    Returns:
+        torch.Tensor: The (potentially) padded data.
+
+    Raises:
+        ValueError: If pad_beforadd_paddinge_transform is True and offset is not provided.
+    """
+
+    if add_padding:
+        if offset is None:
+            raise ValueError("offset needs to be set if add_padding is True.")
+
+        if start_batch_idx == 0:  # First batch
+            indices = start_batch_idx, (end_batch_idx + offset.right - 1)
+            batched_data = inputs[slice(*indices)]
+            batched_data = np.pad(batched_data.cpu().numpy(),
+                                  ((offset.left, 0), (0, 0)),
+                                  mode="edge")
+
+        elif end_batch_idx == len(inputs):  # Last batch
+            indices = (start_batch_idx - offset.left), end_batch_idx
+            batched_data = inputs[slice(*indices)]
+            batched_data = np.pad(batched_data.cpu().numpy(),
+                                  ((0, offset.right - 1), (0, 0)),
+                                  mode="edge")
+        else:  # Middle batches
+            indices = start_batch_idx - offset.left, end_batch_idx + offset.right - 1
+            batched_data = inputs[slice(*indices)]
+
+    else:
+        indices = start_batch_idx, end_batch_idx
+        batched_data = inputs[slice(*indices)]
+
+    batched_data = torch.from_numpy(batched_data) if isinstance(
+        batched_data, np.ndarray) else batched_data
+    return batched_data
+
+
+def _batched_transform(model,
+                       inputs: torch.Tensor,
+                       batch_size: int,
+                       pad_before_transform: bool,
+                       offset=None) -> torch.Tensor:
+
+    class IndexDataset(Dataset):
+
+        def __init__(self, inputs):
+            self.inputs = inputs
+
+        def __len__(self):
+            return len(self.inputs)
+
+        def __getitem__(self, idx):
+            return idx
+
+    index_dataset = IndexDataset(inputs)
+    index_dataloader = DataLoader(index_dataset, batch_size=batch_size)
+
+    output = []
+    for batch_id, index_batch in enumerate(index_dataloader):
+        start_batch_idx, end_batch_idx = index_batch[0], index_batch[-1] + 1
+        batched_data = _process_batch(inputs=inputs,
+                                      add_padding=pad_before_transform,
+                                      offset=offset,
+                                      start_batch_idx=start_batch_idx,
+                                      end_batch_idx=end_batch_idx)
+        output_batch = _inference_transform(model, batched_data)
+        output.append(output_batch)
+
+    output = torch.cat(output)
+    return output
+
+
 @dataclasses.dataclass
 class Solver(abc.ABC, cebra.io.HasDevice):
     """Solver base class.
@@ -286,22 +384,14 @@ def decoding(self, train_loader, valid_loader):
         )
         return decode_metric
 
-    def _inference_transform(self, model, inputs):
-
-        if isinstance(model, cebra.models.ConvolutionalModelMixin):
-            # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
-            inputs = inputs.transpose(1, 0).unsqueeze(0)
-            output = model(inputs).squeeze(0).transpose(1, 0)
-        else:
-            output = model(inputs)
-
-        return output
-
     def _select_model(self, inputs: torch.Tensor, session_id: int):
+        #NOTE: In the torch API the inputs will be a torch tensor. Then in the
+        # sklearn API we will convert it to numpy array.
         """ Select the right model based on the type of solver we have."""
 
-        self.num_sessions = self.loader.dataset.num_sessions if isinstance(
-            inputs, list) else None
+        # before: self.loader.dataset.num_sessions
+        self.num_sessions = len(inputs) if isinstance(inputs, list) else None
+
         if self.num_sessions is not None:  # multisession implementation
             if session_id is None:
                 raise RuntimeError(
@@ -339,25 +429,6 @@ def _select_model(self, inputs: torch.Tensor, session_id: int):
         offset = model.get_offset()
         return model, offset
 
-    @torch.no_grad()
-    def _batched_transform(self, model, inputs, offset, batch_size,
-                           pad_before_transform) -> torch.Tensor:
-        output = []
-        batches = cebra_solver_util.get_batches_of_data(
-            inputs=inputs,
-            batch_size=batch_size,
-            padding=pad_before_transform,
-            offset=offset)
-
-        # NOTE: If we move this inside the `cebra_solver_util.get_batches_of_data`or similar
-        # we avoid a second for loop. Is it good practice to do inference outside the solver?
-        for batch in batches:
-            output_batch = self._inference_transform(model, batch)
-            output.append(output_batch)
-
-        output = torch.cat(output)
-        return output
-
     @torch.no_grad()
     def _transform(self, model, inputs, offset,
                    pad_before_transform) -> torch.Tensor:
@@ -367,7 +438,7 @@ def _transform(self, model, inputs, offset,
                             mode="edge")
             inputs = torch.from_numpy(inputs)
 
-        output = self._inference_transform(model, inputs)
+        output = _inference_transform(model, inputs)
         return output
 
     @torch.no_grad()
@@ -405,7 +476,7 @@ def transform(
             )
 
         if batch_size is not None:
-            output = self._batched_transform(
+            output = _batched_transform(
                 model=model,
                 inputs=inputs,
                 offset=offset,
diff --git a/cebra/solver/util.py b/cebra/solver/util.py
index 4137dab7..af9529f7 100644
--- a/cebra/solver/util.py
+++ b/cebra/solver/util.py
@@ -28,10 +28,6 @@
 import numpy as np
 import torch
 import tqdm
-from torch.utils.data import DataLoader
-from torch.utils.data import Dataset
-
-import cebra.data
 
 
 def _description(stats: Dict[str, float]):
@@ -112,57 +108,3 @@ def set_description(self, stats: Dict[str, float]):
         """
         if self.use_tqdm:
             self.iterator.set_description(_description(stats))
-
-
-def get_batches_of_data(inputs: torch.Tensor,
-                        batch_size: int,
-                        padding: bool,
-                        offset: cebra.data.Offset = None):
-    batches = []
-
-    class IndexDataset(Dataset):
-
-        def __init__(self, inputs):
-            self.inputs = inputs
-
-        def __len__(self):
-            return len(self.inputs)
-
-        def __getitem__(self, idx):
-            return idx
-
-    index_dataset = IndexDataset(inputs)
-    index_dataloader = DataLoader(index_dataset, batch_size=batch_size)
-    for batch_id, index_batch in enumerate(index_dataloader):
-
-        start_batch_idx, end_batch_idx = index_batch[0], index_batch[-1]
-        if padding:
-            if offset is None:
-                raise ValueError("offset needs to be set if padding is True.")
-
-            if batch_id == 0:
-                indices = start_batch_idx, (end_batch_idx + offset.right)
-                batched_data = inputs[slice(*indices)]
-                batched_data = np.pad(batched_data.cpu().numpy(),
-                                      ((offset.left, 0), (0, 0)),
-                                      mode="edge")
-
-            elif batch_id == len(index_dataloader) - 1:
-                indices = (start_batch_idx - offset.left), end_batch_idx
-                batched_data = inputs[slice(*indices)]
-                batched_data = np.pad(batched_data.cpu().numpy(),
-                                      ((0, offset.right), (0, 0)),
-                                      mode="edge")
-            else:  # Middle batches
-                indices = start_batch_idx - offset.left, end_batch_idx + offset.right
-                batched_data = inputs[slice(*indices)]
-
-        else:
-            indices = start_batch_idx, end_batch_idx
-            batched_data = inputs[slice(*indices)]
-
-        batched_data = torch.from_numpy(batched_data) if isinstance(
-            batched_data, np.ndarray) else batched_data
-        batches.append(batched_data)
-
-    return batches

From 6f9ca989dacbc878bdc3a26410761ff06809830e Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Fri, 27 Oct 2023 13:43:32 +0200
Subject: [PATCH 012/100] add test_select_model for single session

---
 tests/test_solver.py | 67 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

diff --git a/tests/test_solver.py b/tests/test_solver.py
index 5412b697..0318e04b 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -207,6 +207,68 @@ def create_model(model_name, input_dimension):
             multi_session_tests_transform.append(
                 (*args, cebra.solver.MultiSessionSolver))
 
+single_session_tests_select_model = []
+single_session_hybrid_tests_select_model = []
+for model_name in ["offset1-model", "offset10-model"]:
+    for session_id in [None, 0, 5]:
+        for args in [
+            ("demo-discrete", model_name, session_id),
+            ("demo-continuous", model_name, session_id),
+            ("demo-mixed", model_name, session_id),
+        ]:
+            single_session_tests_select_model.append(
+                (*args, cebra.solver.SingleSessionSolver))
+            single_session_hybrid_tests_select_model.append(
+                (*args, cebra.solver.SingleSessionHybridSolver))
+
+multi_session_tests_select_model = []
+for model_name in ["offset1-model", "offset10-model"]:
+    for session_id in [None, 0, 1, 4]:
+        for args in [("demo-continuous-multisession", model_name, session_id)]:
+            multi_session_tests_select_model.append(
+                (*args, cebra.solver.MultiSessionSolver))
+
+
+@pytest.mark.parametrize("data_name, model_name,session_id,solver_initfunc",
+                         single_session_tests_select_model +
+                         single_session_hybrid_tests_select_model)
+def test_select_model_single_session(data_name, model_name, session_id,
+                                     solver_initfunc):
+    dataset = cebra.datasets.init(data_name)
+    model = create_model(model_name, dataset.input_dimension)
+    offset = model.get_offset()
+    solver = solver_initfunc(model=model, criterion=None, optimizer=None)
+
+    if session_id is not None and session_id > 0:
+        with pytest.raises(RuntimeError):
+            solver._select_model(dataset.neural, session_id=session_id)
+    else:
+        model_, offset_ = solver._select_model(dataset.neural,
+                                               session_id=session_id)
+        assert offset.left == offset_.left and offset.right == offset_.right
+        assert model == model_
+
+
+#@pytest.mark.parametrize(
+#    "data_name, model_name,session_id,solver_initfunc",
+#    single_session_tests_select_model + single_session_hybrid_tests_select_model)
+#def test_select_model_multi_session(data_name, model_name, session_id, solver_initfunc):
+#    dataset = cebra.datasets.init(data_name)
+#    model = nn.ModuleList(
+#             [create_model(model_name, dataset.input_dimension) for dataset in dataset.iter_sessions()])
+#    offset = model[0].get_offset()
+#    solver = solver_initfunc(model=model,
+#                             criterion=None,
+#                             optimizer=None)
+#
+#    if session_id is not None and session_id > 0:
+#        with pytest.raises(RuntimeError):
+#            solver._select_model(dataset.neural, session_id=session_id)
+#    else:
+#        model_, offset_ = solver._select_model(dataset.neural, session_id=session_id)
+#        assert offset.left == offset_.left and offset.right == offset_.right
+#        assert model == model_
+
 
 @pytest.mark.parametrize(
     "data_name, model_name, padding, loader_initfunc, solver_initfunc",
@@ -229,6 +291,7 @@ def test_batched_transform_singlesession(data_name, model_name, padding,
     solver.fit(loader)
 
     if len(model.get_offset()) < 2 and padding:
+        pytest.skip("not relevant for now.")
         with pytest.raises(ValueError):
             solver.transform(inputs=loader.dataset.neural,
                              pad_before_transform=padding)
@@ -255,7 +318,9 @@ def test_batched_transform_singlesession(data_name, model_name, padding,
                 #TODO: what to check here exactly?
                 pass
             else:
-                assert embedding_batched.shape == embedding.shape
+                #print(model)
+                assert embedding_batched.shape == embedding.shape, (padding,
+                                                                    model)
                 assert np.allclose(embedding_batched, embedding, rtol=1e-02)
 
 

From fbe7eb420d7e89b143ef5ec68abb49f845d1ab9e Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Fri, 27 Oct 2023 16:18:56 +0200
Subject: [PATCH 013/100] add checks and test for _process_batch

---
 cebra/solver/base.py |  36 +++++++++++++--
 tests/test_solver.py | 106 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index d38d8c88..43403911 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -81,25 +81,53 @@ def _process_batch(inputs: torch.Tensor, add_padding: bool,
         ValueError: If pad_beforadd_paddinge_transform is True and offset is not provided.
     """
 
+    def _check_indices(indices, inputs):
+        if (indices[0] < 0) or (indices[1] > inputs.shape[0]):
+            raise ValueError(
+                f"offset {offset} is too big for the length of the inputs ({len(inputs)}) "
+                f"The indices {indices} do not match the inputs length {len(inputs)}."
+            )
+
+    if start_batch_idx < 0 or end_batch_idx < 0:
+        raise ValueError(
+            f"start_batch_idx ({start_batch_idx}) and end_batch_idx ({end_batch_idx}) must be non-negative."
+        )
+
+    if start_batch_idx > end_batch_idx:
+        raise ValueError(
+            f"start_batch_idx ({start_batch_idx}) cannot be greater than end_batch_idx ({end_batch_idx})."
+        )
+
+    if end_batch_idx > len(inputs):
+        raise ValueError(
+            f"end_batch_idx ({end_batch_idx}) cannot exceed the length of inputs ({len(inputs)})."
+        )
+
     if add_padding:
         if offset is None:
             raise ValueError("offset needs to be set if add_padding is True.")
 
+        if not isinstance(offset, cebra.data.Offset):
+            raise ValueError("offset must be an instance of cebra.data.Offset")
+
         if start_batch_idx == 0:  # First batch
             indices = start_batch_idx, (end_batch_idx + offset.right - 1)
+            _check_indices(indices, inputs)
             batched_data = inputs[slice(*indices)]
-            batched_data = np.pad(batched_data.cpu().numpy(),
-                                  ((offset.left, 0), (0, 0)),
+            batched_data = np.pad(array=batched_data.cpu().numpy(),
+                                  pad_width=((offset.left, 0), (0, 0)),
                                   mode="edge")
 
         elif end_batch_idx == len(inputs):  # Last batch
             indices = (start_batch_idx - offset.left), end_batch_idx
+            _check_indices(indices, inputs)
             batched_data = inputs[slice(*indices)]
-            batched_data = np.pad(batched_data.cpu().numpy(),
-                                  ((0, offset.right - 1), (0, 0)),
+            batched_data = np.pad(array=batched_data.cpu().numpy(),
+                                  pad_width=((0, offset.right - 1), (0, 0)),
                                   mode="edge")
         else:  # Middle batches
             indices = start_batch_idx - offset.left, end_batch_idx + offset.right - 1
+            _check_indices(indices, inputs)
             batched_data = inputs[slice(*indices)]
 
     else:
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 0318e04b..6911d102 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -229,6 +229,112 @@ def create_model(model_name, input_dimension):
                 (*args, cebra.solver.MultiSessionSolver))
 
 
+@pytest.mark.parametrize(
+    "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",
+    [
+        # Test case 1: No padding
+        (torch.tensor([[1, 2], [3, 4]]), False, None, 0, 1,
+         torch.tensor([[1, 2]])),  # first batch
+        (torch.tensor([[1, 2], [3, 4]]), False, None, 0, 2,
+         torch.tensor([[1, 2], [3, 4]])),  # first batch
+        (torch.tensor([[1, 2], [3, 4]]), False, None, 1, 2,
+         torch.tensor([[3, 4]])),  # last batch
+
+        # Test case 2: First batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(1, 1),
+            0,
+            2,
+            torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(1, 1),
+            0,
+            3,
+            torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+
+        # Test case 3: Last batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(0, 1),
+            1,
+            3,
+            torch.tensor([[4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(1, 3),
+            1,
+            3,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [7, 8, 9], [7, 8, 9]
+                         ]),
+        ),
+
+        # Test case 4: Middle batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(0, 1),
+            1,
+            2,
+            torch.tensor([[4, 5, 6]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(0, 2),
+            1,
+            2,
+            torch.tensor([[4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(1, 1),
+            1,
+            2,
+            torch.tensor([[1, 2, 3], [4, 5, 6]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(1, 2),
+            1,
+            2,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+
+        # Examples that throw an error:
+
+        # Padding without offset (should raise an error)
+        (torch.tensor([[1, 2]]), True, None, 0, 2, ValueError),
+        # Negative start_batch_idx or end_batch_idx (should raise an error)
+        (torch.tensor([[1, 2]]), False, None, -1, 2, ValueError),
+        # out of bound indices because offset is too large
+        (torch.tensor([[1, 2], [3, 4]]), True, cebra.data.Offset(
+            5, 5), 1, 2, ValueError),
+    ],
+)
+def test_process_batch(inputs, add_padding, offset, start_batch_idx,
+                       end_batch_idx, expected_output):
+    if expected_output == ValueError:
+        with pytest.raises(ValueError):
+            cebra.solver.base._process_batch(inputs, add_padding, offset,
+                                             start_batch_idx, end_batch_idx)
+    else:
+        result = cebra.solver.base._process_batch(inputs, add_padding, offset,
+                                                  start_batch_idx,
+                                                  end_batch_idx)
+        assert torch.equal(result, expected_output)
+
+
 @pytest.mark.parametrize("data_name, model_name,session_id,solver_initfunc",
                          single_session_tests_select_model +
                          single_session_hybrid_tests_select_model)

From 463b0f8a8890770b1d7bf23abe52a97d4ca22d72 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Mon, 30 Oct 2023 12:54:13 +0100
Subject: [PATCH 014/100] add test_select_model for multisession

---
 cebra/solver/base.py | 20 +++++++++------
 tests/test_solver.py | 58 ++++++++++++++++++++++++++++----------------
 2 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 43403911..b9682f47 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -32,7 +32,7 @@
 
 import abc
 import os
-from typing import Callable, Dict, List, Literal, Optional, Union
+from typing import Callable, Dict, Iterable, List, Literal, Optional, Union
 
 import literate_dataclasses as dataclasses
 import numpy as np
@@ -78,7 +78,7 @@ def _process_batch(inputs: torch.Tensor, add_padding: bool,
         torch.Tensor: The (potentially) padded data.
 
     Raises:
-        ValueError: If pad_beforadd_paddinge_transform is True and offset is not provided.
+        ValueError: If add_padding is True and offset is not provided.
     """
 
     def _check_indices(indices, inputs):
@@ -314,6 +314,12 @@ def fit(
             * Refine the API here. Drop the validation entirely, and implement this via a hook?
         """
 
+        self.num_sessions = loader.dataset.num_sessions if loader.dataset.num_sessions is not None else None
+        self.n_features = ([
+            loader.dataset.get_input_dimension(session_id)
+            for session_id in range(loader.dataset.num_sessions)
+        ] if self.num_sessions is not None else loader.dataset.input_dimension)
+
         self.to(loader.device)
 
         iterator = self._get_loader(loader)
@@ -417,9 +423,6 @@ def _select_model(self, inputs: torch.Tensor, session_id: int):
         # sklearn API we will convert it to numpy array.
         """ Select the right model based on the type of solver we have."""
 
-        # before: self.loader.dataset.num_sessions
-        self.num_sessions = len(inputs) if isinstance(inputs, list) else None
-
         if self.num_sessions is not None:  # multisession implementation
             if session_id is None:
                 raise RuntimeError(
@@ -429,14 +432,13 @@ def _select_model(self, inputs: torch.Tensor, session_id: int):
                 raise RuntimeError(
                     f"Invalid session_id {session_id}: session_id for the current multisession model must be between 0 and {self.num_sessions-1}."
                 )
-            if self.n_features_[session_id] != X.shape[1]:
+            if self.n_features[session_id] != inputs.shape[1]:
                 raise ValueError(
                     f"Invalid input shape: model for session {session_id} requires an input of shape"
-                    f"(n_samples, {self.n_features_[session_id]}), got (n_samples, {X.shape[1]})."
+                    f"(n_samples, {self.n_features[session_id]}), got (n_samples, {inputs.shape[1]})."
                 )
 
             model = self.model[session_id]
-            model.to(self.device_)  #TODO: why do I need to do this?
 
         else:  # single session
             if session_id is not None and session_id > 0:
@@ -495,6 +497,8 @@ def transform(
         Returns:
             The output embedding.
         """
+        #TODO: add check like sklearn?
+        # #sklearn_utils_validation.check_is_fitted(self, "n_features_")
         model, offset = self._select_model(inputs, session_id)
         model.eval()
 
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 6911d102..72376bfa 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -222,8 +222,8 @@ def create_model(model_name, input_dimension):
                 (*args, cebra.solver.SingleSessionHybridSolver))
 
 multi_session_tests_select_model = []
-for model_name in ["offset1-model", "offset10-model"]:
-    for session_id in [None, 0, 1, 4]:
+for model_name in ["offset10-model"]:
+    for session_id in [None, 0, 1, 5, 2, 6, 4]:
         for args in [("demo-continuous-multisession", model_name, session_id)]:
             multi_session_tests_select_model.append(
                 (*args, cebra.solver.MultiSessionSolver))
@@ -355,25 +355,41 @@ def test_select_model_single_session(data_name, model_name, session_id,
         assert model == model_
 
 
-#@pytest.mark.parametrize(
-#    "data_name, model_name,session_id,solver_initfunc",
-#    single_session_tests_select_model + single_session_hybrid_tests_select_model)
-#def test_select_model_multi_session(data_name, model_name, session_id, solver_initfunc):
-#    dataset = cebra.datasets.init(data_name)
-#    model = nn.ModuleList(
-#             [create_model(model_name, dataset.input_dimension) for dataset in dataset.iter_sessions()])
-#    offset = model[0].get_offset()
-#    solver = solver_initfunc(model=model,
-#                             criterion=None,
-#                             optimizer=None)
-#
-#    if session_id is not None and session_id > 0:
-#        with pytest.raises(RuntimeError):
-#            solver._select_model(dataset.neural, session_id=session_id)
-#    else:
-#        model_, offset_ = solver._select_model(dataset.neural, session_id=session_id)
-#        assert offset.left == offset_.left and offset.right == offset_.right
-#        assert model == model_
+@pytest.mark.parametrize("data_name, model_name,session_id,solver_initfunc",
+                         multi_session_tests_select_model)
+def test_select_model_multi_session(data_name, model_name, session_id,
+                                    solver_initfunc):
+    dataset = cebra.datasets.init(data_name)
+    model = nn.ModuleList([
+        create_model(model_name, dataset.input_dimension)
+        for dataset in dataset.iter_sessions()
+    ])
+
+    offset = model[0].get_offset()
+    solver = solver_initfunc(model=model,
+                             criterion=cebra.models.InfoNCE(),
+                             optimizer=torch.optim.Adam(model.parameters(),
+                                                        lr=1e-3))
+
+    loader_kwargs = dict(num_steps=10, batch_size=32)
+    loader = cebra.data.ContinuousMultiSessionDataLoader(
+        dataset, **loader_kwargs)
+    solver.fit(loader)
+
+    for i, (model, dataset_) in enumerate(zip(model, dataset.iter_sessions())):
+        inputs = dataset_.neural
+
+        if session_id is None or session_id >= dataset.num_sessions:
+            with pytest.raises(RuntimeError):
+                solver._select_model(inputs, session_id=session_id)
+        elif i != session_id:
+            with pytest.raises(ValueError):
+                solver._select_model(inputs, session_id=session_id)
+        else:
+            model_, offset_ = solver._select_model(inputs,
+                                                   session_id=session_id)
+            assert offset.left == offset_.left and offset.right == offset_.right
+            assert model == model_
 
 
 @pytest.mark.parametrize(

From 52191714431a97da3af79860dc87729eafa75e46 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Tue, 31 Oct 2023 16:07:49 +0100
Subject: [PATCH 015/100] make self.num_sessions compatible with single session
 training

---
 cebra/solver/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index b9682f47..acc98333 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -314,7 +314,8 @@ def fit(
             * Refine the API here. Drop the validation entirely, and implement this via a hook?
         """
 
-        self.num_sessions = loader.dataset.num_sessions if loader.dataset.num_sessions is not None else None
+        self.num_sessions = loader.dataset.num_sessions if hasattr(
+            loader.dataset, "num_sessions") else None
         self.n_features = ([
             loader.dataset.get_input_dimension(session_id)
             for session_id in range(loader.dataset.num_sessions)

From f9bd1a6660b494f1c14a93f391235c72ddcabaa6 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Wed, 1 Nov 2023 12:11:22 +0100
Subject: [PATCH 016/100] improve test_batched_transform_singlesession

---
 tests/test_solver.py | 86 ++++++++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 34 deletions(-)

diff --git a/tests/test_solver.py b/tests/test_solver.py
index 72376bfa..0bdf2cbf 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -178,35 +178,6 @@ def create_model(model_name, input_dimension):
                              num_output=5)
 
 
-single_session_tests_transform = []
-for padding in [True, False]:
-    for model_name in ["offset1-model", "offset10-model"]:
-        for args in [
-            ("demo-discrete", model_name, padding,
-             cebra.data.DiscreteDataLoader),
-            ("demo-continuous", model_name, padding,
-             cebra.data.ContinuousDataLoader),
-            ("demo-mixed", model_name, padding, cebra.data.MixedDataLoader),
-        ]:
-            single_session_tests_transform.append(
-                (*args, cebra.solver.SingleSessionSolver))
-
-single_session_hybrid_tests_transform = []
-for padding in [True, False]:
-    for model_name in ["offset1-model", "offset10-model"]:
-        for args in [("demo-continuous", model_name, padding,
-                      cebra.data.HybridDataLoader)]:
-            single_session_hybrid_tests_transform.append(
-                (*args, cebra.solver.SingleSessionHybridSolver))
-
-multi_session_tests_transform = []
-for padding in [True, False]:
-    for model_name in ["offset1-model", "offset10-model"]:
-        for args in [("demo-continuous-multisession", model_name, padding,
-                      cebra.data.ContinuousMultiSessionDataLoader)]:
-            multi_session_tests_transform.append(
-                (*args, cebra.solver.MultiSessionSolver))
-
 single_session_tests_select_model = []
 single_session_hybrid_tests_select_model = []
 for model_name in ["offset1-model", "offset10-model"]:
@@ -392,12 +363,59 @@ def test_select_model_multi_session(data_name, model_name, session_id,
             assert model == model_
 
 
+#this is a very crucial test. should be checked for different choices of offsets,
+# dataset sizes (also edge cases like dataset size 1001 and batch size 1000 -> is the padding properly handled?)
+#try to isolate this from the remaining tests, and make it really rigorous with a lot of test cases.
+
+models = [
+    "offset1-model", "offset10-model"
+]  # there is an issue with subsampe models e.g. "offset4-model-2x-subsample"
+batch_size_inference = [99_999]  #1, 1000
+
+single_session_tests_transform = []
+for padding in [True, False]:
+    for model_name in models:
+        for batch_size in batch_size_inference:
+            for args in [
+                ("demo-discrete", model_name, padding, batch_size,
+                 cebra.data.DiscreteDataLoader),
+                ("demo-continuous", model_name, padding, batch_size,
+                 cebra.data.ContinuousDataLoader),
+                ("demo-mixed", model_name, padding, batch_size,
+                 cebra.data.MixedDataLoader),
+            ]:
+                single_session_tests_transform.append(
+                    (*args, cebra.solver.SingleSessionSolver))
+
+single_session_hybrid_tests_transform = []
+for padding in [True, False]:
+    for model_name in models:
+        for batch_size in batch_size_inference:
+            for args in [("demo-continuous", model_name, padding, batch_size,
+                          cebra.data.HybridDataLoader)]:
+                single_session_hybrid_tests_transform.append(
+                    (*args, cebra.solver.SingleSessionHybridSolver))
+
+#multi_session_tests_transform = []
+#for padding in [True, False]:
+#    for model_name in ["offset1-model", "offset5-model", "offset10-model"]:
+#        for args in [("demo-continuous-multisession", model_name, padding,
+#                      cebra.data.ContinuousMultiSessionDataLoader)]:
+#            multi_session_tests_transform.append(
+#                (*args, cebra.solver.MultiSessionSolver))
+
+
 @pytest.mark.parametrize(
-    "data_name, model_name, padding, loader_initfunc, solver_initfunc",
+    "data_name, model_name,padding,batch_size_inference,loader_initfunc, solver_initfunc",
     single_session_tests_transform + single_session_hybrid_tests_transform)
-def test_batched_transform_singlesession(data_name, model_name, padding,
-                                         loader_initfunc, solver_initfunc):
-    batch_size = 1024
+def test_batched_transform_singlesession(
+    data_name,
+    model_name,
+    padding,
+    batch_size_inference,
+    loader_initfunc,
+    solver_initfunc,
+):
     dataset = cebra.datasets.init(data_name)
     model = create_model(model_name, dataset.input_dimension)
     dataset.offset = model.get_offset()
@@ -420,7 +438,7 @@ def test_batched_transform_singlesession(data_name, model_name, padding,
 
         with pytest.raises(ValueError):
             solver.transform(inputs=loader.dataset.neural,
-                             batch_size=batch_size,
+                             batch_size=batch_size_inference,
                              pad_before_transform=padding)
     else:
         embedding_batched = solver.transform(inputs=loader.dataset.neural,

From e23a7ef3d936b4c7e3530b46bbc3679d2b710e00 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Tue, 7 Nov 2023 18:14:55 +0100
Subject: [PATCH 017/100] make it work with small batches

---
 cebra/solver/base.py |  27 ++++++--
 tests/test_solver.py | 151 ++++++++++++++++++++++++++-----------------
 2 files changed, 112 insertions(+), 66 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index acc98333..1026dfe2 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -103,6 +103,17 @@ def _check_indices(indices, inputs):
             f"end_batch_idx ({end_batch_idx}) cannot exceed the length of inputs ({len(inputs)})."
         )
 
+    def _check_batch_size_length(indices_batch, offset):
+        batch_size_lenght = indices_batch[1] - indices_batch[0]
+        print("batch_size ll", add_padding, indices, batch_size_lenght,
+              len(offset))
+        if batch_size_lenght <= len(offset):
+            raise ValueError(
+                f"The batch has length {batch_size_lenght} which "
+                f"is smaller or equal than the required offset length {len(offset)}."
+                f"Either choose a model with smaller offset or the batch shoud contain more samples."
+            )
+
     if add_padding:
         if offset is None:
             raise ValueError("offset needs to be set if add_padding is True.")
@@ -112,7 +123,8 @@ def _check_indices(indices, inputs):
 
         if start_batch_idx == 0:  # First batch
             indices = start_batch_idx, (end_batch_idx + offset.right - 1)
-            _check_indices(indices, inputs)
+            #_check_indices(indices, inputs)
+            _check_batch_size_length(indices, offset)
             batched_data = inputs[slice(*indices)]
             batched_data = np.pad(array=batched_data.cpu().numpy(),
                                   pad_width=((offset.left, 0), (0, 0)),
@@ -120,18 +132,21 @@ def _check_indices(indices, inputs):
 
         elif end_batch_idx == len(inputs):  # Last batch
             indices = (start_batch_idx - offset.left), end_batch_idx
-            _check_indices(indices, inputs)
+            #_check_indices(indices, inputs)
+            _check_batch_size_length(indices, offset)
             batched_data = inputs[slice(*indices)]
             batched_data = np.pad(array=batched_data.cpu().numpy(),
                                   pad_width=((0, offset.right - 1), (0, 0)),
                                   mode="edge")
         else:  # Middle batches
             indices = start_batch_idx - offset.left, end_batch_idx + offset.right - 1
-            _check_indices(indices, inputs)
+            #_check_indices(indices, inputs)
+            _check_batch_size_length(indices, offset)
             batched_data = inputs[slice(*indices)]
 
     else:
         indices = start_batch_idx, end_batch_idx
+        _check_batch_size_length(indices, offset)
         batched_data = inputs[slice(*indices)]
 
     batched_data = torch.from_numpy(batched_data) if isinstance(
@@ -139,11 +154,9 @@ def _check_indices(indices, inputs):
     return batched_data
 
 
-def _batched_transform(model,
-                       inputs: torch.Tensor,
-                       batch_size: int,
+def _batched_transform(model, inputs: torch.Tensor, batch_size: int,
                        pad_before_transform: bool,
-                       offset=None) -> torch.Tensor:
+                       offset: cebra.data.Offset) -> torch.Tensor:
 
     class IndexDataset(Dataset):
 
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 0bdf2cbf..12794477 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -368,9 +368,11 @@ def test_select_model_multi_session(data_name, model_name, session_id,
 #try to isolate this from the remaining tests, and make it really rigorous with a lot of test cases.
 
 models = [
-    "offset1-model", "offset10-model"
+    "offset1-model",
+    "offset10-model",
+    #"offset1-model", "offset10-model",
 ]  # there is an issue with subsampe models e.g. "offset4-model-2x-subsample"
-batch_size_inference = [99_999]  #1, 1000
+batch_size_inference = [23432, 99_999]  #1, 1000
 
 single_session_tests_transform = []
 for padding in [True, False]:
@@ -396,17 +398,9 @@ def test_select_model_multi_session(data_name, model_name, session_id,
                 single_session_hybrid_tests_transform.append(
                     (*args, cebra.solver.SingleSessionHybridSolver))
 
-#multi_session_tests_transform = []
-#for padding in [True, False]:
-#    for model_name in ["offset1-model", "offset5-model", "offset10-model"]:
-#        for args in [("demo-continuous-multisession", model_name, padding,
-#                      cebra.data.ContinuousMultiSessionDataLoader)]:
-#            multi_session_tests_transform.append(
-#                (*args, cebra.solver.MultiSessionSolver))
-
 
 @pytest.mark.parametrize(
-    "data_name, model_name,padding,batch_size_inference,loader_initfunc, solver_initfunc",
+    "data_name,model_name,padding,batch_size_inference,loader_initfunc,solver_initfunc",
     single_session_tests_transform + single_session_hybrid_tests_transform)
 def test_batched_transform_singlesession(
     data_name,
@@ -430,7 +424,12 @@ def test_batched_transform_singlesession(
                              optimizer=optimizer)
     solver.fit(loader)
 
-    if len(model.get_offset()) < 2 and padding:
+    smallest_batch_length = loader.dataset.neural.shape[0] - batch_size
+    offset_ = model.get_offset()
+    #print("here!", smallest_batch_length, len(offset_))
+    padding_left = offset_.left if padding else 0
+
+    if len(offset_) < 2 and padding:
         pytest.skip("not relevant for now.")
         with pytest.raises(ValueError):
             solver.transform(inputs=loader.dataset.neural,
@@ -438,8 +437,21 @@ def test_batched_transform_singlesession(
 
         with pytest.raises(ValueError):
             solver.transform(inputs=loader.dataset.neural,
-                             batch_size=batch_size_inference,
+                             batch_size=batch_size,
+                             pad_before_transform=padding)
+
+    # NOTE: We need to add padding_left because if padding is True,
+    # the batch size is not "smallest_batch_length". and the smallest
+    # batch will always be at the end so the last batch we need to add
+    # offset.left.
+    #TODO: this wont work in the case where the data is less than
+    #the offset from the beginning, i.e len(data) = 10, len(offset) = 10
+    elif smallest_batch_length + padding_left <= len(offset_):
+        with pytest.raises(ValueError):
+            solver.transform(inputs=loader.dataset.neural,
+                             batch_size=batch_size,
                              pad_before_transform=padding)
+
     else:
         embedding_batched = solver.transform(inputs=loader.dataset.neural,
                                              batch_size=batch_size,
@@ -464,49 +476,70 @@ def test_batched_transform_singlesession(
                 assert np.allclose(embedding_batched, embedding, rtol=1e-02)
 
 
-# def test_batched_transform_multisession(data_name, model_name, padding, loader_initfunc, solver_initfunc):
-#     batch_size = 1024
-#     dataset = cebra.datasets.init(data_name)
-#     model = nn.ModuleList(
-#             [create_model(model_name, dataset.input_dimension) for dataset in dataset.iter_sessions()])
-#     dataset.offset = model[0].get_offset()
-#     loader_kwargs = dict(num_steps=10, batch_size=32)
-#     loader = loader_initfunc(dataset, **loader_kwargs)
-
-#     criterion = cebra.models.InfoNCE()
-#     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
-
-#     solver = solver_initfunc(model=model,
-#                              criterion=criterion,
-#                              optimizer=optimizer)
-#     solver.fit(loader)
-
-# if len(model.get_offset()) < 2 and padding:
-#     with pytest.raises(ValueError):
-#         solver.transform(inputs=loader.dataset.neural,
-#                             pad_before_transform=padding)
-
-#     with pytest.raises(ValueError):
-#         solver.transform(inputs=loader.dataset.neural,
-#                          batch_size=batch_size,
-#                          pad_before_transform=padding)
-# else:
-#     embedding_batched = solver.transform(inputs=loader.dataset.neural,
-#                                          batch_size=batch_size,
-#                                          pad_before_transform=padding)
-
-#     embedding = solver.transform(inputs=loader.dataset.neural,
-#                                 pad_before_transform=padding)
-
-#     if padding:
-#         if isinstance(model, cebra.models.ConvolutionalModelMixin):
-#             assert embedding_batched.shape == embedding.shape
-#             assert embedding_batched.shape == embedding.shape
-
-#     else:
-#         if isinstance(model, cebra.models.ConvolutionalModelMixin):
-#             #TODO: what to check here exactly?
-#             pass
-#         else:
-#             assert embedding_batched.shape == embedding.shape
-#             assert np.allclose(embedding_batched, embedding, rtol=1e-02)
+multi_session_tests_transform = []
+for padding in [True, False]:
+    for model_name in models:
+        for batch_size in batch_size_inference:
+            for args in [
+                ("demo-continuous-multisession", model_name, padding,
+                 batch_size, cebra.data.ContinuousMultiSessionDataLoader)
+            ]:
+                multi_session_tests_transform.append(
+                    (*args, cebra.solver.MultiSessionSolver))
+
+
+@pytest.mark.parametrize(
+    "data_name, model_name,padding,batch_size_inference,loader_initfunc, solver_initfunc",
+    multi_session_tests_transform)
+def test_batched_transform_multisession(data_name, model_name, padding,
+                                        batch_size_inference, loader_initfunc,
+                                        solver_initfunc):
+    dataset = cebra.datasets.init(data_name)
+    model = nn.ModuleList([
+        create_model(model_name, dataset.input_dimension)
+        for dataset in dataset.iter_sessions()
+    ])
+    dataset.offset = model[0].get_offset()
+    loader_kwargs = dict(num_steps=10, batch_size=32)
+    loader = loader_initfunc(dataset, **loader_kwargs)
+
+    criterion = cebra.models.InfoNCE()
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+
+    solver = solver_initfunc(model=model,
+                             criterion=criterion,
+                             optimizer=optimizer)
+    solver.fit(loader)
+
+    #if len(model[0].get_offset()) < 2 and padding:
+    #    with pytest.raises(ValueError):
+    #        solver.transform(inputs=loader.dataset.neural,
+    #                            pad_before_transform=padding)
+
+
+#
+#    with pytest.raises(ValueError):
+#        solver.transform(inputs=loader.dataset.neural,
+#                        batch_size=batch_size,
+#                        pad_before_transform=padding)
+#else:
+#    embedding_batched = solver.transform(inputs=loader.dataset.neural,
+#                                        batch_size=batch_size,
+#                                        pad_before_transform=padding)
+#
+#    embedding = solver.transform(inputs=loader.dataset.neural,
+#                                pad_before_transform=padding)
+#
+#    if padding:
+#        if isinstance(model, cebra.models.ConvolutionalModelMixin):
+#            assert embedding_batched.shape == embedding.shape
+#            assert embedding_batched.shape == embedding.shape
+#
+#    else:
+#        if isinstance(model, cebra.models.ConvolutionalModelMixin):
+#            #TODO: what to check here exactly?
+#            pass
+#        else:
+#            assert embedding_batched.shape == embedding.shape
+#            assert np.allclose(embedding_batched, embedding, rtol=1e-02)
+#

From 19c3f8709edb738f50ebcefd1026df75d7dbed29 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Wed, 8 Nov 2023 13:33:20 +0100
Subject: [PATCH 018/100] make test with multisession work

---
 tests/test_solver.py | 91 ++++++++++++++++++++++++++++----------------
 1 file changed, 59 insertions(+), 32 deletions(-)

diff --git a/tests/test_solver.py b/tests/test_solver.py
index 12794477..7c433bdc 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -370,9 +370,10 @@ def test_select_model_multi_session(data_name, model_name, session_id,
 models = [
     "offset1-model",
     "offset10-model",
+    "offset40-model-4x-subsample",
     #"offset1-model", "offset10-model",
-]  # there is an issue with subsampe models e.g. "offset4-model-2x-subsample"
-batch_size_inference = [23432, 99_999]  #1, 1000
+]  # there is an issue with "offset4-model-2x-subsample" because it's not a convolutional model.
+batch_size_inference = [23432, 99_999]  # 99_999
 
 single_session_tests_transform = []
 for padding in [True, False]:
@@ -500,6 +501,19 @@ def test_batched_transform_multisession(data_name, model_name, padding,
         for dataset in dataset.iter_sessions()
     ])
     dataset.offset = model[0].get_offset()
+
+    n_samples = dataset._datasets[0].neural.shape[0]
+    assert all(
+        d.neural.shape[0] == n_samples for d in dataset._datasets
+    ), "for this set all of the sessions need ot have same number of samples."
+
+    smallest_batch_length = n_samples - batch_size
+    offset_ = model[0].get_offset()
+    #print("here!", smallest_batch_length, len(offset_))
+    padding_left = offset_.left if padding else 0
+    for d in dataset._datasets:
+        d.offset = offset_
+    #dataset._datasets[0].offset = cebra.data.Offset(0, 1)
     loader_kwargs = dict(num_steps=10, batch_size=32)
     loader = loader_initfunc(dataset, **loader_kwargs)
 
@@ -511,35 +525,48 @@ def test_batched_transform_multisession(data_name, model_name, padding,
                              optimizer=optimizer)
     solver.fit(loader)
 
-    #if len(model[0].get_offset()) < 2 and padding:
-    #    with pytest.raises(ValueError):
-    #        solver.transform(inputs=loader.dataset.neural,
-    #                            pad_before_transform=padding)
+    # Transform each session with the right model, by providing the corresponding session ID
+    for i, inputs in enumerate(dataset.iter_sessions()):
 
+        if len(offset_) < 2 and padding:
+            with pytest.raises(ValueError):
+                embedding = solver.transform(inputs=inputs.neural,
+                                             session_id=i,
+                                             pad_before_transform=padding)
 
-#
-#    with pytest.raises(ValueError):
-#        solver.transform(inputs=loader.dataset.neural,
-#                        batch_size=batch_size,
-#                        pad_before_transform=padding)
-#else:
-#    embedding_batched = solver.transform(inputs=loader.dataset.neural,
-#                                        batch_size=batch_size,
-#                                        pad_before_transform=padding)
-#
-#    embedding = solver.transform(inputs=loader.dataset.neural,
-#                                pad_before_transform=padding)
-#
-#    if padding:
-#        if isinstance(model, cebra.models.ConvolutionalModelMixin):
-#            assert embedding_batched.shape == embedding.shape
-#            assert embedding_batched.shape == embedding.shape
-#
-#    else:
-#        if isinstance(model, cebra.models.ConvolutionalModelMixin):
-#            #TODO: what to check here exactly?
-#            pass
-#        else:
-#            assert embedding_batched.shape == embedding.shape
-#            assert np.allclose(embedding_batched, embedding, rtol=1e-02)
-#
+            with pytest.raises(ValueError):
+                embedding_batched = solver.transform(
+                    inputs=inputs.neural,
+                    session_id=i,
+                    pad_before_transform=padding,
+                    batch_size=batch_size)
+
+        elif smallest_batch_length + padding_left <= len(offset_):
+            with pytest.raises(ValueError):
+                solver.transform(inputs=inputs.neural,
+                                 batch_size=batch_size,
+                                 session_id=i,
+                                 pad_before_transform=padding)
+
+        else:
+            model_ = model[i]
+            embedding = solver.transform(inputs=inputs.neural,
+                                         session_id=i,
+                                         pad_before_transform=padding)
+            embedding_batched = solver.transform(inputs=inputs.neural,
+                                                 session_id=i,
+                                                 pad_before_transform=padding,
+                                                 batch_size=batch_size)
+
+            if padding:
+                if isinstance(model_, cebra.models.ConvolutionalModelMixin):
+                    assert embedding_batched.shape == embedding.shape
+                    assert embedding_batched.shape == embedding.shape
+
+            else:
+                if isinstance(model_, cebra.models.ConvolutionalModelMixin):
+                    #TODO: what to check here exactly?
+                    pass
+                else:
+                    assert embedding_batched.shape == embedding.shape
+                    assert np.allclose(embedding_batched, embedding, rtol=1e-02)

From 87bebac38dca71387e819f749611954430480943 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Thu, 9 Nov 2023 12:21:31 +0100
Subject: [PATCH 019/100] change to torch padding

---
 cebra/solver/base.py | 47 +++++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 1026dfe2..25b4ecb6 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -37,6 +37,7 @@
 import literate_dataclasses as dataclasses
 import numpy as np
 import torch
+import torch.nn.functional as F
 import tqdm
 from torch.utils.data import DataLoader
 from torch.utils.data import Dataset
@@ -51,6 +52,10 @@
 
 
 def _inference_transform(model, inputs):
+
+    #TODO: I am not sure what is the best way with dealing with the types and
+    # device when using batched inference. This works for now.
+    inputs = inputs.type(torch.FloatTensor).to(next(model.parameters()).device)
     if isinstance(model, cebra.models.ConvolutionalModelMixin):
         # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
         inputs = inputs.transpose(1, 0).unsqueeze(0)
@@ -126,18 +131,24 @@ def _check_batch_size_length(indices_batch, offset):
             #_check_indices(indices, inputs)
             _check_batch_size_length(indices, offset)
             batched_data = inputs[slice(*indices)]
-            batched_data = np.pad(array=batched_data.cpu().numpy(),
-                                  pad_width=((offset.left, 0), (0, 0)),
-                                  mode="edge")
+            batched_data = F.pad(batched_data.T, (offset.left, 0),
+                                 'replicate').T
+
+            #batched_data = np.pad(array=batched_data.cpu().numpy(),
+            #                      pad_width=((offset.left, 0), (0, 0)),
+            #                      mode="edge")
 
         elif end_batch_idx == len(inputs):  # Last batch
             indices = (start_batch_idx - offset.left), end_batch_idx
             #_check_indices(indices, inputs)
             _check_batch_size_length(indices, offset)
             batched_data = inputs[slice(*indices)]
-            batched_data = np.pad(array=batched_data.cpu().numpy(),
-                                  pad_width=((0, offset.right - 1), (0, 0)),
-                                  mode="edge")
+            batched_data = F.pad(batched_data.T, (0, offset.right - 1),
+                                 'replicate').T
+
+            #batched_data = np.pad(array=batched_data.cpu().numpy(),
+            #                      pad_width=((0, offset.right - 1), (0, 0)),
+            #                      mode="edge")
         else:  # Middle batches
             indices = start_batch_idx - offset.left, end_batch_idx + offset.right - 1
             #_check_indices(indices, inputs)
@@ -149,8 +160,8 @@ def _check_batch_size_length(indices_batch, offset):
         _check_batch_size_length(indices, offset)
         batched_data = inputs[slice(*indices)]
 
-    batched_data = torch.from_numpy(batched_data) if isinstance(
-        batched_data, np.ndarray) else batched_data
+    #batched_data = torch.from_numpy(batched_data) if isinstance(
+    #    batched_data, np.ndarray) else batched_data
     return batched_data
 
 
@@ -486,12 +497,11 @@ def _transform(self, model, inputs, offset,
         return output
 
     @torch.no_grad()
-    def transform(
-            self,
-            inputs: torch.Tensor,
-            pad_before_transform: bool = True,  #TODO: what should be the default?
-            session_id: Optional[int] = None,
-            batch_size: Optional[int] = None) -> torch.Tensor:
+    def transform(self,
+                  inputs: torch.Tensor,
+                  pad_before_transform: bool = True,
+                  session_id: Optional[int] = None,
+                  batch_size: Optional[int] = None) -> torch.Tensor:
         """Compute the embedding.
 
         This function by default only applies the ``forward`` function
@@ -500,13 +510,14 @@ def transform(
         Args:
             inputs: The input signal
             pad_before_transform: If ``False``, no padding is applied to the input sequence.
-            and the output sequence will be smaller than the input sequence due to the
-            receptive field of the model. If the input sequence is ``n`` steps long,
-            and a model with receptive field ``m`` is used, the output sequence would
-            only be ``n-m+1`` steps long.
+                and the output sequence will be smaller than the input sequence due to the
+                receptive field of the model. If the input sequence is ``n`` steps long,
+                and a model with receptive field ``m`` is used, the output sequence would
+                only be ``n-m+1`` steps long.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
+            batch_size: If not None, batched inference will be applied.
 
         Returns:
             The output embedding.

From f0303e01881c78195c709052f6359bf2575e2109 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Thu, 9 Nov 2023 12:21:39 +0100
Subject: [PATCH 020/100] add argument to sklearn api

---
 cebra/integrations/sklearn/cebra.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 2c9eba2b..d9294706 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1201,16 +1201,18 @@ def fit(
     def transform(self,
                   X: Union[npt.NDArray, torch.Tensor],
                   pad_before_transform: bool = True,
+                  batch_size: Optional[int] = None,
                   session_id: Optional[int] = None) -> npt.NDArray:
         """Transform an input sequence and return the embedding.
 
         Args:
             X: A numpy array or torch tensor of size ``time x dimension``.
             pad_before_transform: If ``False``, no padding is applied to the input sequence.
-            and the output sequence will be smaller than the input sequence due to the
-            receptive field of the model. If the input sequence is ``n`` steps long,
-            and a model with receptive field ``m`` is used, the output sequence would
-            only be ``n-m+1`` steps long.
+                and the output sequence will be smaller than the input sequence due to the
+                receptive field of the model. If the input sequence is ``n`` steps long,
+                and a model with receptive field ``m`` is used, the output sequence would
+                only be ``n-m+1`` steps long.
+            batch_size:
             session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`num_sessions` for
                 multisession, set to ``None`` for single session.
 
@@ -1233,10 +1235,15 @@ def transform(self,
         # Input validation
         X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
         input_dtype = X.dtype
+        #print(type(X))
+        #print(X.dtype)
 
         with torch.no_grad():
             output = self.solver_.transform(
-                X, pad_before_transform=pad_before_transform)
+                inputs=X,
+                pad_before_transform=pad_before_transform,
+                session_id=session_id,
+                batch_size=batch_size)
 
         if input_dtype == "float64":
             return output.astype(input_dtype)

From 8c8be85d00073b98b9a674161c16e7a6c4b8ca75 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Thu, 9 Nov 2023 12:43:08 +0100
Subject: [PATCH 021/100] add torch padding to _transform

---
 cebra/solver/base.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 25b4ecb6..28dd7832 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -56,6 +56,7 @@ def _inference_transform(model, inputs):
     #TODO: I am not sure what is the best way with dealing with the types and
     # device when using batched inference. This works for now.
     inputs = inputs.type(torch.FloatTensor).to(next(model.parameters()).device)
+
     if isinstance(model, cebra.models.ConvolutionalModelMixin):
         # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
         inputs = inputs.transpose(1, 0).unsqueeze(0)
@@ -110,8 +111,6 @@ def _check_indices(indices, inputs):
 
     def _check_batch_size_length(indices_batch, offset):
         batch_size_lenght = indices_batch[1] - indices_batch[0]
-        print("batch_size ll", add_padding, indices, batch_size_lenght,
-              len(offset))
         if batch_size_lenght <= len(offset):
             raise ValueError(
                 f"The batch has length {batch_size_lenght} which "
@@ -489,10 +488,8 @@ def _transform(self, model, inputs, offset,
                    pad_before_transform) -> torch.Tensor:
 
         if pad_before_transform:
-            inputs = np.pad(inputs, ((offset.left, offset.right - 1), (0, 0)),
-                            mode="edge")
-            inputs = torch.from_numpy(inputs)
-
+            inputs = F.pad(inputs.T, (offset.left, offset.right - 1),
+                           'replicate').T
         output = _inference_transform(model, inputs)
         return output
 

From 59df4026b1b8598f7e5978881f8a9d2f115869fe Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Thu, 9 Nov 2023 12:52:17 +0100
Subject: [PATCH 022/100] convert to torch if numpy array as inputs

---
 cebra/integrations/sklearn/cebra.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index d9294706..1121ee98 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1233,10 +1233,13 @@ def transform(self,
 
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
         # Input validation
+        #TODO: if inputs are in cuda, then it throws an error, deal with this.
         X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
         input_dtype = X.dtype
-        #print(type(X))
-        #print(X.dtype)
+
+        if isinstance(X, np.ndarray):
+            X = torch.from_numpy(X)
+            # TODO: which type and device should be put there?
 
         with torch.no_grad():
             output = self.solver_.transform(

From 1aadc8b39d2f309cead0f04582ce47adb902e2b5 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Wed, 15 Nov 2023 18:04:04 +0100
Subject: [PATCH 023/100] add distinction between pad with data and pad with
 zeros and modify test accordingly

---
 cebra/solver/base.py | 73 ++++++++++++++++----------------------------
 tests/test_solver.py | 45 ++++++++-------------------
 2 files changed, 38 insertions(+), 80 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 28dd7832..5282e00c 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -66,11 +66,10 @@ def _inference_transform(model, inputs):
     return output
 
 
-def _process_batch(inputs: torch.Tensor, add_padding: bool,
-                   offset: cebra.data.Offset, start_batch_idx: int,
-                   end_batch_idx: int) -> torch.Tensor:
+def _pad_with_data(inputs: torch.Tensor, offset: cebra.data.Offset,
+                   start_batch_idx: int, end_batch_idx: int) -> torch.Tensor:
     """
-    Process a batch of input data, optionally applying padding based on specified parameters.
+    Pads a batch of input data with its own data (maybe this is not called padding)
 
     Args:
         inputs: The input data to be processed.
@@ -118,49 +117,18 @@ def _check_batch_size_length(indices_batch, offset):
                 f"Either choose a model with smaller offset or the batch shoud contain more samples."
             )
 
-    if add_padding:
-        if offset is None:
-            raise ValueError("offset needs to be set if add_padding is True.")
-
-        if not isinstance(offset, cebra.data.Offset):
-            raise ValueError("offset must be an instance of cebra.data.Offset")
-
-        if start_batch_idx == 0:  # First batch
-            indices = start_batch_idx, (end_batch_idx + offset.right - 1)
-            #_check_indices(indices, inputs)
-            _check_batch_size_length(indices, offset)
-            batched_data = inputs[slice(*indices)]
-            batched_data = F.pad(batched_data.T, (offset.left, 0),
-                                 'replicate').T
-
-            #batched_data = np.pad(array=batched_data.cpu().numpy(),
-            #                      pad_width=((offset.left, 0), (0, 0)),
-            #                      mode="edge")
-
-        elif end_batch_idx == len(inputs):  # Last batch
-            indices = (start_batch_idx - offset.left), end_batch_idx
-            #_check_indices(indices, inputs)
-            _check_batch_size_length(indices, offset)
-            batched_data = inputs[slice(*indices)]
-            batched_data = F.pad(batched_data.T, (0, offset.right - 1),
-                                 'replicate').T
-
-            #batched_data = np.pad(array=batched_data.cpu().numpy(),
-            #                      pad_width=((0, offset.right - 1), (0, 0)),
-            #                      mode="edge")
-        else:  # Middle batches
-            indices = start_batch_idx - offset.left, end_batch_idx + offset.right - 1
-            #_check_indices(indices, inputs)
-            _check_batch_size_length(indices, offset)
-            batched_data = inputs[slice(*indices)]
+    if start_batch_idx == 0:  # First batch
+        indices = start_batch_idx, (end_batch_idx + offset.right - 1)
 
-    else:
-        indices = start_batch_idx, end_batch_idx
-        _check_batch_size_length(indices, offset)
-        batched_data = inputs[slice(*indices)]
+    elif end_batch_idx == len(inputs):  # Last batch
+        indices = (start_batch_idx - offset.left), end_batch_idx
+
+    else:  # Middle batches
+        indices = start_batch_idx - offset.left, end_batch_idx + offset.right - 1
 
-    #batched_data = torch.from_numpy(batched_data) if isinstance(
-    #    batched_data, np.ndarray) else batched_data
+    #_check_batch_size_length(indices, offset)
+    #TODO: modify this check_batch_size to pass test.
+    batched_data = inputs[slice(*indices)]
     return batched_data
 
 
@@ -185,11 +153,22 @@ def __getitem__(self, idx):
     output = []
     for batch_id, index_batch in enumerate(index_dataloader):
         start_batch_idx, end_batch_idx = index_batch[0], index_batch[-1] + 1
-        batched_data = _process_batch(inputs=inputs,
-                                      add_padding=pad_before_transform,
+
+        # This applies to all batches.
+        batched_data = _pad_with_data(inputs=inputs,
                                       offset=offset,
                                       start_batch_idx=start_batch_idx,
                                       end_batch_idx=end_batch_idx)
+
+        if pad_before_transform:
+            if start_batch_idx == 0:  # First batch
+                batched_data = F.pad(batched_data.T, (offset.left, 0),
+                                     'replicate').T
+
+            elif end_batch_idx == len(inputs):  # Last batch
+                batched_data = F.pad(batched_data.T, (0, offset.right - 1),
+                                     'replicate').T
+
         output_batch = _inference_transform(model, batched_data)
         output.append(output_batch)
 
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 7c433bdc..335166d0 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -373,7 +373,7 @@ def test_select_model_multi_session(data_name, model_name, session_id,
     "offset40-model-4x-subsample",
     #"offset1-model", "offset10-model",
 ]  # there is an issue with "offset4-model-2x-subsample" because it's not a convolutional model.
-batch_size_inference = [23432, 99_999]  # 99_999
+batch_size_inference = [23432]  # 99_999
 
 single_session_tests_transform = []
 for padding in [True, False]:
@@ -427,7 +427,6 @@ def test_batched_transform_singlesession(
 
     smallest_batch_length = loader.dataset.neural.shape[0] - batch_size
     offset_ = model.get_offset()
-    #print("here!", smallest_batch_length, len(offset_))
     padding_left = offset_.left if padding else 0
 
     if len(offset_) < 2 and padding:
@@ -447,11 +446,13 @@ def test_batched_transform_singlesession(
     # offset.left.
     #TODO: this wont work in the case where the data is less than
     #the offset from the beginning, i.e len(data) = 10, len(offset) = 10
-    elif smallest_batch_length + padding_left <= len(offset_):
-        with pytest.raises(ValueError):
-            solver.transform(inputs=loader.dataset.neural,
-                             batch_size=batch_size,
-                             pad_before_transform=padding)
+
+    #elif smallest_batch_length + padding_left <= len(offset_):
+    #    print('here')
+    #    with pytest.raises(ValueError):
+    #        solver.transform(inputs=loader.dataset.neural,
+    #                         batch_size=batch_size,
+    #                         pad_before_transform=padding)
 
     else:
         embedding_batched = solver.transform(inputs=loader.dataset.neural,
@@ -461,20 +462,8 @@ def test_batched_transform_singlesession(
         embedding = solver.transform(inputs=loader.dataset.neural,
                                      pad_before_transform=padding)
 
-        if padding:
-            if isinstance(model, cebra.models.ConvolutionalModelMixin):
-                assert embedding_batched.shape == embedding.shape
-                assert embedding_batched.shape == embedding.shape
-
-        else:
-            if isinstance(model, cebra.models.ConvolutionalModelMixin):
-                #TODO: what to check here exactly?
-                pass
-            else:
-                #print(model)
-                assert embedding_batched.shape == embedding.shape, (padding,
-                                                                    model)
-                assert np.allclose(embedding_batched, embedding, rtol=1e-02)
+        assert embedding_batched.shape == embedding.shape
+        assert np.allclose(embedding_batched, embedding, rtol=1e-02)
 
 
 multi_session_tests_transform = []
@@ -558,15 +547,5 @@ def test_batched_transform_multisession(data_name, model_name, padding,
                                                  pad_before_transform=padding,
                                                  batch_size=batch_size)
 
-            if padding:
-                if isinstance(model_, cebra.models.ConvolutionalModelMixin):
-                    assert embedding_batched.shape == embedding.shape
-                    assert embedding_batched.shape == embedding.shape
-
-            else:
-                if isinstance(model_, cebra.models.ConvolutionalModelMixin):
-                    #TODO: what to check here exactly?
-                    pass
-                else:
-                    assert embedding_batched.shape == embedding.shape
-                    assert np.allclose(embedding_batched, embedding, rtol=1e-02)
+            assert embedding_batched.shape == embedding.shape
+            assert np.allclose(embedding_batched, embedding, rtol=1e-02)

From bc8ee250b2643f9c44d98fd434872c121515a080 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Fri, 17 Nov 2023 15:59:52 +0100
Subject: [PATCH 024/100] differentiate between data padding and zero padding

---
 cebra/solver/base.py |  98 +++++------
 tests/test_solver.py | 384 +++++++++++++++++++++----------------------
 2 files changed, 229 insertions(+), 253 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 5282e00c..2cecab08 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -66,56 +66,32 @@ def _inference_transform(model, inputs):
     return output
 
 
-def _pad_with_data(inputs: torch.Tensor, offset: cebra.data.Offset,
-                   start_batch_idx: int, end_batch_idx: int) -> torch.Tensor:
-    """
-    Pads a batch of input data with its own data (maybe this is not called padding)
-
-    Args:
-        inputs: The input data to be processed.
-        add_padding: Indicates whether padding should be applied before inference.
-        offset: Offset configuration for padding. If add_padding is True,
-            offset must be set. If add_padding is False, offset is not used and can be None.
-        start_batch_idx: The starting index of the current batch.
-        end_batch_idx: The last index of the current batch.
-
-    Returns:
-        torch.Tensor: The (potentially) padded data.
-
-    Raises:
-        ValueError: If add_padding is True and offset is not provided.
-    """
-
-    def _check_indices(indices, inputs):
-        if (indices[0] < 0) or (indices[1] > inputs.shape[0]):
-            raise ValueError(
-                f"offset {offset} is too big for the length of the inputs ({len(inputs)}) "
-                f"The indices {indices} do not match the inputs length {len(inputs)}."
-            )
+def _check_indices(start_batch_idx, end_batch_idx, offset, num_samples):
 
     if start_batch_idx < 0 or end_batch_idx < 0:
         raise ValueError(
             f"start_batch_idx ({start_batch_idx}) and end_batch_idx ({end_batch_idx}) must be non-negative."
         )
-
     if start_batch_idx > end_batch_idx:
         raise ValueError(
             f"start_batch_idx ({start_batch_idx}) cannot be greater than end_batch_idx ({end_batch_idx})."
         )
+    if end_batch_idx > num_samples:
+        raise ValueError(
+            f"end_batch_idx ({end_batch_idx}) cannot exceed the length of inputs ({num_samples})."
+        )
 
-    if end_batch_idx > len(inputs):
+    batch_size_lenght = end_batch_idx - start_batch_idx
+    if batch_size_lenght <= len(offset):
         raise ValueError(
-            f"end_batch_idx ({end_batch_idx}) cannot exceed the length of inputs ({len(inputs)})."
+            f"The batch has length {batch_size_lenght} which "
+            f"is smaller or equal than the required offset length {len(offset)}."
+            f"Either choose a model with smaller offset or the batch shoud contain more samples."
         )
 
-    def _check_batch_size_length(indices_batch, offset):
-        batch_size_lenght = indices_batch[1] - indices_batch[0]
-        if batch_size_lenght <= len(offset):
-            raise ValueError(
-                f"The batch has length {batch_size_lenght} which "
-                f"is smaller or equal than the required offset length {len(offset)}."
-                f"Either choose a model with smaller offset or the batch shoud contain more samples."
-            )
+
+def _get_batch(inputs: torch.Tensor, offset: cebra.data.Offset,
+               start_batch_idx: int, end_batch_idx: int) -> torch.Tensor:
 
     if start_batch_idx == 0:  # First batch
         indices = start_batch_idx, (end_batch_idx + offset.right - 1)
@@ -126,12 +102,25 @@ def _check_batch_size_length(indices_batch, offset):
     else:  # Middle batches
         indices = start_batch_idx - offset.left, end_batch_idx + offset.right - 1
 
-    #_check_batch_size_length(indices, offset)
-    #TODO: modify this check_batch_size to pass test.
+    _check_indices(indices[0], indices[1], offset, len(inputs))
     batched_data = inputs[slice(*indices)]
     return batched_data
 
 
+def _add_zero_padding(batched_data: torch.Tensor, offset: cebra.data.Offset,
+                      start_batch_idx: int, end_batch_idx: int,
+                      number_of_samples: int):
+
+    if start_batch_idx == 0:  # First batch
+        batched_data = F.pad(batched_data.T, (offset.left, 0), 'replicate').T
+
+    elif end_batch_idx == number_of_samples:  # Last batch
+        batched_data = F.pad(batched_data.T, (0, offset.right - 1),
+                             'replicate').T
+
+    return batched_data
+
+
 def _batched_transform(model, inputs: torch.Tensor, batch_size: int,
                        pad_before_transform: bool,
                        offset: cebra.data.Offset) -> torch.Tensor:
@@ -153,21 +142,17 @@ def __getitem__(self, idx):
     output = []
     for batch_id, index_batch in enumerate(index_dataloader):
         start_batch_idx, end_batch_idx = index_batch[0], index_batch[-1] + 1
-
-        # This applies to all batches.
-        batched_data = _pad_with_data(inputs=inputs,
-                                      offset=offset,
-                                      start_batch_idx=start_batch_idx,
-                                      end_batch_idx=end_batch_idx)
+        batched_data = _get_batch(inputs=inputs,
+                                  offset=offset,
+                                  start_batch_idx=start_batch_idx,
+                                  end_batch_idx=end_batch_idx)
 
         if pad_before_transform:
-            if start_batch_idx == 0:  # First batch
-                batched_data = F.pad(batched_data.T, (offset.left, 0),
-                                     'replicate').T
-
-            elif end_batch_idx == len(inputs):  # Last batch
-                batched_data = F.pad(batched_data.T, (0, offset.right - 1),
-                                     'replicate').T
+            batched_data = _add_zero_padding(batched_data=batched_data,
+                                             offset=offset,
+                                             start_batch_idx=start_batch_idx,
+                                             end_batch_idx=end_batch_idx,
+                                             number_of_samples=len(inputs))
 
         output_batch = _inference_transform(model, batched_data)
         output.append(output_batch)
@@ -503,10 +488,11 @@ def transform(self,
         model, offset = self._select_model(inputs, session_id)
         model.eval()
 
-        if len(offset) < 2 and pad_before_transform:
-            raise ValueError(
-                "Padding does not make sense when the offset of the model is < 2"
-            )
+        #TODO: should we add this error?
+        #if len(offset) < 2 and pad_before_transform:
+        #    raise ValueError(
+        #        "Padding does not make sense when the offset of the model is < 2"
+        #    )
 
         if batch_size is not None:
             output = _batched_transform(
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 335166d0..1661003a 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -199,169 +199,165 @@ def create_model(model_name, input_dimension):
             multi_session_tests_select_model.append(
                 (*args, cebra.solver.MultiSessionSolver))
 
-
-@pytest.mark.parametrize(
-    "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",
-    [
-        # Test case 1: No padding
-        (torch.tensor([[1, 2], [3, 4]]), False, None, 0, 1,
-         torch.tensor([[1, 2]])),  # first batch
-        (torch.tensor([[1, 2], [3, 4]]), False, None, 0, 2,
-         torch.tensor([[1, 2], [3, 4]])),  # first batch
-        (torch.tensor([[1, 2], [3, 4]]), False, None, 1, 2,
-         torch.tensor([[3, 4]])),  # last batch
-
-        # Test case 2: First batch with padding
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(1, 1),
-            0,
-            2,
-            torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(1, 1),
-            0,
-            3,
-            torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-        ),
-
-        # Test case 3: Last batch with padding
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(0, 1),
-            1,
-            3,
-            torch.tensor([[4, 5, 6], [7, 8, 9]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(1, 3),
-            1,
-            3,
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [7, 8, 9], [7, 8, 9]
-                         ]),
-        ),
-
-        # Test case 4: Middle batch with padding
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(0, 1),
-            1,
-            2,
-            torch.tensor([[4, 5, 6]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(0, 2),
-            1,
-            2,
-            torch.tensor([[4, 5, 6], [7, 8, 9]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(1, 1),
-            1,
-            2,
-            torch.tensor([[1, 2, 3], [4, 5, 6]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(1, 2),
-            1,
-            2,
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-        ),
-
-        # Examples that throw an error:
-
-        # Padding without offset (should raise an error)
-        (torch.tensor([[1, 2]]), True, None, 0, 2, ValueError),
-        # Negative start_batch_idx or end_batch_idx (should raise an error)
-        (torch.tensor([[1, 2]]), False, None, -1, 2, ValueError),
-        # out of bound indices because offset is too large
-        (torch.tensor([[1, 2], [3, 4]]), True, cebra.data.Offset(
-            5, 5), 1, 2, ValueError),
-    ],
-)
-def test_process_batch(inputs, add_padding, offset, start_batch_idx,
-                       end_batch_idx, expected_output):
-    if expected_output == ValueError:
-        with pytest.raises(ValueError):
-            cebra.solver.base._process_batch(inputs, add_padding, offset,
-                                             start_batch_idx, end_batch_idx)
-    else:
-        result = cebra.solver.base._process_batch(inputs, add_padding, offset,
-                                                  start_batch_idx,
-                                                  end_batch_idx)
-        assert torch.equal(result, expected_output)
-
-
-@pytest.mark.parametrize("data_name, model_name,session_id,solver_initfunc",
-                         single_session_tests_select_model +
-                         single_session_hybrid_tests_select_model)
-def test_select_model_single_session(data_name, model_name, session_id,
-                                     solver_initfunc):
-    dataset = cebra.datasets.init(data_name)
-    model = create_model(model_name, dataset.input_dimension)
-    offset = model.get_offset()
-    solver = solver_initfunc(model=model, criterion=None, optimizer=None)
-
-    if session_id is not None and session_id > 0:
-        with pytest.raises(RuntimeError):
-            solver._select_model(dataset.neural, session_id=session_id)
-    else:
-        model_, offset_ = solver._select_model(dataset.neural,
-                                               session_id=session_id)
-        assert offset.left == offset_.left and offset.right == offset_.right
-        assert model == model_
-
-
-@pytest.mark.parametrize("data_name, model_name,session_id,solver_initfunc",
-                         multi_session_tests_select_model)
-def test_select_model_multi_session(data_name, model_name, session_id,
-                                    solver_initfunc):
-    dataset = cebra.datasets.init(data_name)
-    model = nn.ModuleList([
-        create_model(model_name, dataset.input_dimension)
-        for dataset in dataset.iter_sessions()
-    ])
-
-    offset = model[0].get_offset()
-    solver = solver_initfunc(model=model,
-                             criterion=cebra.models.InfoNCE(),
-                             optimizer=torch.optim.Adam(model.parameters(),
-                                                        lr=1e-3))
-
-    loader_kwargs = dict(num_steps=10, batch_size=32)
-    loader = cebra.data.ContinuousMultiSessionDataLoader(
-        dataset, **loader_kwargs)
-    solver.fit(loader)
-
-    for i, (model, dataset_) in enumerate(zip(model, dataset.iter_sessions())):
-        inputs = dataset_.neural
-
-        if session_id is None or session_id >= dataset.num_sessions:
-            with pytest.raises(RuntimeError):
-                solver._select_model(inputs, session_id=session_id)
-        elif i != session_id:
-            with pytest.raises(ValueError):
-                solver._select_model(inputs, session_id=session_id)
-        else:
-            model_, offset_ = solver._select_model(inputs,
-                                                   session_id=session_id)
-            assert offset.left == offset_.left and offset.right == offset_.right
-            assert model == model_
-
+# @pytest.mark.parametrize(
+#     "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",
+#     [
+#         # Test case 1: No padding
+#         (torch.tensor([[1, 2], [3, 4]]), False, None, 0, 1,
+#          torch.tensor([[1, 2]])),  # first batch
+#         (torch.tensor([[1, 2], [3, 4]]), False, None, 0, 2,
+#          torch.tensor([[1, 2], [3, 4]])),  # first batch
+#         (torch.tensor([[1, 2], [3, 4]]), False, None, 1, 2,
+#          torch.tensor([[3, 4]])),  # last batch
+
+#         # Test case 2: First batch with padding
+#         (
+#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+#             True,
+#             cebra.data.Offset(1, 1),
+#             0,
+#             2,
+#             torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6]]),
+#         ),
+#         (
+#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+#             True,
+#             cebra.data.Offset(1, 1),
+#             0,
+#             3,
+#             torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+#         ),
+
+#         # Test case 3: Last batch with padding
+#         (
+#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+#             True,
+#             cebra.data.Offset(0, 1),
+#             1,
+#             3,
+#             torch.tensor([[4, 5, 6], [7, 8, 9]]),
+#         ),
+#         (
+#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+#             True,
+#             cebra.data.Offset(1, 3),
+#             1,
+#             3,
+#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [7, 8, 9], [7, 8, 9]
+#                          ]),
+#         ),
+
+#         # Test case 4: Middle batch with padding
+#         (
+#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+#             True,
+#             cebra.data.Offset(0, 1),
+#             1,
+#             2,
+#             torch.tensor([[4, 5, 6]]),
+#         ),
+#         (
+#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+#             True,
+#             cebra.data.Offset(0, 2),
+#             1,
+#             2,
+#             torch.tensor([[4, 5, 6], [7, 8, 9]]),
+#         ),
+#         (
+#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+#             True,
+#             cebra.data.Offset(1, 1),
+#             1,
+#             2,
+#             torch.tensor([[1, 2, 3], [4, 5, 6]]),
+#         ),
+#         (
+#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+#             True,
+#             cebra.data.Offset(1, 2),
+#             1,
+#             2,
+#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+#         ),
+
+#         # Examples that throw an error:
+
+#         # Padding without offset (should raise an error)
+#         (torch.tensor([[1, 2]]), True, None, 0, 2, ValueError),
+#         # Negative start_batch_idx or end_batch_idx (should raise an error)
+#         (torch.tensor([[1, 2]]), False, None, -1, 2, ValueError),
+#         # out of bound indices because offset is too large
+#         (torch.tensor([[1, 2], [3, 4]]), True, cebra.data.Offset(
+#             5, 5), 1, 2, ValueError),
+#     ],
+# )
+# def test__get_batch(inputs, add_padding, offset, start_batch_idx,
+#                        end_batch_idx, expected_output):
+#     if expected_output == ValueError:
+#         with pytest.raises(ValueError):
+#             cebra.solver.base._get_batch(inputs, add_padding, offset,
+#                                          start_batch_idx, end_batch_idx)
+#     else:
+#         result = cebra.solver.base._get_batch(inputs, add_padding, offset,
+#                                                   start_batch_idx,
+#                                                   end_batch_idx)
+#         assert torch.equal(result, expected_output)
+
+# @pytest.mark.parametrize("data_name, model_name,session_id,solver_initfunc",
+#                          single_session_tests_select_model +
+#                          single_session_hybrid_tests_select_model)
+# def test_select_model_single_session(data_name, model_name, session_id,
+#                                      solver_initfunc):
+#     dataset = cebra.datasets.init(data_name)
+#     model = create_model(model_name, dataset.input_dimension)
+#     offset = model.get_offset()
+#     solver = solver_initfunc(model=model, criterion=None, optimizer=None)
+
+#     if session_id is not None and session_id > 0:
+#         with pytest.raises(RuntimeError):
+#             solver._select_model(dataset.neural, session_id=session_id)
+#     else:
+#         model_, offset_ = solver._select_model(dataset.neural,
+#                                                session_id=session_id)
+#         assert offset.left == offset_.left and offset.right == offset_.right
+#         assert model == model_
+
+# @pytest.mark.parametrize("data_name, model_name,session_id,solver_initfunc",
+#                          multi_session_tests_select_model)
+# def test_select_model_multi_session(data_name, model_name, session_id,
+#                                     solver_initfunc):
+#     dataset = cebra.datasets.init(data_name)
+#     model = nn.ModuleList([
+#         create_model(model_name, dataset.input_dimension)
+#         for dataset in dataset.iter_sessions()
+#     ])
+
+#     offset = model[0].get_offset()
+#     solver = solver_initfunc(model=model,
+#                              criterion=cebra.models.InfoNCE(),
+#                              optimizer=torch.optim.Adam(model.parameters(),
+#                                                         lr=1e-3))
+
+#     loader_kwargs = dict(num_steps=10, batch_size=32)
+#     loader = cebra.data.ContinuousMultiSessionDataLoader(
+#         dataset, **loader_kwargs)
+#     solver.fit(loader)
+
+#     for i, (model, dataset_) in enumerate(zip(model, dataset.iter_sessions())):
+#         inputs = dataset_.neural
+
+#         if session_id is None or session_id >= dataset.num_sessions:
+#             with pytest.raises(RuntimeError):
+#                 solver._select_model(inputs, session_id=session_id)
+#         elif i != session_id:
+#             with pytest.raises(ValueError):
+#                 solver._select_model(inputs, session_id=session_id)
+#         else:
+#             model_, offset_ = solver._select_model(inputs,
+#                                                    session_id=session_id)
+#             assert offset.left == offset_.left and offset.right == offset_.right
+#             assert model == model_
 
 #this is a very crucial test. should be checked for different choices of offsets,
 # dataset sizes (also edge cases like dataset size 1001 and batch size 1000 -> is the padding properly handled?)
@@ -373,7 +369,7 @@ def test_select_model_multi_session(data_name, model_name, session_id,
     "offset40-model-4x-subsample",
     #"offset1-model", "offset10-model",
 ]  # there is an issue with "offset4-model-2x-subsample" because it's not a convolutional model.
-batch_size_inference = [23432]  # 99_999
+batch_size_inference = [40_000, 99_990, 99_999]  # 99_999
 
 single_session_tests_transform = []
 for padding in [True, False]:
@@ -429,31 +425,25 @@ def test_batched_transform_singlesession(
     offset_ = model.get_offset()
     padding_left = offset_.left if padding else 0
 
-    if len(offset_) < 2 and padding:
-        pytest.skip("not relevant for now.")
-        with pytest.raises(ValueError):
-            solver.transform(inputs=loader.dataset.neural,
-                             pad_before_transform=padding)
+    #if len(offset_) < 2 and padding:
+    #    pytest.skip("not relevant for now.")
+    #    with pytest.raises(ValueError):
+    #        solver.transform(inputs=loader.dataset.neural,
+    #                         pad_before_transform=padding)
+    #
+    #    with pytest.raises(ValueError):
+    #        solver.transform(inputs=loader.dataset.neural,
+    #                         batch_size=batch_size,
+    #                         pad_before_transform=padding)
 
+    #TODO: this wont work in the case where the data is less than
+    #the offset from the beginning, i.e len(data) = 10, len(offset) = 10
+    if smallest_batch_length <= len(offset_):
         with pytest.raises(ValueError):
             solver.transform(inputs=loader.dataset.neural,
                              batch_size=batch_size,
                              pad_before_transform=padding)
 
-    # NOTE: We need to add padding_left because if padding is True,
-    # the batch size is not "smallest_batch_length". and the smallest
-    # batch will always be at the end so the last batch we need to add
-    # offset.left.
-    #TODO: this wont work in the case where the data is less than
-    #the offset from the beginning, i.e len(data) = 10, len(offset) = 10
-
-    #elif smallest_batch_length + padding_left <= len(offset_):
-    #    print('here')
-    #    with pytest.raises(ValueError):
-    #        solver.transform(inputs=loader.dataset.neural,
-    #                         batch_size=batch_size,
-    #                         pad_before_transform=padding)
-
     else:
         embedding_batched = solver.transform(inputs=loader.dataset.neural,
                                              batch_size=batch_size,
@@ -517,20 +507,20 @@ def test_batched_transform_multisession(data_name, model_name, padding,
     # Transform each session with the right model, by providing the corresponding session ID
     for i, inputs in enumerate(dataset.iter_sessions()):
 
-        if len(offset_) < 2 and padding:
-            with pytest.raises(ValueError):
-                embedding = solver.transform(inputs=inputs.neural,
-                                             session_id=i,
-                                             pad_before_transform=padding)
-
-            with pytest.raises(ValueError):
-                embedding_batched = solver.transform(
-                    inputs=inputs.neural,
-                    session_id=i,
-                    pad_before_transform=padding,
-                    batch_size=batch_size)
-
-        elif smallest_batch_length + padding_left <= len(offset_):
+        # if len(offset_) < 2 and padding:
+        # with pytest.raises(ValueError):
+        # embedding = solver.transform(inputs=inputs.neural,
+        #  session_id=i,
+        #  pad_before_transform=padding)
+        #
+        # with pytest.raises(ValueError):
+        # embedding_batched = solver.transform(
+        # inputs=inputs.neural,
+        # session_id=i,
+        # pad_before_transform=padding,
+        # batch_size=batch_size)
+
+        if smallest_batch_length <= len(offset_):
             with pytest.raises(ValueError):
                 solver.transform(inputs=inputs.neural,
                                  batch_size=batch_size,

From 5e7a14c3cc80f3d35887a38cccb6a33b580bef3a Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Fri, 24 Nov 2023 13:22:45 +0100
Subject: [PATCH 025/100] remove float16

---
 cebra/integrations/sklearn/cebra.py | 9 +++++----
 cebra/integrations/sklearn/utils.py | 3 ++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 1121ee98..555966fb 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1235,7 +1235,7 @@ def transform(self,
         # Input validation
         #TODO: if inputs are in cuda, then it throws an error, deal with this.
         X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
-        input_dtype = X.dtype
+        #input_dtype = X.dtype
 
         if isinstance(X, np.ndarray):
             X = torch.from_numpy(X)
@@ -1248,10 +1248,11 @@ def transform(self,
                 session_id=session_id,
                 batch_size=batch_size)
 
-        if input_dtype == "float64":
-            return output.astype(input_dtype)
+        #TODO: check if this is safe.
+        return output.numpy(force=True)
 
-        return output
+        #if input_dtype == "float64":
+        #    return output.astype(input_dtype)
 
     def fit_transform(
         self,
diff --git a/cebra/integrations/sklearn/utils.py b/cebra/integrations/sklearn/utils.py
index 455213a3..0ec01aa1 100644
--- a/cebra/integrations/sklearn/utils.py
+++ b/cebra/integrations/sklearn/utils.py
@@ -78,7 +78,8 @@ def check_input_array(X: npt.NDArray, *, min_samples: int) -> npt.NDArray:
         X,
         accept_sparse=False,
         accept_large_sparse=False,
-        dtype=("float16", "float32", "float64"),
+        # NOTE: remove float16 because F.pad does not allow float16.
+        dtype=("float32", "float64"),
         order=None,
         copy=False,
         force_all_finite=True,

From 928d88247c94a0d42fc159ef1c233999262ebbe0 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Mon, 27 Nov 2023 12:09:18 +0100
Subject: [PATCH 026/100] change argument position

---
 cebra/integrations/sklearn/cebra.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 555966fb..39f73aa2 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1200,18 +1200,12 @@ def fit(
 
     def transform(self,
                   X: Union[npt.NDArray, torch.Tensor],
-                  pad_before_transform: bool = True,
                   batch_size: Optional[int] = None,
                   session_id: Optional[int] = None) -> npt.NDArray:
         """Transform an input sequence and return the embedding.
 
         Args:
             X: A numpy array or torch tensor of size ``time x dimension``.
-            pad_before_transform: If ``False``, no padding is applied to the input sequence.
-                and the output sequence will be smaller than the input sequence due to the
-                receptive field of the model. If the input sequence is ``n`` steps long,
-                and a model with receptive field ``m`` is used, the output sequence would
-                only be ``n-m+1`` steps long.
             batch_size:
             session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`num_sessions` for
                 multisession, set to ``None`` for single session.
@@ -1244,7 +1238,7 @@ def transform(self,
         with torch.no_grad():
             output = self.solver_.transform(
                 inputs=X,
-                pad_before_transform=pad_before_transform,
+                pad_before_transform=self.pad_before_transform,
                 session_id=session_id,
                 batch_size=batch_size)
 

From 07bac1cbe39c162f7ab1709c769f71d68167fe94 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Mon, 27 Nov 2023 12:12:00 +0100
Subject: [PATCH 027/100] clean test

---
 tests/test_solver.py | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/tests/test_solver.py b/tests/test_solver.py
index 1661003a..0b0eb823 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -425,17 +425,6 @@ def test_batched_transform_singlesession(
     offset_ = model.get_offset()
     padding_left = offset_.left if padding else 0
 
-    #if len(offset_) < 2 and padding:
-    #    pytest.skip("not relevant for now.")
-    #    with pytest.raises(ValueError):
-    #        solver.transform(inputs=loader.dataset.neural,
-    #                         pad_before_transform=padding)
-    #
-    #    with pytest.raises(ValueError):
-    #        solver.transform(inputs=loader.dataset.neural,
-    #                         batch_size=batch_size,
-    #                         pad_before_transform=padding)
-
     #TODO: this wont work in the case where the data is less than
     #the offset from the beginning, i.e len(data) = 10, len(offset) = 10
     if smallest_batch_length <= len(offset_):
@@ -507,19 +496,6 @@ def test_batched_transform_multisession(data_name, model_name, padding,
     # Transform each session with the right model, by providing the corresponding session ID
     for i, inputs in enumerate(dataset.iter_sessions()):
 
-        # if len(offset_) < 2 and padding:
-        # with pytest.raises(ValueError):
-        # embedding = solver.transform(inputs=inputs.neural,
-        #  session_id=i,
-        #  pad_before_transform=padding)
-        #
-        # with pytest.raises(ValueError):
-        # embedding_batched = solver.transform(
-        # inputs=inputs.neural,
-        # session_id=i,
-        # pad_before_transform=padding,
-        # batch_size=batch_size)
-
         if smallest_batch_length <= len(offset_):
             with pytest.raises(ValueError):
                 solver.transform(inputs=inputs.neural,

From 0823b54efa549ceed51b1cc2fd25d82d8eb5afa0 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Mon, 27 Nov 2023 12:18:15 +0100
Subject: [PATCH 028/100] clean test

---
 tests/test_solver.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/test_solver.py b/tests/test_solver.py
index 0b0eb823..f84edeb5 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -425,8 +425,6 @@ def test_batched_transform_singlesession(
     offset_ = model.get_offset()
     padding_left = offset_.left if padding else 0
 
-    #TODO: this wont work in the case where the data is less than
-    #the offset from the beginning, i.e len(data) = 10, len(offset) = 10
     if smallest_batch_length <= len(offset_):
         with pytest.raises(ValueError):
             solver.transform(inputs=loader.dataset.neural,
@@ -477,11 +475,9 @@ def test_batched_transform_multisession(data_name, model_name, padding,
 
     smallest_batch_length = n_samples - batch_size
     offset_ = model[0].get_offset()
-    #print("here!", smallest_batch_length, len(offset_))
     padding_left = offset_.left if padding else 0
     for d in dataset._datasets:
         d.offset = offset_
-    #dataset._datasets[0].offset = cebra.data.Offset(0, 1)
     loader_kwargs = dict(num_steps=10, batch_size=32)
     loader = loader_initfunc(dataset, **loader_kwargs)
 

From 9fe3af351cddabdc37886bcea1f251997be03bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Tue, 26 Mar 2024 20:46:16 +0100
Subject: [PATCH 029/100] Fix warning

---
 cebra/solver/base.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 2cecab08..643ae8b8 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -111,12 +111,18 @@ def _add_zero_padding(batched_data: torch.Tensor, offset: cebra.data.Offset,
                       start_batch_idx: int, end_batch_idx: int,
                       number_of_samples: int):
 
+    reversed_dims = torch.arange(batched_data.ndim - 1, -1, -1)
+    
     if start_batch_idx == 0:  # First batch
-        batched_data = F.pad(batched_data.T, (offset.left, 0), 'replicate').T
+        batched_data = F.pad(batched_data.permute(*reversed_dims), 
+                                     (offset.left, 0), 'replicate').permute(*reversed_dims)
+        #batched_data = F.pad(batched_data.T, (offset.left, 0), 'replicate').T
 
     elif end_batch_idx == number_of_samples:  # Last batch
-        batched_data = F.pad(batched_data.T, (0, offset.right - 1),
-                             'replicate').T
+        batched_data = F.pad(batched_data.permute(*reversed_dims), 
+                                (0, offset.right - 1), 'replicate').permute(*reversed_dims)
+        #batched_data = F.pad(batched_data.T, (0, offset.right - 1), 'replicate').T
+
 
     return batched_data
 

From b417a239ed01e32f16d85ef9a7005987f8e60b7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:42:53 +0200
Subject: [PATCH 030/100] Improve modularity remove duplicate code and todos

---
 cebra/integrations/sklearn/cebra.py   |  44 +---
 cebra/integrations/sklearn/metrics.py |   3 +-
 cebra/solver/base.py                  | 329 +++++++++++++++-----------
 cebra/solver/multi_session.py         |  66 +++++-
 cebra/solver/single_session.py        |  95 +++++++-
 5 files changed, 359 insertions(+), 178 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 39f73aa2..adabd874 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -791,33 +791,7 @@ def _configure_for_all(
 
     def _select_model(self, X: Union[npt.NDArray, torch.Tensor],
                       session_id: int):
-        # Choose the model and get its corresponding offset
-        if self.num_sessions is not None:  # multisession implementation
-            if session_id is None:
-                raise RuntimeError(
-                    "No session_id provided: multisession model requires a session_id to choose the model corresponding to your data shape."
-                )
-            if session_id >= self.num_sessions or session_id < 0:
-                raise RuntimeError(
-                    f"Invalid session_id {session_id}: session_id for the current multisession model must be between 0 and {self.num_sessions-1}."
-                )
-            if self.n_features_[session_id] != X.shape[1]:
-                raise ValueError(
-                    f"Invalid input shape: model for session {session_id} requires an input of shape"
-                    f"(n_samples, {self.n_features_[session_id]}), got (n_samples, {X.shape[1]})."
-                )
-
-            model = self.model_[session_id]
-            model.to(self.device_)
-        else:  # single session
-            if session_id is not None and session_id > 0:
-                raise RuntimeError(
-                    f"Invalid session_id {session_id}: single session models only takes an optional null session_id."
-                )
-            model = self.model_
-
-        offset = model.get_offset()
-        return model, offset
+        return self.solver_._select_model(X, session_id=session_id)
 
     def _check_labels_types(self, y: tuple, session_id: Optional[int] = None):
         """Check that the input labels are compatible with the labels used to fit the model.
@@ -1224,16 +1198,16 @@ def transform(self,
             >>> embedding = cebra_model.transform(dataset)
 
         """
-
+        self.solver_._check_is_session_id_valid(session_id=session_id)
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
-        # Input validation
-        #TODO: if inputs are in cuda, then it throws an error, deal with this.
+
+        if torch.is_tensor(X) and X.device.type == "cuda":
+            X = X.detach().cpu()
+
         X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
-        #input_dtype = X.dtype
 
         if isinstance(X, np.ndarray):
             X = torch.from_numpy(X)
-            # TODO: which type and device should be put there?
 
         with torch.no_grad():
             output = self.solver_.transform(
@@ -1242,11 +1216,7 @@ def transform(self,
                 session_id=session_id,
                 batch_size=batch_size)
 
-        #TODO: check if this is safe.
-        return output.numpy(force=True)
-
-        #if input_dtype == "float64":
-        #    return output.astype(input_dtype)
+        return output.detach().cpu().numpy()
 
     def fit_transform(
         self,
diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index 9712d021..59a961b3 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -83,7 +83,8 @@ def infonce_loss(
             f"got {len(y[0])} sessions.")
 
     model, _ = cebra_model._select_model(
-        X, session_id)  # check session_id validity and corresponding model
+        X, session_id=session_id
+    )  # check session_id validity and corresponding model
     cebra_model._check_labels_types(y, session_id=session_id)
 
     dataset, is_multisession = cebra_model._prepare_data(X, y)  # single session
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 643ae8b8..5f3acb35 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -32,7 +32,8 @@
 
 import abc
 import os
-from typing import Callable, Dict, Iterable, List, Literal, Optional, Union
+from typing import (Callable, Dict, Iterable, List, Literal, Optional, Tuple,
+                    Union)
 
 import literate_dataclasses as dataclasses
 import numpy as np
@@ -51,37 +52,35 @@
 from cebra.solver.util import ProgressBar
 
 
-def _inference_transform(model, inputs):
-
-    #TODO: I am not sure what is the best way with dealing with the types and
-    # device when using batched inference. This works for now.
-    inputs = inputs.type(torch.FloatTensor).to(next(model.parameters()).device)
-
-    if isinstance(model, cebra.models.ConvolutionalModelMixin):
-        # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
-        inputs = inputs.transpose(1, 0).unsqueeze(0)
-        output = model(inputs).squeeze(0).transpose(1, 0)
-    else:
-        output = model(inputs)
-    return output
-
-
-def _check_indices(start_batch_idx, end_batch_idx, offset, num_samples):
+def _check_indices(batch_start_idx: int, batch_end_idx: int,
+                   offset: cebra.data.Offset, num_samples: int):
+    """Check that indexes in a batch are in a correct range.
+    
+    First and last index must be positive integers, smaller than the total length of inputs 
+    in the dataset, the first index must be smaller than the last and the batch size cannot 
+    be smaller than the offset of the model.
+
+    Args:
+        batch_start_idx: Index of the first sample in the batch.
+        batch_end_idx: Index of the first sample in the batch.
+        offset: Model offset.
+        num_samples: Total number of samples in the input.
+    """
 
-    if start_batch_idx < 0 or end_batch_idx < 0:
+    if batch_start_idx < 0 or batch_end_idx < 0:
         raise ValueError(
-            f"start_batch_idx ({start_batch_idx}) and end_batch_idx ({end_batch_idx}) must be non-negative."
+            f"batch_start_idx ({batch_start_idx}) and batch_end_idx ({batch_end_idx}) must be positive integers."
         )
-    if start_batch_idx > end_batch_idx:
+    if batch_start_idx > batch_end_idx:
         raise ValueError(
-            f"start_batch_idx ({start_batch_idx}) cannot be greater than end_batch_idx ({end_batch_idx})."
+            f"batch_start_idx ({batch_start_idx}) cannot be greater than batch_end_idx ({batch_end_idx})."
         )
-    if end_batch_idx > num_samples:
+    if batch_end_idx > num_samples:
         raise ValueError(
-            f"end_batch_idx ({end_batch_idx}) cannot exceed the length of inputs ({num_samples})."
+            f"batch_end_idx ({batch_end_idx}) cannot exceed the length of inputs ({num_samples})."
         )
 
-    batch_size_lenght = end_batch_idx - start_batch_idx
+    batch_size_lenght = batch_end_idx - batch_start_idx
     if batch_size_lenght <= len(offset):
         raise ValueError(
             f"The batch has length {batch_size_lenght} which "
@@ -91,45 +90,123 @@ def _check_indices(start_batch_idx, end_batch_idx, offset, num_samples):
 
 
 def _get_batch(inputs: torch.Tensor, offset: cebra.data.Offset,
-               start_batch_idx: int, end_batch_idx: int) -> torch.Tensor:
+               batch_start_idx: int, batch_end_idx: int) -> torch.Tensor:
+    """Get a batch of samples between the `batch_start_idx` and `batch_end_idx`.
 
-    if start_batch_idx == 0:  # First batch
-        indices = start_batch_idx, (end_batch_idx + offset.right - 1)
+    Args:
+        inputs: Input data.
+        offset: Model offset.
+        batch_start_idx: Index of the first sample in the batch.
+        batch_end_idx: Index of the first sample in the batch.
 
-    elif end_batch_idx == len(inputs):  # Last batch
-        indices = (start_batch_idx - offset.left), end_batch_idx
+    Returns:
+        The batch.
+    """
 
-    else:  # Middle batches
-        indices = start_batch_idx - offset.left, end_batch_idx + offset.right - 1
+    if batch_start_idx == 0:  # First batch
+        indices = batch_start_idx, (batch_end_idx + offset.right - 1)
+    elif batch_end_idx == len(inputs):  # Last batch
+        indices = (batch_start_idx - offset.left), batch_end_idx
+    else:
+        indices = batch_start_idx - offset.left, batch_end_idx + offset.right - 1
 
     _check_indices(indices[0], indices[1], offset, len(inputs))
     batched_data = inputs[slice(*indices)]
     return batched_data
 
 
-def _add_zero_padding(batched_data: torch.Tensor, offset: cebra.data.Offset,
-                      start_batch_idx: int, end_batch_idx: int,
-                      number_of_samples: int):
+def _add_batched_zero_padding(batched_data: torch.Tensor,
+                              offset: cebra.data.Offset, batch_start_idx: int,
+                              batch_end_idx: int,
+                              num_samples: int) -> torch.Tensor:
+    """Add zero padding to the input data before inference.
 
-    reversed_dims = torch.arange(batched_data.ndim - 1, -1, -1)
-    
-    if start_batch_idx == 0:  # First batch
-        batched_data = F.pad(batched_data.permute(*reversed_dims), 
-                                     (offset.left, 0), 'replicate').permute(*reversed_dims)
-        #batched_data = F.pad(batched_data.T, (offset.left, 0), 'replicate').T
+    Args:
+        batched_data: Data to apply the inference on.
+        offset (cebra.data.Offset): _description_
+        batch_start_idx: Index of the first sample in the batch.
+        batch_end_idx: Index of the first sample in the batch.
+        num_samples (int): Total number of samples in the data. 
 
-    elif end_batch_idx == number_of_samples:  # Last batch
-        batched_data = F.pad(batched_data.permute(*reversed_dims), 
-                                (0, offset.right - 1), 'replicate').permute(*reversed_dims)
-        #batched_data = F.pad(batched_data.T, (0, offset.right - 1), 'replicate').T
+    Returns:
+        The padded batch.
+    """
+    reversed_dims = torch.arange(batched_data.ndim - 1, -1, -1)
 
+    if batch_start_idx == 0:  # First batch
+        batched_data = F.pad(batched_data.permute(*reversed_dims),
+                             (offset.left, 0),
+                             'replicate').permute(*reversed_dims)
+    elif batch_end_idx == num_samples:  # Last batch
+        batched_data = F.pad(batched_data.permute(*reversed_dims),
+                             (0, offset.right - 1),
+                             'replicate').permute(*reversed_dims)
 
     return batched_data
 
 
-def _batched_transform(model, inputs: torch.Tensor, batch_size: int,
-                       pad_before_transform: bool,
+def _inference_transform(model: cebra.models.Model,
+                         inputs: torch.Tensor) -> torch.Tensor:
+    """Compute the embedding on the inputs using the model provided.
+
+    Args:
+        model: Model to use for inference.
+        inputs: Data.
+
+    Returns:
+        The embedding.
+    """
+    #TODO(rodrigo): I am not sure what is the best way with dealing with the types and
+    # device when using batched inference. This works for now.
+    inputs = inputs.type(torch.FloatTensor).to(next(model.parameters()).device)
+
+    if isinstance(model, cebra.models.ConvolutionalModelMixin):
+        # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+        inputs = inputs.transpose(1, 0).unsqueeze(0)
+        output = model(inputs).squeeze(0).transpose(1, 0)
+    else:
+        output = model(inputs)
+    return output
+
+
+def _transform(
+    model: cebra.models.Model,
+    inputs: torch.Tensor,
+    pad_before_transform: bool,
+    offset: cebra.data.Offset,
+) -> torch.Tensor:
+    """Compute the embedding.
+
+    Args:
+        model: The model to use for inference.
+        inputs: Input data.
+        pad_before_transform: If True, the input data is zero padded before inference.
+        offset: Model offset.
+
+    Returns:
+        The embedding.
+    """
+    if pad_before_transform:
+        inputs = F.pad(inputs.T, (offset.left, offset.right - 1), 'replicate').T
+    output = _inference_transform(model, inputs)
+    return output
+
+
+def _batched_transform(model: cebra.models.Model, inputs: torch.Tensor,
+                       batch_size: int, pad_before_transform: bool,
                        offset: cebra.data.Offset) -> torch.Tensor:
+    """Compute the embedding on batched inputs.
+
+    Args:
+        model: The model to use for inference.
+        inputs: Input data.
+        batch_size: Integer corresponding to the batch size.
+        pad_before_transform: If True, the input data is zero padded before inference.
+        offset: Model offset.
+
+    Returns:
+        The embedding.
+    """
 
     class IndexDataset(Dataset):
 
@@ -146,19 +223,20 @@ def __getitem__(self, idx):
     index_dataloader = DataLoader(index_dataset, batch_size=batch_size)
 
     output = []
-    for batch_id, index_batch in enumerate(index_dataloader):
-        start_batch_idx, end_batch_idx = index_batch[0], index_batch[-1] + 1
+    for index_batch in index_dataloader:
+        batch_start_idx, batch_end_idx = index_batch[0], index_batch[-1] + 1
         batched_data = _get_batch(inputs=inputs,
                                   offset=offset,
-                                  start_batch_idx=start_batch_idx,
-                                  end_batch_idx=end_batch_idx)
+                                  batch_start_idx=batch_start_idx,
+                                  batch_end_idx=batch_end_idx)
 
         if pad_before_transform:
-            batched_data = _add_zero_padding(batched_data=batched_data,
-                                             offset=offset,
-                                             start_batch_idx=start_batch_idx,
-                                             end_batch_idx=end_batch_idx,
-                                             number_of_samples=len(inputs))
+            batched_data = _add_batched_zero_padding(
+                batched_data=batched_data,
+                offset=offset,
+                batch_start_idx=batch_start_idx,
+                batch_end_idx=batch_end_idx,
+                num_samples=len(inputs))
 
         output_batch = _inference_transform(model, batched_data)
         output.append(output_batch)
@@ -265,13 +343,9 @@ def num_parameters(self) -> int:
         """Total number of parameters in the encoder and criterion."""
         return sum(p.numel() for p in self.parameters())
 
-    def parameters(self):
-        """Iterate over all parameters."""
-        for parameter in self.model.parameters():
-            yield parameter
-
-        for parameter in self.criterion.parameters():
-            yield parameter
+    @abc.abstractmethod
+    def parameters(self, session_id: Optional[int] = None):
+        raise NotImplementedError
 
     def _get_loader(self, loader):
         return ProgressBar(
@@ -279,6 +353,10 @@ def _get_loader(self, loader):
             "tqdm" if self.tqdm_on else "off",
         )
 
+    @abc.abstractmethod
+    def _set_fitted_params(self, loader: cebra.data.Loader):
+        raise NotImplementedError
+
     def fit(
         self,
         loader: cebra.data.Loader,
@@ -306,14 +384,6 @@ def fit(
         TODO:
             * Refine the API here. Drop the validation entirely, and implement this via a hook?
         """
-
-        self.num_sessions = loader.dataset.num_sessions if hasattr(
-            loader.dataset, "num_sessions") else None
-        self.n_features = ([
-            loader.dataset.get_input_dimension(session_id)
-            for session_id in range(loader.dataset.num_sessions)
-        ] if self.num_sessions is not None else loader.dataset.input_dimension)
-
         self.to(loader.device)
 
         iterator = self._get_loader(loader)
@@ -341,6 +411,8 @@ def fit(
                     save_hook(num_steps, self)
                 self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
 
+        self._set_fitted_params(loader)
+
     def step(self, batch: cebra.data.Batch) -> dict:
         """Perform a single gradient update.
 
@@ -377,8 +449,9 @@ def validation(self,
         Args:
             loader: Data loader, which is an iterator over `cebra.data.Batch` instances.
                 Each batch contains reference, positive and negative input samples.
-            session_id: The session ID, an integer between 0 and the number of sessions in the
-                multisession model, set to None for single session.
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
 
         Returns:
             Loss averaged over iterations on data batch.
@@ -412,56 +485,43 @@ def decoding(self, train_loader, valid_loader):
         )
         return decode_metric
 
-    def _select_model(self, inputs: torch.Tensor, session_id: int):
-        #NOTE: In the torch API the inputs will be a torch tensor. Then in the
-        # sklearn API we will convert it to numpy array.
-        """ Select the right model based on the type of solver we have."""
-
-        if self.num_sessions is not None:  # multisession implementation
-            if session_id is None:
-                raise RuntimeError(
-                    "No session_id provided: multisession model requires a session_id to choose the model corresponding to your data shape."
-                )
-            if session_id >= self.num_sessions or session_id < 0:
-                raise RuntimeError(
-                    f"Invalid session_id {session_id}: session_id for the current multisession model must be between 0 and {self.num_sessions-1}."
-                )
-            if self.n_features[session_id] != inputs.shape[1]:
-                raise ValueError(
-                    f"Invalid input shape: model for session {session_id} requires an input of shape"
-                    f"(n_samples, {self.n_features[session_id]}), got (n_samples, {inputs.shape[1]})."
-                )
-
-            model = self.model[session_id]
-
-        else:  # single session
-            if session_id is not None and session_id > 0:
-                raise RuntimeError(
-                    f"Invalid session_id {session_id}: single session models only takes an optional null session_id."
-                )
-
-            if isinstance(
-                    self,
-                    cebra.solver.single_session.SingleSessionHybridSolver):
-                # NOTE: This is different from the sklearn API implementation. The issue is that here the
-                # model is a cebra.models.MultiObjective instance, and therefore to do inference I need
-                # to get the module inside this model.
-                model = self.model.module
-            else:
-                model = self.model
+    @abc.abstractmethod
+    def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
+        """Check that the inputs can be infered using the selected model.
+        
+        Note: This method checks that the number of neurons in the input is
+        similar to the input dimension to the selected model.
+        
+        Args: 
+            inputs: Data to infer using the selected model.
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+        """
+        raise NotImplementedError
 
-        offset = model.get_offset()
-        return model, offset
+    @abc.abstractmethod
+    def _check_is_session_id_valid(self, session_id: Optional[int] = None):
+        raise NotImplementedError
 
-    @torch.no_grad()
-    def _transform(self, model, inputs, offset,
-                   pad_before_transform) -> torch.Tensor:
+    @abc.abstractmethod
+    def _select_model(
+        self, inputs: Union[torch.Tensor,
+                            List[torch.Tensor]], session_id: Optional[int]
+    ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
+               cebra.data.datatypes.Offset]:
+        """ Select the model based on the input dimension and session ID.
+        
+        Args: 
+            inputs: Data to infer using the selected model.
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
 
-        if pad_before_transform:
-            inputs = F.pad(inputs.T, (offset.left, offset.right - 1),
-                           'replicate').T
-        output = _inference_transform(model, inputs)
-        return output
+        Returns: 
+            The model (first returns) and the offset of the model (second returns).
+        """
+        raise NotImplementedError
 
     @torch.no_grad()
     def transform(self,
@@ -489,17 +549,16 @@ def transform(self,
         Returns:
             The output embedding.
         """
-        #TODO: add check like sklearn?
-        # #sklearn_utils_validation.check_is_fitted(self, "n_features_")
+        if not hasattr(self, "n_features"):
+            raise ValueError(
+                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
+                "appropriate arguments before using this estimator.")
         model, offset = self._select_model(inputs, session_id)
-        model.eval()
 
-        #TODO: should we add this error?
-        #if len(offset) < 2 and pad_before_transform:
-        #    raise ValueError(
-        #        "Padding does not make sense when the offset of the model is < 2"
-        #    )
+        if len(offset) < 2 and pad_before_transform:
+            pad_before_transform = False
 
+        model.eval()
         if batch_size is not None:
             output = _batched_transform(
                 model=model,
@@ -508,12 +567,11 @@ def transform(self,
                 batch_size=batch_size,
                 pad_before_transform=pad_before_transform,
             )
-
         else:
-            output = self._transform(model=model,
-                                     inputs=inputs,
-                                     offset=offset,
-                                     pad_before_transform=pad_before_transform)
+            output = _transform(model=model,
+                                inputs=inputs,
+                                offset=offset,
+                                pad_before_transform=pad_before_transform)
 
         return output
 
@@ -539,6 +597,7 @@ def load(self, logdir, filename="checkpoint.pth"):
         """Load the experiment from its checkpoint file.
 
         Args:
+            logdir: Log directory.
             filename (str): Checkpoint name for loading the experiment.
         """
 
@@ -549,6 +608,12 @@ def load(self, logdir, filename="checkpoint.pth"):
         checkpoint = torch.load(savepath, map_location=self.device)
         self.load_state_dict(checkpoint, strict=True)
 
+        if hasattr(self.model, "n_features"):
+            n_features = self.model.n_features
+            self.n_features = ([
+                session_n_features for session_n_features in n_features
+            ] if isinstance(n_features, list) else n_features)
+
     def save(self, logdir, filename="checkpoint_last.pth"):
         """Save the model and optimizer params.
 
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index 7f103708..666dafb8 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -43,6 +43,15 @@ class MultiSessionSolver(abc_.Solver):
 
     _variant_name = "multi-session"
 
+    def parameters(self, session_id: Optional[int] = None):
+        """Iterate over all parameters."""
+        self._check_is_session_id_valid(session_id=session_id)
+        for parameter in self.model[session_id].parameters():
+            yield parameter
+
+        for parameter in self.criterion.parameters():
+            yield parameter
+
     def _mix(self, array: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
         shape = array.shape
         n, m = shape[:2]
@@ -116,6 +125,61 @@ def _inference(self, batches: List[cebra.data.Batch]) -> cebra.data.Batch:
             negative=neg.view(-1, num_features),
         )
 
+    def _set_fitted_params(self, loader: cebra.data.Loader):
+        self.num_sessions = loader.dataset.num_sessions
+        self.n_features = [
+            loader.dataset.get_input_dimension(session_id)
+            for session_id in range(loader.dataset.num_sessions)
+        ]
+
+    def _check_is_inputs_valid(self, inputs: torch.Tensor,
+                               session_id: Optional[int]):
+        """Check that the inputs can be infered using the selected model.
+        
+        Note: This method checks that the number of neurons in the input is
+        similar to the input dimension to the selected model.
+        
+        Args: 
+            inputs: Data to infer using the selected model.
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+        """
+        if self.n_features[session_id] != inputs.shape[1]:
+            raise ValueError(
+                f"Invalid input shape: model for session {session_id} requires an input of shape"
+                f"(n_samples, {self.n_features[session_id]}), got (n_samples, {inputs.shape[1]})."
+            )
+
+    def _check_is_session_id_valid(self, session_id: Optional[int]):
+        if session_id is None:
+            raise RuntimeError(
+                "No session_id provided: multisession model requires a session_id to choose the model corresponding to your data shape."
+            )
+        if session_id >= self.num_sessions or session_id < 0:
+            raise RuntimeError(
+                f"Invalid session_id {session_id}: session_id for the current multisession model must be between 0 and {self.num_sessions-1}."
+            )
+
+    def _select_model(self, inputs: torch.Tensor, session_id: Optional[int]):
+        """ Select the model based on the input dimension and session ID.
+        
+        Args: 
+            inputs: Data to infer using the selected model.
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Returns: 
+            The model (first returns) and the offset of the model (second returns).
+        """
+        self._check_is_session_id_valid(session_id=session_id)
+        self._check_is_inputs_valid(inputs, session_id=session_id)
+
+        model = self.model[session_id]
+        offset = model.get_offset()
+        return model, offset
+
     def validation(self, loader, session_id: Optional[int] = None):
         """Compute score of the model on data.
 
@@ -147,7 +211,7 @@ def validation(self, loader, session_id: Optional[int] = None):
 
 
 @register("multi-session-aux")
-class MultiSessionAuxVariableSolver(abc_.Solver):
+class MultiSessionAuxVariableSolver(MultiSessionSolver):
     """Multi session training, contrasting neural data against behavior."""
 
     _variant_name = "multi-session-aux"
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
index ded526e9..0ac603e2 100644
--- a/cebra/solver/single_session.py
+++ b/cebra/solver/single_session.py
@@ -21,11 +21,8 @@
 #
 """Single session solvers embed a single pair of time series."""
 
-import abc
 import copy
-import os
-from collections.abc import Iterable
-from typing import List
+from typing import List, Optional, Tuple, Union
 
 import literate_dataclasses as dataclasses
 import torch
@@ -42,11 +39,72 @@ class SingleSessionSolver(abc_.Solver):
     """Single session training with a symmetric encoder.
 
     This solver assumes that reference, positive and negative samples
-    are processed by the same features encoder.
+    are processed by the same features encoder and that a single session
+    is provided to that encoder.
     """
 
     _variant_name = "single-session"
 
+    def parameters(self, session_id: Optional[int] = None):
+        """Iterate over all parameters."""
+        self._check_is_session_id_valid(session_id=session_id)
+        for parameter in self.model.parameters():
+            yield parameter
+
+        for parameter in self.criterion.parameters():
+            yield parameter
+
+    def _set_fitted_params(self, loader: cebra.data.Loader):
+        self.num_sessions = None
+        self.n_features = loader.dataset.input_dimension
+
+    def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
+        """Check that the inputs can be infered using the selected model.
+        
+        Note: This method checks that the number of neurons in the input is
+        similar to the input dimension to the selected model.
+        
+        Args: 
+            inputs: Data to infer using the selected model.
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+        """
+        if self.n_features != inputs.shape[1]:
+            raise ValueError(
+                f"Invalid input shape: model for session {session_id} requires an input of shape"
+                f"(n_samples, {self.n_features}), got (n_samples, {inputs.shape[1]})."
+            )
+
+    def _check_is_session_id_valid(self, session_id: Optional[int] = None):
+        if session_id is not None and session_id > 0:
+            raise RuntimeError(
+                f"Invalid session_id {session_id}: single session models only takes an optional null session_id."
+            )
+
+    def _select_model(
+        self, inputs: Union[torch.Tensor,
+                            List[torch.Tensor]], session_id: Optional[int]
+    ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
+               cebra.data.datatypes.Offset]:
+        """ Select the model based on the input dimension and session ID.
+        
+        Args: 
+            inputs: Data to infer using the selected model.
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Returns: 
+            The model (first returns) and the offset of the model (second returns).
+        """
+        self._check_is_inputs_valid(inputs, session_id=session_id)
+        self._check_is_session_id_valid(session_id=session_id)
+
+        model = self.model
+        offset = model.get_offset()
+        return model, offset
+
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """Given a batch of input examples, computes the feature representation/embedding.
 
@@ -94,7 +152,7 @@ def get_embedding(self, data: torch.Tensor) -> torch.Tensor:
 
 @register("single-session-aux")
 @dataclasses.dataclass
-class SingleSessionAuxVariableSolver(abc_.Solver):
+class SingleSessionAuxVariableSolver(SingleSessionSolver):
     """Single session training for reference and positive/negative samples.
 
     This solver processes reference samples with a model different from
@@ -131,7 +189,7 @@ def _inference(self, batch):
 
 @register("single-session-hybrid")
 @dataclasses.dataclass
-class SingleSessionHybridSolver(abc_.MultiobjectiveSolver):
+class SingleSessionHybridSolver(abc_.MultiobjectiveSolver, SingleSessionSolver):
     """Single session training, contrasting neural data against behavior."""
 
     _variant_name = "single-session-hybrid"
@@ -149,6 +207,29 @@ def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
                                 behavior_neg), cebra.data.Batch(
                                     time_ref, time_pos, time_neg)
 
+    def _select_model(
+        self, inputs: Union[torch.Tensor,
+                            List[torch.Tensor]], session_id: Optional[int]
+    ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
+               cebra.data.datatypes.Offset]:
+        """ Select the model based on the input dimension and session ID.
+        
+        Args: 
+            inputs: Data to infer using the selected model.
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Returns: 
+            The model (first returns) and the offset of the model (second returns).
+        """
+        self._check_is_inputs_valid(inputs, session_id=session_id)
+        self._check_is_session_id_valid(session_id=session_id)
+
+        model = self.model.module
+        offset = model.get_offset()
+        return model, offset
+
 
 @register("single-session-full")
 @dataclasses.dataclass

From 83c16691d081c90e51b0e90d6d4d306f74457d3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 22 Aug 2024 11:41:44 +0200
Subject: [PATCH 031/100] Add tests to solver

---
 cebra/data/base.py                  |   4 +
 cebra/data/multi_session.py         |  15 +-
 cebra/data/single_session.py        |  14 +-
 cebra/integrations/sklearn/cebra.py |   4 +-
 cebra/solver/base.py                |  90 +++--
 cebra/solver/single_session.py      |   5 +-
 tests/test_solver.py                | 592 ++++++++++++++++++----------
 7 files changed, 458 insertions(+), 266 deletions(-)

diff --git a/cebra/data/base.py b/cebra/data/base.py
index d2ee47b5..874ed58b 100644
--- a/cebra/data/base.py
+++ b/cebra/data/base.py
@@ -196,6 +196,7 @@ def load_batch(self, index: BatchIndex) -> Batch:
         """
         raise NotImplementedError()
 
+    @abc.abstractmethod
     def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
@@ -205,6 +206,7 @@ def configure_for(self, model: "cebra.models.Model"):
         Args:
             model: The model to configure the dataset for.
         """
+        raise NotImplementedError
         self.offset = model.get_offset()
 
 
@@ -230,6 +232,8 @@ class Loader(abc.ABC, cebra.io.HasDevice):
         doc="""A dataset instance specifying a ``__getitem__`` function.""",
     )
 
+    time_offset: int = dataclasses.field(default=10)
+
     num_steps: int = dataclasses.field(
         default=None,
         doc=
diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index 8cd74286..a8d56d10 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -111,6 +111,18 @@ def configure_for(self, model):
         for session in self.iter_sessions():
             session.configure_for(model)
 
+    def configure_for(self, model: "cebra.models.Model"):
+        """Configure the dataset offset for the provided model.
+
+        Call this function before indexing the dataset. This sets the
+        :py:attr:`offset` attribute of the dataset.
+
+        Args:
+            model: The model to configure the dataset for.
+        """
+        for i, session in enumerate(self.iter_sessions()):
+            session.configure_for(model[i])
+
 
 @dataclasses.dataclass
 class MultiSessionLoader(cebra_data.Loader):
@@ -121,8 +133,6 @@ class MultiSessionLoader(cebra_data.Loader):
     dimension, it is better to use a :py:class:`cebra.data.single_session.MixedDataLoader`.
     """
 
-    time_offset: int = dataclasses.field(default=10)
-
     def __post_init__(self):
         super().__post_init__()
         self.sampler = cebra_distr.MultisessionSampler(self.dataset,
@@ -151,7 +161,6 @@ class ContinuousMultiSessionDataLoader(MultiSessionLoader):
     """Contrastive learning conditioned on a continuous behavior variable."""
 
     conditional: str = "time_delta"
-    time_offset: int = dataclasses.field(default=10)
 
     @property
     def index(self):
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index c27b10f5..71cd0c3e 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -72,6 +72,17 @@ def load_batch(self, index: BatchIndex) -> Batch:
             reference=self[index.reference],
         )
 
+    def configure_for(self, model: "cebra.models.Model"):
+        """Configure the dataset offset for the provided model.
+
+        Call this function before indexing the dataset. This sets the
+        :py:attr:`offset` attribute of the dataset.
+
+        Args:
+            model: The model to configure the dataset for.
+        """
+        self.offset = model.get_offset()
+
 
 @dataclasses.dataclass
 class DiscreteDataLoader(cebra_data.Loader):
@@ -192,7 +203,6 @@ class ContinuousDataLoader(cebra_data.Loader):
     and become equivalent to time contrastive learning.
     """,
     )
-    time_offset: int = dataclasses.field(default=10)
     delta: float = dataclasses.field(default=0.1)
 
     def __post_init__(self):
@@ -274,7 +284,6 @@ class MixedDataLoader(cebra_data.Loader):
     """
 
     conditional: str = dataclasses.field(default="time_delta")
-    time_offset: int = dataclasses.field(default=10)
 
     @property
     def dindex(self):
@@ -337,7 +346,6 @@ class HybridDataLoader(cebra_data.Loader):
     """
 
     conditional: str = dataclasses.field(default="time_delta")
-    time_offset: int = dataclasses.field(default=10)
     delta: float = dataclasses.field(default=0.1)
 
     @property
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index adabd874..4240074f 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -776,8 +776,6 @@ def _configure_for_all(
                             f"receptive fields/offsets larger than 1 via the sklearn API. "
                             f"Please use a different model, or revert to the pytorch "
                             f"API for training.")
-
-                d.configure_for(model[n])
         else:
             if not isinstance(model, cebra.models.ConvolutionalModelMixin):
                 if len(model.get_offset()) > 1:
@@ -787,7 +785,7 @@ def _configure_for_all(
                         f"Please use a different model, or revert to the pytorch "
                         f"API for training.")
 
-            dataset.configure_for(model)
+        dataset.configure_for(model)
 
     def _select_model(self, X: Union[npt.NDArray, torch.Tensor],
                       session_id: int):
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 5f3acb35..ec33f23e 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -37,6 +37,7 @@
 
 import literate_dataclasses as dataclasses
 import numpy as np
+import numpy.typing as npt
 import torch
 import torch.nn.functional as F
 import tqdm
@@ -89,32 +90,6 @@ def _check_indices(batch_start_idx: int, batch_end_idx: int,
         )
 
 
-def _get_batch(inputs: torch.Tensor, offset: cebra.data.Offset,
-               batch_start_idx: int, batch_end_idx: int) -> torch.Tensor:
-    """Get a batch of samples between the `batch_start_idx` and `batch_end_idx`.
-
-    Args:
-        inputs: Input data.
-        offset: Model offset.
-        batch_start_idx: Index of the first sample in the batch.
-        batch_end_idx: Index of the first sample in the batch.
-
-    Returns:
-        The batch.
-    """
-
-    if batch_start_idx == 0:  # First batch
-        indices = batch_start_idx, (batch_end_idx + offset.right - 1)
-    elif batch_end_idx == len(inputs):  # Last batch
-        indices = (batch_start_idx - offset.left), batch_end_idx
-    else:
-        indices = batch_start_idx - offset.left, batch_end_idx + offset.right - 1
-
-    _check_indices(indices[0], indices[1], offset, len(inputs))
-    batched_data = inputs[slice(*indices)]
-    return batched_data
-
-
 def _add_batched_zero_padding(batched_data: torch.Tensor,
                               offset: cebra.data.Offset, batch_start_idx: int,
                               batch_end_idx: int,
@@ -145,6 +120,45 @@ def _add_batched_zero_padding(batched_data: torch.Tensor,
     return batched_data
 
 
+def _get_batch(inputs: torch.Tensor, offset: Optional[cebra.data.Offset],
+               batch_start_idx: int, batch_end_idx: int,
+               pad_before_transform: bool) -> torch.Tensor:
+    """Get a batch of samples between the `batch_start_idx` and `batch_end_idx`.
+
+    Args:
+        inputs: Input data.
+        offset: Model offset.
+        batch_start_idx: Index of the first sample in the batch.
+        batch_end_idx: Index of the first sample in the batch.
+        pad_before_transform: If True zero-pad the batched data.
+
+    Returns:
+        The batch.
+    """
+    if offset is None:
+        raise ValueError(f"offset cannot be null.")
+
+    if batch_start_idx == 0:  # First batch
+        indices = batch_start_idx, (batch_end_idx + offset.right - 1)
+    elif batch_end_idx == len(inputs):  # Last batch
+        indices = (batch_start_idx - offset.left), batch_end_idx
+    else:
+        indices = batch_start_idx - offset.left, batch_end_idx + offset.right - 1
+
+    _check_indices(indices[0], indices[1], offset, len(inputs))
+    batched_data = inputs[slice(*indices)]
+
+    if pad_before_transform:
+        batched_data = _add_batched_zero_padding(
+            batched_data=batched_data,
+            offset=offset,
+            batch_start_idx=batch_start_idx,
+            batch_end_idx=batch_end_idx,
+            num_samples=len(inputs))
+
+    return batched_data
+
+
 def _inference_transform(model: cebra.models.Model,
                          inputs: torch.Tensor) -> torch.Tensor:
     """Compute the embedding on the inputs using the model provided.
@@ -156,9 +170,7 @@ def _inference_transform(model: cebra.models.Model,
     Returns:
         The embedding.
     """
-    #TODO(rodrigo): I am not sure what is the best way with dealing with the types and
-    # device when using batched inference. This works for now.
-    inputs = inputs.type(torch.FloatTensor).to(next(model.parameters()).device)
+    inputs = inputs.float().to(next(model.parameters()).device)
 
     if isinstance(model, cebra.models.ConvolutionalModelMixin):
         # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
@@ -228,15 +240,8 @@ def __getitem__(self, idx):
         batched_data = _get_batch(inputs=inputs,
                                   offset=offset,
                                   batch_start_idx=batch_start_idx,
-                                  batch_end_idx=batch_end_idx)
-
-        if pad_before_transform:
-            batched_data = _add_batched_zero_padding(
-                batched_data=batched_data,
-                offset=offset,
-                batch_start_idx=batch_start_idx,
-                batch_end_idx=batch_end_idx,
-                num_samples=len(inputs))
+                                  batch_end_idx=batch_end_idx,
+                                  pad_before_transform=pad_before_transform)
 
         output_batch = _inference_transform(model, batched_data)
         output.append(output_batch)
@@ -549,6 +554,15 @@ def transform(self,
         Returns:
             The output embedding.
         """
+        if isinstance(inputs, list):
+            raise NotImplementedError(
+                "Inputs to transform() should be the data for a single session."
+            )
+
+        elif not isinstance(inputs, torch.Tensor):
+            raise ValueError(
+                f"Inputs should be a torch.Tensor, not {type(inputs)}.")
+
         if not hasattr(self, "n_features"):
             raise ValueError(
                 f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
index 0ac603e2..b941a8ba 100644
--- a/cebra/solver/single_session.py
+++ b/cebra/solver/single_session.py
@@ -227,7 +227,10 @@ def _select_model(
         self._check_is_session_id_valid(session_id=session_id)
 
         model = self.model.module
-        offset = model.get_offset()
+        if hasattr(model, 'get_offset'):
+            offset = model.get_offset()
+        else:
+            offset = None
         return model, offset
 
 
diff --git a/tests/test_solver.py b/tests/test_solver.py
index f84edeb5..4bb17232 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -35,72 +35,121 @@
 
 single_session_tests = []
 for args in [
-    ("demo-discrete", cebra.data.DiscreteDataLoader),
-    ("demo-continuous", cebra.data.ContinuousDataLoader),
-    ("demo-mixed", cebra.data.MixedDataLoader),
+    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset10-model"),
+    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset1-model"),
+    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset1-model"),
+    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset10-model"),
+    ("demo-continuous", cebra.data.ContinuousDataLoader, "offset10-model"),
+    ("demo-continuous", cebra.data.ContinuousDataLoader, "offset1-model"),
+    ("demo-mixed", cebra.data.MixedDataLoader, "offset10-model"),
+    ("demo-mixed", cebra.data.MixedDataLoader, "offset1-model"),
 ]:
     single_session_tests.append((*args, cebra.solver.SingleSessionSolver))
 
 single_session_hybrid_tests = []
-for args in [("demo-continuous", cebra.data.HybridDataLoader)]:
+for args in [("demo-continuous", cebra.data.HybridDataLoader, "offset10-model"),
+             ("demo-continuous", cebra.data.HybridDataLoader, "offset1-model")]:
     single_session_hybrid_tests.append(
         (*args, cebra.solver.SingleSessionHybridSolver))
 
 multi_session_tests = []
-for args in [("demo-continuous-multisession",
-              cebra.data.ContinuousMultiSessionDataLoader)]:
+for args in [
+    ("demo-continuous-multisession",
+     cebra.data.ContinuousMultiSessionDataLoader, "offset1-model"),
+    ("demo-continuous-multisession",
+     cebra.data.ContinuousMultiSessionDataLoader, "offset10-model"),
+]:
     multi_session_tests.append((*args, cebra.solver.MultiSessionSolver))
-    # multi_session_tests.append((*args, cebra.solver.MultiSessionAuxVariableSolver))
 
-print(single_session_tests)
+# multi_session_tests.append((*args, cebra.solver.MultiSessionAuxVariableSolver))
 
 
-def _get_loader(data_name, loader_initfunc):
-    data = cebra.datasets.init(data_name)
-    kwargs = dict(num_steps=10, batch_size=32)
+def _get_loader(data, loader_initfunc):
+    kwargs = dict(num_steps=5, batch_size=32)
     loader = loader_initfunc(data, **kwargs)
     return loader
 
 
-def _make_model(dataset):
-    # TODO flexible input dimension
-    return nn.Sequential(
-        nn.Conv1d(dataset.input_dimension, 5, kernel_size=10),
-        nn.Flatten(start_dim=1, end_dim=-1),
-    )
+OUTPUT_DIMENSION = 3
 
 
-def _make_behavior_model(dataset):
+def _make_model(dataset, model_architecture="offset10-model"):
     # TODO flexible input dimension
-    return nn.Sequential(
-        nn.Conv1d(dataset.input_dimension, 5, kernel_size=10),
-        nn.Flatten(start_dim=1, end_dim=-1),
-    )
+    # return nn.Sequential(
+    #     nn.Conv1d(dataset.input_dimension, 5, kernel_size=10),
+    #     nn.Flatten(start_dim=1, end_dim=-1),
+    # )
+    return cebra.models.init(model_architecture, dataset.input_dimension, 32,
+                             OUTPUT_DIMENSION)
 
 
-@pytest.mark.parametrize("data_name, loader_initfunc, solver_initfunc",
-                         single_session_tests)
-def test_single_session(data_name, loader_initfunc, solver_initfunc):
-    loader = _get_loader(data_name, loader_initfunc)
-    model = _make_model(loader.dataset)
+# def _make_behavior_model(dataset):
+#     # TODO flexible input dimension
+#     return nn.Sequential(
+#         nn.Conv1d(dataset.input_dimension, 5, kernel_size=10),
+#         nn.Flatten(start_dim=1, end_dim=-1),
+#     )
+
+
+@pytest.mark.parametrize(
+    "data_name, loader_initfunc, model_architecture, solver_initfunc",
+    single_session_tests)
+def test_single_session(data_name, loader_initfunc, model_architecture,
+                        solver_initfunc):
+    data = cebra.datasets.init(data_name)
+    loader = _get_loader(data, loader_initfunc)
+    model = _make_model(data, model_architecture)
+    data.configure_for(model)
+    offset = model.get_offset()
     criterion = cebra.models.InfoNCE()
     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
 
     solver = solver_initfunc(model=model,
                              criterion=criterion,
-                             optimizer=optimizer)
+                             optimizer=optimizer,
+                             tqdm_on=False)
 
     batch = next(iter(loader))
-    assert batch.reference.shape == (32, loader.dataset.input_dimension, 10)
+    assert batch.reference.shape[:2] == (32, loader.dataset.input_dimension)
     log = solver.step(batch)
     assert isinstance(log, dict)
 
+    X = loader.dataset.neural
+    with pytest.raises(ValueError, match="not.*fitted"):
+        solver.transform(X)
+
     solver.fit(loader)
 
+    assert solver.num_sessions == None
+    assert solver.n_features == X.shape[1]
+
+    embedding = solver.transform(X)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(torch.Tensor(X))
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(X, session_id=0)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(X, pad_before_transform=False)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0] - len(offset) + 1, OUTPUT_DIMENSION)
+
+    with pytest.raises(ValueError, match="torch.Tensor"):
+        solver.transform(X.numpy())
+    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
+        embedding = solver.transform(X, session_id=2)
 
-@pytest.mark.parametrize("data_name, loader_initfunc, solver_initfunc",
-                         single_session_tests)
-def test_single_session_auxvar(data_name, loader_initfunc, solver_initfunc):
+    for param in solver.parameters():
+        assert isinstance(param, torch.Tensor)
+
+
+@pytest.mark.parametrize(
+    "data_name, loader_initfunc, model_architecture, solver_initfunc",
+    single_session_tests)
+def test_single_session_auxvar(data_name, loader_initfunc, model_architecture,
+                               solver_initfunc):
     return  # TODO
 
     loader = _get_loader(data_name, loader_initfunc)
@@ -124,12 +173,16 @@ def test_single_session_auxvar(data_name, loader_initfunc, solver_initfunc):
     solver.fit(loader)
 
 
-@pytest.mark.parametrize("data_name, loader_initfunc, solver_initfunc",
-                         single_session_hybrid_tests)
-def test_single_session_hybrid(data_name, loader_initfunc, solver_initfunc):
-    loader = _get_loader(data_name, loader_initfunc)
-    model = cebra.models.init("offset10-model", loader.dataset.input_dimension,
-                              32, 3)
+@pytest.mark.parametrize(
+    "data_name, loader_initfunc, model_architecture, solver_initfunc",
+    single_session_hybrid_tests)
+def test_single_session_hybrid(data_name, loader_initfunc, model_architecture,
+                               solver_initfunc):
+    data = cebra.datasets.init(data_name)
+    loader = _get_loader(data, loader_initfunc)
+    model = _make_model(data, model_architecture)
+    data.configure_for(model)
+    offset = model.get_offset()
     criterion = cebra.models.InfoNCE()
     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
     solver = solver_initfunc(model=model,
@@ -142,16 +195,50 @@ def test_single_session_hybrid(data_name, loader_initfunc, solver_initfunc):
     log = solver.step(batch)
     assert isinstance(log, dict)
 
+    X = loader.dataset.neural
+    with pytest.raises(ValueError, match="not.*fitted"):
+        solver.transform(X)
+
     solver.fit(loader)
 
+    assert solver.num_sessions == None
+    assert solver.n_features == X.shape[1]
 
-@pytest.mark.parametrize("data_name, loader_initfunc, solver_initfunc",
-                         multi_session_tests)
-def test_multi_session(data_name, loader_initfunc, solver_initfunc):
-    loader = _get_loader(data_name, loader_initfunc)
+    embedding = solver.transform(X)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(torch.Tensor(X))
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(X, session_id=0)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(X, pad_before_transform=False)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0] - len(offset) + 1, OUTPUT_DIMENSION)
+
+    with pytest.raises(ValueError, match="torch.Tensor"):
+        solver.transform(X.numpy())
+    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
+        embedding = solver.transform(X, session_id=2)
+
+    for param in solver.parameters():
+        assert isinstance(param, torch.Tensor)
+
+
+@pytest.mark.parametrize(
+    "data_name, loader_initfunc, model_architecture, solver_initfunc",
+    multi_session_tests)
+def test_multi_session(data_name, loader_initfunc, model_architecture,
+                       solver_initfunc):
+    data = cebra.datasets.init(data_name)
+    loader = _get_loader(data, loader_initfunc)
+    model = nn.ModuleList([
+        _make_model(dataset, model_architecture)
+        for dataset in data.iter_sessions()
+    ])
+    data.configure_for(model)
     criterion = cebra.models.InfoNCE()
-    model = nn.ModuleList(
-        [_make_model(dataset) for dataset in loader.dataset.iter_sessions()])
     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
 
     solver = solver_initfunc(model=model,
@@ -160,22 +247,178 @@ def test_multi_session(data_name, loader_initfunc, solver_initfunc):
 
     batch = next(iter(loader))
     for session_id, dataset in enumerate(loader.dataset.iter_sessions()):
-        assert batch[session_id].reference.shape == (32,
-                                                     dataset.input_dimension,
-                                                     10)
+        assert batch[session_id].reference.shape[:2] == (
+            32, dataset.input_dimension)
         assert batch[session_id].index is not None
 
     log = solver.step(batch)
     assert isinstance(log, dict)
 
+    X = [
+        loader.dataset.get_session(i).neural
+        for i in range(loader.dataset.num_sessions)
+    ]
+    with pytest.raises(ValueError, match="not.*fitted"):
+        solver.transform(X[0], session_id=0)
+
     solver.fit(loader)
 
+    assert solver.num_sessions == 3
+    assert solver.n_features == [X[i].shape[1] for i in range(len(X))]
+
+    embedding = solver.transform(X[0], session_id=0)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X[0].shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(X[1], session_id=1)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X[1].shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(X[0], session_id=0, pad_before_transform=False)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X[0].shape[0] -
+                               len(solver.model[0].get_offset()) + 1,
+                               OUTPUT_DIMENSION)
+
+    with pytest.raises(ValueError, match="torch.Tensor"):
+        embedding = solver.transform(X[0].numpy(), session_id=0)
+
+    with pytest.raises(ValueError, match="shape"):
+        embedding = solver.transform(X[1], session_id=0)
+    with pytest.raises(ValueError, match="shape"):
+        embedding = solver.transform(X[0], session_id=1)
+
+    with pytest.raises(RuntimeError, match="No.*session_id"):
+        embedding = solver.transform(X[0])
+    with pytest.raises(RuntimeError, match="single.*session"):
+        embedding = solver.transform(X)
+    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
+        embedding = solver.transform(X[0], session_id=5)
+    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
+        embedding = solver.transform(X[0], session_id=-1)
+
+    for param in solver.parameters(session_id=0):
+        assert isinstance(param, torch.Tensor)
+
+    with pytest.raises(RuntimeError, match="No.*session_id"):
+        for param in solver.parameters():
+            assert isinstance(param, torch.Tensor)
+
+
+@pytest.mark.parametrize(
+    "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",
+    [
+        # Test case 1: No padding
+        (torch.tensor([[1, 2], [3, 4], [5, 6]]), False, cebra.data.Offset(
+            0, 1), 0, 2, torch.tensor([[1, 2], [3, 4]])),  # first batch
+        (torch.tensor([[1, 2], [3, 4], [5, 6]]), False, cebra.data.Offset(
+            0, 1), 1, 3, torch.tensor([[3, 4], [5, 6]])),  # last batch
+        (torch.tensor(
+            [[1, 2], [3, 4], [5, 6], [7, 8]]), False, cebra.data.Offset(
+                0, 1), 1, 3, torch.tensor([[3, 4], [5, 6]])),  # middle batch
+
+        # Test case 2: First batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(0, 1),
+            0,
+            2,
+            torch.tensor([[1, 2, 3], [4, 5, 6]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(1, 1),
+            0,
+            3,
+            torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+
+        # Test case 3: Last batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(0, 1),
+            1,
+            3,
+            torch.tensor([[4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
+                          [13, 14, 15]]),
+            True,
+            cebra.data.Offset(1, 2),
+            1,
+            3,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+        ),
+
+        # Test case 4: Middle batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+            True,
+            cebra.data.Offset(0, 1),
+            1,
+            3,
+            torch.tensor([[4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+            True,
+            cebra.data.Offset(1, 1),
+            1,
+            3,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
+                          [13, 14, 15]]),
+            True,
+            cebra.data.Offset(0, 1),
+            2,
+            4,
+            torch.tensor([[7, 8, 9], [10, 11, 12]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+            True,
+            cebra.data.Offset(0, 1),
+            0,
+            3,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+
+        # Examples that throw an error:
+
+        # Padding without offset (should raise an error)
+        (torch.tensor([[1, 2]]), True, None, 0, 2, ValueError),
+        # Negative start_batch_idx or end_batch_idx (should raise an error)
+        (torch.tensor([[1, 2]]), False, cebra.data.Offset(
+            0, 1), -1, 2, ValueError),
+        # out of bound indices because offset is too large
+        (torch.tensor([[1, 2], [3, 4]]), True, cebra.data.Offset(
+            5, 5), 1, 2, ValueError),
+        # Batch length is smaller than offset.
+        (torch.tensor([[1, 2], [3, 4]]), False, cebra.data.Offset(
+            0, 1), 0, 1, ValueError),  # first batch
+    ],
+)
+def test_get_batch(inputs, add_padding, offset, start_batch_idx, end_batch_idx,
+                   expected_output):
+    if expected_output == ValueError:
+        with pytest.raises(ValueError):
+            cebra.solver.base._get_batch(inputs, offset, start_batch_idx,
+                                         end_batch_idx, add_padding)
+    else:
+        result = cebra.solver.base._get_batch(inputs, offset, start_batch_idx,
+                                              end_batch_idx, add_padding)
+        assert torch.equal(result, expected_output)
+
 
 def create_model(model_name, input_dimension):
     return cebra.models.init(model_name,
                              num_neurons=input_dimension,
                              num_units=128,
-                             num_output=5)
+                             num_output=OUTPUT_DIMENSION)
 
 
 single_session_tests_select_model = []
@@ -183,9 +426,11 @@ def create_model(model_name, input_dimension):
 for model_name in ["offset1-model", "offset10-model"]:
     for session_id in [None, 0, 5]:
         for args in [
-            ("demo-discrete", model_name, session_id),
-            ("demo-continuous", model_name, session_id),
-            ("demo-mixed", model_name, session_id),
+            ("demo-discrete", model_name, session_id,
+             cebra.data.DiscreteDataLoader),
+            ("demo-continuous", model_name, session_id,
+             cebra.data.ContinuousDataLoader),
+            ("demo-mixed", model_name, session_id, cebra.data.MixedDataLoader),
         ]:
             single_session_tests_select_model.append(
                 (*args, cebra.solver.SingleSessionSolver))
@@ -195,169 +440,79 @@ def create_model(model_name, input_dimension):
 multi_session_tests_select_model = []
 for model_name in ["offset10-model"]:
     for session_id in [None, 0, 1, 5, 2, 6, 4]:
-        for args in [("demo-continuous-multisession", model_name, session_id)]:
+        for args in [("demo-continuous-multisession", model_name, session_id,
+                      cebra.data.ContinuousMultiSessionDataLoader)]:
             multi_session_tests_select_model.append(
                 (*args, cebra.solver.MultiSessionSolver))
 
-# @pytest.mark.parametrize(
-#     "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",
-#     [
-#         # Test case 1: No padding
-#         (torch.tensor([[1, 2], [3, 4]]), False, None, 0, 1,
-#          torch.tensor([[1, 2]])),  # first batch
-#         (torch.tensor([[1, 2], [3, 4]]), False, None, 0, 2,
-#          torch.tensor([[1, 2], [3, 4]])),  # first batch
-#         (torch.tensor([[1, 2], [3, 4]]), False, None, 1, 2,
-#          torch.tensor([[3, 4]])),  # last batch
-
-#         # Test case 2: First batch with padding
-#         (
-#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-#             True,
-#             cebra.data.Offset(1, 1),
-#             0,
-#             2,
-#             torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6]]),
-#         ),
-#         (
-#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-#             True,
-#             cebra.data.Offset(1, 1),
-#             0,
-#             3,
-#             torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-#         ),
-
-#         # Test case 3: Last batch with padding
-#         (
-#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-#             True,
-#             cebra.data.Offset(0, 1),
-#             1,
-#             3,
-#             torch.tensor([[4, 5, 6], [7, 8, 9]]),
-#         ),
-#         (
-#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-#             True,
-#             cebra.data.Offset(1, 3),
-#             1,
-#             3,
-#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [7, 8, 9], [7, 8, 9]
-#                          ]),
-#         ),
-
-#         # Test case 4: Middle batch with padding
-#         (
-#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-#             True,
-#             cebra.data.Offset(0, 1),
-#             1,
-#             2,
-#             torch.tensor([[4, 5, 6]]),
-#         ),
-#         (
-#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-#             True,
-#             cebra.data.Offset(0, 2),
-#             1,
-#             2,
-#             torch.tensor([[4, 5, 6], [7, 8, 9]]),
-#         ),
-#         (
-#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-#             True,
-#             cebra.data.Offset(1, 1),
-#             1,
-#             2,
-#             torch.tensor([[1, 2, 3], [4, 5, 6]]),
-#         ),
-#         (
-#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-#             True,
-#             cebra.data.Offset(1, 2),
-#             1,
-#             2,
-#             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-#         ),
-
-#         # Examples that throw an error:
-
-#         # Padding without offset (should raise an error)
-#         (torch.tensor([[1, 2]]), True, None, 0, 2, ValueError),
-#         # Negative start_batch_idx or end_batch_idx (should raise an error)
-#         (torch.tensor([[1, 2]]), False, None, -1, 2, ValueError),
-#         # out of bound indices because offset is too large
-#         (torch.tensor([[1, 2], [3, 4]]), True, cebra.data.Offset(
-#             5, 5), 1, 2, ValueError),
-#     ],
-# )
-# def test__get_batch(inputs, add_padding, offset, start_batch_idx,
-#                        end_batch_idx, expected_output):
-#     if expected_output == ValueError:
-#         with pytest.raises(ValueError):
-#             cebra.solver.base._get_batch(inputs, add_padding, offset,
-#                                          start_batch_idx, end_batch_idx)
-#     else:
-#         result = cebra.solver.base._get_batch(inputs, add_padding, offset,
-#                                                   start_batch_idx,
-#                                                   end_batch_idx)
-#         assert torch.equal(result, expected_output)
-
-# @pytest.mark.parametrize("data_name, model_name,session_id,solver_initfunc",
-#                          single_session_tests_select_model +
-#                          single_session_hybrid_tests_select_model)
-# def test_select_model_single_session(data_name, model_name, session_id,
-#                                      solver_initfunc):
-#     dataset = cebra.datasets.init(data_name)
-#     model = create_model(model_name, dataset.input_dimension)
-#     offset = model.get_offset()
-#     solver = solver_initfunc(model=model, criterion=None, optimizer=None)
-
-#     if session_id is not None and session_id > 0:
-#         with pytest.raises(RuntimeError):
-#             solver._select_model(dataset.neural, session_id=session_id)
-#     else:
-#         model_, offset_ = solver._select_model(dataset.neural,
-#                                                session_id=session_id)
-#         assert offset.left == offset_.left and offset.right == offset_.right
-#         assert model == model_
-
-# @pytest.mark.parametrize("data_name, model_name,session_id,solver_initfunc",
-#                          multi_session_tests_select_model)
-# def test_select_model_multi_session(data_name, model_name, session_id,
-#                                     solver_initfunc):
-#     dataset = cebra.datasets.init(data_name)
-#     model = nn.ModuleList([
-#         create_model(model_name, dataset.input_dimension)
-#         for dataset in dataset.iter_sessions()
-#     ])
-
-#     offset = model[0].get_offset()
-#     solver = solver_initfunc(model=model,
-#                              criterion=cebra.models.InfoNCE(),
-#                              optimizer=torch.optim.Adam(model.parameters(),
-#                                                         lr=1e-3))
-
-#     loader_kwargs = dict(num_steps=10, batch_size=32)
-#     loader = cebra.data.ContinuousMultiSessionDataLoader(
-#         dataset, **loader_kwargs)
-#     solver.fit(loader)
-
-#     for i, (model, dataset_) in enumerate(zip(model, dataset.iter_sessions())):
-#         inputs = dataset_.neural
-
-#         if session_id is None or session_id >= dataset.num_sessions:
-#             with pytest.raises(RuntimeError):
-#                 solver._select_model(inputs, session_id=session_id)
-#         elif i != session_id:
-#             with pytest.raises(ValueError):
-#                 solver._select_model(inputs, session_id=session_id)
-#         else:
-#             model_, offset_ = solver._select_model(inputs,
-#                                                    session_id=session_id)
-#             assert offset.left == offset_.left and offset.right == offset_.right
-#             assert model == model_
+
+@pytest.mark.parametrize(
+    "data_name, model_name ,session_id, loader_initfunc, solver_initfunc",
+    single_session_tests_select_model +
+    single_session_hybrid_tests_select_model)
+def test_select_model_single_session(data_name, model_name, session_id,
+                                     loader_initfunc, solver_initfunc):
+    dataset = cebra.datasets.init(data_name)
+    model = create_model(model_name, dataset.input_dimension)
+    dataset.configure_for(model)
+    loader = _get_loader(dataset, loader_initfunc=loader_initfunc)
+    offset = model.get_offset()
+    solver = solver_initfunc(model=model, criterion=None, optimizer=None)
+
+    with pytest.raises(ValueError):
+        solver.n_features = 1000
+        solver._select_model(inputs=dataset.neural, session_id=0)
+
+    solver.n_features = dataset.neural.shape[1]
+    if session_id is not None and session_id > 0:
+        with pytest.raises(RuntimeError):
+            solver._select_model(inputs=dataset.neural, session_id=session_id)
+    else:
+        model_, offset_ = solver._select_model(inputs=dataset.neural,
+                                               session_id=session_id)
+        assert offset.left == offset_.left and offset.right == offset_.right
+        assert model == model_
+
+
+@pytest.mark.parametrize(
+    "data_name, model_name, session_id, loader_initfunc, solver_initfunc",
+    multi_session_tests_select_model)
+def test_select_model_multi_session(data_name, model_name, session_id,
+                                    loader_initfunc, solver_initfunc):
+    dataset = cebra.datasets.init(data_name)
+    model = nn.ModuleList([
+        create_model(model_name, dataset.input_dimension)
+        for dataset in dataset.iter_sessions()
+    ])
+    dataset.configure_for(model)
+    loader = _get_loader(dataset, loader_initfunc=loader_initfunc)
+
+    offset = model[0].get_offset()
+    solver = solver_initfunc(model=model,
+                             criterion=cebra.models.InfoNCE(),
+                             optimizer=torch.optim.Adam(model.parameters(),
+                                                        lr=1e-3))
+
+    loader_kwargs = dict(num_steps=10, batch_size=32)
+    loader = cebra.data.ContinuousMultiSessionDataLoader(
+        dataset, **loader_kwargs)
+    solver.fit(loader)
+
+    for i, (model, dataset_) in enumerate(zip(model, dataset.iter_sessions())):
+        inputs = dataset_.neural
+
+        if session_id is None or session_id >= dataset.num_sessions:
+            with pytest.raises(RuntimeError):
+                solver._select_model(inputs, session_id=session_id)
+        elif i != session_id:
+            with pytest.raises(ValueError):
+                solver._select_model(inputs, session_id=session_id)
+        else:
+            model_, offset_ = solver._select_model(inputs,
+                                                   session_id=session_id)
+            assert offset.left == offset_.left and offset.right == offset_.right
+            assert model == model_
+
 
 #this is a very crucial test. should be checked for different choices of offsets,
 # dataset sizes (also edge cases like dataset size 1001 and batch size 1000 -> is the padding properly handled?)
@@ -367,9 +522,10 @@ def create_model(model_name, input_dimension):
     "offset1-model",
     "offset10-model",
     "offset40-model-4x-subsample",
-    #"offset1-model", "offset10-model",
+    "offset1-model",
+    "offset10-model",
 ]  # there is an issue with "offset4-model-2x-subsample" because it's not a convolutional model.
-batch_size_inference = [40_000, 99_990, 99_999]  # 99_999
+batch_size_inference = [40_000, 99_990, 99_999]
 
 single_session_tests_transform = []
 for padding in [True, False]:
@@ -397,9 +553,9 @@ def create_model(model_name, input_dimension):
 
 
 @pytest.mark.parametrize(
-    "data_name,model_name,padding,batch_size_inference,loader_initfunc,solver_initfunc",
+    "data_name, model_name, padding, batch_size_inference, loader_initfunc, solver_initfunc",
     single_session_tests_transform + single_session_hybrid_tests_transform)
-def test_batched_transform_singlesession(
+def test_batched_transform_single_session(
     data_name,
     model_name,
     padding,
@@ -458,9 +614,9 @@ def test_batched_transform_singlesession(
 @pytest.mark.parametrize(
     "data_name, model_name,padding,batch_size_inference,loader_initfunc, solver_initfunc",
     multi_session_tests_transform)
-def test_batched_transform_multisession(data_name, model_name, padding,
-                                        batch_size_inference, loader_initfunc,
-                                        solver_initfunc):
+def test_batched_transform_multi_session(data_name, model_name, padding,
+                                         batch_size_inference, loader_initfunc,
+                                         solver_initfunc):
     dataset = cebra.datasets.init(data_name)
     model = nn.ModuleList([
         create_model(model_name, dataset.input_dimension)

From 9c46eb97d830402917bbb3b8a8365fb6a9d26c30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 22 Aug 2024 11:44:35 +0200
Subject: [PATCH 032/100] Remove unused import in solver/utils

---
 cebra/solver/util.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cebra/solver/util.py b/cebra/solver/util.py
index af9529f7..584eb0da 100644
--- a/cebra/solver/util.py
+++ b/cebra/solver/util.py
@@ -25,8 +25,6 @@
 from typing import Dict
 
 import literate_dataclasses as dataclasses
-import numpy as np
-import torch
 import tqdm
 
 

From c845ec3ef611f7e2330079a6a2a3fd4e16155712 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 22 Aug 2024 11:52:53 +0200
Subject: [PATCH 033/100] Fix test plot

---
 cebra/integrations/sklearn/cebra.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 4240074f..39a64073 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1196,8 +1196,8 @@ def transform(self,
             >>> embedding = cebra_model.transform(dataset)
 
         """
-        self.solver_._check_is_session_id_valid(session_id=session_id)
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
+        self.solver_._check_is_session_id_valid(session_id=session_id)
 
         if torch.is_tensor(X) and X.device.type == "cuda":
             X = X.detach().cpu()

From 9db3e3701ec89b93020918473f55b8f193216998 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 22 Aug 2024 12:00:20 +0200
Subject: [PATCH 034/100] Add some coverage

---
 cebra/solver/base.py           | 13 ++++++++++++-
 cebra/solver/multi_session.py  | 19 +++++++++++++++++++
 cebra/solver/single_session.py | 16 ++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index ec33f23e..6fb786b4 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -360,6 +360,12 @@ def _get_loader(self, loader):
 
     @abc.abstractmethod
     def _set_fitted_params(self, loader: cebra.data.Loader):
+        """Set parameters once the solver is fitted.
+
+        Args:
+            loader: Loader used to fit the solver.
+        """
+
         raise NotImplementedError
 
     def fit(
@@ -507,6 +513,11 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
 
     @abc.abstractmethod
     def _check_is_session_id_valid(self, session_id: Optional[int] = None):
+        """Check that the session ID provided is valid for the solver instance.
+        
+        Args: 
+            session_id: The session ID to check.
+        """
         raise NotImplementedError
 
     @abc.abstractmethod
@@ -530,7 +541,7 @@ def _select_model(
 
     @torch.no_grad()
     def transform(self,
-                  inputs: torch.Tensor,
+                  inputs: Union[torch.Tensor, List[torch.Tensor], npt.NDArray],
                   pad_before_transform: bool = True,
                   session_id: Optional[int] = None,
                   batch_size: Optional[int] = None) -> torch.Tensor:
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index 666dafb8..f10f36a6 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -126,6 +126,17 @@ def _inference(self, batches: List[cebra.data.Batch]) -> cebra.data.Batch:
         )
 
     def _set_fitted_params(self, loader: cebra.data.Loader):
+        """Set parameters once the solver is fitted.
+        
+        In multi session solver, the number of session is set to the number of
+        sessions in the dataset of the loader and the number of
+        features is set as a list corresponding to the number of neurons in 
+        each dataset.
+
+        Args:
+            loader: Loader used to fit the solver.
+        """
+
         self.num_sessions = loader.dataset.num_sessions
         self.n_features = [
             loader.dataset.get_input_dimension(session_id)
@@ -152,6 +163,14 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor,
             )
 
     def _check_is_session_id_valid(self, session_id: Optional[int]):
+        """Check that the session ID provided is valid for the solver instance.
+        
+        The session ID must be non-null and between 0 and the number session in the dataset.
+        
+        Args: 
+            session_id: The session ID to check.
+        """
+
         if session_id is None:
             raise RuntimeError(
                 "No session_id provided: multisession model requires a session_id to choose the model corresponding to your data shape."
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
index b941a8ba..eb75db0e 100644
--- a/cebra/solver/single_session.py
+++ b/cebra/solver/single_session.py
@@ -55,6 +55,14 @@ def parameters(self, session_id: Optional[int] = None):
             yield parameter
 
     def _set_fitted_params(self, loader: cebra.data.Loader):
+        """Set parameters once the solver is fitted.
+        
+        In single session solver, the number of session is set to None and the number of
+        features is set to the number of neurons in the dataset.
+
+        Args:
+            loader: Loader used to fit the solver.
+        """
         self.num_sessions = None
         self.n_features = loader.dataset.input_dimension
 
@@ -77,6 +85,14 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
             )
 
     def _check_is_session_id_valid(self, session_id: Optional[int] = None):
+        """Check that the session ID provided is valid for the solver instance.
+        
+        The session ID must be null or equal to 0.
+        
+        Args: 
+            session_id: The session ID to check.
+        """
+
         if session_id is not None and session_id > 0:
             raise RuntimeError(
                 f"Invalid session_id {session_id}: single session models only takes an optional null session_id."

From 8e5f9332768ed328b23623eba4cd20225f5bd83c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 22 Aug 2024 13:27:34 +0200
Subject: [PATCH 035/100] Fix save/load

---
 cebra/integrations/sklearn/cebra.py |  5 +++
 cebra/solver/base.py                | 11 +++--
 tests/test_solver.py                | 62 +++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 39a64073..c3fd9c9e 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1417,6 +1417,11 @@ def load(cls,
         else:
             cebra_ = _check_type_checkpoint(checkpoint)
 
+        n_features = cebra_.n_features_
+        cebra_.solver_.n_features = ([
+            session_n_features for session_n_features in n_features
+        ] if isinstance(n_features, list) else n_features)
+
         return cebra_
 
     def to(self, device: Union[str, torch.device]):
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 6fb786b4..d60c4515 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -633,13 +633,12 @@ def load(self, logdir, filename="checkpoint.pth"):
         checkpoint = torch.load(savepath, map_location=self.device)
         self.load_state_dict(checkpoint, strict=True)
 
-        if hasattr(self.model, "n_features"):
-            n_features = self.model.n_features
-            self.n_features = ([
-                session_n_features for session_n_features in n_features
-            ] if isinstance(n_features, list) else n_features)
+        n_features = self.n_features
+        self.n_features = ([
+            session_n_features for session_n_features in n_features
+        ] if isinstance(n_features, list) else n_features)
 
-    def save(self, logdir, filename="checkpoint_last.pth"):
+    def save(self, logdir, filename="checkpoint.pth"):
         """Save the model and optimizer params.
 
         Args:
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 4bb17232..8ebef4a0 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -19,7 +19,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import copy
 import itertools
+import tempfile
 
 import numpy as np
 import pytest
@@ -91,6 +93,48 @@ def _make_model(dataset, model_architecture="offset10-model"):
 #     )
 
 
+def _assert_same_state_dict(first, second):
+    assert first.keys() == second.keys()
+    for key in first:
+        if isinstance(first[key], torch.Tensor):
+            assert torch.allclose(first[key], second[key]), key
+        elif isinstance(first[key], dict):
+            _assert_same_state_dict(first[key], second[key]), key
+        else:
+            assert first[key] == second[key]
+
+
+def check_if_fit(model):
+    """Check if a model was already fit.
+
+    Args:
+        model: The model to check.
+
+    Returns:
+        True if the model was already fit.
+    """
+    return hasattr(model, "n_features_")
+
+
+def _assert_equal(original_solver, loaded_solver):
+    for k in original_solver.model.state_dict():
+        assert original_solver.model.state_dict()[k].all(
+        ) == loaded_solver.model.state_dict()[k].all()
+    assert check_if_fit(loaded_solver) == check_if_fit(original_solver)
+
+    if check_if_fit(loaded_solver):
+        _assert_same_state_dict(original_solver.state_dict_,
+                                loaded_solver.state_dict_)
+        X = np.random.normal(0, 1, (100, 1))
+
+        if loaded_solver.num_sessions is not None:
+            assert np.allclose(loaded_solver.transform(X, session_id=0),
+                               original_solver.transform(X, session_id=0))
+        else:
+            assert np.allclose(loaded_solver.transform(X),
+                               original_solver.transform(X))
+
+
 @pytest.mark.parametrize(
     "data_name, loader_initfunc, model_architecture, solver_initfunc",
     single_session_tests)
@@ -144,6 +188,12 @@ def test_single_session(data_name, loader_initfunc, model_architecture,
     for param in solver.parameters():
         assert isinstance(param, torch.Tensor)
 
+    fitted_solver = copy.deepcopy(solver)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        solver.save(temp_dir)
+        solver.load(temp_dir)
+    _assert_equal(fitted_solver, solver)
+
 
 @pytest.mark.parametrize(
     "data_name, loader_initfunc, model_architecture, solver_initfunc",
@@ -225,6 +275,12 @@ def test_single_session_hybrid(data_name, loader_initfunc, model_architecture,
     for param in solver.parameters():
         assert isinstance(param, torch.Tensor)
 
+    fitted_solver = copy.deepcopy(solver)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        solver.save(temp_dir)
+        solver.load(temp_dir)
+    _assert_equal(fitted_solver, solver)
+
 
 @pytest.mark.parametrize(
     "data_name, loader_initfunc, model_architecture, solver_initfunc",
@@ -302,6 +358,12 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
         for param in solver.parameters():
             assert isinstance(param, torch.Tensor)
 
+    fitted_solver = copy.deepcopy(solver)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        solver.save(temp_dir)
+        solver.load(temp_dir)
+    _assert_equal(fitted_solver, solver)
+
 
 @pytest.mark.parametrize(
     "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",

From d08e400f2846b546dc43ef2ec68ea76bbce0d8dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:28:36 +0200
Subject: [PATCH 036/100] Remove duplicate configure_for in multi dataset

---
 cebra/data/multi_session.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index a8d56d10..1758deb3 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -106,11 +106,6 @@ def load_batch(self, index: BatchIndex) -> List[Batch]:
             ) for session_id, session in enumerate(self.iter_sessions())
         ]
 
-    def configure_for(self, model):
-        self.offset = model.get_offset()
-        for session in self.iter_sessions():
-            session.configure_for(model)
-
     def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 

From 0c693dd1b005a437faf5388eab061a256b82ae81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 22 Aug 2024 16:24:44 +0200
Subject: [PATCH 037/100] Make save/load cleaner

---
 cebra/solver/base.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index d60c4515..f9ae3d82 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -296,7 +296,7 @@ def state_dict(self) -> dict:
             the model was trained with.
         """
 
-        return {
+        state_dict = {
             "model": self.model.state_dict(),
             "optimizer": self.optimizer.state_dict(),
             "loss": torch.tensor(self.history),
@@ -306,6 +306,13 @@ def state_dict(self) -> dict:
             "log": self.log,
         }
 
+        if hasattr(self, "n_features"):
+            state_dict["n_features"] = self.n_features
+        if hasattr(self, "num_sessions"):
+            state_dict["num_sessions"] = self.num_sessions
+
+        return state_dict
+
     def load_state_dict(self, state_dict: dict, strict: bool = True):
         """Update the solver state with the given state_dict.
 
@@ -343,6 +350,12 @@ def _get(key):
         if _contains("log"):
             self.log = _get("log")
 
+        # Not defined if the model was saved before being fitted.
+        if "n_features" in state_dict:
+            self.n_features = _get("n_features")
+        if "num_sessions" in state_dict:
+            self.num_sessions = _get("num_sessions")
+
     @property
     def num_parameters(self) -> int:
         """Total number of parameters in the encoder and criterion."""
@@ -633,11 +646,6 @@ def load(self, logdir, filename="checkpoint.pth"):
         checkpoint = torch.load(savepath, map_location=self.device)
         self.load_state_dict(checkpoint, strict=True)
 
-        n_features = self.n_features
-        self.n_features = ([
-            session_n_features for session_n_features in n_features
-        ] if isinstance(n_features, list) else n_features)
-
     def save(self, logdir, filename="checkpoint.pth"):
         """Save the model and optimizer params.
 

From 794867bf58fc078de09623f33d944dce815aa704 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 18 Sep 2024 11:58:33 +0200
Subject: [PATCH 038/100] Fix codespell errors

---
 cebra/solver/base.py           | 4 ++--
 cebra/solver/multi_session.py  | 2 +-
 cebra/solver/single_session.py | 2 +-
 tests/test_solver.py           | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index f9ae3d82..1d8bb9ce 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -86,7 +86,7 @@ def _check_indices(batch_start_idx: int, batch_end_idx: int,
         raise ValueError(
             f"The batch has length {batch_size_lenght} which "
             f"is smaller or equal than the required offset length {len(offset)}."
-            f"Either choose a model with smaller offset or the batch shoud contain more samples."
+            f"Either choose a model with smaller offset or the batch should contain more samples."
         )
 
 
@@ -511,7 +511,7 @@ def decoding(self, train_loader, valid_loader):
 
     @abc.abstractmethod
     def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
-        """Check that the inputs can be infered using the selected model.
+        """Check that the inputs can be inferred using the selected model.
         
         Note: This method checks that the number of neurons in the input is
         similar to the input dimension to the selected model.
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index 350266af..87d906d4 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -144,7 +144,7 @@ def _set_fitted_params(self, loader: cebra.data.Loader):
 
     def _check_is_inputs_valid(self, inputs: torch.Tensor,
                                session_id: Optional[int]):
-        """Check that the inputs can be infered using the selected model.
+        """Check that the inputs can be inferred using the selected model.
         
         Note: This method checks that the number of neurons in the input is
         similar to the input dimension to the selected model.
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
index eb75db0e..e0927a21 100644
--- a/cebra/solver/single_session.py
+++ b/cebra/solver/single_session.py
@@ -67,7 +67,7 @@ def _set_fitted_params(self, loader: cebra.data.Loader):
         self.n_features = loader.dataset.input_dimension
 
     def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
-        """Check that the inputs can be infered using the selected model.
+        """Check that the inputs can be inferred using the selected model.
         
         Note: This method checks that the number of neurons in the input is
         similar to the input dimension to the selected model.
diff --git a/tests/test_solver.py b/tests/test_solver.py
index ffe01d4a..63caed67 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -683,7 +683,7 @@ def test_batched_transform_multi_session(data_name, model_name, padding,
     n_samples = dataset._datasets[0].neural.shape[0]
     assert all(
         d.neural.shape[0] == n_samples for d in dataset._datasets
-    ), "for this set all of the sessions need ot have same number of samples."
+    ), # all sessions need to have same number of samples
 
     smallest_batch_length = n_samples - batch_size
     offset_ = model[0].get_offset()

From 0bb654940b81a30107a8b93acf6400c14c7bd125 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 18 Sep 2024 12:10:25 +0200
Subject: [PATCH 039/100] Fix docs compilation errors

---
 cebra/data/multi_session.py | 6 +++---
 docs/source/conf.py         | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index f9c4ca47..0af2793c 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -30,7 +30,7 @@
 import torch
 
 import cebra.data as cebra_data
-import cebra.distributions as cebra_distr
+import cebra.distributions
 from cebra.data.datatypes import Batch
 from cebra.data.datatypes import BatchIndex
 
@@ -130,7 +130,7 @@ class MultiSessionLoader(cebra_data.Loader):
 
     def __post_init__(self):
         super().__post_init__()
-        self.sampler = cebra_distr.MultisessionSampler(self.dataset,
+        self.sampler = cebra.distributions.MultisessionSampler(self.dataset,
                                                        self.time_offset)
 
     def get_indices(self, num_samples: int) -> List[BatchIndex]:
@@ -169,7 +169,7 @@ class DiscreteMultiSessionDataLoader(MultiSessionLoader):
     # Overwrite sampler with the discrete implementation
     # Generalize MultisessionSampler to avoid doing this?
     def __post_init__(self):
-        self.sampler = cebra_distr.DiscreteMultisessionSampler(self.dataset)
+        self.sampler = cebra.distributions.DiscreteMultisessionSampler(self.dataset)
 
     @property
     def index(self):
diff --git a/docs/source/conf.py b/docs/source/conf.py
index be839ddf..025a988b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -104,7 +104,7 @@ def get_years(start_year=2021):
 
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3", None),
-    "torch": ("https://pytorch.org/docs/master/", None),
+    "torch": ("https://pytorch.org/docs/stable/", None),
     "sklearn": ("https://scikit-learn.org/stable", None),
     "numpy": ("https://numpy.org/doc/stable/", None),
     "matplotlib": ("https://matplotlib.org/stable/", None),

From 04a102ffb733ba0a962fe0d4cb8ba89721fc4d5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 18 Sep 2024 12:11:30 +0200
Subject: [PATCH 040/100] Fix formatting

---
 cebra/data/multi_session.py | 7 ++++---
 tests/test_datasets.py      | 5 ++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index 0af2793c..be2e556b 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -130,8 +130,8 @@ class MultiSessionLoader(cebra_data.Loader):
 
     def __post_init__(self):
         super().__post_init__()
-        self.sampler = cebra.distributions.MultisessionSampler(self.dataset,
-                                                       self.time_offset)
+        self.sampler = cebra.distributions.MultisessionSampler(
+            self.dataset, self.time_offset)
 
     def get_indices(self, num_samples: int) -> List[BatchIndex]:
         ref_idx = self.sampler.sample_prior(self.batch_size)
@@ -169,7 +169,8 @@ class DiscreteMultiSessionDataLoader(MultiSessionLoader):
     # Overwrite sampler with the discrete implementation
     # Generalize MultisessionSampler to avoid doing this?
     def __post_init__(self):
-        self.sampler = cebra.distributions.DiscreteMultisessionSampler(self.dataset)
+        self.sampler = cebra.distributions.DiscreteMultisessionSampler(
+            self.dataset)
 
     @property
     def index(self):
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index adbfab64..98885d07 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -153,9 +153,8 @@ def test_allen():
 
 
 @pytest.mark.requires_dataset
-@pytest.mark.parametrize("options",
-                         cebra.datasets.get_options("*",
-                                                    expand_parametrized=False))
+@pytest.mark.parametrize(
+    "options", cebra.datasets.get_options("*", expand_parametrized=False))
 def test_options(options):
     assert len(options) > 0
     assert len(multisubject_options) > 0

From 7aab28251b38f5b5069b7839ce4790fce0211bbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 18 Sep 2024 12:22:54 +0200
Subject: [PATCH 041/100] Fix extra docs errors

---
 cebra/data/multi_session.py  | 2 +-
 cebra/data/single_session.py | 2 +-
 cebra/solver/base.py         | 4 ++--
 tests/test_solver.py         | 4 +++-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index be2e556b..9d10fbfc 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -110,7 +110,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`offset` attribute of the dataset.
+        :py:attr:`cebra_data.Dataset.offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index 71cd0c3e..169ebcb6 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -76,7 +76,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`offset` attribute of the dataset.
+        :py:attr:`cebra_data.Dataset.offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 1d8bb9ce..0b5549cf 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -185,7 +185,7 @@ def _transform(
     model: cebra.models.Model,
     inputs: torch.Tensor,
     pad_before_transform: bool,
-    offset: cebra.data.Offset,
+    offset: cebra.data.datatypes.Offset,
 ) -> torch.Tensor:
     """Compute the embedding.
 
@@ -206,7 +206,7 @@ def _transform(
 
 def _batched_transform(model: cebra.models.Model, inputs: torch.Tensor,
                        batch_size: int, pad_before_transform: bool,
-                       offset: cebra.data.Offset) -> torch.Tensor:
+                       offset: cebra.data.datatypes.Offset) -> torch.Tensor:
     """Compute the embedding on batched inputs.
 
     Args:
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 63caed67..d93c90e9 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -65,6 +65,7 @@
 
 # multi_session_tests.append((*args, cebra.solver.MultiSessionAuxVariableSolver))
 
+
 def _get_loader(data, loader_initfunc):
     kwargs = dict(num_steps=5, batch_size=32)
     loader = loader_initfunc(data, **kwargs)
@@ -574,6 +575,7 @@ def test_select_model_multi_session(data_name, model_name, session_id,
             assert offset.left == offset_.left and offset.right == offset_.right
             assert model == model_
 
+
 models = [
     "offset1-model",
     "offset10-model",
@@ -683,7 +685,7 @@ def test_batched_transform_multi_session(data_name, model_name, padding,
     n_samples = dataset._datasets[0].neural.shape[0]
     assert all(
         d.neural.shape[0] == n_samples for d in dataset._datasets
-    ), # all sessions need to have same number of samples
+    ), "for this set all of the sessions need to have same number of samples."
 
     smallest_batch_length = n_samples - batch_size
     offset_ = model[0].get_offset()

From ffa66eb79891aac77134ff787cacff0bddf26a3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 18 Sep 2024 13:18:58 +0200
Subject: [PATCH 042/100] Fix offset in docs

---
 cebra/data/multi_session.py  | 2 +-
 cebra/data/single_session.py | 2 +-
 cebra/solver/base.py         | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index 9d10fbfc..f9686769 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -110,7 +110,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`cebra_data.Dataset.offset` attribute of the dataset.
+        :py:attr:`cebra.data.Dataset.offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index 169ebcb6..9270c98b 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -76,7 +76,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`cebra_data.Dataset.offset` attribute of the dataset.
+        :py:attr:`cebra.data.Dataset.offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 0b5549cf..af617838 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -91,14 +91,15 @@ def _check_indices(batch_start_idx: int, batch_end_idx: int,
 
 
 def _add_batched_zero_padding(batched_data: torch.Tensor,
-                              offset: cebra.data.Offset, batch_start_idx: int,
+                              offset: cebra.data.Offset, 
+                              batch_start_idx: int,
                               batch_end_idx: int,
                               num_samples: int) -> torch.Tensor:
     """Add zero padding to the input data before inference.
 
     Args:
         batched_data: Data to apply the inference on.
-        offset (cebra.data.Offset): _description_
+        offset: Offset of the model to consider when padding.
         batch_start_idx: Index of the first sample in the batch.
         batch_end_idx: Index of the first sample in the batch.
         num_samples (int): Total number of samples in the data. 

From 7f58607d969ffe5085b63abd69d5259744cc79db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 18 Sep 2024 13:50:20 +0200
Subject: [PATCH 043/100] Remove attribute ref

---
 cebra/data/multi_session.py  | 2 +-
 cebra/data/single_session.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index f9686769..cff61038 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -110,7 +110,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`cebra.data.Dataset.offset` attribute of the dataset.
+        `offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index 9270c98b..a821db97 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -76,7 +76,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`cebra.data.Dataset.offset` attribute of the dataset.
+        `offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.

From c2544c759478ee962e0a37992a35155df08d2b43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 19 Sep 2024 13:55:19 +0200
Subject: [PATCH 044/100] Add review updates

---
 cebra/data/base.py                  |   1 -
 cebra/integrations/sklearn/cebra.py |  60 +++++++-
 cebra/solver/base.py                |  35 +++--
 cebra/solver/multi_session.py       |   6 +-
 tests/test_sklearn.py               | 220 +++++++++++++++++++++++++++-
 tests/test_solver.py                |   6 +-
 6 files changed, 300 insertions(+), 28 deletions(-)

diff --git a/cebra/data/base.py b/cebra/data/base.py
index 874ed58b..54ae4579 100644
--- a/cebra/data/base.py
+++ b/cebra/data/base.py
@@ -207,7 +207,6 @@ def configure_for(self, model: "cebra.models.Model"):
             model: The model to configure the dataset for.
         """
         raise NotImplementedError
-        self.offset = model.get_offset()
 
 
 @dataclasses.dataclass
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index ce50b7ea..bdae8ca7 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1202,7 +1202,7 @@ def transform(self,
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
         self.solver_._check_is_session_id_valid(session_id=session_id)
 
-        if torch.is_tensor(X) and X.device.type == "cuda":
+        if torch.is_tensor(X):
             X = X.detach().cpu()
 
         X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
@@ -1210,6 +1210,10 @@ def transform(self,
         if isinstance(X, np.ndarray):
             X = torch.from_numpy(X)
 
+        if batch_size is not None and batch_size < 1:
+            raise ValueError(
+                f"Batch size should be at least 1, got {batch_size}")
+
         with torch.no_grad():
             output = self.solver_.transform(
                 inputs=X,
@@ -1219,6 +1223,60 @@ def transform(self,
 
         return output.detach().cpu().numpy()
 
+    # Deprecated, kept for testing.
+    def transform_deprecated(self,
+                             X: Union[npt.NDArray, torch.Tensor],
+                             session_id: Optional[int] = None) -> npt.NDArray:
+        """Transform an input sequence and return the embedding.
+
+        Args:
+            X: A numpy array or torch tensor of size ``time x dimension``.
+            session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`num_sessions` for
+                multisession, set to ``None`` for single session.
+
+        Returns:
+            A :py:func:`numpy.array` of size ``time x output_dimension``.
+
+        Example:
+
+            >>> import cebra
+            >>> import numpy as np
+            >>> dataset =  np.random.uniform(0, 1, (1000, 30))
+            >>> cebra_model = cebra.CEBRA(max_iterations=10)
+            >>> cebra_model.fit(dataset)
+            CEBRA(max_iterations=10)
+            >>> embedding = cebra_model.transform(dataset)
+
+        """
+
+        sklearn_utils_validation.check_is_fitted(self, "n_features_")
+        model, offset = self._select_model(X, session_id)
+
+        # Input validation
+        X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
+        input_dtype = X.dtype
+
+        with torch.no_grad():
+            model.eval()
+
+            if self.pad_before_transform:
+                X = np.pad(X, ((offset.left, offset.right - 1), (0, 0)),
+                           mode="edge")
+            X = torch.from_numpy(X).float().to(self.device_)
+
+            if isinstance(model, cebra.models.ConvolutionalModelMixin):
+                # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+                X = X.transpose(1, 0).unsqueeze(0)
+                output = model(X).cpu().numpy().squeeze(0).transpose(1, 0)
+            else:
+                # Standard evaluation, (T, C, dt)
+                output = model(X).cpu().numpy()
+
+        if input_dtype == "float64":
+            return output.astype(input_dtype)
+
+        return output
+
     def fit_transform(
         self,
         X: Union[npt.NDArray, torch.Tensor],
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index af617838..7f0cbef1 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -81,18 +81,17 @@ def _check_indices(batch_start_idx: int, batch_end_idx: int,
             f"batch_end_idx ({batch_end_idx}) cannot exceed the length of inputs ({num_samples})."
         )
 
-    batch_size_lenght = batch_end_idx - batch_start_idx
-    if batch_size_lenght <= len(offset):
+    batch_size_length = batch_end_idx - batch_start_idx
+    if batch_size_length <= len(offset):
         raise ValueError(
-            f"The batch has length {batch_size_lenght} which "
+            f"The batch has length {batch_size_length} which "
             f"is smaller or equal than the required offset length {len(offset)}."
             f"Either choose a model with smaller offset or the batch should contain more samples."
         )
 
 
 def _add_batched_zero_padding(batched_data: torch.Tensor,
-                              offset: cebra.data.Offset, 
-                              batch_start_idx: int,
+                              offset: cebra.data.Offset, batch_start_idx: int,
                               batch_end_idx: int,
                               num_samples: int) -> torch.Tensor:
     """Add zero padding to the input data before inference.
@@ -409,6 +408,7 @@ def fit(
         TODO:
             * Refine the API here. Drop the validation entirely, and implement this via a hook?
         """
+        self._set_fitted_params(loader)
         self.to(loader.device)
 
         iterator = self._get_loader(loader)
@@ -436,8 +436,6 @@ def fit(
                     save_hook(num_steps, self)
                 self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
 
-        self._set_fitted_params(loader)
-
     def step(self, batch: cebra.data.Batch) -> dict:
         """Perform a single gradient update.
 
@@ -553,6 +551,10 @@ def _select_model(
         """
         raise NotImplementedError
 
+    @property
+    def is_fitted(self):
+        return hasattr(self, "n_features")
+
     @torch.no_grad()
     def transform(self,
                   inputs: Union[torch.Tensor, List[torch.Tensor], npt.NDArray],
@@ -579,19 +581,24 @@ def transform(self,
         Returns:
             The output embedding.
         """
+        if not self.is_fitted:
+            raise ValueError(
+                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
+                "appropriate arguments before using this estimator.")
+
+        if batch_size is not None and batch_size < 1:
+            raise ValueError(
+                f"Batch size should be at least 1, got {batch_size}")
+
         if isinstance(inputs, list):
-            raise NotImplementedError(
-                "Inputs to transform() should be the data for a single session."
+            raise ValueError(
+                "Inputs to transform() should be the data for a single session, but received a list."
             )
 
         elif not isinstance(inputs, torch.Tensor):
             raise ValueError(
                 f"Inputs should be a torch.Tensor, not {type(inputs)}.")
 
-        if not hasattr(self, "n_features"):
-            raise ValueError(
-                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
-                "appropriate arguments before using this estimator.")
         model, offset = self._select_model(inputs, session_id)
 
         if len(offset) < 2 and pad_before_transform:
@@ -647,7 +654,7 @@ def load(self, logdir, filename="checkpoint.pth"):
         checkpoint = torch.load(savepath, map_location=self.device)
         self.load_state_dict(checkpoint, strict=True)
 
-    def save(self, logdir, filename="checkpoint.pth"):
+    def save(self, logdir, filename="checkpoint_last.pth"):
         """Save the model and optimizer params.
 
         Args:
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index 87d906d4..b4be2125 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -44,9 +44,9 @@ class MultiSessionSolver(abc_.Solver):
 
     def parameters(self, session_id: Optional[int] = None):
         """Iterate over all parameters."""
-        self._check_is_session_id_valid(session_id=session_id)
-        for parameter in self.model[session_id].parameters():
-            yield parameter
+        if session_id is not None:
+            for parameter in self.model[session_id].parameters():
+                yield parameter
 
         for parameter in self.criterion.parameters():
             yield parameter
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index e409c0e3..0644aef7 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -231,7 +231,7 @@ def iterate_models():
     ) in itertools.product(
         [
             "offset10-model", "offset10-model-mse", "offset1-model",
-            "resample-model"
+            "offset40-model-4x-subsample"
         ],
             _DEVICES,
         ["euclidean", "cosine"],
@@ -343,6 +343,20 @@ def test_sklearn(model_architecture, device):
     assert cebra_model.num_sessions is None
     embedding = cebra_model.transform(X)
     assert isinstance(embedding, np.ndarray)
+    embedding = cebra_model.transform(X, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
+
+    if model_architecture in [
+            "offset36-model-cpu", "offset36-model-dropout-cpu",
+            "offset36-model-more-dropout-cpu",
+            "offset40-model-4x-subsample-cpu",
+            "offset20-model-4x-subsample-cpu", "offset36-model-cuda",
+            "offset36-model-dropout-cuda", "offset36-model-more-dropout-cuda",
+            "offset40-model-4x-subsample-cuda",
+            "offset20-model-4x-subsample-cuda"
+    ]:
+        with pytest.raises(ValueError, match="required.*offset.*length"):
+            embedding = cebra_model.transform(X, batch_size=10)
 
     # continuous behavior contrastive
     cebra_model.fit(X, y_c1, y_c2)
@@ -354,9 +368,17 @@ def test_sklearn(model_architecture, device):
     assert isinstance(embedding, np.ndarray)
     embedding = cebra_model.transform(X, session_id=0)
     assert isinstance(embedding, np.ndarray)
+    embedding = cebra_model.transform(X, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
+    embedding = cebra_model.transform(X, session_id=0, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
 
     with pytest.raises(RuntimeError, match="Invalid.*session_id"):
         embedding = cebra_model.transform(X, session_id=2)
+    with pytest.raises(ValueError, match="Batch.*size"):
+        embedding = cebra_model.transform(X, batch_size=0)
+    with pytest.raises(ValueError, match="Batch.*size"):
+        embedding = cebra_model.transform(X, batch_size=-10)
     with pytest.raises(ValueError, match="Invalid.*labels"):
         cebra_model.fit(X, [y_c1, y_c1_s2])
     with pytest.raises(ValueError, match="Invalid.*samples"):
@@ -369,11 +391,15 @@ def test_sklearn(model_architecture, device):
     cebra_model.fit(X, y_d)
     embedding = cebra_model.transform(X)
     assert isinstance(embedding, np.ndarray)
+    embedding = cebra_model.transform(X, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
 
     # mixed
     cebra_model.fit(X, y_c1, y_c2, y_d)
     embedding = cebra_model.transform(X)
     assert isinstance(embedding, np.ndarray)
+    embedding = cebra_model.transform(X, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
 
     # multi-session discrete behavior contrastive
     cebra_model.fit([X, X_s2], [y_d, y_d_s2])
@@ -387,6 +413,9 @@ def test_sklearn(model_architecture, device):
     embedding = cebra_model.transform(X_s2, session_id=1)
     assert isinstance(embedding, np.ndarray)
     assert embedding.shape == (X_s2.shape[0], output_dimension)
+    embedding = cebra_model.transform(X_s2, session_id=1, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
+    assert embedding.shape == (X_s2.shape[0], output_dimension)
 
     with pytest.raises(ValueError, match="shape"):
         embedding = cebra_model.transform(X_s2, session_id=0)
@@ -411,6 +440,9 @@ def test_sklearn(model_architecture, device):
     embedding = cebra_model.transform(X_s2, session_id=1)
     assert isinstance(embedding, np.ndarray)
     assert embedding.shape == (X_s2.shape[0], output_dimension)
+    embedding = cebra_model.transform(X_s2, session_id=1, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
+    assert embedding.shape == (X_s2.shape[0], output_dimension)
 
     with pytest.raises(ValueError, match="shape"):
         embedding = cebra_model.transform(X_s2, session_id=0)
@@ -442,6 +474,9 @@ def test_sklearn(model_architecture, device):
     embedding = cebra_model.transform(X, session_id=2)
     assert isinstance(embedding, np.ndarray)
     assert embedding.shape == (X.shape[0], output_dimension)
+    embedding = cebra_model.transform(X, session_id=2, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
+    assert embedding.shape == (X.shape[0], output_dimension)
 
     with pytest.raises(ValueError, match="shape"):
         embedding = cebra_model.transform(X_s2, session_id=0)
@@ -467,6 +502,9 @@ def test_sklearn(model_architecture, device):
     embedding = cebra_model.transform(X, session_id=2)
     assert isinstance(embedding, np.ndarray)
     assert embedding.shape == (X.shape[0], output_dimension)
+    embedding = cebra_model.transform(X, session_id=2, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
+    assert embedding.shape == (X.shape[0], output_dimension)
 
     with pytest.raises(ValueError, match="shape"):
         embedding = cebra_model.transform(X_s2, session_id=0)
@@ -711,6 +749,8 @@ def check_first_layer_dim(model, X):
     check_first_layer_dim(cebra_model, X_s2)
     embedding = cebra_model.transform(X_s2)
     assert isinstance(embedding, np.ndarray)
+    embedding = cebra_model.transform(X_s2, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
 
     cebra_model.fit(X, y_c1, y_c2, adapt=True)
     check_first_layer_dim(cebra_model, X)
@@ -718,6 +758,8 @@ def check_first_layer_dim(model, X):
     assert isinstance(embedding, np.ndarray)
     embedding = cebra_model.transform(X, session_id=0)
     assert isinstance(embedding, np.ndarray)
+    embedding = cebra_model.transform(X, session_id=0, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
 
     with pytest.raises(RuntimeError, match="Invalid.*session_id"):
         embedding = cebra_model.transform(X, session_id=2)
@@ -730,11 +772,15 @@ def check_first_layer_dim(model, X):
     check_first_layer_dim(cebra_model, X_s2)
     embedding = cebra_model.transform(X_s2)
     assert isinstance(embedding, np.ndarray)
+    embedding = cebra_model.transform(X_s2, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
 
     cebra_model.fit(X, y_c1, y_c2, y_d, adapt=True)
     check_first_layer_dim(cebra_model, X)
     embedding = cebra_model.transform(X)
     assert isinstance(embedding, np.ndarray)
+    embedding = cebra_model.transform(X, batch_size=50)
+    assert isinstance(embedding, np.ndarray)
 
     with pytest.raises(NotImplementedError, match=".*multisession.*"):
         cebra_model.fit([X, X_s2], [y_c1, y_c1_s2], adapt=True)
@@ -848,8 +894,8 @@ def test_sklearn_full(model_architecture, device, pad_before_transform):
 
 
 @pytest.mark.parametrize("model_architecture,device",
-                         [("resample-model", "cpu"),
-                          ("resample5-model", "cpu")])
+                         [("offset40-model-4x-subsample", "cpu"),
+                          ("offset20-model-4x-subsample", "cpu")])
 def test_sklearn_resampling_model(model_architecture, device):
     cebra_model = cebra_sklearn_cebra.CEBRA(
         model_architecture=model_architecture,
@@ -869,10 +915,12 @@ def test_sklearn_resampling_model(model_architecture, device):
     cebra_model.fit(X, y_c1)
     output = cebra_model.transform(X)
     assert output.shape == (250, 4)
+    output = cebra_model.transform(X, batch_size=100)
+    assert output.shape == (250, 4)
 
 
 @pytest.mark.parametrize("model_architecture,device",
-                         [("resample1-model", "cpu")])
+                         [("offset4-model-2x-subsample", "cpu")])
 def test_sklearn_resampling_model_not_yet_supported(model_architecture, device):
     cebra_model = cebra_sklearn_cebra.CEBRA(
         model_architecture=model_architecture, max_iterations=5)
@@ -1294,3 +1342,167 @@ def test_check_device():
         torch.backends.mps.is_built = lambda: False
         with pytest.raises(ValueError):
             cebra_sklearn_utils.check_device(device)
+
+
+@_util.parametrize_slow(
+    arg_names="model_architecture,device",
+    fast_arguments=list(
+        itertools.islice(
+            itertools.product(
+                cebra_sklearn_cebra.CEBRA.supported_model_architectures(),
+                _DEVICES),
+            2,
+        )),
+    slow_arguments=list(
+        itertools.product(
+            cebra_sklearn_cebra.CEBRA.supported_model_architectures(),
+            _DEVICES)),
+)
+def test_new_transform(model_architecture, device):
+    """
+    This is a test that the original sklearn transform returns the same output as 
+    the new sklearn transform that uses the pytorch solver transform.
+    """
+    output_dimension = 4
+    cebra_model = cebra_sklearn_cebra.CEBRA(
+        model_architecture=model_architecture,
+        time_offsets=10,
+        learning_rate=3e-4,
+        max_iterations=5,
+        device=device,
+        output_dimension=output_dimension,
+        batch_size=42,
+        verbose=True,
+    )
+
+    # example dataset
+    X = np.random.uniform(0, 1, (1000, 50))
+    X_s2 = np.random.uniform(0, 1, (800, 30))
+    X_s3 = np.random.uniform(0, 1, (1000, 30))
+    y_c1 = np.random.uniform(0, 1, (1000, 5))
+    y_c1_s2 = np.random.uniform(0, 1, (800, 5))
+    y_c2 = np.random.uniform(0, 1, (1000, 2))
+    y_c2_s2 = np.random.uniform(0, 1, (800, 2))
+    y_d = np.random.randint(0, 10, (1000,))
+    y_d_s2 = np.random.randint(0, 10, (800,))
+
+    # time contrastive
+    cebra_model.fit(X)
+    embedding1 = cebra_model.transform(X)
+    embedding2 = cebra_model.transform_deprecated(X)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    # continuous behavior contrastive
+    cebra_model.fit(X, y_c1, y_c2)
+    assert cebra_model.num_sessions is None
+
+    embedding1 = cebra_model.transform(X)
+    embedding2 = cebra_model.transform_deprecated(X)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    embedding1 = cebra_model.transform(torch.Tensor(X))
+    embedding2 = cebra_model.transform_deprecated(torch.Tensor(X))
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    embedding1 = cebra_model.transform(torch.Tensor(X), session_id=0)
+    embedding2 = cebra_model.transform_deprecated(torch.Tensor(X), session_id=0)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    # tensor input
+    cebra_model.fit(torch.Tensor(X), torch.Tensor(y_c1), torch.Tensor(y_c2))
+
+    # discrete behavior contrastive
+    cebra_model.fit(X, y_d)
+    embedding1 = cebra_model.transform(X)
+    embedding2 = cebra_model.transform_deprecated(X)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    # mixed
+    cebra_model.fit(X, y_c1, y_c2, y_d)
+    embedding1 = cebra_model.transform(X)
+    embedding2 = cebra_model.transform_deprecated(X)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    # multi-session discrete behavior contrastive
+    cebra_model.fit([X, X_s2], [y_d, y_d_s2])
+
+    embedding1 = cebra_model.transform(X, session_id=0)
+    embedding2 = cebra_model.transform_deprecated(X, session_id=0)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    embedding1 = cebra_model.transform(torch.Tensor(X), session_id=0)
+    embedding2 = cebra_model.transform_deprecated(torch.Tensor(X), session_id=0)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    embedding1 = cebra_model.transform(X_s2, session_id=1)
+    embedding2 = cebra_model.transform_deprecated(X_s2, session_id=1)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    # multi-session continuous behavior contrastive
+    cebra_model.fit([X, X_s2], [y_c1, y_c1_s2])
+
+    embedding1 = cebra_model.transform(X, session_id=0)
+    embedding2 = cebra_model.transform_deprecated(X, session_id=0)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    embedding1 = cebra_model.transform(torch.Tensor(X), session_id=0)
+    embedding2 = cebra_model.transform_deprecated(torch.Tensor(X), session_id=0)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    embedding1 = cebra_model.transform(X_s2, session_id=1)
+    embedding2 = cebra_model.transform(X_s2, session_id=1)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    # multi-session tensor inputs
+    cebra_model.fit(
+        [torch.Tensor(X), torch.Tensor(X_s2)],
+        [torch.Tensor(y_c1), torch.Tensor(y_c1_s2)],
+    )
+
+    # multi-session discrete behavior contrastive, more than two sessions
+    cebra_model.fit([X, X_s2, X], [y_d, y_d_s2, y_d])
+
+    embedding1 = cebra_model.transform(X, session_id=0)
+    embedding2 = cebra_model.transform_deprecated(X, session_id=0)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    embedding1 = cebra_model.transform(X_s2, session_id=1)
+    embedding2 = cebra_model.transform_deprecated(X_s2, session_id=1)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    embedding1 = cebra_model.transform(X, session_id=2)
+    embedding2 = cebra_model.transform_deprecated(X, session_id=2)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    # multi-session continuous behavior contrastive, more than two sessions
+    cebra_model.fit([X, X_s2, X], [y_c1, y_c1_s2, y_c1])
+
+    embedding1 = cebra_model.transform(X, session_id=0)
+    embedding2 = cebra_model.transform_deprecated(X, session_id=0)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    embedding1 = cebra_model.transform(X_s2, session_id=1)
+    embedding2 = cebra_model.transform_deprecated(X_s2, session_id=1)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
+
+    embedding1 = cebra_model.transform(X, session_id=2)
+    embedding2 = cebra_model.transform_deprecated(X, session_id=2)
+    assert np.allclose(embedding1, embedding2, rtol=1e-5,
+                       atol=1e-8), "Arrays are not close enough"
diff --git a/tests/test_solver.py b/tests/test_solver.py
index d93c90e9..c27a9e41 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -344,7 +344,7 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
 
     with pytest.raises(RuntimeError, match="No.*session_id"):
         embedding = solver.transform(X[0])
-    with pytest.raises(RuntimeError, match="single.*session"):
+    with pytest.raises(ValueError, match="single.*session"):
         embedding = solver.transform(X)
     with pytest.raises(RuntimeError, match="Invalid.*session_id"):
         embedding = solver.transform(X[0], session_id=5)
@@ -354,10 +354,6 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
     for param in solver.parameters(session_id=0):
         assert isinstance(param, torch.Tensor)
 
-    with pytest.raises(RuntimeError, match="No.*session_id"):
-        for param in solver.parameters():
-            assert isinstance(param, torch.Tensor)
-
     fitted_solver = copy.deepcopy(solver)
     with tempfile.TemporaryDirectory() as temp_dir:
         solver.save(temp_dir)

From e1b7cc76bdeb87fcdcac2978cfc8fba8058d78cd Mon Sep 17 00:00:00 2001
From: Steffen Schneider <steffen.schneider@helmholtz-munich.de>
Date: Sun, 27 Oct 2024 19:08:10 +0100
Subject: [PATCH 045/100] apply ruff auto-fixes

---
 cebra/__init__.py                             |  6 ++--
 cebra/__main__.py                             |  4 ---
 cebra/config.py                               |  1 -
 cebra/data/base.py                            |  3 --
 cebra/data/datasets.py                        |  7 ----
 cebra/data/datatypes.py                       |  3 --
 cebra/data/helper.py                          | 10 +++---
 cebra/data/multi_session.py                   |  2 --
 cebra/data/single_session.py                  |  7 ++--
 cebra/datasets/allen/ca_movie.py              |  4 ---
 cebra/datasets/allen/ca_movie_decoding.py     |  5 ---
 cebra/datasets/allen/combined.py              | 20 ++---------
 cebra/datasets/allen/make_neuropixel.py       |  2 --
 cebra/datasets/allen/neuropixel_movie.py      | 14 +-------
 .../allen/neuropixel_movie_decoding.py        |  8 -----
 cebra/datasets/allen/single_session_ca.py     |  8 -----
 cebra/datasets/gaussian_mixture.py            |  4 ---
 cebra/datasets/generate_synthetic_data.py     |  1 -
 cebra/datasets/hippocampus.py                 |  2 --
 cebra/datasets/make_neuropixel.py             |  1 -
 cebra/datasets/monkey_reaching.py             |  5 +--
 cebra/distributions/base.py                   |  3 +-
 cebra/distributions/continuous.py             |  5 ++-
 cebra/distributions/index.py                  |  7 ++--
 cebra/distributions/mixed.py                  |  1 -
 cebra/integrations/deeplabcut.py              |  2 +-
 cebra/integrations/sklearn/cebra.py           | 31 ++++++++---------
 cebra/integrations/sklearn/helpers.py         |  2 +-
 cebra/integrations/sklearn/metrics.py         | 12 +++----
 cebra/models/criterions.py                    |  2 +-
 cebra/models/model.py                         |  2 --
 cebra/models/projector.py                     |  2 +-
 cebra/solver/base.py                          | 34 ++++++++-----------
 cebra/solver/multi_session.py                 | 25 ++++++--------
 cebra/solver/single_session.py                | 26 +++++++-------
 cebra/solver/supervised.py                    |  8 -----
 tests/test_datasets.py                        |  8 ++---
 tests/test_sklearn.py                         |  4 +--
 tests/test_solver.py                          |  5 ++-
 39 files changed, 91 insertions(+), 205 deletions(-)

diff --git a/cebra/__init__.py b/cebra/__init__.py
index fd4cf58c..b361a441 100644
--- a/cebra/__init__.py
+++ b/cebra/__init__.py
@@ -33,7 +33,7 @@
     from cebra.integrations.sklearn.decoder import L1LinearRegressor
 
     is_sklearn_available = True
-except ImportError as e:
+except ImportError:
     # silently fail for now
     pass
 
@@ -42,7 +42,7 @@
     from cebra.integrations.matplotlib import *
 
     is_matplotlib_available = True
-except ImportError as e:
+except ImportError:
     # silently fail for now
     pass
 
@@ -51,7 +51,7 @@
     from cebra.integrations.plotly import *
 
     is_plotly_available = True
-except ImportError as e:
+except ImportError:
     # silently fail for now
     pass
 
diff --git a/cebra/__main__.py b/cebra/__main__.py
index 6c7c18bf..4ba66993 100644
--- a/cebra/__main__.py
+++ b/cebra/__main__.py
@@ -27,11 +27,7 @@
 import argparse
 import sys
 
-import numpy as np
-import torch
-
 import cebra
-import cebra.distributions as cebra_distr
 
 
 def train(parser, kwargs):
diff --git a/cebra/config.py b/cebra/config.py
index ba6e3922..a960721f 100644
--- a/cebra/config.py
+++ b/cebra/config.py
@@ -21,7 +21,6 @@
 #
 import argparse
 import json
-from dataclasses import MISSING
 from typing import Literal, Optional
 
 import literate_dataclasses as dataclasses
diff --git a/cebra/data/base.py b/cebra/data/base.py
index 54ae4579..e35e20c5 100644
--- a/cebra/data/base.py
+++ b/cebra/data/base.py
@@ -22,11 +22,8 @@
 """Base classes for datasets and loaders."""
 
 import abc
-import collections
-from typing import List
 
 import literate_dataclasses as dataclasses
-import numpy as np
 import torch
 
 import cebra.data.assets as cebra_data_assets
diff --git a/cebra/data/datasets.py b/cebra/data/datasets.py
index 0b7f191d..9fa815c2 100644
--- a/cebra/data/datasets.py
+++ b/cebra/data/datasets.py
@@ -21,21 +21,14 @@
 #
 """Pre-defined datasets."""
 
-import abc
-import collections
 import types
 from typing import List, Tuple, Union
 
-import literate_dataclasses as dataclasses
 import numpy as np
 import numpy.typing as npt
 import torch
-from numpy.typing import NDArray
 
 import cebra.data as cebra_data
-import cebra.distributions
-from cebra.data.datatypes import Batch
-from cebra.data.datatypes import BatchIndex
 
 
 class TensorDataset(cebra_data.SingleSessionDataset):
diff --git a/cebra/data/datatypes.py b/cebra/data/datatypes.py
index 11583909..4b2ac8a2 100644
--- a/cebra/data/datatypes.py
+++ b/cebra/data/datatypes.py
@@ -20,9 +20,6 @@
 # limitations under the License.
 #
 import collections
-from typing import Tuple
-
-import torch
 
 __all__ = ["Batch", "BatchIndex", "Offset"]
 
diff --git a/cebra/data/helper.py b/cebra/data/helper.py
index c324a80f..d2a1cfe3 100644
--- a/cebra/data/helper.py
+++ b/cebra/data/helper.py
@@ -181,14 +181,14 @@ def fit(
         elif ref_data.shape[0] == data.shape[0] and (ref_label is None or
                                                      label is None):
             raise ValueError(
-                f"Missing labels: the data to align are the same shape but you provided only "
-                f"one of the sets of labels. Either provide both the reference and alignment "
-                f"labels or none.")
+                "Missing labels: the data to align are the same shape but you provided only "
+                "one of the sets of labels. Either provide both the reference and alignment "
+                "labels or none.")
         else:
             if ref_label is None or label is None:
                 raise ValueError(
-                    f"Missing labels: the data to align are not the same shape, "
-                    f"provide labels to align the data and reference data.")
+                    "Missing labels: the data to align are not the same shape, "
+                    "provide labels to align the data and reference data.")
 
             if len(ref_label.shape) == 1:
                 ref_label = np.expand_dims(ref_label, axis=1)
diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index cff61038..ebae8b6f 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -22,11 +22,9 @@
 """Datasets and loaders for multi-session training."""
 
 import abc
-import collections
 from typing import List
 
 import literate_dataclasses as dataclasses
-import numpy as np
 import torch
 
 import cebra.data as cebra_data
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index a821db97..0c575ed7 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -26,12 +26,9 @@
 """
 
 import abc
-import collections
 import warnings
-from typing import List
 
 import literate_dataclasses as dataclasses
-import numpy as np
 import torch
 
 import cebra.data as cebra_data
@@ -365,8 +362,8 @@ def __post_init__(self):
 
         if self.conditional != "time_delta":
             raise NotImplementedError(
-                f"Hybrid training is currently only implemented using the ``time_delta`` "
-                f"continual distribution.")
+                "Hybrid training is currently only implemented using the ``time_delta`` "
+                "continual distribution.")
 
         self.time_distribution = cebra.distributions.TimeContrastive(
             time_offset=self.time_offset,
diff --git a/cebra/datasets/allen/ca_movie.py b/cebra/datasets/allen/ca_movie.py
index f11e5e93..fa25f72a 100644
--- a/cebra/datasets/allen/ca_movie.py
+++ b/cebra/datasets/allen/ca_movie.py
@@ -29,11 +29,8 @@
 
 """
 
-import glob
-import hashlib
 import pathlib
 
-import h5py
 import joblib
 import numpy as np
 import pandas as pd
@@ -46,7 +43,6 @@
 import cebra.data
 from cebra.datasets import get_datapath
 from cebra.datasets import parametrize
-from cebra.datasets import register
 from cebra.datasets.allen import NUM_NEURONS
 from cebra.datasets.allen import SEEDS
 
diff --git a/cebra/datasets/allen/ca_movie_decoding.py b/cebra/datasets/allen/ca_movie_decoding.py
index 12d6cc64..8bb164cc 100644
--- a/cebra/datasets/allen/ca_movie_decoding.py
+++ b/cebra/datasets/allen/ca_movie_decoding.py
@@ -29,11 +29,8 @@
 
 """
 
-import glob
-import hashlib
 import pathlib
 
-import h5py
 import joblib
 import numpy as np
 import pandas as pd
@@ -41,12 +38,10 @@
 import torch
 from numpy.random import Generator
 from numpy.random import PCG64
-from sklearn.decomposition import PCA
 
 import cebra.data
 from cebra.datasets import get_datapath
 from cebra.datasets import parametrize
-from cebra.datasets import register
 from cebra.datasets.allen import NUM_NEURONS
 from cebra.datasets.allen import SEEDS
 from cebra.datasets.allen import SEEDS_DISJOINT
diff --git a/cebra/datasets/allen/combined.py b/cebra/datasets/allen/combined.py
index bfaca9b3..a05eb17c 100644
--- a/cebra/datasets/allen/combined.py
+++ b/cebra/datasets/allen/combined.py
@@ -31,22 +31,8 @@
 
 """
 
-import glob
-import hashlib
-
-import h5py
-import joblib
-import numpy as np
-import pandas as pd
-import scipy.io
-import torch
-from numpy.random import Generator
-from numpy.random import PCG64
-from sklearn.decomposition import PCA
-
 import cebra.data
 from cebra.datasets import parametrize
-from cebra.datasets import register
 from cebra.datasets.allen import ca_movie
 from cebra.datasets.allen import ca_movie_decoding
 from cebra.datasets.allen import neuropixel_movie
@@ -80,7 +66,7 @@ def __init__(self, num_neurons=1000, seed=111, area="VISp"):
         )
 
     def __repr__(self):
-        return f"CaNeuropixelDataset"
+        return "CaNeuropixelDataset"
 
 
 @parametrize(
@@ -117,7 +103,7 @@ def __init__(self,
         )
 
     def __repr__(self):
-        return f"CaNeuropixelMovieOneCorticesDataset"
+        return "CaNeuropixelMovieOneCorticesDataset"
 
 
 @parametrize(
@@ -152,4 +138,4 @@ def __init__(self, group, num_neurons, seed, cortex, split_flag="train"):
         )
 
     def __repr__(self):
-        return f"CaNeuropixelMovieOneCorticesDisjointDataset"
+        return "CaNeuropixelMovieOneCorticesDisjointDataset"
diff --git a/cebra/datasets/allen/make_neuropixel.py b/cebra/datasets/allen/make_neuropixel.py
index 5c0568b7..1eabfe9f 100644
--- a/cebra/datasets/allen/make_neuropixel.py
+++ b/cebra/datasets/allen/make_neuropixel.py
@@ -31,14 +31,12 @@
 """
 
 import argparse
-import glob
 import pathlib
 
 import h5py
 import joblib as jl
 import numpy as np
 import numpy.typing as npt
-import pandas as pd
 
 from cebra.datasets import get_datapath
 
diff --git a/cebra/datasets/allen/neuropixel_movie.py b/cebra/datasets/allen/neuropixel_movie.py
index 51011407..f9b9c3ea 100644
--- a/cebra/datasets/allen/neuropixel_movie.py
+++ b/cebra/datasets/allen/neuropixel_movie.py
@@ -26,24 +26,12 @@
     *Siegle, Joshua H., et al. "Survey of spiking in the mouse visual system reveals functional hierarchy." Nature 592.7852 (2021): 86-92.
 
 """
-import glob
-import hashlib
 import pathlib
 
-import h5py
 import joblib
-import numpy as np
-import pandas as pd
-import scipy.io
-import torch
-from numpy.random import Generator
-from numpy.random import PCG64
-from sklearn.decomposition import PCA
-
-import cebra.data
+
 from cebra.datasets import get_datapath
 from cebra.datasets import parametrize
-from cebra.datasets import register
 from cebra.datasets.allen import ca_movie
 from cebra.datasets.allen import NUM_NEURONS
 from cebra.datasets.allen import SEEDS
diff --git a/cebra/datasets/allen/neuropixel_movie_decoding.py b/cebra/datasets/allen/neuropixel_movie_decoding.py
index a99f367d..4ff1ebc2 100644
--- a/cebra/datasets/allen/neuropixel_movie_decoding.py
+++ b/cebra/datasets/allen/neuropixel_movie_decoding.py
@@ -26,25 +26,17 @@
     *Siegle, Joshua H., et al. "Survey of spiking in the mouse visual system reveals functional hierarchy." Nature 592.7852 (2021): 86-92.
 
 """
-import glob
-import hashlib
 import pathlib
 
-import h5py
 import joblib
 import numpy as np
-import pandas as pd
-import scipy.io
 import torch
 from numpy.random import Generator
 from numpy.random import PCG64
-from sklearn.decomposition import PCA
 
 import cebra.data
-from cebra.datasets import allen
 from cebra.datasets import get_datapath
 from cebra.datasets import parametrize
-from cebra.datasets import register
 from cebra.datasets.allen import ca_movie_decoding
 from cebra.datasets.allen import NUM_NEURONS
 from cebra.datasets.allen import SEEDS
diff --git a/cebra/datasets/allen/single_session_ca.py b/cebra/datasets/allen/single_session_ca.py
index f207a1bc..5a3eea4d 100644
--- a/cebra/datasets/allen/single_session_ca.py
+++ b/cebra/datasets/allen/single_session_ca.py
@@ -28,25 +28,17 @@
     *http://observatory.brain-map.org/visualcoding
 
 """
-import glob
-import hashlib
 import pathlib
 
-import h5py
-import joblib
 import numpy as np
-import pandas as pd
 import scipy.io
 import torch
-from numpy.random import Generator
-from numpy.random import PCG64
 from sklearn.decomposition import PCA
 
 import cebra.data
 from cebra.datasets import get_datapath
 from cebra.datasets import init
 from cebra.datasets import parametrize
-from cebra.datasets import register
 
 _DEFAULT_DATADIR = get_datapath()
 
diff --git a/cebra/datasets/gaussian_mixture.py b/cebra/datasets/gaussian_mixture.py
index f5508838..05fd971d 100644
--- a/cebra/datasets/gaussian_mixture.py
+++ b/cebra/datasets/gaussian_mixture.py
@@ -20,17 +20,13 @@
 # limitations under the License.
 #
 import pathlib
-from typing import Tuple
 
 import joblib as jl
-import literate_dataclasses as dataclasses
 import numpy as np
-import sklearn
 import torch
 
 import cebra.data
 import cebra.io
-from cebra.datasets import get_datapath
 from cebra.datasets import parametrize
 from cebra.datasets import register
 
diff --git a/cebra/datasets/generate_synthetic_data.py b/cebra/datasets/generate_synthetic_data.py
index 8a243d6d..0fc33963 100644
--- a/cebra/datasets/generate_synthetic_data.py
+++ b/cebra/datasets/generate_synthetic_data.py
@@ -26,7 +26,6 @@
 """
 import argparse
 import pathlib
-import sys
 
 import joblib as jl
 import keras
diff --git a/cebra/datasets/hippocampus.py b/cebra/datasets/hippocampus.py
index a32209a3..92537b8e 100644
--- a/cebra/datasets/hippocampus.py
+++ b/cebra/datasets/hippocampus.py
@@ -31,12 +31,10 @@
 
 """
 
-import hashlib
 import pathlib
 
 import joblib
 import numpy as np
-import scipy.io
 import sklearn.model_selection
 import sklearn.neighbors
 import torch
diff --git a/cebra/datasets/make_neuropixel.py b/cebra/datasets/make_neuropixel.py
index 7c097f38..65029f94 100644
--- a/cebra/datasets/make_neuropixel.py
+++ b/cebra/datasets/make_neuropixel.py
@@ -36,7 +36,6 @@
 import joblib as jl
 import numpy as np
 import numpy.typing as npt
-import pandas as pd
 
 
 def _filter_units(
diff --git a/cebra/datasets/monkey_reaching.py b/cebra/datasets/monkey_reaching.py
index 23fc5a6c..a07e24fd 100644
--- a/cebra/datasets/monkey_reaching.py
+++ b/cebra/datasets/monkey_reaching.py
@@ -28,14 +28,11 @@
 
 """
 
-import hashlib
 import pathlib
-import pickle as pk
 from typing import Union
 
 import joblib as jl
 import numpy as np
-import scipy.io
 import torch
 
 import cebra.data
@@ -72,7 +69,7 @@ def _load_data(
 
     try:
         from nlb_tools.nwb_interface import NWBDataset
-    except ImportError as e:
+    except ImportError:
         raise ImportError(
             "Could not import the nlb_tools package required for data loading "
             "the raw reaching datasets in NWB format. "
diff --git a/cebra/distributions/base.py b/cebra/distributions/base.py
index 990d7e79..07ad9ae4 100644
--- a/cebra/distributions/base.py
+++ b/cebra/distributions/base.py
@@ -31,7 +31,6 @@
 """
 
 import abc
-import functools
 
 import torch
 
@@ -82,7 +81,7 @@ def to(self, device: str):
         self._generator = torch.Generator(device=device)
         try:
             self._generator.set_state(state.to(device))
-        except (TypeError, RuntimeError) as e:
+        except (TypeError, RuntimeError):
             # TODO(https://discuss.pytorch.org/t/cuda-rng-state-does-not-change-when-re-seeding-why-is-that/47917/3)
             self._generator.manual_seed(self.seed)
 
diff --git a/cebra/distributions/continuous.py b/cebra/distributions/continuous.py
index c4235d48..ad95fdf6 100644
--- a/cebra/distributions/continuous.py
+++ b/cebra/distributions/continuous.py
@@ -23,7 +23,6 @@
 
 from typing import Literal, Optional
 
-import numpy as np
 import torch
 
 import cebra.data
@@ -112,8 +111,8 @@ def __init__(
         abc_.HasGenerator.__init__(self, device=device, seed=seed)
         if continuous is None and num_samples is None:
             raise ValueError(
-                f"Supply either a continuous index (which will be used to infer the dataset size) "
-                f"or alternatively the number of datapoints using the num_samples argument."
+                "Supply either a continuous index (which will be used to infer the dataset size) "
+                "or alternatively the number of datapoints using the num_samples argument."
             )
         if continuous is not None and num_samples is not None:
             if len(continuous) != num_samples:
diff --git a/cebra/distributions/index.py b/cebra/distributions/index.py
index 0ee0959a..724e86e4 100644
--- a/cebra/distributions/index.py
+++ b/cebra/distributions/index.py
@@ -30,7 +30,6 @@
 discrete labels should be converted accordingly.
 """
 
-import numpy as np
 import torch
 
 import cebra.data
@@ -188,9 +187,9 @@ def __init__(self, discrete, continuous):
                 "of samples.")
         if len(discrete.shape) > 1:
             raise ValueError(
-                f"Discrete indexing information needs to be limited to a 1d "
-                f"array/tensor. Multi-dimensional discrete indices should be "
-                f"reformatted first.")
+                "Discrete indexing information needs to be limited to a 1d "
+                "array/tensor. Multi-dimensional discrete indices should be "
+                "reformatted first.")
             # TODO(stes): Once a helper function exists, the error message should
             #            mention it.
 
diff --git a/cebra/distributions/mixed.py b/cebra/distributions/mixed.py
index 14fb8a61..7221fd99 100644
--- a/cebra/distributions/mixed.py
+++ b/cebra/distributions/mixed.py
@@ -27,7 +27,6 @@
 """
 from typing import Literal
 
-import numpy as np
 import torch
 
 import cebra.io
diff --git a/cebra/integrations/deeplabcut.py b/cebra/integrations/deeplabcut.py
index c265b09a..4c5b292d 100644
--- a/cebra/integrations/deeplabcut.py
+++ b/cebra/integrations/deeplabcut.py
@@ -160,7 +160,7 @@ def load_data(self, pcutoff: float = 0.6) -> npt.NDArray:
             )
         elif self.dlc_df.columns.nlevels == 4:
             raise NotImplementedError(
-                f"Multi-animals DLC files are not handled. Please provide a single-animal file."
+                "Multi-animals DLC files are not handled. Please provide a single-animal file."
             )
 
         dlc_df_coords = (
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index bdae8ca7..97beaaaa 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -21,9 +21,7 @@
 #
 """Define the CEBRA model."""
 
-import copy
 import itertools
-import warnings
 from typing import (Callable, Dict, Iterable, List, Literal, Optional, Tuple,
                     Union)
 
@@ -33,7 +31,6 @@
 import sklearn.utils.validation as sklearn_utils_validation
 import torch
 from sklearn.base import BaseEstimator
-from sklearn.base import ClassifierMixin
 from sklearn.base import TransformerMixin
 from torch import nn
 
@@ -274,8 +271,8 @@ def _require_arg(key):
             "Until then, please train using the PyTorch API."))
     else:
         raise RuntimeError(
-            f"Index combination not covered. Please report this issue and add the following "
-            f"information to your bug report: \n" + error_message)
+            "Index combination not covered. Please report this issue and add the following "
+            "information to your bug report: \n" + error_message)
 
 
 def _check_type_checkpoint(checkpoint):
@@ -776,18 +773,18 @@ def _configure_for_all(
                                   cebra.models.ConvolutionalModelMixin):
                     if len(model[n].get_offset()) > 1:
                         raise ValueError(
-                            f"It is not yet supported to run non-convolutional models with "
-                            f"receptive fields/offsets larger than 1 via the sklearn API. "
-                            f"Please use a different model, or revert to the pytorch "
-                            f"API for training.")
+                            "It is not yet supported to run non-convolutional models with "
+                            "receptive fields/offsets larger than 1 via the sklearn API. "
+                            "Please use a different model, or revert to the pytorch "
+                            "API for training.")
         else:
             if not isinstance(model, cebra.models.ConvolutionalModelMixin):
                 if len(model.get_offset()) > 1:
                     raise ValueError(
-                        f"It is not yet supported to run non-convolutional models with "
-                        f"receptive fields/offsets larger than 1 via the sklearn API. "
-                        f"Please use a different model, or revert to the pytorch "
-                        f"API for training.")
+                        "It is not yet supported to run non-convolutional models with "
+                        "receptive fields/offsets larger than 1 via the sklearn API. "
+                        "Please use a different model, or revert to the pytorch "
+                        "API for training.")
 
         dataset.configure_for(model)
 
@@ -1466,12 +1463,12 @@ def load(cls,
 
         if isinstance(checkpoint, dict) and backend == "torch":
             raise RuntimeError(
-                f"Cannot use 'torch' backend with a dictionary-based checkpoint. "
-                f"Please try a different backend.")
+                "Cannot use 'torch' backend with a dictionary-based checkpoint. "
+                "Please try a different backend.")
         if not isinstance(checkpoint, dict) and backend == "sklearn":
             raise RuntimeError(
-                f"Cannot use 'sklearn' backend a non dictionary-based checkpoint. "
-                f"Please try a different backend.")
+                "Cannot use 'sklearn' backend a non dictionary-based checkpoint. "
+                "Please try a different backend.")
 
         if backend == "sklearn":
             cebra_ = _load_cebra_with_sklearn_backend(checkpoint)
diff --git a/cebra/integrations/sklearn/helpers.py b/cebra/integrations/sklearn/helpers.py
index 06095c1e..9127aaa2 100644
--- a/cebra/integrations/sklearn/helpers.py
+++ b/cebra/integrations/sklearn/helpers.py
@@ -42,7 +42,7 @@ def _get_min_max(
     for label in labels:
         if any(isinstance(l, str) for l in label):
             raise ValueError(
-                f"Invalid labels dtype, expect floats or integers, got string")
+                "Invalid labels dtype, expect floats or integers, got string")
         min = np.min(label) if min > np.min(label) else min
         max = np.max(label) if max < np.max(label) else max
     return min, max
diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index 59a961b3..d07f9359 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -188,7 +188,7 @@ def _consistency_datasets(
     if labels is None:
         raise ValueError(
             "Missing labels, computing consistency between datasets requires labels, expect "
-            f"a set of labels for each embedding.")
+            "a set of labels for each embedding.")
     if len(embeddings) != len(labels):
         raise ValueError(
             "Invalid set of labels, computing consistency between datasets requires labels, "
@@ -274,8 +274,8 @@ def _consistency_runs(
     if not all(embeddings[0].shape[0] == embeddings[i].shape[0]
                for i in range(1, len(embeddings))):
         raise ValueError(
-            f"Invalid embeddings, all embeddings should be the same shape to be compared in a between-runs way."
-            f"If your embeddings are coming from different models, you can use between-datasets"
+            "Invalid embeddings, all embeddings should be the same shape to be compared in a between-runs way."
+            "If your embeddings are coming from different models, you can use between-datasets"
         )
 
     run_ids = np.arange(len(embeddings))
@@ -354,11 +354,11 @@ def consistency_score(
     if between == "runs":
         if labels is not None:
             raise ValueError(
-                f"No labels should be provided for between-runs consistency.")
+                "No labels should be provided for between-runs consistency.")
         if dataset_ids is not None:
             raise ValueError(
-                f"No dataset ID should be provided for between-runs consistency."
-                f"All embeddings should be computed on the same dataset.")
+                "No dataset ID should be provided for between-runs consistency."
+                "All embeddings should be computed on the same dataset.")
         scores, pairs, ids = _consistency_runs(embeddings=embeddings,)
     elif between == "datasets":
         scores, pairs, ids = _consistency_datasets(
diff --git a/cebra/models/criterions.py b/cebra/models/criterions.py
index 8dbdc2b4..d2a5a04f 100644
--- a/cebra/models/criterions.py
+++ b/cebra/models/criterions.py
@@ -33,7 +33,7 @@
 """
 
 import math
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple
 
 import torch
 from torch import nn
diff --git a/cebra/models/model.py b/cebra/models/model.py
index f4a5d862..7631ba86 100644
--- a/cebra/models/model.py
+++ b/cebra/models/model.py
@@ -22,10 +22,8 @@
 """Neural network models and criterions for training CEBRA models."""
 import abc
 
-import literate_dataclasses as dataclasses
 import torch
 import torch.nn.functional as F
-import tqdm
 from torch import nn
 
 import cebra.data
diff --git a/cebra/models/projector.py b/cebra/models/projector.py
index 0c924296..dd7388bc 100644
--- a/cebra/models/projector.py
+++ b/cebra/models/projector.py
@@ -134,7 +134,7 @@ def features(self, inp, index):
         return self._features[index](inp)
 
     def forward(self, inp):
-        raise NotImplemented()
+        raise NotImplementedError()
 
     def get_offset(self) -> cebra.data.Offset:
         return cebra.data.Offset(5, 5)
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 7f0cbef1..b28f4848 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -32,15 +32,12 @@
 
 import abc
 import os
-from typing import (Callable, Dict, Iterable, List, Literal, Optional, Tuple,
-                    Union)
+from typing import Callable, Dict, List, Literal, Optional, Tuple, Union
 
 import literate_dataclasses as dataclasses
-import numpy as np
 import numpy.typing as npt
 import torch
 import torch.nn.functional as F
-import tqdm
 from torch.utils.data import DataLoader
 from torch.utils.data import Dataset
 
@@ -48,7 +45,6 @@
 import cebra.data
 import cebra.io
 import cebra.models
-import cebra.solver.util as cebra_solver_util
 from cebra.solver.util import Meter
 from cebra.solver.util import ProgressBar
 
@@ -56,9 +52,9 @@
 def _check_indices(batch_start_idx: int, batch_end_idx: int,
                    offset: cebra.data.Offset, num_samples: int):
     """Check that indexes in a batch are in a correct range.
-    
-    First and last index must be positive integers, smaller than the total length of inputs 
-    in the dataset, the first index must be smaller than the last and the batch size cannot 
+
+    First and last index must be positive integers, smaller than the total length of inputs
+    in the dataset, the first index must be smaller than the last and the batch size cannot
     be smaller than the offset of the model.
 
     Args:
@@ -101,7 +97,7 @@ def _add_batched_zero_padding(batched_data: torch.Tensor,
         offset: Offset of the model to consider when padding.
         batch_start_idx: Index of the first sample in the batch.
         batch_end_idx: Index of the first sample in the batch.
-        num_samples (int): Total number of samples in the data. 
+        num_samples (int): Total number of samples in the data.
 
     Returns:
         The padded batch.
@@ -136,7 +132,7 @@ def _get_batch(inputs: torch.Tensor, offset: Optional[cebra.data.Offset],
         The batch.
     """
     if offset is None:
-        raise ValueError(f"offset cannot be null.")
+        raise ValueError("offset cannot be null.")
 
     if batch_start_idx == 0:  # First batch
         indices = batch_start_idx, (batch_end_idx + offset.right - 1)
@@ -427,7 +423,7 @@ def fit(
                 validation_loss = self.validation(valid_loader)
                 if self.best_loss is None or validation_loss < self.best_loss:
                     self.best_loss = validation_loss
-                    self.save(logdir, f"checkpoint_best.pth")
+                    self.save(logdir, "checkpoint_best.pth")
             if save_model:
                 if decode:
                     self.decode_history.append(
@@ -511,11 +507,11 @@ def decoding(self, train_loader, valid_loader):
     @abc.abstractmethod
     def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
         """Check that the inputs can be inferred using the selected model.
-        
+
         Note: This method checks that the number of neurons in the input is
         similar to the input dimension to the selected model.
-        
-        Args: 
+
+        Args:
             inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
@@ -526,8 +522,8 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
     @abc.abstractmethod
     def _check_is_session_id_valid(self, session_id: Optional[int] = None):
         """Check that the session ID provided is valid for the solver instance.
-        
-        Args: 
+
+        Args:
             session_id: The session ID to check.
         """
         raise NotImplementedError
@@ -539,14 +535,14 @@ def _select_model(
     ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
                cebra.data.datatypes.Offset]:
         """ Select the model based on the input dimension and session ID.
-        
-        Args: 
+
+        Args:
             inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
 
-        Returns: 
+        Returns:
             The model (first returns) and the offset of the model (second returns).
         """
         raise NotImplementedError
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index b4be2125..2c2153c2 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -21,11 +21,8 @@
 #
 """Solver implementations for multi-session datasetes."""
 
-import abc
-from collections.abc import Iterable
 from typing import List, Optional
 
-import literate_dataclasses as dataclasses
 import torch
 
 import cebra
@@ -126,10 +123,10 @@ def _inference(self, batches: List[cebra.data.Batch]) -> cebra.data.Batch:
 
     def _set_fitted_params(self, loader: cebra.data.Loader):
         """Set parameters once the solver is fitted.
-        
+
         In multi session solver, the number of session is set to the number of
         sessions in the dataset of the loader and the number of
-        features is set as a list corresponding to the number of neurons in 
+        features is set as a list corresponding to the number of neurons in
         each dataset.
 
         Args:
@@ -145,11 +142,11 @@ def _set_fitted_params(self, loader: cebra.data.Loader):
     def _check_is_inputs_valid(self, inputs: torch.Tensor,
                                session_id: Optional[int]):
         """Check that the inputs can be inferred using the selected model.
-        
+
         Note: This method checks that the number of neurons in the input is
         similar to the input dimension to the selected model.
-        
-        Args: 
+
+        Args:
             inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
@@ -163,10 +160,10 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor,
 
     def _check_is_session_id_valid(self, session_id: Optional[int]):
         """Check that the session ID provided is valid for the solver instance.
-        
+
         The session ID must be non-null and between 0 and the number session in the dataset.
-        
-        Args: 
+
+        Args:
             session_id: The session ID to check.
         """
 
@@ -181,14 +178,14 @@ def _check_is_session_id_valid(self, session_id: Optional[int]):
 
     def _select_model(self, inputs: torch.Tensor, session_id: Optional[int]):
         """ Select the model based on the input dimension and session ID.
-        
-        Args: 
+
+        Args:
             inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
 
-        Returns: 
+        Returns:
             The model (first returns) and the offset of the model (second returns).
         """
         self._check_is_session_id_valid(session_id=session_id)
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
index e0927a21..62570a57 100644
--- a/cebra/solver/single_session.py
+++ b/cebra/solver/single_session.py
@@ -56,7 +56,7 @@ def parameters(self, session_id: Optional[int] = None):
 
     def _set_fitted_params(self, loader: cebra.data.Loader):
         """Set parameters once the solver is fitted.
-        
+
         In single session solver, the number of session is set to None and the number of
         features is set to the number of neurons in the dataset.
 
@@ -68,11 +68,11 @@ def _set_fitted_params(self, loader: cebra.data.Loader):
 
     def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
         """Check that the inputs can be inferred using the selected model.
-        
+
         Note: This method checks that the number of neurons in the input is
         similar to the input dimension to the selected model.
-        
-        Args: 
+
+        Args:
             inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
@@ -86,10 +86,10 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
 
     def _check_is_session_id_valid(self, session_id: Optional[int] = None):
         """Check that the session ID provided is valid for the solver instance.
-        
+
         The session ID must be null or equal to 0.
-        
-        Args: 
+
+        Args:
             session_id: The session ID to check.
         """
 
@@ -104,14 +104,14 @@ def _select_model(
     ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
                cebra.data.datatypes.Offset]:
         """ Select the model based on the input dimension and session ID.
-        
-        Args: 
+
+        Args:
             inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
 
-        Returns: 
+        Returns:
             The model (first returns) and the offset of the model (second returns).
         """
         self._check_is_inputs_valid(inputs, session_id=session_id)
@@ -229,14 +229,14 @@ def _select_model(
     ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
                cebra.data.datatypes.Offset]:
         """ Select the model based on the input dimension and session ID.
-        
-        Args: 
+
+        Args:
             inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
 
-        Returns: 
+        Returns:
             The model (first returns) and the offset of the model (second returns).
         """
         self._check_is_inputs_valid(inputs, session_id=session_id)
diff --git a/cebra/solver/supervised.py b/cebra/solver/supervised.py
index f69308e6..f4e4f95c 100644
--- a/cebra/solver/supervised.py
+++ b/cebra/solver/supervised.py
@@ -25,17 +25,9 @@
     It is inclear whether these will be kept. Consider the implementation
     as experimental/outdated, and the API for this particular package unstable.
 """
-import abc
-from collections.abc import Iterable
-from typing import List
 
-import literate_dataclasses as dataclasses
 import torch
-import tqdm
 
-import cebra
-import cebra.data
-import cebra.models
 import cebra.solver.base as abc_
 
 
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 98885d07..c9f9fb2f 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -68,7 +68,6 @@ def test_demo():
 
 @pytest.mark.requires_dataset
 def test_hippocampus():
-    from cebra.datasets import hippocampus
 
     pytest.skip("Outdated")
     dataset = cebra.datasets.init("rat-hippocampus-single")
@@ -99,7 +98,6 @@ def test_hippocampus():
 
 @pytest.mark.requires_dataset
 def test_monkey():
-    from cebra.datasets import monkey_reaching
 
     dataset = cebra.datasets.init(
         "area2-bump-pos-active-passive",
@@ -111,7 +109,6 @@ def test_monkey():
 
 @pytest.mark.requires_dataset
 def test_allen():
-    from cebra.datasets import allen
 
     pytest.skip("Test takes too long")
 
@@ -153,8 +150,9 @@ def test_allen():
 
 
 @pytest.mark.requires_dataset
-@pytest.mark.parametrize(
-    "options", cebra.datasets.get_options("*", expand_parametrized=False))
+@pytest.mark.parametrize("options",
+                         cebra.datasets.get_options("*",
+                                                    expand_parametrized=False))
 def test_options(options):
     assert len(options) > 0
     assert len(multisubject_options) > 0
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index 0644aef7..e1e09e5d 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -1145,7 +1145,7 @@ def test_move_cpu_to_cuda_device(device):
 def test_move_cpu_to_mps_device(device):
 
     if not cebra.helper._is_mps_availabe(torch):
-        pytest.skip(f"MPS device is not available")
+        pytest.skip("MPS device is not available")
 
     X = np.random.uniform(0, 1, (10, 5))
     cebra_model = cebra_sklearn_cebra.CEBRA(model_architecture="offset1-model",
@@ -1360,7 +1360,7 @@ def test_check_device():
 )
 def test_new_transform(model_architecture, device):
     """
-    This is a test that the original sklearn transform returns the same output as 
+    This is a test that the original sklearn transform returns the same output as
     the new sklearn transform that uses the pytorch solver transform.
     """
     output_dimension = 4
diff --git a/tests/test_solver.py b/tests/test_solver.py
index c27a9e41..68e2a43e 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -20,7 +20,6 @@
 # limitations under the License.
 #
 import copy
-import itertools
 import tempfile
 
 import numpy as np
@@ -506,8 +505,8 @@ def create_model(model_name, input_dimension):
 
 @pytest.mark.parametrize(
     "data_name, model_name ,session_id, loader_initfunc, solver_initfunc",
-    single_session_tests_select_model +
-    single_session_hybrid_tests_select_model)
+    single_session_tests_select_model + single_session_hybrid_tests_select_model
+)
 def test_select_model_single_session(data_name, model_name, session_id,
                                      loader_initfunc, solver_initfunc):
     dataset = cebra.datasets.init(data_name)

From 81b964cd7ec2746bd38712e9f01d0cc422365f5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Tue, 21 Jan 2025 23:47:49 +0100
Subject: [PATCH 046/100] Concatenate last batches for batched inference (#200)

* Concatenate last to batches for batched inference

* Add test case
---
 cebra/solver/base.py  | 14 +++++++++++++-
 tests/test_sklearn.py | 17 +++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index b28f4848..f1eab6ed 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -231,7 +231,19 @@ def __getitem__(self, idx):
     index_dataloader = DataLoader(index_dataset, batch_size=batch_size)
 
     output = []
-    for index_batch in index_dataloader:
+    for batch_idx, index_batch in enumerate(index_dataloader):
+        # NOTE(celia): This is to prevent that adding the offset to the
+        # penultimate batch for larger offset make the batch_end_idx larger
+        # than the input length, while we also don't want to drop the last
+        # samples that do not fit in a complete batch.
+        if batch_idx == (len(index_dataloader) - 2):
+            # penultimate batch, last complete batch
+            last_batch = index_batch
+            continue
+        if batch_idx == (len(index_dataloader) - 1):
+            # last batch, incomplete
+            index_batch = torch.cat((last_batch, index_batch), dim=0)
+
         batch_start_idx, batch_end_idx = index_batch[0], index_batch[-1] + 1
         batched_data = _get_batch(inputs=inputs,
                                   offset=offset,
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index e1e09e5d..33df3caf 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -1506,3 +1506,20 @@ def test_new_transform(model_architecture, device):
     embedding2 = cebra_model.transform_deprecated(X, session_id=2)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
+
+
+def test_last_incomplete_batch_smaller_than_offset():
+    """
+    When offset of the model is larger than the remaining samples in the 
+    last batch, an error could happen. We merge the penultimate 
+    and last batches together to avoid this.
+    """
+    train = cebra.data.TensorDataset(neural=np.random.rand(20111, 100),
+                                     continuous=np.random.rand(20111, 2))
+
+    model = cebra.CEBRA(max_iterations=2,
+                        model_architecture="offset36-model-more-dropout",
+                        device="cpu")
+    model.fit(train.neural, train.continuous)
+
+    _ = model.transform(train.neural, batch_size=300)
\ No newline at end of file

From a09d123b493852e5bfc7e51be6cc87d278297342 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Sun, 27 Oct 2024 19:23:19 +0100
Subject: [PATCH 047/100] Fix linting errors in tests (#188)

* apply auto-fixes

* Fix linting errors in tests/

* Fix version check
---
 tests/test_api.py               |  1 -
 tests/test_cli.py               |  3 ---
 tests/test_criterions.py        |  3 +--
 tests/test_datasets.py          |  3 ---
 tests/test_demo.py              |  1 -
 tests/test_distributions.py     |  6 +++---
 tests/test_grid_search.py       |  1 -
 tests/test_integration_train.py |  1 -
 tests/test_load.py              |  8 ++------
 tests/test_models.py            |  4 ++--
 tests/test_plot.py              |  4 +---
 tests/test_registry.py          |  6 +++---
 tests/test_sklearn.py           |  5 +----
 tests/test_solver.py            | 13 ++++++-------
 tests/test_usecases.py          |  1 -
 15 files changed, 19 insertions(+), 41 deletions(-)

diff --git a/tests/test_api.py b/tests/test_api.py
index bc279cbd..4e514429 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -21,6 +21,5 @@
 #
 def test_api():
     import cebra.distributions
-    from cebra.distributions import TimedeltaDistribution
 
     cebra.distributions.TimedeltaDistribution
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 41e67f42..8e49cc35 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -19,6 +19,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import argparse
-
-import pytest
diff --git a/tests/test_criterions.py b/tests/test_criterions.py
index 93a3b846..0d6f8ff2 100644
--- a/tests/test_criterions.py
+++ b/tests/test_criterions.py
@@ -19,7 +19,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import numpy as np
 import pytest
 import torch
 from torch import nn
@@ -294,7 +293,7 @@ def _sample_dist_matrices(seed):
 
 
 @pytest.mark.parametrize("seed", [42, 4242, 424242])
-def test_infonce(seed):
+def test_infonce_check_output_parts(seed):
     pos_dist, neg_dist = _sample_dist_matrices(seed)
 
     ref_loss, ref_align, ref_uniform = _reference_infonce(pos_dist, neg_dist)
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 6a7f9319..a91a9370 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -70,7 +70,6 @@ def test_demo():
 def test_hippocampus():
 
     pytest.skip("Outdated")
-
     dataset = cebra.datasets.init("rat-hippocampus-single")
     loader = cebra.data.ContinuousDataLoader(
         dataset=dataset,
@@ -99,7 +98,6 @@ def test_hippocampus():
 
 @pytest.mark.requires_dataset
 def test_monkey():
-
     dataset = cebra.datasets.init(
         "area2-bump-pos-active-passive",
         path=pathlib.Path(_DEFAULT_DATADIR) / "monkey_reaching_preload_smth_40",
@@ -110,7 +108,6 @@ def test_monkey():
 
 @pytest.mark.requires_dataset
 def test_allen():
-
     pytest.skip("Test takes too long")
 
     ca_dataset = cebra.datasets.init("allen-movie-one-ca-VISp-100-train-10-111")
diff --git a/tests/test_demo.py b/tests/test_demo.py
index 4f0f146c..ce555db3 100644
--- a/tests/test_demo.py
+++ b/tests/test_demo.py
@@ -21,7 +21,6 @@
 #
 import glob
 import re
-import sys
 
 import pytest
 
diff --git a/tests/test_distributions.py b/tests/test_distributions.py
index d7151fd1..2b704391 100644
--- a/tests/test_distributions.py
+++ b/tests/test_distributions.py
@@ -43,7 +43,7 @@ def prepare(N=1000, n=128, d=5, probs=[0.3, 0.1, 0.6], device="cpu"):
     continuous = torch.randn(N, d).to(device)
 
     rand = torch.from_numpy(np.random.randint(0, N, (n,))).to(device)
-    qidx = discrete[rand].to(device)
+    _ = discrete[rand].to(device)
     query = continuous[rand] + 0.1 * torch.randn(n, d).to(device)
     query = query.to(device)
 
@@ -173,7 +173,7 @@ def test_mixed():
         discrete, continuous)
 
     reference_idx = distribution.sample_prior(10)
-    positive_idx = distribution.sample_conditional(reference_idx)
+    _ = distribution.sample_conditional(reference_idx)
 
     # The conditional distribution p(· | disc, cont) should yield
     # samples where the label exactly matches the reference sample.
@@ -193,7 +193,7 @@ def test_continuous(benchmark):
     def _test_distribution(dist):
         distribution = dist(continuous)
         reference_idx = distribution.sample_prior(10)
-        positive_idx = distribution.sample_conditional(reference_idx)
+        _ = distribution.sample_conditional(reference_idx)
         return distribution
 
     distribution = _test_distribution(
diff --git a/tests/test_grid_search.py b/tests/test_grid_search.py
index 3f88ba12..c774ea02 100644
--- a/tests/test_grid_search.py
+++ b/tests/test_grid_search.py
@@ -20,7 +20,6 @@
 # limitations under the License.
 #
 import numpy as np
-import pytest
 
 import cebra
 import cebra.grid_search
diff --git a/tests/test_integration_train.py b/tests/test_integration_train.py
index 06e6da40..238bbea7 100644
--- a/tests/test_integration_train.py
+++ b/tests/test_integration_train.py
@@ -20,7 +20,6 @@
 # limitations under the License.
 #
 import itertools
-from typing import List
 
 import pytest
 import torch
diff --git a/tests/test_load.py b/tests/test_load.py
index 6f62dc92..2a9ef3b5 100644
--- a/tests/test_load.py
+++ b/tests/test_load.py
@@ -22,10 +22,7 @@
 import itertools
 import pathlib
 import pickle
-import platform
 import tempfile
-import unittest
-from unittest.mock import patch
 
 import h5py
 import hdf5storage
@@ -125,7 +122,7 @@ def generate_numpy_confounder(filename, dtype):
 
 
 @register("npz")
-def generate_numpy_path(filename, dtype):
+def generate_numpy_path_2(filename, dtype):
     A = np.arange(1000, dtype=dtype).reshape(10, 100)
     np.savez(filename, array=A, other_data="test")
     loaded_A = cebra_load.load(pathlib.Path(filename))
@@ -418,7 +415,7 @@ def generate_csv_path(filename, dtype):
 
 @register_error("csv")
 def generate_csv_empty_file(filename, dtype):
-    with open(filename, "w") as creating_new_csv_file:
+    with open(filename, "w") as _:
         pass
     _ = cebra_load.load(filename)
 
@@ -619,7 +616,6 @@ def generate_pickle_invalid_key(filename, dtype):
 
 @register_error("pkl", "p")
 def generate_pickle_no_array(filename, dtype):
-    A = np.arange(1000, dtype=dtype).reshape(10, 100)
     with open(filename, "wb") as f:
         pickle.dump({"A": "test_1", "B": "test_2"}, f)
     _ = cebra_load.load(filename)
diff --git a/tests/test_models.py b/tests/test_models.py
index 2a6e4812..d41dc7ab 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -155,8 +155,8 @@ def test_version_check(version, raises):
             cebra.models.model._check_torch_version(raise_error=True)
 
 
-def test_version_check():
-    raises = not cebra.models.model._check_torch_version(raise_error=False)
+def test_version_check_dropout_available():
+    raises = cebra.models.model._check_torch_version(raise_error=False)
     if raises:
         assert len(cebra.models.get_options("*dropout*")) == 0
     else:
diff --git a/tests/test_plot.py b/tests/test_plot.py
index 3f44d887..1d94d310 100644
--- a/tests/test_plot.py
+++ b/tests/test_plot.py
@@ -72,8 +72,6 @@ def test_plot_imports():
 def test_colormaps():
     import matplotlib
 
-    import cebra
-
     cmap = matplotlib.colormaps["cebra"]
     assert cmap is not None
     plt.scatter([1], [2], c=[2], cmap="cebra")
@@ -241,7 +239,7 @@ def test_compare_models():
         _ = cebra_plot.compare_models(models, labels=long_labels, ax=ax)
     with pytest.raises(ValueError, match="Invalid.*labels"):
         invalid_labels = copy.deepcopy(labels)
-        ele = invalid_labels.pop()
+        _ = invalid_labels.pop()
         invalid_labels.append(["a"])
         _ = cebra_plot.compare_models(models, labels=invalid_labels, ax=ax)
 
diff --git a/tests/test_registry.py b/tests/test_registry.py
index 69e04f38..cd27344c 100644
--- a/tests/test_registry.py
+++ b/tests/test_registry.py
@@ -117,7 +117,7 @@ def test_override():
     _Foo1 = test_module.register("foo")(Foo)
     assert _Foo1 == Foo
     assert _Foo1 != Bar
-    assert f"foo" in test_module.get_options()
+    assert "foo" in test_module.get_options()
 
     # Check that the class was actually added to the module
     assert (
@@ -137,7 +137,7 @@ def test_override():
     _Foo2 = test_module.register("foo", override=True)(Bar)
     assert _Foo2 != Foo
     assert _Foo2 == Bar
-    assert f"foo" in test_module.get_options()
+    assert "foo" in test_module.get_options()
 
 
 def test_depreciation():
@@ -145,7 +145,7 @@ def test_depreciation():
     Foo = _make_class()
     _Foo1 = test_module.register("foo")(Foo)
     assert _Foo1 == Foo
-    assert f"foo" in test_module.get_options()
+    assert "foo" in test_module.get_options()
 
     # Registering the same class under different names
     # also raises and error
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index 33df3caf..f340548c 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -276,7 +276,6 @@ def test_api(estimator, check):
         pytest.skip(f"Model architecture {estimator.model_architecture} "
                     f"requires longer input sizes than 20 samples.")
 
-    success = True
     exception = None
     num_successful = 0
     total_runs = 0
@@ -334,7 +333,6 @@ def test_sklearn(model_architecture, device):
     y_c1 = np.random.uniform(0, 1, (1000, 5))
     y_c1_s2 = np.random.uniform(0, 1, (800, 5))
     y_c2 = np.random.uniform(0, 1, (1000, 2))
-    y_c2_s2 = np.random.uniform(0, 1, (800, 2))
     y_d = np.random.randint(0, 10, (1000,))
     y_d_s2 = np.random.randint(0, 10, (800,))
 
@@ -863,7 +861,6 @@ def test_sklearn_full(model_architecture, device, pad_before_transform):
     X = np.random.uniform(0, 1, (1000, 50))
     y_c1 = np.random.uniform(0, 1, (1000, 5))
     y_c2 = np.random.uniform(0, 1, (1000, 2))
-    y_d = np.random.randint(0, 10, (1000,))
 
     # time contrastive
     cebra_model.fit(X)
@@ -931,7 +928,7 @@ def test_sklearn_resampling_model_not_yet_supported(model_architecture, device):
 
     with pytest.raises(ValueError):
         cebra_model.fit(X, y_c1)
-        output = cebra_model.transform(X)
+        _ = cebra_model.transform(X)
 
 
 def _iterate_actions():
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 68e2a43e..e93b87fc 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -194,16 +194,15 @@ def test_single_session(data_name, loader_initfunc, model_architecture,
     _assert_equal(fitted_solver, solver)
 
 
-@pytest.mark.parametrize(
-    "data_name, loader_initfunc, model_architecture, solver_initfunc",
-    single_session_tests)
-def test_single_session_auxvar(data_name, loader_initfunc, model_architecture,
-                               solver_initfunc):
-    return  # TODO
+@pytest.mark.parametrize("data_name, loader_initfunc, model_architecture, solver_initfunc",
+                         single_session_tests)
+def test_single_session_auxvar(data_name, loader_initfunc, model_architecture, solver_initfunc):
+
+    pytest.skip("Not yet supported")
 
     loader = _get_loader(data_name, loader_initfunc)
     model = _make_model(loader.dataset)
-    behavior_model = _make_behavior_model(loader.dataset)
+    behavior_model = _make_behavior_model(loader.dataset)  # noqa: F841
 
     criterion = cebra.models.InfoNCE()
     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
diff --git a/tests/test_usecases.py b/tests/test_usecases.py
index 22195bd8..f0cc308a 100644
--- a/tests/test_usecases.py
+++ b/tests/test_usecases.py
@@ -29,7 +29,6 @@
 """
 
 import itertools
-import pickle
 
 import numpy as np
 import pytest

From 521f00384d1a99774840c1cff0daa9d77d4ee43a Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Fri, 8 Nov 2024 07:33:23 +0000
Subject: [PATCH 048/100] Fix `scikit-learn` reference in conda environment
 files (#195)

---
 conda/cebra_paper.yml    | 2 +-
 conda/cebra_paper_m1.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/cebra_paper.yml b/conda/cebra_paper.yml
index e7537756..4b9e2b6e 100644
--- a/conda/cebra_paper.yml
+++ b/conda/cebra_paper.yml
@@ -39,7 +39,7 @@ dependencies:
         - "cebra[dev,integrations,datasets,demos]"
         - joblib
         - literate-dataclasses
-        - sklearn
+        - scikit-learn
         - scipy
         - torch
         - keras==2.3.1
diff --git a/conda/cebra_paper_m1.yml b/conda/cebra_paper_m1.yml
index 32256758..3d8cd7b9 100644
--- a/conda/cebra_paper_m1.yml
+++ b/conda/cebra_paper_m1.yml
@@ -48,7 +48,7 @@ dependencies:
         - tensorflow-metal
         - joblib
         - literate-dataclasses
-        - sklearn
+        - scikit-learn
         - scipy
         - torch
         - umap-learn

From 46610e341d137f3a1e0d38c16dc63e63d40d8372 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Mon, 16 Dec 2024 20:32:47 +0100
Subject: [PATCH 049/100] Add support for new __sklearn_tags__ (#205)

* Add support for new __sklearn_tags__

* fix inheritance order

* Add more tests

* fix added test
---
 .github/workflows/build.yml         | 13 ++++++++++++-
 cebra/integrations/sklearn/cebra.py | 18 +++++++++++++++++-
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a231258f..ef9e1777 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -19,10 +19,16 @@ jobs:
         # as well as selected previous versions on
         # https://pytorch.org/get-started/previous-versions/
         torch-version: ["2.2.2", "2.4.0"]
+        sklearn-version: ["latest"]
         include:
           - os: windows-latest
             torch-version: 2.4.0
             python-version: "3.10"
+            sklearn-version: "latest"
+          - os: ubuntu-latest 
+            torch-version: 2.4.0
+            python-version: "3.10"
+            sklearn-version: "legacy"
 
     runs-on: ${{ matrix.os }}
 
@@ -32,7 +38,7 @@ jobs:
         uses: actions/cache@v3
         with:
           path: ~/.cache/pip
-          key: pip-os_${{ runner.os }}-python_${{ matrix.python-version }}-torch_${{ matrix.torch-version }}
+          key: pip-os_${{ runner.os }}-python_${{ matrix.python-version }}-torch_${{ matrix.torch-version }}-sklearn_${{ matrix.sklearn-version }}
 
       - name: Checkout code
         uses: actions/checkout@v2
@@ -48,6 +54,11 @@ jobs:
           python -m pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/cpu
           pip install '.[dev,datasets,integrations]'
 
+      - name: Check sklearn legacy version 
+        if: matrix.sklearn-version == 'legacy'
+        run: |
+          pip install scikit-learn==1.4.2 '.[dev,datasets,integrations]'
+
       - name: Run the formatter
         run: |
           make format
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index a340a392..3c834fa9 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -30,8 +30,10 @@
 import pkg_resources
 import sklearn.utils.validation as sklearn_utils_validation
 import torch
+import sklearn
 from sklearn.base import BaseEstimator
 from sklearn.base import TransformerMixin
+from sklearn.utils.metaestimators import available_if
 from torch import nn
 
 import cebra.data
@@ -41,6 +43,11 @@
 import cebra.models
 import cebra.solver
 
+def check_version(estimator):
+    # NOTE(stes): required as a check for the old way of specifying tags
+    # https://github.com/scikit-learn/scikit-learn/pull/29677#issuecomment-2334229165
+    from packaging import version
+    return version.parse(sklearn.__version__) < version.parse("1.6.dev")
 
 def _init_loader(
     is_cont: bool,
@@ -364,7 +371,7 @@ def _load_cebra_with_sklearn_backend(cebra_info: Dict) -> "CEBRA":
     return cebra_
 
 
-class CEBRA(BaseEstimator, TransformerMixin):
+class CEBRA(TransformerMixin, BaseEstimator):
     """CEBRA model defined as part of a ``scikit-learn``-like API.
 
     Attributes:
@@ -1317,6 +1324,15 @@ def fit_transform(
                  callback_frequency=callback_frequency)
         return self.transform(X)
 
+    def __sklearn_tags__(self):
+        # NOTE(stes): from 1.6.dev, this is the new way to specify tags
+        # https://scikit-learn.org/dev/developers/develop.html
+        # https://github.com/scikit-learn/scikit-learn/pull/29677#issuecomment-2334229165
+        tags = super().__sklearn_tags__()
+        tags.non_deterministic = True
+        return tags
+
+    @available_if(check_version)
     def _more_tags(self):
         # NOTE(stes): This tag is needed as seeding is not fully implemented in the
         # current version of CEBRA.

From e8004ba98a5fa9a6f8cccf941fc54690bec9c827 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Wed, 22 Jan 2025 00:11:39 +0100
Subject: [PATCH 050/100] Update workflows to actions/setup-python@v5,
 actions/cache@v4 (#212)

---
 .github/workflows/build.yml        |  8 ++++----
 .github/workflows/doc-coverage.yml |  6 +++---
 .github/workflows/docs.yml         | 12 ++++++------
 .github/workflows/release-pypi.yml |  2 +-
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index ef9e1777..3c4f68dd 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -25,7 +25,7 @@ jobs:
             torch-version: 2.4.0
             python-version: "3.10"
             sklearn-version: "latest"
-          - os: ubuntu-latest 
+          - os: ubuntu-latest
             torch-version: 2.4.0
             python-version: "3.10"
             sklearn-version: "legacy"
@@ -35,7 +35,7 @@ jobs:
     steps:
       - name: Cache dependencies
         id: pip-cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: pip-os_${{ runner.os }}-python_${{ matrix.python-version }}-torch_${{ matrix.torch-version }}-sklearn_${{ matrix.sklearn-version }}
@@ -44,7 +44,7 @@ jobs:
         uses: actions/checkout@v2
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -54,7 +54,7 @@ jobs:
           python -m pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/cpu
           pip install '.[dev,datasets,integrations]'
 
-      - name: Check sklearn legacy version 
+      - name: Check sklearn legacy version
         if: matrix.sklearn-version == 'legacy'
         run: |
           pip install scikit-learn==1.4.2 '.[dev,datasets,integrations]'
diff --git a/.github/workflows/doc-coverage.yml b/.github/workflows/doc-coverage.yml
index 268cbee0..8d7f0522 100644
--- a/.github/workflows/doc-coverage.yml
+++ b/.github/workflows/doc-coverage.yml
@@ -22,7 +22,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.8']
+        python-version: ['3.9']
 
     steps:
     # NOTE(stes) currently not used, we check
@@ -31,14 +31,14 @@ jobs:
     #  with:
     #    ref: main
     - uses: actions/checkout@v3
-    - uses: actions/cache@v1
+    - uses: actions/cache@v4
       with:
         path: ~/.cache/pip
         key: ${{ runner.os }}-pip
         restore-keys: |
           ${{ runner.os }}-pip
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install package
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 83c9d829..47b5862d 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -17,7 +17,7 @@ jobs:
     steps:
       - name: Cache dependencies
         id: pip-cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: ${{ runner.os }}-pip
@@ -52,7 +52,7 @@ jobs:
           ref: main
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -60,12 +60,12 @@ jobs:
         run: |
           python -m pip install --upgrade pip setuptools wheel
           # NOTE(stes) Pandoc version must be at least (2.14.2) but less than (4.0.0).
-          # as of 29/10/23. Ubuntu 22.04 which is used for ubuntu-latest only has an 
+          # as of 29/10/23. Ubuntu 22.04 which is used for ubuntu-latest only has an
           # old pandoc version (2.9.). We will hence install the latest version manually.
           # previou: sudo apt-get install -y pandoc
-          wget https://github.com/jgm/pandoc/releases/download/3.1.9/pandoc-3.1.9-1-amd64.deb 
-          sudo dpkg -i pandoc-3.1.9-1-amd64.deb 
-          rm pandoc-3.1.9-1-amd64.deb 
+          wget https://github.com/jgm/pandoc/releases/download/3.1.9/pandoc-3.1.9-1-amd64.deb
+          sudo dpkg -i pandoc-3.1.9-1-amd64.deb
+          rm pandoc-3.1.9-1-amd64.deb
           pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
           pip install '.[docs]'
 
diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml
index d6950119..fc6d5c8e 100644
--- a/.github/workflows/release-pypi.yml
+++ b/.github/workflows/release-pypi.yml
@@ -23,7 +23,7 @@ jobs:
     steps:
       - name: Cache dependencies
         id: pip-cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: ${{ runner.os }}-pip

From ddc00f40dc692e16d8785ee01d8773ba8ec6d6f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8Dcaro?= <icarosadero@proton.me>
Date: Wed, 22 Jan 2025 07:52:19 +0100
Subject: [PATCH 051/100] Fix deprecation warning force_all_finite ->
 ensure_all_finite for sklearn>=1.6 (#206)

---
 cebra/integrations/sklearn/utils.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/cebra/integrations/sklearn/utils.py b/cebra/integrations/sklearn/utils.py
index 0ec01aa1..80013d00 100644
--- a/cebra/integrations/sklearn/utils.py
+++ b/cebra/integrations/sklearn/utils.py
@@ -22,12 +22,26 @@
 import warnings
 
 import numpy.typing as npt
+import packaging
+import sklearn
 import sklearn.utils.validation as sklearn_utils_validation
 import torch
 
 import cebra.helper
 
 
+def _sklearn_check_array(array, **kwargs):
+    # NOTE(stes): See discussion in https://github.com/AdaptiveMotorControlLab/CEBRA/pull/206
+    # https://scikit-learn.org/1.6/modules/generated/sklearn.utils.check_array.html
+    # force_all_finite was renamed to ensure_all_finite and will be removed in 1.8.
+    if packaging.version.parse(
+            sklearn.__version__) < packaging.version.parse("1.6"):
+        if "ensure_all_finite" in kwargs:
+            kwargs["force_all_finite"] = kwargs["ensure_all_finite"]
+            del kwargs["ensure_all_finite"]
+    return sklearn_utils_validation.check_array(array, **kwargs)
+
+
 def update_old_param(old: dict, new: dict, kwargs: dict, default) -> tuple:
     """Handle deprecated arguments of a function until they are replaced.
 
@@ -74,7 +88,7 @@ def check_input_array(X: npt.NDArray, *, min_samples: int) -> npt.NDArray:
     Returns:
         The converted and validated array.
     """
-    return sklearn_utils_validation.check_array(
+    return _sklearn_check_array(
         X,
         accept_sparse=False,
         accept_large_sparse=False,
@@ -82,8 +96,8 @@ def check_input_array(X: npt.NDArray, *, min_samples: int) -> npt.NDArray:
         dtype=("float32", "float64"),
         order=None,
         copy=False,
-        force_all_finite=True,
         ensure_2d=True,
+        ensure_all_finite=True,
         allow_nd=False,
         ensure_min_samples=min_samples,
         ensure_min_features=1,
@@ -106,15 +120,15 @@ def check_label_array(y: npt.NDArray, *, min_samples: int):
     Returns:
         The converted and validated labels.
     """
-    return sklearn_utils_validation.check_array(
+    return _sklearn_check_array(
         y,
         accept_sparse=False,
         accept_large_sparse=False,
         dtype="numeric",
         order=None,
         copy=False,
-        force_all_finite=True,
         ensure_2d=False,
+        ensure_all_finite=True,
         allow_nd=False,
         ensure_min_samples=min_samples,
     )

From 7dc9f81809d3a6e45b3843fce765e63a57ce9923 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Wed, 29 Jan 2025 14:27:15 -0500
Subject: [PATCH 052/100] Add tests to check legacy model loading (#214)

---
 tests/_build_legacy_model/.gitignore      |  1 +
 tests/_build_legacy_model/Dockerfile      | 39 +++++++++++++++++++++
 tests/_build_legacy_model/README.md       | 13 +++++++
 tests/_build_legacy_model/create_model.py | 15 +++++++++
 tests/_build_legacy_model/generate.sh     |  3 ++
 tests/test_sklearn_legacy.py              | 41 +++++++++++++++++++++++
 6 files changed, 112 insertions(+)
 create mode 100644 tests/_build_legacy_model/.gitignore
 create mode 100644 tests/_build_legacy_model/Dockerfile
 create mode 100644 tests/_build_legacy_model/README.md
 create mode 100644 tests/_build_legacy_model/create_model.py
 create mode 100755 tests/_build_legacy_model/generate.sh
 create mode 100644 tests/test_sklearn_legacy.py

diff --git a/tests/_build_legacy_model/.gitignore b/tests/_build_legacy_model/.gitignore
new file mode 100644
index 00000000..4b6ebe5f
--- /dev/null
+++ b/tests/_build_legacy_model/.gitignore
@@ -0,0 +1 @@
+*.pt
diff --git a/tests/_build_legacy_model/Dockerfile b/tests/_build_legacy_model/Dockerfile
new file mode 100644
index 00000000..ddbb0e61
--- /dev/null
+++ b/tests/_build_legacy_model/Dockerfile
@@ -0,0 +1,39 @@
+FROM python:3.12-slim AS base
+RUN pip install torch --index-url https://download.pytorch.org/whl/cpu
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+
+FROM base AS cebra-0.4.0-scikit-learn-1.4
+RUN pip install cebra==0.4.0 "scikit-learn<1.5"
+WORKDIR /app
+COPY create_model.py .
+RUN python create_model.py
+
+FROM base AS cebra-0.4.0-scikit-learn-1.6
+RUN pip install cebra==0.4.0 "scikit-learn>=1.6"
+WORKDIR /app
+COPY create_model.py .
+RUN python create_model.py
+
+FROM base AS cebra-rc-scikit-learn-1.4
+# NOTE(stes): Commit where new scikit-learn tag logic was added to the CEBRA class.
+# https://github.com/AdaptiveMotorControlLab/CEBRA/commit/5f46c3257952a08dfa9f9e1b149a85f7f12c1053
+RUN pip install git+https://github.com/AdaptiveMotorControlLab/CEBRA.git@5f46c3257952a08dfa9f9e1b149a85f7f12c1053 "scikit-learn<1.5"
+WORKDIR /app
+COPY create_model.py .
+RUN python create_model.py
+
+FROM base AS cebra-rc-scikit-learn-1.6
+# NOTE(stes): Commit where new scikit-learn tag logic was added to the CEBRA class.
+# https://github.com/AdaptiveMotorControlLab/CEBRA/commit/5f46c3257952a08dfa9f9e1b149a85f7f12c1053
+RUN pip install git+https://github.com/AdaptiveMotorControlLab/CEBRA.git@5f46c3257952a08dfa9f9e1b149a85f7f12c1053 "scikit-learn>=1.6"
+WORKDIR /app
+COPY create_model.py .
+RUN python create_model.py
+
+FROM scratch
+COPY --from=cebra-0.4.0-scikit-learn-1.4 /app/cebra_model.pt /cebra_model_cebra-0.4.0-scikit-learn-1.4.pt
+COPY --from=cebra-0.4.0-scikit-learn-1.6 /app/cebra_model.pt /cebra_model_cebra-0.4.0-scikit-learn-1.6.pt
+COPY --from=cebra-rc-scikit-learn-1.4 /app/cebra_model.pt /cebra_model_cebra-rc-scikit-learn-1.4.pt
+COPY --from=cebra-rc-scikit-learn-1.6 /app/cebra_model.pt /cebra_model_cebra-rc-scikit-learn-1.6.pt
diff --git a/tests/_build_legacy_model/README.md b/tests/_build_legacy_model/README.md
new file mode 100644
index 00000000..4bcffa2b
--- /dev/null
+++ b/tests/_build_legacy_model/README.md
@@ -0,0 +1,13 @@
+# Helper script to build CEBRA checkpoints
+
+This script builds CEBRA checkpoints for different versions of scikit-learn and CEBRA.
+To build all models, run:
+
+```bash
+./generate.sh
+```
+
+The models are currently also stored in git directly due to their small size.
+
+Related issue: https://github.com/AdaptiveMotorControlLab/CEBRA/issues/207
+Related test: tests/test_sklearn_legacy.py
diff --git a/tests/_build_legacy_model/create_model.py b/tests/_build_legacy_model/create_model.py
new file mode 100644
index 00000000..f308d296
--- /dev/null
+++ b/tests/_build_legacy_model/create_model.py
@@ -0,0 +1,15 @@
+import numpy as np
+
+import cebra
+
+neural_data = np.random.normal(0, 1, (1000, 30))  # 1000 samples, 30 features
+cebra_model = cebra.CEBRA(model_architecture="offset10-model",
+                          batch_size=512,
+                          learning_rate=1e-4,
+                          max_iterations=10,
+                          time_offsets=10,
+                          num_hidden_units=16,
+                          output_dimension=8,
+                          verbose=True)
+cebra_model.fit(neural_data)
+cebra_model.save("cebra_model.pt")
diff --git a/tests/_build_legacy_model/generate.sh b/tests/_build_legacy_model/generate.sh
new file mode 100755
index 00000000..749a0d32
--- /dev/null
+++ b/tests/_build_legacy_model/generate.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+DOCKER_BUILDKIT=1 docker build --output type=local,dest=. .
diff --git a/tests/test_sklearn_legacy.py b/tests/test_sklearn_legacy.py
new file mode 100644
index 00000000..4d74515f
--- /dev/null
+++ b/tests/test_sklearn_legacy.py
@@ -0,0 +1,41 @@
+import pathlib
+import urllib.request
+
+import numpy as np
+import pytest
+
+from cebra.integrations.sklearn.cebra import CEBRA
+
+MODEL_VARIANTS = [
+    "cebra-0.4.0-scikit-learn-1.4", "cebra-0.4.0-scikit-learn-1.6",
+    "cebra-rc-scikit-learn-1.4", "cebra-rc-scikit-learn-1.6"
+]
+
+
+@pytest.mark.parametrize("model_variant", MODEL_VARIANTS)
+def test_load_legacy_model(model_variant):
+    """Test loading a legacy CEBRA model."""
+
+    X = np.random.normal(0, 1, (1000, 30))
+
+    model_path = pathlib.Path(
+        __file__
+    ).parent / "_build_legacy_model" / f"cebra_model_{model_variant}.pt"
+
+    if not model_path.exists():
+        url = f"https://cebra.fra1.digitaloceanspaces.com/cebra_model_{model_variant}.pt"
+        urllib.request.urlretrieve(url, model_path)
+
+    loaded_model = CEBRA.load(model_path)
+
+    assert loaded_model.model_architecture == "offset10-model"
+    assert loaded_model.output_dimension == 8
+    assert loaded_model.num_hidden_units == 16
+    assert loaded_model.time_offsets == 10
+
+    output = loaded_model.transform(X)
+    assert isinstance(output, np.ndarray)
+    assert output.shape[1] == loaded_model.output_dimension
+
+    assert hasattr(loaded_model, "state_dict_")
+    assert hasattr(loaded_model, "n_features_")

From a2a6c445a9235709a20b8fe085a3e91585cc8976 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Sun, 2 Feb 2025 11:59:12 -0500
Subject: [PATCH 053/100] Add improved goodness of fit implementation (#190)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Started implementing improved goodness of fit implementation

* add tests and improve implementation

* Fix examples

* Fix docstring error

* Handle batch size = None for goodness of fit computation

* adapt GoF implementation

* Fix docstring tests

* Update docstring for goodness_of_fit_score

Co-authored-by: Célia Benquet <32598028+CeliaBenquet@users.noreply.github.com>

* add annotations to goodness_of_fit_history

Co-authored-by: Célia Benquet <32598028+CeliaBenquet@users.noreply.github.com>

* fix typo

Co-authored-by: Célia Benquet <32598028+CeliaBenquet@users.noreply.github.com>

* improve err message

Co-authored-by: Célia Benquet <32598028+CeliaBenquet@users.noreply.github.com>

* make numerical test less conversative

* Add tests for exception handling

* fix tests

---------

Co-authored-by: Célia Benquet <32598028+CeliaBenquet@users.noreply.github.com>
---
 cebra/integrations/sklearn/metrics.py | 143 ++++++++++++++++++++++++++
 tests/test_sklearn_metrics.py         | 129 +++++++++++++++++++++++
 2 files changed, 272 insertions(+)

diff --git a/cebra/integrations/sklearn/metrics.py b/cebra/integrations/sklearn/metrics.py
index d07f9359..d8fd791d 100644
--- a/cebra/integrations/sklearn/metrics.py
+++ b/cebra/integrations/sklearn/metrics.py
@@ -109,6 +109,149 @@ def infonce_loss(
     return avg_loss
 
 
+def goodness_of_fit_score(cebra_model: cebra_sklearn_cebra.CEBRA,
+                          X: Union[npt.NDArray, torch.Tensor],
+                          *y,
+                          session_id: Optional[int] = None,
+                          num_batches: int = 500) -> float:
+    """Compute the goodness of fit score on a *single session* dataset on the model.
+
+    This function uses the :func:`infonce_loss` function to compute the InfoNCE loss
+    for a given `cebra_model` and the :func:`infonce_to_goodness_of_fit` function
+    to derive the goodness of fit from the InfoNCE loss.
+
+    Args:
+        cebra_model: The model to use to compute the InfoNCE loss on the samples.
+        X: A 2D data matrix, corresponding to a *single session* recording.
+        y: An arbitrary amount of continuous indices passed as 2D matrices, and up to one
+            discrete index passed as a 1D array. Each index has to match the length of ``X``.
+        session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`cebra.CEBRA.num_sessions`
+            for multisession, set to ``None`` for single session.
+        num_batches: The number of iterations to consider to evaluate the model on the new data.
+            Higher values will give a more accurate estimate. Set it to at least 500 iterations.
+
+    Returns:
+        The average GoF score estimated over ``num_batches`` batches from the data distribution.
+
+    Related:
+        :func:`infonce_to_goodness_of_fit`
+
+    Example:
+
+        >>> import cebra
+        >>> import numpy as np
+        >>> neural_data = np.random.uniform(0, 1, (1000, 20))
+        >>> cebra_model = cebra.CEBRA(max_iterations=10, batch_size = 512)
+        >>> cebra_model.fit(neural_data)
+        CEBRA(batch_size=512, max_iterations=10)
+        >>> gof = cebra.sklearn.metrics.goodness_of_fit_score(cebra_model, neural_data)
+    """
+    loss = infonce_loss(cebra_model,
+                        X,
+                        *y,
+                        session_id=session_id,
+                        num_batches=num_batches,
+                        correct_by_batchsize=False)
+    return infonce_to_goodness_of_fit(loss, cebra_model)
+
+
+def goodness_of_fit_history(model: cebra_sklearn_cebra.CEBRA) -> np.ndarray:
+    """Return the history of the goodness of fit score.
+
+    Args:
+        model: A trained CEBRA model.
+
+    Returns:
+        A numpy array containing the goodness of fit values, measured in bits.
+
+    Related:
+        :func:`infonce_to_goodness_of_fit`
+
+    Example:
+
+        >>> import cebra
+        >>> import numpy as np
+        >>> neural_data = np.random.uniform(0, 1, (1000, 20))
+        >>> cebra_model = cebra.CEBRA(max_iterations=10, batch_size = 512)
+        >>> cebra_model.fit(neural_data)
+        CEBRA(batch_size=512, max_iterations=10)
+        >>> gof_history = cebra.sklearn.metrics.goodness_of_fit_history(cebra_model)
+    """
+    infonce = np.array(model.state_dict_["log"]["total"])
+    return infonce_to_goodness_of_fit(infonce, model)
+
+
+def infonce_to_goodness_of_fit(
+        infonce: Union[float, np.ndarray],
+        model: Optional[cebra_sklearn_cebra.CEBRA] = None,
+        batch_size: Optional[int] = None,
+        num_sessions: Optional[int] = None) -> Union[float, np.ndarray]:
+    """Given a trained CEBRA model, return goodness of fit metric.
+
+    The goodness of fit ranges from 0 (lowest meaningful value)
+    to a positive number with the unit "bits", the higher the
+    better.
+
+    Values lower than 0 bits are possible, but these only occur
+    due to numerical effects. A perfectly collapsed embedding
+    (e.g., because the data cannot be fit with the provided
+    auxiliary variables) will have a goodness of fit of 0.
+
+    The conversion between the generalized InfoNCE metric that
+    CEBRA is trained with and the goodness of fit computed with this
+    function is
+
+    .. math::
+
+        S = \\log N - \\text{InfoNCE}
+
+    To use this function, either provide a trained CEBRA model or the
+    batch size and number of sessions.
+
+    Args:
+        infonce: The InfoNCE loss, either a single value or an iterable of values.
+        model: The trained CEBRA model.
+        batch_size: The batch size used to train the model.
+        num_sessions: The number of sessions used to train the model.
+
+    Returns:
+        Numpy array containing the goodness of fit values, measured in bits
+
+    Raises:
+        RuntimeError: If the provided model is not fit to data.
+        ValueError: If both ``model`` and ``(batch_size, num_sessions)`` are provided.
+    """
+    if model is not None:
+        if batch_size is not None or num_sessions is not None:
+            raise ValueError(
+                "batch_size and num_sessions should not be provided if model is provided."
+            )
+        if not hasattr(model, "state_dict_"):
+            raise RuntimeError("Fit the CEBRA model first.")
+        if model.batch_size is None:
+            raise ValueError(
+                "Computing the goodness of fit is not yet supported for "
+                "models trained on the full dataset (batchsize = None). ")
+        batch_size = model.batch_size
+        num_sessions = model.num_sessions_
+        if num_sessions is None:
+            num_sessions = 1
+
+        if model.batch_size is None:
+            raise ValueError(
+                "Computing the goodness of fit is not yet supported for "
+                "models trained on the full dataset (batchsize = None). ")
+    else:
+        if batch_size is None or num_sessions is None:
+            raise ValueError(
+                f"batch_size ({batch_size}) and num_sessions ({num_sessions})"
+                f"should be provided if model is not provided.")
+
+    nats_to_bits = np.log2(np.e)
+    chance_level = np.log(batch_size * num_sessions)
+    return (chance_level - infonce) * nats_to_bits
+
+
 def _consistency_scores(
     embeddings: List[Union[npt.NDArray, torch.Tensor]],
     datasets: List[Union[int, str]],
diff --git a/tests/test_sklearn_metrics.py b/tests/test_sklearn_metrics.py
index 58e12010..4e765ba7 100644
--- a/tests/test_sklearn_metrics.py
+++ b/tests/test_sklearn_metrics.py
@@ -383,3 +383,132 @@ def test_sklearn_runs_consistency():
     with pytest.raises(ValueError, match="Invalid.*embeddings"):
         _, _, _ = cebra_sklearn_metrics.consistency_score(
             invalid_embeddings_runs, between="runs")
+
+
+@pytest.mark.parametrize("seed", [42, 24, 10])
+def test_goodness_of_fit_score(seed):
+    """
+    Ensure that the GoF score is close to 0 for a model fit on random data.
+    """
+    cebra_model = cebra_sklearn_cebra.CEBRA(
+        model_architecture="offset1-model",
+        max_iterations=5,
+        batch_size=512,
+    )
+    generator = torch.Generator().manual_seed(seed)
+    X = torch.rand(5000, 50, dtype=torch.float32, generator=generator)
+    y = torch.rand(5000, 5, dtype=torch.float32, generator=generator)
+    cebra_model.fit(X, y)
+    score = cebra_sklearn_metrics.goodness_of_fit_score(cebra_model,
+                                                        X,
+                                                        y,
+                                                        session_id=0,
+                                                        num_batches=500)
+    assert isinstance(score, float)
+    assert np.isclose(score, 0, atol=0.01)
+
+
+@pytest.mark.parametrize("seed", [42, 24, 10])
+def test_goodness_of_fit_history(seed):
+    """
+    Ensure that the GoF score is higher for a model fit on data with underlying
+    structure than for a model fit on random data.
+    """
+
+    # Generate data
+    generator = torch.Generator().manual_seed(seed)
+    X = torch.rand(1000, 50, dtype=torch.float32, generator=generator)
+    y_random = torch.rand(len(X), 5, dtype=torch.float32, generator=generator)
+    linear_map = torch.randn(50, 5, dtype=torch.float32, generator=generator)
+    y_linear = X @ linear_map
+
+    def _fit_and_get_history(X, y):
+        cebra_model = cebra_sklearn_cebra.CEBRA(
+            model_architecture="offset1-model",
+            max_iterations=150,
+            batch_size=512,
+            device="cpu")
+        cebra_model.fit(X, y)
+        history = cebra_sklearn_metrics.goodness_of_fit_history(cebra_model)
+        # NOTE(stes): Ignore the first 5 iterations, they can have nonsensical values
+        # due to numerical issues.
+        return history[5:]
+
+    history_random = _fit_and_get_history(X, y_random)
+    history_linear = _fit_and_get_history(X, y_linear)
+
+    assert isinstance(history_random, np.ndarray)
+    assert history_random.shape[0] > 0
+    # NOTE(stes): Ignore the first 5 iterations, they can have nonsensical values
+    # due to numerical issues.
+    history_random_non_negative = history_random[history_random >= 0]
+    np.testing.assert_allclose(history_random_non_negative, 0, atol=0.075)
+
+    assert isinstance(history_linear, np.ndarray)
+    assert history_linear.shape[0] > 0
+
+    assert np.all(history_linear[-20:] > history_random[-20:])
+
+
+@pytest.mark.parametrize("seed", [42, 24, 10])
+def test_infonce_to_goodness_of_fit(seed):
+    """Test the conversion from InfoNCE loss to goodness of fit metric."""
+    # Test with model
+    cebra_model = cebra_sklearn_cebra.CEBRA(
+        model_architecture="offset10-model",
+        max_iterations=5,
+        batch_size=128,
+    )
+    generator = torch.Generator().manual_seed(seed)
+    X = torch.rand(1000, 50, dtype=torch.float32, generator=generator)
+    cebra_model.fit(X)
+
+    # Test single value
+    gof = cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                           model=cebra_model)
+    assert isinstance(gof, float)
+
+    # Test array of values
+    infonce_values = np.array([1.0, 2.0, 3.0])
+    gof_array = cebra_sklearn_metrics.infonce_to_goodness_of_fit(
+        infonce_values, model=cebra_model)
+    assert isinstance(gof_array, np.ndarray)
+    assert gof_array.shape == infonce_values.shape
+
+    # Test with explicit batch_size and num_sessions
+    gof = cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                           batch_size=128,
+                                                           num_sessions=1)
+    assert isinstance(gof, float)
+
+    # Test error cases
+    with pytest.raises(ValueError, match="batch_size.*should not be provided"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                         model=cebra_model,
+                                                         batch_size=128)
+
+    with pytest.raises(ValueError, match="batch_size.*should not be provided"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                         model=cebra_model,
+                                                         num_sessions=1)
+
+    # Test with unfitted model
+    unfitted_model = cebra_sklearn_cebra.CEBRA(max_iterations=5)
+    with pytest.raises(RuntimeError, match="Fit the CEBRA model first"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                         model=unfitted_model)
+
+    # Test with model having batch_size=None
+    none_batch_model = cebra_sklearn_cebra.CEBRA(batch_size=None,
+                                                 max_iterations=5)
+    none_batch_model.fit(X)
+    with pytest.raises(ValueError, match="Computing the goodness of fit"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0,
+                                                         model=none_batch_model)
+
+    # Test missing batch_size or num_sessions when model is None
+    with pytest.raises(ValueError, match="batch_size.*and num_sessions"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0, batch_size=128)
+
+    with pytest.raises(ValueError, match="batch_size.*and num_sessions"):
+        cebra_sklearn_metrics.infonce_to_goodness_of_fit(1.0, num_sessions=1)

From a3b143f03f7bc8d6b299f68ac8ff3bee32bd83ad Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Sun, 2 Feb 2025 18:41:55 -0500
Subject: [PATCH 054/100] Support numpy 2, upgrade tests to support torch 2.6
 (#221)

* Drop numpy constraint

* Implement workaround for pytables

* better error message

* pin numpy only for python 3.9

* update dependencies

* Upgrade torch version

* Fix based on python version

* Add support for torch.load with weights_only=True

* Implement safe loading for torch models starting in torch 2.6

* Fix windows specs

* fix docstring

* Revert changes to loading logic
---
 .github/workflows/build.yml         |  2 +-
 cebra/data/load.py                  | 26 +++++++++++++---
 cebra/integrations/sklearn/cebra.py | 48 ++++++++++++++++++++++++-----
 setup.cfg                           |  6 ++--
 tests/test_dlc.py                   |  7 ++---
 tests/test_load.py                  | 22 ++++++-------
 6 files changed, 80 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 3c4f68dd..5fed4c79 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -18,7 +18,7 @@ jobs:
         # We aim to support the versions on pytorch.org
         # as well as selected previous versions on
         # https://pytorch.org/get-started/previous-versions/
-        torch-version: ["2.2.2", "2.4.0"]
+        torch-version: ["2.4.0", "2.6.0"]
         sklearn-version: ["latest"]
         include:
           - os: windows-latest
diff --git a/cebra/data/load.py b/cebra/data/load.py
index 6f1b86e5..02714ad0 100644
--- a/cebra/data/load.py
+++ b/cebra/data/load.py
@@ -275,11 +275,11 @@ def _is_dlc_df(h5_file: IO[bytes], df_keys: List[str]) -> bool:
         """
         try:
             if ["_i_table", "table"] in df_keys:
-                df = pd.read_hdf(h5_file, key="table")
+                df = read_hdf(h5_file, key="table")
             else:
-                df = pd.read_hdf(h5_file, key=df_keys[0])
+                df = read_hdf(h5_file, key=df_keys[0])
         except KeyError:
-            df = pd.read_hdf(h5_file)
+            df = read_hdf(h5_file)
         return all(value in df.columns.names
                    for value in ["scorer", "bodyparts", "coords"])
 
@@ -348,7 +348,7 @@ def load_from_h5(file: Union[pathlib.Path, str], key: str,
         Returns:
             A :py:func:`numpy.array` containing the data of interest extracted from the :py:class:`pandas.DataFrame`.
         """
-        df = pd.read_hdf(file, key=key)
+        df = read_hdf(file, key=key)
         if columns is None:
             loaded_array = df.values
         elif isinstance(columns, list) and df.columns.nlevels == 1:
@@ -716,3 +716,21 @@ def _get_loader(file_ending: str) -> _BaseLoader:
     if file_ending not in __loaders.keys() or file_ending == "":
         raise OSError(f"File ending {file_ending} not supported.")
     return __loaders[file_ending]
+
+
+def read_hdf(filename, key=None):
+    """Read HDF5 file using pandas, with fallback to h5py if pandas fails.
+
+    Args:
+        filename: Path to HDF5 file
+        key: Optional key to read from HDF5 file. If None, tries "df_with_missing"
+             then falls back to first available key.
+
+    Returns:
+        pandas.DataFrame: The loaded data
+
+    Raises:
+        RuntimeError: If both pandas and h5py fail to load the file
+    """
+
+    return pd.read_hdf(filename, key=key)
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 3c834fa9..6dc1e0d0 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -27,10 +27,11 @@
 
 import numpy as np
 import numpy.typing as npt
+import packaging.version
 import pkg_resources
+import sklearn
 import sklearn.utils.validation as sklearn_utils_validation
 import torch
-import sklearn
 from sklearn.base import BaseEstimator
 from sklearn.base import TransformerMixin
 from sklearn.utils.metaestimators import available_if
@@ -43,11 +44,38 @@
 import cebra.models
 import cebra.solver
 
+# NOTE(stes): From torch 2.6 onwards, we need to specify the following list
+# when loading CEBRA models to allow weights_only = True.
+CEBRA_LOAD_SAFE_GLOBALS = [
+    cebra.data.Offset, torch.torch_version.TorchVersion, np.dtype,
+    np.dtypes.Float64DType, np.dtypes.Int64DType
+]
+
+
 def check_version(estimator):
     # NOTE(stes): required as a check for the old way of specifying tags
     # https://github.com/scikit-learn/scikit-learn/pull/29677#issuecomment-2334229165
-    from packaging import version
-    return version.parse(sklearn.__version__) < version.parse("1.6.dev")
+    return packaging.version.parse(
+        sklearn.__version__) < packaging.version.parse("1.6.dev")
+
+
+def _safe_torch_load(filename, weights_only, **kwargs):
+    if weights_only is None:
+        if packaging.version.parse(
+                torch.__version__) >= packaging.version.parse("2.6.0"):
+            weights_only = True
+        else:
+            weights_only = False
+
+    if not weights_only:
+        checkpoint = torch.load(filename, weights_only=False, **kwargs)
+    else:
+        # NOTE(stes): This is only supported for torch 2.6+
+        with torch.serialization.safe_globals(CEBRA_LOAD_SAFE_GLOBALS):
+            checkpoint = torch.load(filename, weights_only=True, **kwargs)
+
+    return checkpoint
+
 
 def _init_loader(
     is_cont: bool,
@@ -1432,15 +1460,22 @@ def save(self,
     def load(cls,
              filename: str,
              backend: Literal["auto", "sklearn", "torch"] = "auto",
+             weights_only: bool = None,
              **kwargs) -> "CEBRA":
         """Load a model from disk.
 
         Args:
             filename: The path to the file in which to save the trained model.
             backend: A string identifying the used backend.
+            weights_only: Indicates whether unpickler should be restricted to loading only tensors, primitive types,
+                dictionaries and any types added via :py:func:`torch.serialization.add_safe_globals`.
+                See :py:func:`torch.load` with ``weights_only=True`` for more details. It it recommended to leave this
+                at the default value of ``None``, which sets the argument to ``False`` for torch<2.6, and ``True`` for
+                higher versions of torch. If you experience issues with loading custom models (specified outside
+                of the CEBRA package), you can try to set this to ``False`` if you trust the source of the model.
             kwargs: Optional keyword arguments passed directly to the loader.
 
-        Return:
+        Returns:
             The model to load.
 
         Note:
@@ -1450,7 +1485,6 @@ def load(cls,
             For information about the file format please refer to :py:meth:`cebra.CEBRA.save`.
 
         Example:
-
             >>> import cebra
             >>> import numpy as np
             >>> import tempfile
@@ -1464,16 +1498,14 @@ def load(cls,
             >>> loaded_model = cebra.CEBRA.load(tmp_file)
             >>> embedding = loaded_model.transform(dataset)
             >>> tmp_file.unlink()
-
         """
-
         supported_backends = ["auto", "sklearn", "torch"]
         if backend not in supported_backends:
             raise NotImplementedError(
                 f"Unsupported backend: '{backend}'. Supported backends are: {', '.join(supported_backends)}"
             )
 
-        checkpoint = torch.load(filename, **kwargs)
+        checkpoint = _safe_torch_load(filename, weights_only, **kwargs)
 
         if backend == "auto":
             backend = "sklearn" if isinstance(checkpoint, dict) else "torch"
diff --git a/setup.cfg b/setup.cfg
index 68263d73..2addd5d7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -31,11 +31,13 @@ where =
 python_requires = >=3.9
 install_requires =
     joblib
-    numpy<2.0.0
+    numpy<2.0;platform_system=="Windows"
+    numpy<2.0;platform_system!="Windows" and python_version<"3.10"
+    numpy;platform_system!="Windows" and python_version>="3.10"
     literate-dataclasses
     scikit-learn
     scipy
-    torch
+    torch>=2.4.0
     tqdm
     matplotlib
     requests
diff --git a/tests/test_dlc.py b/tests/test_dlc.py
index a19fe593..8ab29abd 100644
--- a/tests/test_dlc.py
+++ b/tests/test_dlc.py
@@ -29,6 +29,7 @@
 import cebra.integrations.deeplabcut as cebra_dlc
 from cebra import CEBRA
 from cebra import load_data
+from cebra.data.load import read_hdf
 
 # NOTE(stes): The original data URL is
 # https://github.com/DeepLabCut/DeepLabCut/blob/main/examples
@@ -54,11 +55,7 @@ def test_imports():
 
 
 def _load_dlc_dataframe(filename):
-    try:
-        df = pd.read_hdf(filename, "df_with_missing")
-    except KeyError:
-        df = pd.read_hdf(filename)
-    return df
+    return read_hdf(filename)
 
 
 def _get_annotated_data(url, keypoints):
diff --git a/tests/test_load.py b/tests/test_load.py
index 2a9ef3b5..4524b29c 100644
--- a/tests/test_load.py
+++ b/tests/test_load.py
@@ -248,7 +248,7 @@ def generate_h5_no_array(filename, dtype):
 def generate_h5_dataframe(filename, dtype):
     A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     df_A = pd.DataFrame(np.array(A), columns=["a", "b", "c"])
-    df_A.to_hdf(filename, "df_A")
+    df_A.to_hdf(filename, key="df_A")
     loaded_A = cebra_load.load(filename, key="df_A")
     return A, loaded_A
 
@@ -258,7 +258,7 @@ def generate_h5_dataframe_columns(filename, dtype):
     A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     A_col = A[:, :2]
     df_A = pd.DataFrame(np.array(A), columns=["a", "b", "c"])
-    df_A.to_hdf(filename, "df_A")
+    df_A.to_hdf(filename, key="df_A")
     loaded_A = cebra_load.load(filename, key="df_A", columns=["a", "b"])
     return A_col, loaded_A
 
@@ -269,8 +269,8 @@ def generate_h5_multi_dataframe(filename, dtype):
     B = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     df_A = pd.DataFrame(np.array(A), columns=["a", "b", "c"])
     df_B = pd.DataFrame(np.array(B), columns=["c", "d", "e"])
-    df_A.to_hdf(filename, "df_A")
-    df_B.to_hdf(filename, "df_B")
+    df_A.to_hdf(filename, key="df_A")
+    df_B.to_hdf(filename, key="df_B")
     loaded_A = cebra_load.load(filename, key="df_A")
     return A, loaded_A
 
@@ -279,7 +279,7 @@ def generate_h5_multi_dataframe(filename, dtype):
 def generate_h5_single_dataframe_no_key(filename, dtype):
     A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(dtype)
     df_A = pd.DataFrame(np.array(A), columns=["a", "b", "c"])
-    df_A.to_hdf(filename, "df_A")
+    df_A.to_hdf(filename, key="df_A")
     loaded_A = cebra_load.load(filename)
     return A, loaded_A
 
@@ -290,8 +290,8 @@ def generate_h5_multi_dataframe_no_key(filename, dtype):
     B = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(dtype)
     df_A = pd.DataFrame(np.array(A), columns=["a", "b", "c"])
     df_B = pd.DataFrame(np.array(B), columns=["c", "d", "e"])
-    df_A.to_hdf(filename, "df_A")
-    df_B.to_hdf(filename, "df_B")
+    df_A.to_hdf(filename, key="df_A")
+    df_B.to_hdf(filename, key="df_B")
     _ = cebra_load.load(filename)
 
 
@@ -304,7 +304,7 @@ def generate_h5_multicol_dataframe(filename, dtype):
     df_A = pd.DataFrame(A,
                         columns=pd.MultiIndex.from_product([animals,
                                                             keypoints]))
-    df_A.to_hdf(filename, "df_A")
+    df_A.to_hdf(filename, key="df_A")
     loaded_A = cebra_load.load(filename, key="df_A")
     return A, loaded_A
 
@@ -313,7 +313,7 @@ def generate_h5_multicol_dataframe(filename, dtype):
 def generate_h5_dataframe_invalid_key(filename, dtype):
     A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(dtype)
     df_A = pd.DataFrame(np.array(A), columns=["a", "b", "c"])
-    df_A.to_hdf(filename, "df_A")
+    df_A.to_hdf(filename, key="df_A")
     _ = cebra_load.load(filename, key="df_B")
 
 
@@ -321,7 +321,7 @@ def generate_h5_dataframe_invalid_key(filename, dtype):
 def generate_h5_dataframe_invalid_column(filename, dtype):
     A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(dtype)
     df_A = pd.DataFrame(np.array(A), columns=["a", "b", "c"])
-    df_A.to_hdf(filename, "df_A")
+    df_A.to_hdf(filename, key="df_A")
     _ = cebra_load.load(filename, key="df_A", columns=["d", "b"])
 
 
@@ -334,7 +334,7 @@ def generate_h5_multicol_dataframe_columns(filename, dtype):
     df_A = pd.DataFrame(A,
                         columns=pd.MultiIndex.from_product([animals,
                                                             keypoints]))
-    df_A.to_hdf(filename, "df_A")
+    df_A.to_hdf(filename, key="df_A")
     _ = cebra_load.load(filename, key="df_A", columns=["a", "b"])
 
 

From 0d5d82ab9a518533041fd118c1ddd0df7334f433 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Sun, 2 Feb 2025 18:55:59 -0500
Subject: [PATCH 055/100] Release 0.5.0rc1 (#189)

* Make bump_version script runnable on MacOS

* Bump version to 0.5.0rc1

* fix minor formatting issues

* remove commented code

---------

Co-authored-by: Mackenzie Mathis <mathis@rowland.harvard.edu>
---
 Dockerfile                          |  2 +-
 Makefile                            |  2 +-
 PKGBUILD                            |  2 +-
 cebra/__init__.py                   |  2 +-
 cebra/integrations/sklearn/cebra.py |  2 +-
 docs/source/conf.py                 | 19 ++++++---------
 reinstall.sh                        |  2 +-
 tools/bump_version.sh               | 36 +++++++++++++++++++----------
 8 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index d734ee6f..e8ac14a0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,7 +40,7 @@ RUN make dist
 FROM cebra-base
 
 # install the cebra wheel
-ENV WHEEL=cebra-0.4.0-py2.py3-none-any.whl
+ENV WHEEL=cebra-0.5.0rc1-py2.py3-none-any.whl
 WORKDIR /build
 COPY --from=wheel /build/dist/${WHEEL} .
 RUN pip install --no-cache-dir ${WHEEL}'[dev,integrations,datasets]'
diff --git a/Makefile b/Makefile
index ca8c5480..a1e8d3b2 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-CEBRA_VERSION := 0.4.0
+CEBRA_VERSION := 0.5.0rc1
 
 dist:
 	python3 -m pip install virtualenv
diff --git a/PKGBUILD b/PKGBUILD
index 07fa3a1d..91ba4a4e 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -1,7 +1,7 @@
 # Maintainer: Steffen Schneider <stes@hey.com>
 pkgname=python-cebra
 _pkgname=cebra
-pkgver=0.4.0
+pkgver=0.5.0rc1
 pkgrel=1
 pkgdesc="Consistent Embeddings of high-dimensional Recordings using Auxiliary variables"
 url="https://cebra.ai"
diff --git a/cebra/__init__.py b/cebra/__init__.py
index 204cd2a2..edf1b5ee 100644
--- a/cebra/__init__.py
+++ b/cebra/__init__.py
@@ -66,7 +66,7 @@
 
 import cebra.integrations.sklearn as sklearn
 
-__version__ = "0.4.0"
+__version__ = "0.5.0rc1"
 __all__ = ["CEBRA"]
 __allow_lazy_imports = False
 __lazy_imports = {}
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 6dc1e0d0..fe53c8e9 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -51,7 +51,6 @@
     np.dtypes.Float64DType, np.dtypes.Int64DType
 ]
 
-
 def check_version(estimator):
     # NOTE(stes): required as a check for the old way of specifying tags
     # https://github.com/scikit-learn/scikit-learn/pull/29677#issuecomment-2334229165
@@ -77,6 +76,7 @@ def _safe_torch_load(filename, weights_only, **kwargs):
     return checkpoint
 
 
+
 def _init_loader(
     is_cont: bool,
     is_disc: bool,
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 025a988b..c5e12b5a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -28,18 +28,13 @@
 
 # -- Path setup --------------------------------------------------------------
 
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
+import datetime
 import os
 import sys
 
 sys.path.insert(0, os.path.abspath("."))
 
-import datetime
-
-import cebra
+import cebra  # noqa: E402
 
 
 def get_years(start_year=2021):
@@ -156,11 +151,6 @@ def get_years(start_year=2021):
             "url": "https://twitter.com/cebraAI",
             "icon": "fab fa-twitter",
         },
-        # {
-        #     "name": "DockerHub",
-        #     "url": "https://hub.docker.com/r/stffsc/cebra",
-        #     "icon": "fab fa-docker",
-        # },
         {
             "name": "PyPI",
             "url": "https://pypi.org/project/cebra/",
@@ -247,6 +237,9 @@ def get_years(start_year=2021):
 
 # Download link for the notebook, see
 # https://nbsphinx.readthedocs.io/en/0.3.0/prolog-and-epilog.html
+
+# fmt: off
+# flake8: noqa: E501
 nbsphinx_prolog = r"""
 
 .. only:: html
@@ -269,3 +262,5 @@ def get_years(start_year=2021):
 
 ----
 """
+# fmt: on
+# flake8: enable=E501
diff --git a/reinstall.sh b/reinstall.sh
index 778f98eb..549982a1 100755
--- a/reinstall.sh
+++ b/reinstall.sh
@@ -15,7 +15,7 @@ pip uninstall -y cebra
 # Get version info after uninstalling --- this will automatically get the
 # most recent version based on the source code in the current directory.
 # $(tools/get_cebra_version.sh)
-VERSION=0.4.0
+VERSION=0.5.0rc1
 echo "Upgrading to CEBRA v${VERSION}"
 
 # Upgrade the build system (PEP517/518 compatible)
diff --git a/tools/bump_version.sh b/tools/bump_version.sh
index fbc161b1..fb89f413 100755
--- a/tools/bump_version.sh
+++ b/tools/bump_version.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # Bump the CEBRA version to the specified value.
 # Edits all relevant files at once.
-# 
+#
 # Usage:
 # tools/bump_version.sh 0.3.1rc1
 
@@ -10,24 +10,36 @@ if [ -z ${version} ]; then
     >&1 echo "Specify a version number."
     >&1 echo "Usage:"
     >&1 echo "tools/bump_version.sh <semantic version>"
+    exit 1
+fi
+
+# Determine the correct sed command based on the OS
+# On macOS, the `sed` command requires an empty string argument after `-i` for in-place editing.
+# On Linux and other Unix-like systems, the `sed` command only requires `-i` for in-place editing.
+if [[ "$OSTYPE" == "darwin"* ]]; then
+    # macOS
+    SED_CMD="sed -i .bkp -e"
+else
+    # Linux and other Unix-like systems
+    SED_CMD="sed -i -e"
 fi
 
 # python cebra version
-sed -i "s/__version__ = .*/__version__ = \"${version}\"/" \
-    cebra/__init__.py
+$SED_CMD "s/__version__ = .*/__version__ = \"${version}\"/" cebra/__init__.py
 
 # reinstall script in root
-sed -i "s/VERSION=.*/VERSION=${version}/" \
-    reinstall.sh
+$SED_CMD "s/VERSION=.*/VERSION=${version}/" reinstall.sh
 
 # Makefile
-sed -i "s/CEBRA_VERSION := .*/CEBRA_VERSION := ${version}/" \
-    Makefile
+$SED_CMD "s/CEBRA_VERSION := .*/CEBRA_VERSION := ${version}/" Makefile
 
-# Arch linux PKGBUILD 
-sed -i "s/pkgver=.*/pkgver=${version}/" \
-    PKGBUILD 
+# Arch linux PKGBUILD
+$SED_CMD "s/pkgver=.*/pkgver=${version}/" PKGBUILD
 
 # Dockerfile
-sed -i "s/ENV WHEEL=cebra-.*\.whl/ENV WHEEL=cebra-${version}-py2.py3-none-any.whl/" \
-    Dockerfile 
+$SED_CMD "s/ENV WHEEL=cebra-.*\.whl/ENV WHEEL=cebra-${version}-py2.py3-none-any.whl/" Dockerfile
+
+# Remove backup files
+if [[ "$OSTYPE" == "darwin"* ]]; then
+    rm cebra/__init__.py.bkp reinstall.sh.bkp Makefile.bkp PKGBUILD.bkp Dockerfile.bkp
+fi

From 92fd9bc90b614ed9828e22ea7842610e510b6ffb Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Sun, 2 Feb 2025 19:46:43 -0500
Subject: [PATCH 056/100] Fix pypi action (#222)

* force packaging upgrade to 24.2 for twine

* Bump version to 0.5.0rc2

* remove universal compatibility option

* revert tag

* adapt files to new wheel name due to py3
---
 .github/workflows/release-pypi.yml | 7 +++++++
 Dockerfile                         | 2 +-
 PKGBUILD                           | 2 +-
 docs/source/contributing.rst       | 4 ++--
 pyproject.toml                     | 3 ++-
 reinstall.sh                       | 2 +-
 setup.cfg                          | 3 ---
 tools/build_docs.sh                | 4 ++--
 tools/bump_version.sh              | 8 ++++++--
 9 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml
index fc6d5c8e..ac078fd9 100644
--- a/.github/workflows/release-pypi.yml
+++ b/.github/workflows/release-pypi.yml
@@ -28,6 +28,13 @@ jobs:
           path: ~/.cache/pip
           key: ${{ runner.os }}-pip
 
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install wheel
+          # NOTE(stes) see https://github.com/pypa/twine/issues/1216#issuecomment-2629069669
+          pip install "packaging>=24.2"
+
       - name: Checkout code
         uses: actions/checkout@v3
 
diff --git a/Dockerfile b/Dockerfile
index e8ac14a0..7cd326d5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,7 +40,7 @@ RUN make dist
 FROM cebra-base
 
 # install the cebra wheel
-ENV WHEEL=cebra-0.5.0rc1-py2.py3-none-any.whl
+ENV WHEEL=cebra-0.5.0rc1-py3-none-any.whl
 WORKDIR /build
 COPY --from=wheel /build/dist/${WHEEL} .
 RUN pip install --no-cache-dir ${WHEEL}'[dev,integrations,datasets]'
diff --git a/PKGBUILD b/PKGBUILD
index 91ba4a4e..1f8b3db5 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -40,7 +40,7 @@ build() {
 
 package() {
     cd $srcdir/${_pkgname}-${pkgver}
-    pip install --ignore-installed --no-deps --root="${pkgdir}" dist/${_pkgname}-${pkgver}-py2.py3-none-any.whl
+    pip install --ignore-installed --no-deps --root="${pkgdir}" dist/${_pkgname}-${pkgver}-py3-none-any.whl
     find ${pkgdir} -iname __pycache__ -exec rm -r {} \; 2>/dev/null || echo
     install -Dm 644 LICENSE.md $pkgdir/usr/share/licenses/${pkgname}/LICENSE
 }
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index cc7ae0a8..7fcd16a1 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -155,13 +155,13 @@ Enter the build environment and build the package:
     host $ make interact
     docker $ make build
     # ... outputs ...
-    Successfully built cebra-X.X.XaX-py2.py3-none-any.whl
+    Successfully built cebra-X.X.XaX-py3-none-any.whl
 
 The built package can be found in ``dist/`` and can be installed locally with
 
 .. code:: bash
 
-    pip install dist/cebra-X.X.XaX-py2.py3-none-any.whl
+    pip install dist/cebra-X.X.XaX-py3-none-any.whl
 
 **Please do not distribute this package prior to the public release of the CEBRA repository, because it also
 contains parts of the source code.**
diff --git a/pyproject.toml b/pyproject.toml
index 4a927c6c..b64475e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,8 @@
 [build-system]
 requires = [
     "setuptools>=43",
-    "wheel"
+    "wheel",
+    "packaging>=24.2"
 ]
 build-backend = "setuptools.build_meta"
 
diff --git a/reinstall.sh b/reinstall.sh
index 549982a1..ece080b8 100755
--- a/reinstall.sh
+++ b/reinstall.sh
@@ -24,4 +24,4 @@ python3 -m pip install --upgrade build
 python3 -m build --sdist --wheel .
 
 # Reinstall the package with most recent version
-pip install --upgrade --no-cache-dir "dist/cebra-${VERSION}-py2.py3-none-any.whl[datasets,integrations]"
+pip install --upgrade --no-cache-dir "dist/cebra-${VERSION}-py3-none-any.whl[datasets,integrations]"
diff --git a/setup.cfg b/setup.cfg
index 2addd5d7..9da156ec 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -112,6 +112,3 @@ dev =
     # docformatter[tomli]
     codespell
     cffconvert
-
-[bdist_wheel]
-universal=1
diff --git a/tools/build_docs.sh b/tools/build_docs.sh
index 3f5f36cd..38a7982e 100755
--- a/tools/build_docs.sh
+++ b/tools/build_docs.sh
@@ -62,8 +62,8 @@ FROM python:3.9
 RUN python -m pip install --upgrade pip setuptools wheel \
     && apt-get update -y && apt-get install -y pandoc git
 RUN pip install torch --extra-index-url=https://download.pytorch.org/whl/cpu
-COPY dist/cebra-0.4.0-py2.py3-none-any.whl .
-RUN pip install 'cebra-0.4.0-py2.py3-none-any.whl[docs]'
+COPY dist/cebra-0.5.0rc1-py3-none-any.whl .
+RUN pip install 'cebra-0.5.0rc1-py3-none-any.whl[docs]'
 EOF
 
 checkout_cebra_figures
diff --git a/tools/bump_version.sh b/tools/bump_version.sh
index fb89f413..17142f7e 100755
--- a/tools/bump_version.sh
+++ b/tools/bump_version.sh
@@ -37,9 +37,13 @@ $SED_CMD "s/CEBRA_VERSION := .*/CEBRA_VERSION := ${version}/" Makefile
 $SED_CMD "s/pkgver=.*/pkgver=${version}/" PKGBUILD
 
 # Dockerfile
-$SED_CMD "s/ENV WHEEL=cebra-.*\.whl/ENV WHEEL=cebra-${version}-py2.py3-none-any.whl/" Dockerfile
+$SED_CMD "s/ENV WHEEL=cebra-.*\.whl/ENV WHEEL=cebra-${version}-py3-none-any.whl/" Dockerfile
+
+# build_docs.sh
+$SED_CMD "s/COPY dist\/cebra-.*-py3-none-any\.whl/COPY dist\/cebra-${version}-py3-none-any.whl/" tools/build_docs.sh
+$SED_CMD "s/RUN pip install 'cebra-.*-py3-none-any\.whl/RUN pip install 'cebra-${version}-py3-none-any.whl/" tools/build_docs.sh
 
 # Remove backup files
 if [[ "$OSTYPE" == "darwin"* ]]; then
-    rm cebra/__init__.py.bkp reinstall.sh.bkp Makefile.bkp PKGBUILD.bkp Dockerfile.bkp
+    rm cebra/__init__.py.bkp reinstall.sh.bkp Makefile.bkp PKGBUILD.bkp Dockerfile.bkp tools/build_docs.sh.bkp
 fi

From 69d91ef2db025fa70d7ff791bad7b11e0089ceda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8Dcaro?= <icarosadero@proton.me>
Date: Tue, 18 Feb 2025 10:49:06 +0100
Subject: [PATCH 057/100] Update base.py (#224)

This is a lazy solution to #223
---
 cebra/solver/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index f1eab6ed..ea87a4ad 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -442,7 +442,8 @@ def fit(
                         self.decoding(loader, valid_loader))
                 if save_hook is not None:
                     save_hook(num_steps, self)
-                self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
+                if logdir is not None:
+                    self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
 
     def step(self, batch: cebra.data.Batch) -> dict:
         """Perform a single gradient update.

From 782b63a459f06bc547199f5feda118173d101a14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Sat, 1 Mar 2025 15:41:58 +0100
Subject: [PATCH 058/100] Change max consistency value to 100 instead of 99
 (#227)

* Change text consistency max from 99 to 100
* Update cebra/integrations/matplotlib.py

---------

Co-authored-by: Mackenzie Mathis <mackenzie.mathis@epfl.ch>
Co-authored-by: Steffen Schneider <steffen@bethgelab.org>
---
 cebra/integrations/matplotlib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cebra/integrations/matplotlib.py b/cebra/integrations/matplotlib.py
index 30af7fd4..c2696d4a 100644
--- a/cebra/integrations/matplotlib.py
+++ b/cebra/integrations/matplotlib.py
@@ -684,7 +684,7 @@ def _to_heatmap_format(
                 else:
                     heatmap_values[i, j] = score_dict[label_i, label_j]
 
-        return np.minimum(heatmap_values * 100, 99)
+        return heatmap_values * 100
 
     def _create_text(self):
         """Create the text to add in the confusion matrix grid and the title."""

From d72b055a234ee96dcba26e481dfd98c3ad19c319 Mon Sep 17 00:00:00 2001
From: Mackenzie Mathis <mathis@rowland.harvard.edu>
Date: Sat, 1 Mar 2025 18:23:50 +0100
Subject: [PATCH 059/100] Update assets.py --> force check for parent dir
 (#230)

Update assets.py

- mkdir was failing in 0.5.0rc1; attempt to fix
---
 cebra/data/assets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cebra/data/assets.py b/cebra/data/assets.py
index 86695482..adea8413 100644
--- a/cebra/data/assets.py
+++ b/cebra/data/assets.py
@@ -93,7 +93,7 @@ def download_file_with_progress_bar(url: str,
         )
 
     # Create the directory and any necessary parent directories
-    location_path.mkdir(exist_ok=True)
+    location_path.mkdir(parents=True, exist_ok=True)
 
     filename = filename_match.group(1)
     file_path = location_path / filename

From 9fd91c36eb78a0af5b24cd2ad09b7075b7e0e3f5 Mon Sep 17 00:00:00 2001
From: Mackenzie Mathis <mathis@rowland.harvard.edu>
Date: Sat, 1 Mar 2025 22:59:39 +0100
Subject: [PATCH 060/100] User docs minor edit (#229)

* user note added to usage.rst

- link added

* Update usage.rst

- more detailed note on the effect of temp.

* Update usage.rst

- add in temp to demo model

- testout put

thanks @stes

* Update docs/source/usage.rst

Co-authored-by: Steffen Schneider <stes@hey.com>

* Update docs/source/usage.rst

Co-authored-by: Steffen Schneider <stes@hey.com>

* Update docs/source/usage.rst

Co-authored-by: Steffen Schneider <stes@hey.com>

---------

Co-authored-by: Steffen Schneider <stes@hey.com>
---
 docs/source/usage.rst | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/docs/source/usage.rst b/docs/source/usage.rst
index 334f1bbc..53821e36 100644
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -1,7 +1,7 @@
 Using CEBRA
 ===========
 
-This page covers a standard CEBRA usage. We recommend checking out the :py:doc:`demos` for in-depth CEBRA usage examples as well. Here we present a quick overview on how to use CEBRA on various datasets. Note that we provide two ways to interact with the code:
+This page covers a standard CEBRA usage. We recommend checking out the :py:doc:`demos` for CEBRA usage examples as well. Here we present a quick overview on how to use CEBRA on various datasets. Note that we provide two ways to interact with the code:
 
 * For regular usage, we recommend leveraging the **high-level interface**, adhering to ``scikit-learn`` formatting.
 * Upon specific needs, advanced users might consider diving into the **low-level interface** that adheres to ``PyTorch`` formatting.
@@ -12,7 +12,7 @@ Firstly, why use CEBRA?
 
 CEBRA is primarily designed for producing robust, consistent extractions of latent factors from time-series data. It supports three modes, and is a self-supervised representation learning algorithm that uses our modified contrastive learning approach designed for multi-modal time-series data. In short, it is a type of non-linear dimensionality reduction, like `tSNE <https://www.jmlr.org/papers/v9/vandermaaten08a.html>`_ and `UMAP <https://arxiv.org/abs/1802.03426>`_. We show in our original paper that it outperforms tSNE and UMAP at producing closer-to-ground-truth latents and is more consistent.
 
-That being said, CEBRA can be used on non-time-series data and it does not strictly require multi-modal data. In general, we recommend considering using CEBRA for measuring changes in consistency across conditions (brain areas, cells, animals), for hypothesis-guided decoding, and for topological exploration of the resulting embedding spaces. It can also be used for visualization and considering dynamics within the embedding space. For examples of how CEBRA can be used to map space, decode natural movies, and make hypotheses for neural coding of sensorimotor systems, see our paper (Schneider, Lee, Mathis, 2023).
+That being said, CEBRA can be used on non-time-series data and it does not strictly require multi-modal data. In general, we recommend considering using CEBRA for measuring changes in consistency across conditions (brain areas, cells, animals), for hypothesis-guided decoding, and for topological exploration of the resulting embedding spaces. It can also be used for visualization and considering dynamics within the embedding space. For examples of how CEBRA can be used to map space, decode natural movies, and make hypotheses for neural coding of sensorimotor systems, see `Schneider, Lee, Mathis. Nature 2023 <https://www.nature.com/articles/s41586-023-06031-6>`_.
 
 The CEBRA workflow
 ------------------
@@ -22,7 +22,7 @@ We recommend to start with running CEBRA-Time (unsupervised) and look both at th
 
 (1) Use CEBRA-Time for unsupervised data exploration.
 (2) Consider running a hyperparameter sweep on the inputs to the model, such as :py:attr:`cebra.CEBRA.model_architecture`, :py:attr:`cebra.CEBRA.time_offsets`, :py:attr:`cebra.CEBRA.output_dimension`, and set :py:attr:`cebra.CEBRA.batch_size` to be as high as your GPU allows. You want to see clear structure in the 3D plot (the first 3 latents are shown by default).
-(3) Use CEBRA-Behavior with many different labels and combinations, then look at the InfoNCE loss - the lower the loss value, the better the fit (see :py:doc:`cebra-figures/figures/ExtendedDataFigure5`), and visualize the embeddings. The goal is to understand which labels are contributing to the structure you see in CEBRA-Time, and improve this structure. Again, you should consider a hyperparameter sweep.
+(3) Use CEBRA-Behavior with many different labels and combinations, then look at the InfoNCE loss - the lower the loss value, the better the fit (see :py:doc:`cebra-figures/figures/ExtendedDataFigure5`), and visualize the embeddings. The goal is to understand which labels are contributing to the structure you see in CEBRA-Time, and improve this structure. Again, you should consider a hyperparameter sweep (and avoid overfitting by performing the proper train/validation split (see Step 3 in our quick start guide below).
 (4) Interpretability: now you can use these latents in downstream tasks, such as measuring consistency, decoding, and determining the dimensionality of your data with topological data analysis.
 
 All the steps to do this are described below. Enjoy using CEBRA! 🔥🦓
@@ -179,7 +179,7 @@ We provide a set of pre-defined models. You can access (and search) a list of av
 
 Then, you can choose the one that fits best with your needs and provide it to the CEBRA model as the :py:attr:`~.CEBRA.model_architecture` parameter.
 
-As an indication the table below presents the model architecture we used to train CEBRA on the datasets presented in our paper (Schneider, Lee, Mathis, 2022).
+As an indication the table below presents the model architecture we used to train CEBRA on the datasets presented in our paper (Schneider, Lee, Mathis. Nature 2023).
 
 .. list-table::
     :widths: 25 25 20 30
@@ -265,9 +265,8 @@ For standard usage we recommend the default values (i.e., ``InfoNCE`` and ``cosi
 
 .. rubric:: Temperature :py:attr:`~.CEBRA.temperature`
 
-:py:attr:`~.CEBRA.temperature` has the largest effect on visualization of the embedding (see :py:doc:`cebra-figures/figures/ExtendedDataFigure2`). Hence, it is important that it is fitted to your specific data.
+:py:attr:`~.CEBRA.temperature` has the largest effect on *visualization* of the embedding (see :py:doc:`cebra-figures/figures/ExtendedDataFigure2`). Hence, it is important that it is fitted to your specific data. Lower temperatures (e.g. around 0.1) will result in a more dispersed embedding, higher temperatures (larger than 1) will concentrate the embedding.
 
-The simplest way to handle it is to use a *learnable temperature*. For that, set :py:attr:`~.CEBRA.temperature_mode` to ``auto``. :py:attr:`~.CEBRA.temperature` will be trained alongside the model.
 
 🚀 For advance usage, you might need to find the optimal :py:attr:`~.CEBRA.temperature`. For that we recommend to perform a grid-search.
 
@@ -307,7 +306,6 @@ Here is an example of a CEBRA model initialization:
     cebra_model = CEBRA(
         model_architecture = "offset10-model",
         batch_size = 1024,
-        temperature_mode="auto",
         learning_rate = 0.001,
         max_iterations = 10,
         time_offsets = 10,
@@ -321,8 +319,7 @@ Here is an example of a CEBRA model initialization:
 .. testoutput::
 
     CEBRA(batch_size=1024, learning_rate=0.001, max_iterations=10,
-          model_architecture='offset10-model', temperature_mode='auto',
-          time_offsets=10)
+          model_architecture='offset10-model', time_offsets=10)
 
 .. admonition:: See API docs
     :class: dropdown
@@ -568,7 +565,8 @@ We provide a simple hyperparameters sweep to compare CEBRA models with different
         learning_rate = [0.001],
         time_offsets = 5,
         max_iterations = 5,
-        temperature_mode = "auto",
+        temperature_mode='constant',
+        temperature = 0.1,
         verbose = False)
 
     # 2. Define the datasets to iterate over
@@ -820,7 +818,7 @@ It takes a CEBRA model and returns a 2D plot of the loss against the number of i
 Displaying the temperature
 """"""""""""""""""""""""""
 
-:py:attr:`~.CEBRA.temperature` has the largest effect on the visualization of the embedding. Hence it might be interesting to check its evolution when ``temperature_mode=auto``.
+:py:attr:`~.CEBRA.temperature` has the largest effect on the visualization of the embedding. Hence it might be interesting to check its evolution when ``temperature_mode=auto``. We recommend only using `auto` if you have first explored the `constant` setting. If you use the ``auto`` mode, please always check the time evolution of the temperature over time alongside the loss curve.
 
 To that extend, you can use the function :py:func:`~.plot_temperature`.
 
@@ -1186,9 +1184,10 @@ Improve model performance
 🧐 Below is a (non-exhaustive) list of actions you can try if your embedding looks different from what you were expecting.
 
 #. Assess that your model `converged <https://machine-learning.paperspace.com/wiki/convergence>`_. For that, observe if the training loss stabilizes itself around the end of the training or still seems to be decreasing. Refer to `Visualize the training loss`_ for more details on how to display the training loss.
-#. Increase the number of iterations. It should be at least 10,000.
+#. Increase the number of iterations. It typically should be at least 10,000. On small datasets, it can make sense to stop training earlier to avoid overfitting effects.
 #. Make sure the batch size is big enough. It should be at least 512.
 #. Fine-tune the model's hyperparameters, namely ``learning_rate``, ``output_dimension``, ``num_hidden_units`` and eventually ``temperature`` (by setting ``temperature_mode`` back to ``constant``). Refer to `Grid search`_ for more details on performing hyperparameters tuning.
+#. To note, you should still be mindful of performing train/validation splits and shuffle controls to avoid `overfitting <https://developers.google.com/machine-learning/crash-course/overfitting/overfitting>`_.
 
 
 
@@ -1202,14 +1201,19 @@ Putting all previous snippet examples together, we obtain the following pipeline
      import cebra
      from numpy.random import uniform, randint
      from sklearn.model_selection import train_test_split
+     import os
+     import tempfile
+     from pathlib import Path
 
      # 1. Define a CEBRA model
      cebra_model = cebra.CEBRA(
          model_architecture = "offset10-model",
          batch_size = 512,
          learning_rate = 1e-4,
-         max_iterations = 10, # TODO(user): to change to at least 10'000
-         max_adapt_iterations = 10, # TODO(user): to change to ~100-500
+         temperature_mode='constant',
+         temperature = 0.1,
+         max_iterations = 10, # TODO(user): to change to ~500-10000 depending on dataset size
+         #max_adapt_iterations = 10, # TODO(user): use and to change to ~100-500 if adapting
          time_offsets = 10,
          output_dimension = 8,
          verbose = False
@@ -1243,7 +1247,7 @@ Putting all previous snippet examples together, we obtain the following pipeline
      # time contrastive learning
      cebra_model.fit(train_data)
      # discrete behavior contrastive learning
-     cebra_model.fit(train_data, train_discrete_label,)
+     cebra_model.fit(train_data, train_discrete_label)
      # continuous behavior contrastive learning
      cebra_model.fit(train_data, train_continuous_label)
      # mixed behavior contrastive learning
@@ -1257,10 +1261,10 @@ Putting all previous snippet examples together, we obtain the following pipeline
      cebra_model = cebra.CEBRA.load(tmp_file)
      train_embedding = cebra_model.transform(train_data)
      valid_embedding = cebra_model.transform(valid_data)
-     assert train_embedding.shape == (70, 8)
-     assert valid_embedding.shape == (30, 8)
+     assert train_embedding.shape == (70, 8) # TODO(user): change to split ratio & output dim
+     assert valid_embedding.shape == (30, 8) # TODO(user): change to split ratio & output dim
 
-     # 7. Evaluate the model performances
+     # 7. Evaluate the model performance (you can also check the train_data)
      goodness_of_fit = cebra.sklearn.metrics.infonce_loss(cebra_model,
                                                           valid_data,
                                                           valid_discrete_label,

From 8d636e96d6bfeaa3d7fbadbcb7691898fcc153eb Mon Sep 17 00:00:00 2001
From: Mackenzie Mathis <mathis@rowland.harvard.edu>
Date: Mon, 3 Mar 2025 14:25:26 +0100
Subject: [PATCH 061/100] General Doc refresher (#232)

* Update installation.rst

- python 3.9+

* Update index.rst

* Update figures.rst

* Update index.rst

-typo fix

* Update usage.rst

- update suggestion on data split

* Update docs/source/usage.rst

Co-authored-by: Steffen Schneider <stes@hey.com>

* Update usage.rst

- indent error fixed

* Update usage.rst

- changed infoNCE to new GoF

* Update usage.rst

- finx numpy() doctest

* Update usage.rst

- small typo fix (label)

* Update usage.rst

---------

Co-authored-by: Steffen Schneider <stes@hey.com>
---
 docs/source/figures.rst      |  4 +-
 docs/source/index.rst        | 39 +++++++++--------
 docs/source/installation.rst |  6 +--
 docs/source/usage.rst        | 82 ++++++++++++++++++++----------------
 4 files changed, 72 insertions(+), 59 deletions(-)

diff --git a/docs/source/figures.rst b/docs/source/figures.rst
index 24b1987e..a4101f4a 100644
--- a/docs/source/figures.rst
+++ b/docs/source/figures.rst
@@ -1,7 +1,7 @@
 Figures
 =======
 
-CEBRA was introduced in `Schneider, Lee and Mathis (2022)`_ and applied to various datasets across
+CEBRA was introduced in `Schneider, Lee and Mathis (2023)`_ and applied to various datasets across
 animals and recording modalities.
 
 In this section, we provide reference code for reproducing the figures and experiments. Since especially
@@ -56,4 +56,4 @@ differ in minor typographic details.
 
 
 
-.. _Schneider, Lee and Mathis (2022): https://arxiv.org/abs/2204.00673
+.. _Schneider, Lee and Mathis (2023): https://www.nature.com/articles/s41586-023-06031-6
diff --git a/docs/source/index.rst b/docs/source/index.rst
index c8231746..1a6ce4d2 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -34,27 +34,18 @@ Please support the development of CEBRA by starring and/or watching the project
 Installation and Setup
 ----------------------
 
-Please see the dedicated :doc:`Installation Guide </installation>` for information on installation options using ``conda``, ``pip`` and ``docker``.
-
-Have fun! 😁
+Please see the dedicated :doc:`Installation Guide </installation>` for information on installation options using ``conda``, ``pip`` and ``docker``. Have fun! 😁
 
 Usage
 -----
 
 Please head over to the :doc:`Usage </usage>` tab to find step-by-step instructions to use CEBRA on your data. For example use cases, see the :doc:`Demos </demos>` tab.
 
-Integrations
-------------
-
-CEBRA can be directly integrated with existing libraries commonly used in data analysis. The ``cebra.integrations`` module
-is getting actively extended. Right now, we offer integrations for ``scikit-learn``-like usage of CEBRA, a package making use of ``matplotlib`` to plot the CEBRA model results, as well as the
-possibility to compute CEBRA embeddings on DeepLabCut_ outputs directly.
-
 
 Licensing
 ---------
-
-Since version 0.4.0, CEBRA is open source software under an Apache 2.0 license.
+The ideas presented in our package are currently patent pending (Patent No. WO2023143843).
+Since version 0.4.0, CEBRA's source is licenced under an Apache 2.0 license.
 Prior versions 0.1.0 to 0.3.1 were released for academic use only.
 
 Please see the full license file on Github_ for further information.
@@ -65,13 +56,19 @@ Contributing
 
 Please refer to the :doc:`Contributing </contributing>` tab to find our guidelines on contributions.
 
-Code contributors
+Code Contributors
 -----------------
 
-The CEBRA code was originally developed by Steffen Schneider, Jin H. Lee, and Mackenzie Mathis (up to internal version 0.0.2). As of March 2023, it is being actively extended and maintained by `Steffen Schneider`_, `Célia Benquet`_, and `Mackenzie Mathis`_.
+The CEBRA code was originally developed by Steffen Schneider, Jin H. Lee, and Mackenzie Mathis (up to internal version 0.0.2). Please see our AUTHORS file for more information.
 
-References
-----------
+Integrations
+------------
+
+CEBRA can be directly integrated with existing libraries commonly used in data analysis. Namely, we provide a ``scikit-learn`` style interface to use CEBRA. Additionally, we offer integrations with our ``scikit-learn``-style of using CEBRA, a package making use of ``matplotlib`` and ``plotly`` to plot the CEBRA model results, as well as the possibility to compute CEBRA embeddings on DeepLabCut_ outputs directly. If you have another suggestion, please head over to Discussions_ on GitHub_!
+
+
+Key References
+--------------
 .. code::
 
   @article{schneider2023cebra,
@@ -82,14 +79,22 @@ References
     year = {2023},
   }
 
+  @article{xCEBRA2025,
+    author={Steffen Schneider and Rodrigo Gonz{\'a}lez Laiz and Anastasiia Filippova and Markus Frey and Mackenzie W Mathis},
+    title = {Time-series attribution maps with regularized contrastive learning},
+    journal = {AISTATS},
+    url = {https://openreview.net/forum?id=aGrCXoTB4P},
+    year = {2025},
+  }
+
 This documentation is based on the `PyData Theme`_.
 
 
 .. _`Twitter`: https://twitter.com/cebraAI
 .. _`PyData Theme`: https://github.com/pydata/pydata-sphinx-theme
 .. _`DeepLabCut`: https://deeplabcut.org
+.. _`Discussions`: https://github.com/AdaptiveMotorControlLab/CEBRA/discussions
 .. _`Github`: https://github.com/AdaptiveMotorControlLab/cebra
 .. _`email`: mailto:mackenzie.mathis@epfl.ch
 .. _`Steffen Schneider`: https://github.com/stes
-.. _`Célia Benquet`: https://github.com/CeliaBenquet
 .. _`Mackenzie Mathis`: https://github.com/MMathisLab
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index a9650452..c5823fa7 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -4,7 +4,7 @@ Installation Guide
 System Requirements
 -------------------
 
-CEBRA is written in Python (3.8+) and PyTorch. CEBRA is most effective when used with a GPU, but CPU-only support is provided. We provide instructions to run CEBRA on your system directly.  The instructions below were tested on different compute setups with Ubuntu 18.04 or 20.04, using Nvidia GTX 2080, A4000, and V100 cards. Other setups are possible (including Windows), as long as CUDA 10.2+ support is guaranteed.
+CEBRA is written in Python (3.9+) and PyTorch. CEBRA is most effective when used with a GPU, but CPU-only support is provided. We provide instructions to run CEBRA on your system directly.  The instructions below were tested on different compute setups with Ubuntu 18.04 or 20.04, using Nvidia GTX 2080, A4000, and V100 cards. Other setups are possible (including Windows), as long as CUDA 10.2+ support is guaranteed.
 
 - Software dependencies and operating systems:
     - Linux or MacOS
@@ -93,11 +93,11 @@ we outline different options below.
 
         * 🚀 For more advanced users, CEBRA has different extra install options that you can select based on your usecase:
 
-            * ``[integrations]``: This will install (experimental) support for our streamlit and jupyter integrations.
+            * ``[integrations]``: This will install (experimental) support for integrations, such as plotly.
             * ``[docs]``: This will install additional dependencies for building the package documentation.
             * ``[dev]``: This will install additional dependencies for development, unit and integration testing,
               code formatting, etc. Install this extension if you want to work on a pull request.
-            * ``[demos]``: This will install additional dependencies for running our demo notebooks.
+            * ``[demos]``: This will install additional dependencies for running our demo notebooks in Jupyter.
             * ``[datasets]``: This extension will install additional dependencies to use the pre-installed datasets
               in ``cebra.datasets``.
 
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
index 53821e36..8b60aa69 100644
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -1207,42 +1207,47 @@ Putting all previous snippet examples together, we obtain the following pipeline
 
      # 1. Define a CEBRA model
      cebra_model = cebra.CEBRA(
-         model_architecture = "offset10-model",
-         batch_size = 512,
-         learning_rate = 1e-4,
-         temperature_mode='constant',
-         temperature = 0.1,
-         max_iterations = 10, # TODO(user): to change to ~500-10000 depending on dataset size
-         #max_adapt_iterations = 10, # TODO(user): use and to change to ~100-500 if adapting
-         time_offsets = 10,
-         output_dimension = 8,
-         verbose = False
+        model_architecture = "offset10-model",
+        batch_size = 512,
+        learning_rate = 1e-4,
+        temperature_mode='constant',
+        temperature = 0.1,
+        max_iterations = 10, # TODO(user): to change to ~500-10000 depending on dataset size
+        #max_adapt_iterations = 10, # TODO(user): use and to change to ~100-500 if adapting
+        time_offsets = 10,
+        output_dimension = 8,
+        verbose = False
      )
-
+    
      # 2. Load example data
      neural_data = cebra.load_data(file="neural_data.npz", key="neural")
      new_neural_data = cebra.load_data(file="neural_data.npz", key="new_neural")
      continuous_label = cebra.load_data(file="auxiliary_behavior_data.h5", key="auxiliary_variables", columns=["continuous1", "continuous2", "continuous3"])
      discrete_label = cebra.load_data(file="auxiliary_behavior_data.h5", key="auxiliary_variables", columns=["discrete"]).flatten()
-
+    
+    
      assert neural_data.shape == (100, 3)
      assert new_neural_data.shape == (100, 4)
      assert discrete_label.shape == (100, )
      assert continuous_label.shape == (100, 3)
-
-     # 3. Split data and labels
-     (
-         train_data,
-         valid_data,
-         train_discrete_label,
-         valid_discrete_label,
-         train_continuous_label,
-         valid_continuous_label,
-     ) = train_test_split(neural_data,
-                         discrete_label,
-                         continuous_label,
-                         test_size=0.3)
-
+    
+     # 3. Split data and labels into train/validation
+     from sklearn.model_selection import train_test_split
+    
+     split_idx = int(0.8 * len(neural_data))
+     # suggestion: 5%-20% depending on your dataset size; note that this splits the
+     # into an early and late part, which might not be ideal for your data/experiment!
+     # As a more involved alternative, consider e.g. a nested time-series split.
+    
+     train_data = neural_data[:split_idx]
+     valid_data = neural_data[split_idx:]
+    
+     train_continuous_label = continuous_label[:split_idx]
+     valid_continuous_label = continuous_label[split_idx:]
+    
+     train_discrete_label = discrete_label[:split_idx]
+     valid_discrete_label = discrete_label[split_idx:]
+    
      # 4. Fit the model
      # time contrastive learning
      cebra_model.fit(train_data)
@@ -1252,33 +1257,36 @@ Putting all previous snippet examples together, we obtain the following pipeline
      cebra_model.fit(train_data, train_continuous_label)
      # mixed behavior contrastive learning
      cebra_model.fit(train_data, train_discrete_label, train_continuous_label)
-
+    
+    
      # 5. Save the model
      tmp_file = Path(tempfile.gettempdir(), 'cebra.pt')
      cebra_model.save(tmp_file)
-
+    
      # 6. Load the model and compute an embedding
      cebra_model = cebra.CEBRA.load(tmp_file)
      train_embedding = cebra_model.transform(train_data)
      valid_embedding = cebra_model.transform(valid_data)
-     assert train_embedding.shape == (70, 8) # TODO(user): change to split ratio & output dim
-     assert valid_embedding.shape == (30, 8) # TODO(user): change to split ratio & output dim
-
+    
+     assert train_embedding.shape == (80, 8) # TODO(user): change to split ratio & output dim
+     assert valid_embedding.shape == (20, 8) # TODO(user): change to split ratio & output dim
+    
      # 7. Evaluate the model performance (you can also check the train_data)
-     goodness_of_fit = cebra.sklearn.metrics.infonce_loss(cebra_model,
+     goodness_of_fit = cebra.sklearn.metrics.goodness_of_fit_score(cebra_model,
                                                           valid_data,
                                                           valid_discrete_label,
-                                                          valid_continuous_label,
-                                                          num_batches=5)
-
+                                                          valid_continuous_label)
+    
      # 8. Adapt the model to a new session
      cebra_model.fit(new_neural_data, adapt = True)
-
+    
      # 9. Decode discrete labels behavior from the embedding
      decoder = cebra.KNNDecoder()
      decoder.fit(train_embedding, train_discrete_label)
      prediction = decoder.predict(valid_embedding)
-     assert prediction.shape == (30,)
+     assert prediction.shape == (20,)
+
+
 
 👉 For further guidance on different/customized applications of CEBRA on your own data, refer to the ``examples/`` folder or to the full documentation folder ``docs/``.
 

From 36370beccdf806f825d732e6571f0498c004f877 Mon Sep 17 00:00:00 2001
From: Mackenzie Mathis <mathis@rowland.harvard.edu>
Date: Tue, 4 Mar 2025 22:58:59 +0100
Subject: [PATCH 062/100] render plotly in our docs, show code/doc version
 (#231)

---
 .github/workflows/docs.yml        |  7 +++++++
 cebra/integrations/plotly.py      |  5 +++--
 docs/Makefile                     |  5 +++++
 docs/source/_static/css/custom.js |  6 ++++++
 docs/source/conf.py               | 28 +++++++++++++++++++++++++---
 5 files changed, 46 insertions(+), 5 deletions(-)
 create mode 100644 docs/source/_static/css/custom.js

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 47b5862d..826d9e91 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -9,6 +9,12 @@ on:
       - main
       - public
       - dev
+    paths:
+      - '**.py'
+      - '**.ipynb'
+      - '**.js'
+      - '**.rst'
+      - '**.md'
 
 jobs:
   build:
@@ -69,6 +75,7 @@ jobs:
           pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
           pip install '.[docs]'
 
+
       - name: Build docs
         run: |
           ls docs/source/cebra-figures
diff --git a/cebra/integrations/plotly.py b/cebra/integrations/plotly.py
index bbaa1de6..8b0515e4 100644
--- a/cebra/integrations/plotly.py
+++ b/cebra/integrations/plotly.py
@@ -28,6 +28,7 @@
 import numpy.typing as npt
 import plotly.graph_objects
 import torch
+import plotly.graph_objects as go
 
 from cebra.integrations.matplotlib import _EmbeddingPlot
 
@@ -154,7 +155,7 @@ def _plot_3d(self, **kwargs) -> plotly.graph_objects.Figure:
 def plot_embedding_interactive(
     embedding: Union[npt.NDArray, torch.Tensor],
     embedding_labels: Optional[Union[npt.NDArray, torch.Tensor, str]] = "grey",
-    axis: Optional[plotly.graph_objects.Figure] = None,
+    axis: Optional["go.Figure"] = None,
     markersize: float = 1,
     idx_order: Optional[Tuple[int]] = None,
     alpha: float = 0.4,
@@ -163,7 +164,7 @@ def plot_embedding_interactive(
     figsize: Tuple[int] = (5, 5),
     dpi: int = 100,
     **kwargs,
-) -> plotly.graph_objects.Figure:
+) -> "go.Figure":
     """Plot embedding in a 3D dimensional space.
 
     This is supposing that the dimensions provided to ``idx_order`` are in the range of the number of
diff --git a/docs/Makefile b/docs/Makefile
index 741d165e..2739f4af 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -18,6 +18,11 @@ help:
 html:
 	PYTHONPATH=.. $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
+# Build multiple versions
+html_versions:
+	for v in latest v0.2.0 v0.3.0 v0.4.0; do \
+		PYTHONPATH=.. $(SPHINXBUILD) -b html "$(SOURCEDIR)" "$(BUILDDIR)/$$v"; \
+	done
 # Remove the current temp folder and page build
 clean:
 	rm -rf build
diff --git a/docs/source/_static/css/custom.js b/docs/source/_static/css/custom.js
new file mode 100644
index 00000000..f9afa170
--- /dev/null
+++ b/docs/source/_static/css/custom.js
@@ -0,0 +1,6 @@
+requirejs.config({
+    paths: {
+        base: '/static/base',
+        plotly: 'https://cdn.plot.ly/plotly-2.12.1.min.js?noext',
+    },
+});
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c5e12b5a..28cf2b14 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -47,8 +47,8 @@ def get_years(start_year=2021):
 
 # -- Project information -----------------------------------------------------
 project = "cebra"
-copyright = f"""{get_years(2021)}, Steffen Schneider, Jin H Lee, Mackenzie Mathis"""
-author = "Steffen Schneider, Jin H Lee, Mackenzie Mathis"
+copyright = f"""{get_years(2021)}"""
+author = "See AUTHORS.md"
 # The full version, including alpha/beta/rc tags
 release = cebra.__version__
 
@@ -57,6 +57,13 @@ def get_years(start_year=2021):
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
+
+#https://github.com/spatialaudio/nbsphinx/issues/128#issuecomment-1158712159
+html_js_files = [
+    "require.min.js",  # Add to your _static
+    "custom.js",
+]
+
 extensions = [
     "sphinx.ext.autodoc",
     "sphinx.ext.napoleon",
@@ -68,13 +75,13 @@ def get_years(start_year=2021):
     "sphinx_tabs.tabs",
     "sphinx.ext.mathjax",
     "IPython.sphinxext.ipython_console_highlighting",
-    # "sphinx_panels", # Note: package to avoid: no longer maintained.
     "sphinx_design",
     "sphinx_togglebutton",
     "sphinx.ext.doctest",
     "sphinx_gallery.load_style",
 ]
 
+
 coverage_show_missing_items = True
 panels_add_bootstrap_css = False
 
@@ -137,6 +144,21 @@ def get_years(start_year=2021):
 # a list of builtin themes.
 html_theme = "pydata_sphinx_theme"
 
+html_context = {
+    "default_mode": "light",
+    "switcher": {
+        "version_match": "latest",  # Adjust this dynamically per version
+        "versions": [
+            ("latest", "/latest/"),
+            ("v0.2.0", "/v0.2.0/"),
+            ("v0.3.0", "/v0.3.0/"),
+            ("v0.4.0", "/v0.4.0/"),
+            ("v0.5.0rc1", "/v0.5.0rc1/"),
+        ],
+    },
+    "navbar_start": ["version-switcher", "navbar-logo"],  # Place the dropdown above the logo
+}
+
 # More info on theme options:
 # https://pydata-sphinx-theme.readthedocs.io/en/latest/user_guide/configuring.html
 html_theme_options = {

From f7f4d7fd1c584181dbdbe694e77eb0479026abb3 Mon Sep 17 00:00:00 2001
From: Mackenzie Mathis <mathis@rowland.harvard.edu>
Date: Thu, 6 Mar 2025 18:00:29 +0100
Subject: [PATCH 063/100] Update layout.html (#233)

---
 docs/source/_templates/layout.html | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
index 2994db97..0140a5cf 100644
--- a/docs/source/_templates/layout.html
+++ b/docs/source/_templates/layout.html
@@ -1,11 +1,15 @@
 {% extends "pydata_sphinx_theme/layout.html" %}
 
-{% block fonts %}
+{% block extrahead %}
+    <!-- Load require.js locally from _static -->
+    <script src="{{ pathto('_static/css/custom.js', 1) }}"></script>
+
+{% endblock %}
 
+{% block fonts %}
 <link rel="preconnect" href="https://fonts.googleapis.com">
 <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@300&display=swap" rel="stylesheet">
-
 {% endblock %}
 
 {% block docs_sidebar %}

From 798f7b298cce5964009e6085319ad11322c6a5bd Mon Sep 17 00:00:00 2001
From: Mackenzie Mathis <mathis@rowland.harvard.edu>
Date: Thu, 6 Mar 2025 19:22:55 +0100
Subject: [PATCH 064/100] Update conf.py (#234)

- adding link to new notebook icon
---
 docs/source/conf.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 28cf2b14..a58f24ec 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -219,6 +219,8 @@ def get_years(start_year=2021):
 ]
 
 nbsphinx_thumbnails = {
+    "demo_notebooks/CEBRA_best_practices":
+    "_static/thumbnails/cebra-best.png",
     "demo_notebooks/Demo_primate_reaching":
         "_static/thumbnails/ForelimbS1.png",
     "demo_notebooks/Demo_hippocampus":

From 4a2996d1cb17a1b74a778883369c1d257d4b10ad Mon Sep 17 00:00:00 2001
From: Mackenzie Mathis <mathis@rowland.harvard.edu>
Date: Sat, 15 Mar 2025 13:59:38 +0100
Subject: [PATCH 065/100] Refactoring setup.cfg (#228)

---
 AUTHORS.md | 28 ++++++++++++++++++++++++++++
 setup.cfg  |  7 +++----
 2 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 AUTHORS.md

diff --git a/AUTHORS.md b/AUTHORS.md
new file mode 100644
index 00000000..11415b12
--- /dev/null
+++ b/AUTHORS.md
@@ -0,0 +1,28 @@
+<img src="https://github.com/user-attachments/assets/1f327e57-8ee1-4a2f-afd3-2bbce885c2f8" width="200"/>
+
+
+
+CEBRA was initially developed by **Mackenzie Mathis** and **Steffen Schneider** (2021+), who are co-inventors on the patent application [WO2023143843](https://infoscience.epfl.ch/entities/patent/0d9debed-4d22-47b7-bad1-f211e7010323). 
+**Jin Hwa Lee** contributed significantly to our first paper:  
+
+> **Schneider, S., Lee, J.H., & Mathis, M.W.**  
+> [*Learnable latent embeddings for joint behavioural and neural analysis.*](https://doi.org/10.1038/s41586-023-06031-6) 
+> Nature 617, 360–368 (2023)
+
+CEBRA is actively developed by [**Mackenzie Mathis**](https://www.mackenziemathislab.org/) and [**Steffen Schneider**](https://dynamical-inference.ai/) and their labs.  
+
+It is a publicly available tool that has benefited from contributions and suggestions from many individuals: [CEBRA/graphs/contributors](https://github.com/AdaptiveMotorControlLab/CEBRA/graphs/contributors).  
+
+## CEBRA Extensions 
+
+### 2023  
+- **Steffen Schneider, Rodrigo González Laiz, Markus Frey, Mackenzie W. Mathis**  
+  [*Identifiable attribution maps using regularized contrastive learning.*](https://sslneurips23.github.io/paper_pdfs/paper_80.pdf) 
+  NeurIPS 4th Workshop on Self-Supervised Learning: Theory and Practice (2023)
+
+### 2025  
+- **Steffen Schneider, Rodrigo González Laiz, Anastasiia Filippova, Markus Frey, Mackenzie W. Mathis**  
+  [*Time-series attribution maps with regularized contrastive learning.*](https://openreview.net/forum?id=aGrCXoTB4P)  
+  AISTATS (2025)
+
+
diff --git a/setup.cfg b/setup.cfg
index 9da156ec..9a3c3a41 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,8 +1,8 @@
 [metadata]
 name = cebra
 version = attr: cebra.__version__
-author = Steffen Schneider, Jin H Lee, Mackenzie W Mathis
-author_email = stes@hey.com
+author = file: AUTHORS.md
+author_email = stes@hey.com, mackenzie@post.harvard.edu
 description = Consistent Embeddings of high-dimensional Recordings using Auxiliary variables
 long_description = file: README.md
 long_description_content_type = text/markdown
@@ -58,9 +58,9 @@ datasets =
     hdf5storage # for creating .mat files in new format
     openpyxl # for excel file format loading
 integrations =
-    jupyter
     pandas
     plotly
+    seaborn
 docs =
     sphinx==5.3
     sphinx-gallery==0.10.1
@@ -83,7 +83,6 @@ demos =
     ipykernel
     jupyter
     nbconvert
-    seaborn
     # TODO(stes): Additional dependency for running
     # co-homology analysis
     # is ripser, which can be tricky to

From 7abd1b02bc7a8765633e6ee4f42ebac51f90dd4e Mon Sep 17 00:00:00 2001
From: Mackenzie Mathis <mathis@rowland.harvard.edu>
Date: Sat, 15 Mar 2025 17:57:02 +0100
Subject: [PATCH 066/100] Home page landing update (#235)

* website refresh
---
 docs/root/index.html | 266 +++++++++++++++++++++++++++----------------
 1 file changed, 170 insertions(+), 96 deletions(-)

diff --git a/docs/root/index.html b/docs/root/index.html
index 86015297..cee11753 100644
--- a/docs/root/index.html
+++ b/docs/root/index.html
@@ -7,21 +7,21 @@
     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 
     <!-- Primary Meta Tags -->
-    <title>Learnable latent embeddings for joint behavioural and neural analysis</title>
-    <meta name="title" content="Learnable latent embeddings for joint behavioural and neural analysis">
+    <title>CEBRA</title>
+    <meta name="title" content="CEBRA: a self-supervised learning algorithm for obtaining interpretable, Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables">
     <meta name="description" content="Mapping behavioural actions to neural activity is a fundamental goal of neuroscience. As our ability to record large neural and behavioural data increases, there is growing interest in modeling neural dynamics during adaptive behaviors to probe neural representations. In particular, neural latent embeddings can reveal underlying correlates of behavior, yet, we lack non-linear techniques that can explicitly and flexibly leverage joint behavior and neural data. Here, we fill this gap with a novel method, CEBRA, that jointly uses behavioural and neural data in a hypothesis- or discovery-driven manner to produce consistent, high-performance latent spaces. We validate its accuracy and demonstrate our tool's utility for both calcium and electrophysiology datasets, across sensory and motor tasks, and in simple or complex behaviors across species. It allows for single and multi-session datasets to be leveraged for hypothesis testing or can be used label-free. Lastly, we show that CEBRA can be used for the mapping of space, uncovering complex kinematic features, and rapid, high-accuracy decoding of natural movies from visual cortex.">
 
     <!-- Open Graph / Facebook -->
     <meta property="og:type" content="website">
     <meta property="og:url" content="https://cebra.ai/">
-    <meta property="og:title" content="Learnable latent embeddings for joint behavioural and neural analysis">
+    <meta property="og:title" content="CEBRA: a self-supervised learning algorithm for obtaining interpretable, Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables">
     <meta property="og:description" content="Mapping behavioural actions to neural activity is a fundamental goal of neuroscience. As our ability to record large neural and behavioural data increases, there is growing interest in modeling neural dynamics during adaptive behaviors to probe neural representations. In particular, neural latent embeddings can reveal underlying correlates of behavior, yet, we lack non-linear techniques that can explicitly and flexibly leverage joint behavior and neural data. Here, we fill this gap with a novel method, CEBRA, that jointly uses behavioural and neural data in a hypothesis- or discovery-driven manner to produce consistent, high-performance latent spaces. We validate its accuracy and demonstrate our tool's utility for both calcium and electrophysiology datasets, across sensory and motor tasks, and in simple or complex behaviors across species. It allows for single and multi-session datasets to be leveraged for hypothesis testing or can be used label-free. Lastly, we show that CEBRA can be used for the mapping of space, uncovering complex kinematic features, and rapid, high-accuracy decoding of natural movies from visual cortex.">
     <meta property="og:image" content="">
 
     <!-- Twitter -->
     <meta property="twitter:card" content="summary_large_image">
     <meta property="twitter:url" content="https://cebra.ai/">
-    <meta property="twitter:title" content="Learnable latent embeddings for joint behavioural and neural analysis">
+    <meta property="twitter:title" content="CEBRA: a self-supervised learning algorithm for obtaining interpretable, Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables">
     <meta property="twitter:description" content="Mapping behavioural actions to neural activity is a fundamental goal of neuroscience. As our ability to record large neural and behavioural data increases, there is growing interest in modeling neural dynamics during adaptive behaviors to probe neural representations. In particular, neural latent embeddings can reveal underlying correlates of behavior, yet, we lack non-linear techniques that can explicitly and flexibly leverage joint behavior and neural data. Here, we fill this gap with a novel method, CEBRA, that jointly uses behavioural and neural data in a hypothesis- or discovery-driven manner to produce consistent, high-performance latent spaces. We validate its accuracy and demonstrate our tool's utility for both calcium and electrophysiology datasets, across sensory and motor tasks, and in simple or complex behaviors across species. It allows for single and multi-session datasets to be leveraged for hypothesis testing or can be used label-free. Lastly, we show that CEBRA can be used for the mapping of space, uncovering complex kinematic features, and rapid, high-accuracy decoding of natural movies from visual cortex.">
     <meta property="twitter:image" content="">
 
@@ -36,7 +36,6 @@
     <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.13.1/css/all.min.css" rel="stylesheet">
 
     <style>
-
         :root {
             --cebra-c: #1D29B8;
             --cebra-e: #6235E0;
@@ -46,7 +45,7 @@
         }
 
         .main {
-            font-family: 'IBM Plex Sans Condensed', sans-serif;
+            font-family: Helvetica, Arial, sans-serif;
             color: gainsboro;
         }
 
@@ -62,15 +61,20 @@
 
         h3 {
             color: var(--cebra-r);
+            font-family: Helvetica, Arial, sans-serif;
+            margin-top: 2rem;
+            margin-bottom: 1.5rem;
         }
 
         a {
             color: var(--cebra-r);
-            font-family: 'IBM Plex Sans Condensed', sans-serif;
+            font-family: Helvetica, Arial, sans-serif;
+            text-decoration: none;
         }
 
         a:hover {
             color: var(--cebra-b);
+            text-decoration: underline;
         }
 
         .muted-link {
@@ -81,6 +85,25 @@
             background-color: white;
             border-radius: 5%;
         }
+
+        .paper-card {
+            background: rgba(255, 255, 255, 0.05);
+            border-radius: 8px;
+            padding: 20px;
+            margin-bottom: 20px;
+            transition: all 0.3s ease;
+        }
+
+        .paper-card:hover {
+            background: rgba(255, 255, 255, 0.1);
+            transform: translateY(-2px);
+        }
+
+        .paper-title {
+            color: var(--cebra-e);
+            font-weight: bold;
+            margin-bottom: 10px;
+        }
     </style>
 
     <title>CEBRA</title>
@@ -93,58 +116,26 @@
             </div>
             <div class="col-md-8" id="main-content">
                 <div class="row text-center my-5" id="#">
-                    <h1>Learnable latent embeddings for joint behavioural and neural analysis</h1>
-                </div>
-
-                <!-- Begin author list-->
-                <div class="row text-center mb-4">
-                    <div class="col-md-3 mb-4"></div>
-                    <div class="col-md-2 mb-4">
-                        Steffen Schneider*</br>
-                        EPFL & <nobr>IMPRS-IS</nobr>
-                        <a href="mailto:stes@hey.com"><i class="far fa-envelope"></i></a>
-                        <a href="https://stes.io" target="_blank"><i class="fas fa-link"></i></a>
-                    </div>
-                    <div class="col-md-2 mb-4">
-                        Jin Hwa Lee*</br>
-                        EPFL
-                        <a href="https://jinhl9.github.io/" target="_blank"><i class="fas fa-link"></i></a>
-                    </div>
-                    <div class="col-md-2  mb-4">
-                        Mackenzie Mathis</br>
-                        EPFL
-                        <a href="mailto:mackenzie.mathis@epfl.ch"><i class="far fa-envelope"></i></a>
-                        <a href="https://www.mackenziemathislab.org/mackenziemathis" target="_blank"><i class="fas fa-link"></i></a>
-                    </div>
+                    <h1><span style="color: var(--cebra-c)">C</span><span style="color: var(--cebra-e)">E</span><span style="color: var(--cebra-b)">B</span><span style="color: var(--cebra-r)">R</span><span style="color: var(--cebra-a)">A</span>: a self-supervised learning algorithm for obtaining interpretable, <span style="color: var(--cebra-c)">C</span>onsistent <span style="color: var(--cebra-e)">Em</span><span style="color: var(--cebra-b)">B</span>eddings of high-dimensional <span style="color: var(--cebra-r)">R</span>ecordings using <span style="color: var(--cebra-a)">A</span>uxiliary variables</h1>
                 </div>
-                <!-- End author list-->
 
                 <div class="row text-center">
-                    <div class="col-md-2 mb-4"></div>
-                    <div class="col-sm-2 mb-2">
+                    <div class="col-sm-4 mb-2">
                         <h4>
                             <a href="https://doi.org/10.1038/s41586-023-06031-6" target="_blank">
                             <i class="fas fa-file-alt"></i>
-                            Paper
-                        </a>
-                        </h4>
-                    </div>
-                    <div class="col-sm-2 mb-2">
-                        <h4>
-                            <a href="https://arxiv.org/abs/2204.00673" target="_blank">
-                            <i class="fas fa-file-alt"></i>
-                            Preprint
+                            Nature 2023 Paper
                         </a>
                         </h4>
                     </div>
-                    <div class="col-sm-2 mb-2">
+                    <div class="col-sm-4 mb-2">
                         <h4>
-                            <a href="/docs"> <i class="fas fa-book"></i>
-                                Docs
+                            <a href="https://cebra.ai/docs/" target="_blank"> <i class="fas fa-book"></i>
+                                Documentation
                             </a>
                         </h4>
                     </div>
-                    <div class="col-sm-2 mb-2">
+                    <div class="col-sm-4 mb-2">
                         <h4>
                             <a href="https://github.com/AdaptiveMotorControlLab/cebra" target="_blank"> <i class="fab fa-github"></i>
                                 Code
@@ -153,48 +144,105 @@ <h4>
                     </div>
                 </div>
 
-                <div class="row mb-5 mt-2">
-                    CEBRA is a machine-learning
-                    method that can be used to
-                    compress time series in a way
-                    that reveals otherwise hidden
-                    structures in the variability of
-                    the data. It excels on behavioural
-                    and neural data recorded
-                    simultaneously, and it can
-                    decode activity from the visual
-                    cortex of the mouse brain to
-                    reconstruct a viewed video.
+                <div class="row mb-5 mt-4">
+                    <p>CEBRA is a machine-learning method that can be used to 
+                        compress time series in a way that reveals otherwise hidden
+                         structures in the variability of the data. It excels on 
+                         behavioural and neural data recorded simultaneously. 
+                         We have shown it can be used to decode the activity from the
+                          visual cortex of the mouse brain to reconstruct a viewed video,
+                           to decode trajectories from the sensoirmotor cortex of primates,
+                            and for decoding position during navigation. For these use cases
+                            and other demos see our <a href="https://cebra.ai/docs/" style="color: #6235E0;">Documentation</a>.</p>
+                
                 </div>
 
                 <div class="row">
-
+                    <h3><i class="fas fa-play-circle"></i> Demo Applications</h3>
                     <div class="col-md-6 mb-2">
                         <video width="100%" autoplay loop muted preload="auto">
                             <source src="static/videos/rat.mp4" type="video/mp4">
                             Video file not supported in this web browser.
                         </video>
 
-                        <p>Application of CEBRA-Behavior to rat hippocampus data (Grosmark and Buzsáki, 2016), showing position/neural activity (left), overlayed with decoding obtained by CEBRA. The current point in embedding space is highlighted (right). CEBRA obtains a median absolute error of 5cm (total track length: 160cm; see pre-print for details). Video is played at 2x real-time speed.</p>
+                        <p>Application of CEBRA-Behavior to rat hippocampus data (Grosmark and Buzsáki, 2016), showing position/neural activity (left), overlayed with decoding obtained by CEBRA. The current point in embedding space is highlighted (right). CEBRA obtains a median absolute error of 5cm (total track length: 160cm; see Schneider et al. 2023 for details). Video is played at 2x real-time speed.</p>
+                    </div>
+
+                    <div class="col-md-6 mb-2">
+                        <!-- Embedding the Plotly figure using iframe -->
+                        <div style="position: relative; height: 315px; overflow: hidden; margin-bottom: 1rem;">
+                            <iframe src="static/img/hippocampus_posdir3_full.html" 
+                                    style="position: absolute; top: -140px; left: -5%; width: 110%; height: 150%; border: none; transform: scale(0.85); transform-origin: top center;"
+                                    scrolling="no">
+                            </iframe>
+                        </div>
+                        
+                        <p style="margin-top: -70px;">Interactive visualization of the CEBRA embedding for the rat hippocampus data. This 3D plot shows how neural activity is mapped to a lower-dimensional space that correlates with the animal's position and movement direction. <a href="https://colab.research.google.com/github/AdaptiveMotorControlLab/CEBRA-demos/blob/main/Demo_hippocampus.ipynb" target="_blank" style="color: #6235E0;"><i class="fas fa-external-link-alt"></i> Open In Colaboratory</a></p>
                     </div>
+                </div>
 
+                <div class="row">
                     <div class="col-md-6 mb-2">
-                            <video width="100%" autoplay loop muted preload="auto">
-                                <source src="static/videos/allen.mp4" type="video/mp4">
-                                Video file not supported in this web browser.
-                            </video>
+                        <video width="100%" autoplay loop muted preload="auto">
+                            <source src="static/videos/allen.mp4" type="video/mp4">
+                            Video file not supported in this web browser.
+                        </video>
 
-                            <p>CEBRA applied to mouse primary visual cortex, collected at the Allen Institute (de Vries et al. 2020, Siegle et al. 2021). 2-photon and Neuropixels recordings are embedded with CEBRA using DINO frame features as labels.
-                            The embedding is used to decode the video frames using a kNN decoder on the CEBRA-Behavior embedding from the test set.</p>
+                        <p>CEBRA applied to mouse primary visual cortex, collected at the Allen Institute (de Vries et al. 2020, Siegle et al. 2021). 2-photon and Neuropixels recordings are embedded with CEBRA using DINO frame features as labels.
+                        The embedding is used to decode the video frames using a kNN decoder on the CEBRA-Behavior embedding from the test set.</p>
+                    </div>
+                    
+                    <div class="col-md-6 mb-2">
+                        <!-- YouTube embed for CEBRA on M1 and S1 neural data with cleaner styling -->
+                        <video width="100%" autoplay loop muted preload="auto">
+                            <source src="static/videos/cebra_s1m1.mp4" type="video/mp4">
+                            Video file not supported in this web browser.
+                        </video>
 
+                        <p>CEBRA applied to M1 and S1 neural data, demonstrating how neural activity from primary motor and somatosensory cortices can be effectively embedded and analyzed. See <a href="https://www.biorxiv.org/content/10.1101/2024.09.11.612513v2" target="_blank" style="color: #6235E0;">DeWolf et al. 2024</a> for details.</p>
                     </div>
+                </div>
 
+                <div class="row mt-4">
+                    <h3><i class="fas fa-newspaper"></i> Publications</h3>
+                    
+                    <div class="col-12">
+                        <div class="paper-card">
+                            <div class="paper-title">Learnable latent embeddings for joint behavioural and neural analysis</div>
+                            <p>Steffen Schneider*, Jin Hwa Lee*, Mackenzie Weygandt Mathis. Nature 2023</p>
+                            <p>A comprehensive introduction to CEBRA, demonstrating its capabilities in joint behavioral and neural analysis across various datasets and species.</p>
+                            <a href="https://doi.org/10.1038/s41586-023-06031-6" target="_blank" class="btn btn-link" style="color: #6235E0;"><i class="fas fa-external-link-alt"></i> Read Paper</a>
+                            <a href="https://arxiv.org/abs/2204.00673" target="_blank" class="btn btn-link" style="color: #6235E0;"><i class="fas fa-file-alt"></i> Preprint</a>
+                        </div>
+                    </div>
+                    
+                    <div class="col-12">
+                        <div class="paper-card">
+                            <div class="paper-title">Time-series attribution maps with regularized contrastive learning</div>
+                            <p>Steffen Schneider, Rodrigo González Laiz, Anastasiia Filipova, Markus Frey, Mackenzie Weygandt Mathis. AISTATS 2025</p>
+                            <p>An extension of CEBRA that provides attribution maps for time-series data using regularized contrastive learning.</p>
+                            <a href="https://openreview.net/forum?id=aGrCXoTB4P" target="_blank" class="btn btn-link" style="color: #6235E0;"><i class="fas fa-external-link-alt"></i> Read Paper</a>
+                            <a href="https://arxiv.org/abs/2502.12977" target="_blank" class="btn btn-link" style="color: #6235E0;"><i class="fas fa-file-alt"></i> Preprint</a>
+                            <a href="https://sslneurips23.github.io/paper_pdfs/paper_80.pdf" target="_blank" class="btn btn-link" style="color: #6235E0;"><i class="fas fa-file-pdf"></i> NeurIPS-W 2023 Version</a>
+                        </div>
+                    </div>
+                </div>
+
+                <div class="row mt-4">
+                    <h3><i class="fas fa-certificate"></i> Patent Information</h3>
+                    
+                    <div class="col-12">
+                        <div class="paper-card">
+                            <div class="paper-title">Patent Pending</div>
+                            <p>Please note EPFL has filed a patent titled <a href="https://patents.google.com/patent/WO2023143843A1" target="_blank" style="color: #6235E0;">"Dimensionality reduction of time-series data, and systems and devices that use the resultant embeddings"</a> so if this does not work for your non-academic use case, please contact the Tech Transfer Office at EPFL.</p>
+                        </div>
+                    </div>
                 </div>
 
                 <div class="row pt-4">
                     <h3>
                         <i class="fas fa-file"></i>
-                        Abstract
+                        Overview
                     </h3>
                 </div>
 
@@ -209,31 +257,6 @@ <h3>
                     </p>
                 </div>
 
-                <div class="row pt-4">
-                    <h3>
-                        <i class="fas fa-file-alt"></i>
-                        Pre-Print
-                    </h3>
-                </div>
-
-                <div class="row">
-                    <p>
-                        The pre-print is available on arxiv at <a href="https://arxiv.org/abs/2204.00673">arxiv.org/abs/2204.00673</a>.
-                    </p>
-                    <!---div class="col-lg-8 mb-1">
-                        <div class="row">
-                            <div class="col-3 mb-1"><img class="paper-thumbnail" src="static/img/thumb/thumb_1.pdf.png" width="100%" /></div>
-                            <div class="col-3 mb-1"><img class="paper-thumbnail" src="static/img/thumb/thumb_2.pdf.png" width="100%" /></div>
-                            <div class="col-3 mb-1"><img class="paper-thumbnail" src="static/img/thumb/thumb_3.pdf.png" width="100%" /></div>
-                            <div class="col-3 mb-1"><img class="paper-thumbnail" src="static/img/thumb/thumb_4.pdf.png" width="100%" /></div>
-                            <div class="col-3 mb-1"><img class="paper-thumbnail" src="static/img/thumb/thumb_5.pdf.png" width="100%" /></div>
-                            <div class="col-3 mb-1"><img class="paper-thumbnail" src="static/img/thumb/thumb_6.pdf.png" width="100%" /></div>
-                            <div class="col-3 mb-1"><img class="paper-thumbnail" src="static/img/thumb/thumb_7.pdf.png" width="100%" /></div>
-                            <div class="col-3 mb-1"><img class="paper-thumbnail" src="static/img/thumb/thumb_8.pdf.png" width="100%" /></div>
-                        </div>
-                    </div--->
-                </div>
-
                 <div class="row pt-4">
                     <h3>
                        <i class="fab fa-github"></i>
@@ -244,8 +267,7 @@ <h3>
                     You can find our official implementation of the CEBRA algorithm on GitHub:
                     <a href="https://github.com/AdaptiveMotorControlLab/CEBRA" target="blank_">Watch and Star the repository</a> to
                     be notified of future updates and releases.
-                    You can also <a href="https://twitter.com/cebraAI" target="blank_">follow us on Twitter</a> or subscribe to our
-                    <a href="https://groups.google.com/g/cebra-info" target="blank_">mailing list</a> for updates on the project.
+                    You can also <a href="https://twitter.com/cebraAI" target="blank_">follow us on Twitter</a> for updates on the project.
                 </p>
 
                 <p>If you are interested in collaborations, please contact us via
@@ -258,13 +280,13 @@ <h3>
                     BibTeX</h3>
                 </div>
                 <div class="row">
-                    <p>Please cite our paper as follows:</p>
+                    <p>Please cite our papers as follows:</p>
                 </div>
                 <div class="row justify-content-md-center">
                     <div class="col-sm-10 rounded p-3 m-2" style="background-color: rgb(20,20,20);">
                         <small class="code">
                             @article{schneider2023cebra,<br/>
-                            &nbsp;&nbsp;author={Schneider, Steffen and Lee, Jin Hwa and Mathis, Mackenzie Weygandt},<br/>
+                            &nbsp;&nbsp;author={Steffen Schneider and Jin Hwa Lee and Mackenzie Weygandt Mathis},<br/>
                             &nbsp;&nbsp;title={Learnable latent embeddings for joint behavioural and neural analysis},<br/>
                             &nbsp;&nbsp;journal={Nature},<br/>
                             &nbsp;&nbsp;year={2023},<br/>
@@ -277,6 +299,58 @@ <h3>
                         </small>
                     </div>
                 </div>
+                
+                <div class="row justify-content-md-center">
+                    <div class="col-sm-10 rounded p-3 m-2" style="background-color: rgb(20,20,20);">
+                        <small class="code">
+                            @inproceedings{schneider2025timeseries,<br/>
+                            &nbsp;&nbsp;title={Time-series attribution maps with regularized contrastive learning},<br/>
+                            &nbsp;&nbsp;author={Steffen Schneider and Rodrigo Gonz{\'a}lez Laiz and Anastasiia Filippova and Markus Frey and Mackenzie Weygandt Mathis},<br/>
+                            &nbsp;&nbsp;booktitle={The 28th International Conference on Artificial Intelligence and Statistics},<br/>
+                            &nbsp;&nbsp;year={2025},<br/>
+                            &nbsp;&nbsp;url={https://openreview.net/forum?id=aGrCXoTB4P}<br/>
+                            }
+                        </small>
+                    </div>
+                </div>
+
+                <div class="row pt-4">
+                    <h3>
+                        <i class="fas fa-quote-right"></i>
+                        Impact & Citations
+                    </h3>
+                </div>
+
+                <div class="row">
+                    <p>
+                        CEBRA has been cited in numerous high-impact publications across neuroscience, machine learning, and related fields. Our work has influenced research in neural decoding, brain-computer interfaces, computational neuroscience, and machine learning methods for time-series analysis.
+                    </p>
+                    
+                    <div class="col-12 text-center mb-4">
+                        <a href="https://scholar.google.com/scholar?oi=bibs&hl=en&cites=5385393104765622341&as_sdt=5" target="_blank" class="btn btn-outline-light btn-lg">
+                            <i class="fas fa-graduation-cap"></i> View All Citations on Google Scholar
+                        </a>
+                    </div>
+                    
+                    <div class="col-12">
+                        <div class="paper-card">
+                            <p class="mb-0">Our research has been cited in proceedings and journals including <span class="badge bg-light text-dark">Nature</span> <span class="badge bg-light text-dark">Science</span> <span class="badge bg-light text-dark">ICML</span> <span class="badge bg-light text-dark">Nature Neuroscience</span> <span class="badge bg-light text-dark">ICML</span> <span class="badge bg-light text-dark">Neuron</span> <span class="badge bg-light text-dark">NeurIPS</span> <span class="badge bg-light text-dark">ICLR</span> and others.</p>
+                        </div>
+                    </div>
+                </div>
+
+                <div class="row justify-content-center mt-5 mb-3">
+                    <div class="col-md-12 text-center">
+                        <a href="https://www.epfl.ch/" target="_blank">
+                            <img src="https://images.squarespace-cdn.com/content/v1/57f6d51c9f74566f55ecf271/00b1fa45-9246-4914-86ee-4a01bb3fb60b/logo.png?format=2500w" 
+                                alt="MLAI Logo" 
+                                style="max-width: 600px;">
+                        </a>
+                        <div class="mt-3">
+                            <small class="text-muted">© 2021 - present | EPFL Mathis Laboratory</small>
+                        </div>
+                    </div>
+                </div>
 
                 <div class="row">
                     <small class="text-muted">Webpage designed using Bootstrap 5 and Fontawesome 5.</small>

From 673019a18a07cbd1b5e39487bc6ff47aa574e1a4 Mon Sep 17 00:00:00 2001
From: Mackenzie Mathis <mathis@rowland.harvard.edu>
Date: Thu, 17 Apr 2025 10:51:45 +0200
Subject: [PATCH 067/100] v0.5.0 (#238)

---
 AUTHORS.md                   | 28 +++++++++++++---------------
 Dockerfile                   |  2 +-
 Makefile                     |  2 +-
 PKGBUILD                     |  2 +-
 cebra/__init__.py            |  2 +-
 cebra/integrations/plotly.py | 25 +++++++++++++------------
 docs/root/index.html         | 30 +++++++++++++++---------------
 docs/source/conf.py          |  9 +++++----
 docs/source/usage.rst        | 32 ++++++++++++++++----------------
 reinstall.sh                 |  2 +-
 setup.cfg                    |  4 +++-
 tools/build_docker.sh        | 24 +++++++++++++++++++++++-
 tools/build_docs.sh          |  4 ++--
 13 files changed, 95 insertions(+), 71 deletions(-)

diff --git a/AUTHORS.md b/AUTHORS.md
index 11415b12..17db8887 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -2,27 +2,25 @@
 
 
 
-CEBRA was initially developed by **Mackenzie Mathis** and **Steffen Schneider** (2021+), who are co-inventors on the patent application [WO2023143843](https://infoscience.epfl.ch/entities/patent/0d9debed-4d22-47b7-bad1-f211e7010323). 
-**Jin Hwa Lee** contributed significantly to our first paper:  
+CEBRA was initially developed by **Mackenzie Mathis** and **Steffen Schneider** (2021+), who are co-inventors on the patent application [WO2023143843](https://infoscience.epfl.ch/entities/patent/0d9debed-4d22-47b7-bad1-f211e7010323).
+**Jin Hwa Lee** contributed significantly to our first paper:
 
-> **Schneider, S., Lee, J.H., & Mathis, M.W.**  
-> [*Learnable latent embeddings for joint behavioural and neural analysis.*](https://doi.org/10.1038/s41586-023-06031-6) 
+> **Schneider, S., Lee, J.H., & Mathis, M.W.**
+> [*Learnable latent embeddings for joint behavioural and neural analysis.*](https://doi.org/10.1038/s41586-023-06031-6)
 > Nature 617, 360–368 (2023)
 
-CEBRA is actively developed by [**Mackenzie Mathis**](https://www.mackenziemathislab.org/) and [**Steffen Schneider**](https://dynamical-inference.ai/) and their labs.  
+CEBRA is actively developed by [**Mackenzie Mathis**](https://www.mackenziemathislab.org/) and [**Steffen Schneider**](https://dynamical-inference.ai/) and their labs.
 
-It is a publicly available tool that has benefited from contributions and suggestions from many individuals: [CEBRA/graphs/contributors](https://github.com/AdaptiveMotorControlLab/CEBRA/graphs/contributors).  
+It is a publicly available tool that has benefited from contributions and suggestions from many individuals: [CEBRA/graphs/contributors](https://github.com/AdaptiveMotorControlLab/CEBRA/graphs/contributors).
 
-## CEBRA Extensions 
+## CEBRA Extensions
 
-### 2023  
-- **Steffen Schneider, Rodrigo González Laiz, Markus Frey, Mackenzie W. Mathis**  
-  [*Identifiable attribution maps using regularized contrastive learning.*](https://sslneurips23.github.io/paper_pdfs/paper_80.pdf) 
+### 2023
+- **Steffen Schneider, Rodrigo González Laiz, Markus Frey, Mackenzie W. Mathis**
+  [*Identifiable attribution maps using regularized contrastive learning.*](https://sslneurips23.github.io/paper_pdfs/paper_80.pdf)
   NeurIPS 4th Workshop on Self-Supervised Learning: Theory and Practice (2023)
 
-### 2025  
-- **Steffen Schneider, Rodrigo González Laiz, Anastasiia Filippova, Markus Frey, Mackenzie W. Mathis**  
-  [*Time-series attribution maps with regularized contrastive learning.*](https://openreview.net/forum?id=aGrCXoTB4P)  
+### 2025
+- **Steffen Schneider, Rodrigo González Laiz, Anastasiia Filippova, Markus Frey, Mackenzie W. Mathis**
+  [*Time-series attribution maps with regularized contrastive learning.*](https://openreview.net/forum?id=aGrCXoTB4P)
   AISTATS (2025)
-
-
diff --git a/Dockerfile b/Dockerfile
index 7cd326d5..46c8a555 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,7 +40,7 @@ RUN make dist
 FROM cebra-base
 
 # install the cebra wheel
-ENV WHEEL=cebra-0.5.0rc1-py3-none-any.whl
+ENV WHEEL=cebra-0.5.0-py3-none-any.whl
 WORKDIR /build
 COPY --from=wheel /build/dist/${WHEEL} .
 RUN pip install --no-cache-dir ${WHEEL}'[dev,integrations,datasets]'
diff --git a/Makefile b/Makefile
index a1e8d3b2..5b8cb107 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-CEBRA_VERSION := 0.5.0rc1
+CEBRA_VERSION := 0.5.0
 
 dist:
 	python3 -m pip install virtualenv
diff --git a/PKGBUILD b/PKGBUILD
index 1f8b3db5..7aa985a8 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -1,7 +1,7 @@
 # Maintainer: Steffen Schneider <stes@hey.com>
 pkgname=python-cebra
 _pkgname=cebra
-pkgver=0.5.0rc1
+pkgver=0.5.0
 pkgrel=1
 pkgdesc="Consistent Embeddings of high-dimensional Recordings using Auxiliary variables"
 url="https://cebra.ai"
diff --git a/cebra/__init__.py b/cebra/__init__.py
index edf1b5ee..0eb1f645 100644
--- a/cebra/__init__.py
+++ b/cebra/__init__.py
@@ -66,7 +66,7 @@
 
 import cebra.integrations.sklearn as sklearn
 
-__version__ = "0.5.0rc1"
+__version__ = "0.5.0"
 __all__ = ["CEBRA"]
 __allow_lazy_imports = False
 __lazy_imports = {}
diff --git a/cebra/integrations/plotly.py b/cebra/integrations/plotly.py
index 8b0515e4..2cfc5ec9 100644
--- a/cebra/integrations/plotly.py
+++ b/cebra/integrations/plotly.py
@@ -27,8 +27,8 @@
 import numpy as np
 import numpy.typing as npt
 import plotly.graph_objects
-import torch
 import plotly.graph_objects as go
+import torch
 
 from cebra.integrations.matplotlib import _EmbeddingPlot
 
@@ -153,17 +153,18 @@ def _plot_3d(self, **kwargs) -> plotly.graph_objects.Figure:
 
 
 def plot_embedding_interactive(
-    embedding: Union[npt.NDArray, torch.Tensor],
-    embedding_labels: Optional[Union[npt.NDArray, torch.Tensor, str]] = "grey",
-    axis: Optional["go.Figure"] = None,
-    markersize: float = 1,
-    idx_order: Optional[Tuple[int]] = None,
-    alpha: float = 0.4,
-    cmap: str = "cool",
-    title: str = "Embedding",
-    figsize: Tuple[int] = (5, 5),
-    dpi: int = 100,
-    **kwargs,
+        embedding: Union[npt.NDArray, torch.Tensor],
+        embedding_labels: Optional[Union[npt.NDArray, torch.Tensor,
+                                         str]] = "grey",
+        axis: Optional["go.Figure"] = None,
+        markersize: float = 1,
+        idx_order: Optional[Tuple[int]] = None,
+        alpha: float = 0.4,
+        cmap: str = "cool",
+        title: str = "Embedding",
+        figsize: Tuple[int] = (5, 5),
+        dpi: int = 100,
+        **kwargs,
 ) -> "go.Figure":
     """Plot embedding in a 3D dimensional space.
 
diff --git a/docs/root/index.html b/docs/root/index.html
index cee11753..aa740039 100644
--- a/docs/root/index.html
+++ b/docs/root/index.html
@@ -145,16 +145,16 @@ <h4>
                 </div>
 
                 <div class="row mb-5 mt-4">
-                    <p>CEBRA is a machine-learning method that can be used to 
+                    <p>CEBRA is a machine-learning method that can be used to
                         compress time series in a way that reveals otherwise hidden
-                         structures in the variability of the data. It excels on 
-                         behavioural and neural data recorded simultaneously. 
+                         structures in the variability of the data. It excels on
+                         behavioural and neural data recorded simultaneously.
                          We have shown it can be used to decode the activity from the
                           visual cortex of the mouse brain to reconstruct a viewed video,
                            to decode trajectories from the sensoirmotor cortex of primates,
                             and for decoding position during navigation. For these use cases
                             and other demos see our <a href="https://cebra.ai/docs/" style="color: #6235E0;">Documentation</a>.</p>
-                
+
                 </div>
 
                 <div class="row">
@@ -171,12 +171,12 @@ <h3><i class="fas fa-play-circle"></i> Demo Applications</h3>
                     <div class="col-md-6 mb-2">
                         <!-- Embedding the Plotly figure using iframe -->
                         <div style="position: relative; height: 315px; overflow: hidden; margin-bottom: 1rem;">
-                            <iframe src="static/img/hippocampus_posdir3_full.html" 
+                            <iframe src="static/img/hippocampus_posdir3_full.html"
                                     style="position: absolute; top: -140px; left: -5%; width: 110%; height: 150%; border: none; transform: scale(0.85); transform-origin: top center;"
                                     scrolling="no">
                             </iframe>
                         </div>
-                        
+
                         <p style="margin-top: -70px;">Interactive visualization of the CEBRA embedding for the rat hippocampus data. This 3D plot shows how neural activity is mapped to a lower-dimensional space that correlates with the animal's position and movement direction. <a href="https://colab.research.google.com/github/AdaptiveMotorControlLab/CEBRA-demos/blob/main/Demo_hippocampus.ipynb" target="_blank" style="color: #6235E0;"><i class="fas fa-external-link-alt"></i> Open In Colaboratory</a></p>
                     </div>
                 </div>
@@ -191,7 +191,7 @@ <h3><i class="fas fa-play-circle"></i> Demo Applications</h3>
                         <p>CEBRA applied to mouse primary visual cortex, collected at the Allen Institute (de Vries et al. 2020, Siegle et al. 2021). 2-photon and Neuropixels recordings are embedded with CEBRA using DINO frame features as labels.
                         The embedding is used to decode the video frames using a kNN decoder on the CEBRA-Behavior embedding from the test set.</p>
                     </div>
-                    
+
                     <div class="col-md-6 mb-2">
                         <!-- YouTube embed for CEBRA on M1 and S1 neural data with cleaner styling -->
                         <video width="100%" autoplay loop muted preload="auto">
@@ -205,7 +205,7 @@ <h3><i class="fas fa-play-circle"></i> Demo Applications</h3>
 
                 <div class="row mt-4">
                     <h3><i class="fas fa-newspaper"></i> Publications</h3>
-                    
+
                     <div class="col-12">
                         <div class="paper-card">
                             <div class="paper-title">Learnable latent embeddings for joint behavioural and neural analysis</div>
@@ -215,7 +215,7 @@ <h3><i class="fas fa-newspaper"></i> Publications</h3>
                             <a href="https://arxiv.org/abs/2204.00673" target="_blank" class="btn btn-link" style="color: #6235E0;"><i class="fas fa-file-alt"></i> Preprint</a>
                         </div>
                     </div>
-                    
+
                     <div class="col-12">
                         <div class="paper-card">
                             <div class="paper-title">Time-series attribution maps with regularized contrastive learning</div>
@@ -230,7 +230,7 @@ <h3><i class="fas fa-newspaper"></i> Publications</h3>
 
                 <div class="row mt-4">
                     <h3><i class="fas fa-certificate"></i> Patent Information</h3>
-                    
+
                     <div class="col-12">
                         <div class="paper-card">
                             <div class="paper-title">Patent Pending</div>
@@ -299,7 +299,7 @@ <h3>
                         </small>
                     </div>
                 </div>
-                
+
                 <div class="row justify-content-md-center">
                     <div class="col-sm-10 rounded p-3 m-2" style="background-color: rgb(20,20,20);">
                         <small class="code">
@@ -325,13 +325,13 @@ <h3>
                     <p>
                         CEBRA has been cited in numerous high-impact publications across neuroscience, machine learning, and related fields. Our work has influenced research in neural decoding, brain-computer interfaces, computational neuroscience, and machine learning methods for time-series analysis.
                     </p>
-                    
+
                     <div class="col-12 text-center mb-4">
                         <a href="https://scholar.google.com/scholar?oi=bibs&hl=en&cites=5385393104765622341&as_sdt=5" target="_blank" class="btn btn-outline-light btn-lg">
                             <i class="fas fa-graduation-cap"></i> View All Citations on Google Scholar
                         </a>
                     </div>
-                    
+
                     <div class="col-12">
                         <div class="paper-card">
                             <p class="mb-0">Our research has been cited in proceedings and journals including <span class="badge bg-light text-dark">Nature</span> <span class="badge bg-light text-dark">Science</span> <span class="badge bg-light text-dark">ICML</span> <span class="badge bg-light text-dark">Nature Neuroscience</span> <span class="badge bg-light text-dark">ICML</span> <span class="badge bg-light text-dark">Neuron</span> <span class="badge bg-light text-dark">NeurIPS</span> <span class="badge bg-light text-dark">ICLR</span> and others.</p>
@@ -342,8 +342,8 @@ <h3>
                 <div class="row justify-content-center mt-5 mb-3">
                     <div class="col-md-12 text-center">
                         <a href="https://www.epfl.ch/" target="_blank">
-                            <img src="https://images.squarespace-cdn.com/content/v1/57f6d51c9f74566f55ecf271/00b1fa45-9246-4914-86ee-4a01bb3fb60b/logo.png?format=2500w" 
-                                alt="MLAI Logo" 
+                            <img src="https://images.squarespace-cdn.com/content/v1/57f6d51c9f74566f55ecf271/00b1fa45-9246-4914-86ee-4a01bb3fb60b/logo.png?format=2500w"
+                                alt="MLAI Logo"
                                 style="max-width: 600px;">
                         </a>
                         <div class="mt-3">
diff --git a/docs/source/conf.py b/docs/source/conf.py
index a58f24ec..c210526f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -81,7 +81,6 @@ def get_years(start_year=2021):
     "sphinx_gallery.load_style",
 ]
 
-
 coverage_show_missing_items = True
 panels_add_bootstrap_css = False
 
@@ -147,7 +146,8 @@ def get_years(start_year=2021):
 html_context = {
     "default_mode": "light",
     "switcher": {
-        "version_match": "latest",  # Adjust this dynamically per version
+        "version_match":
+            "latest",  # Adjust this dynamically per version
         "versions": [
             ("latest", "/latest/"),
             ("v0.2.0", "/v0.2.0/"),
@@ -156,7 +156,8 @@ def get_years(start_year=2021):
             ("v0.5.0rc1", "/v0.5.0rc1/"),
         ],
     },
-    "navbar_start": ["version-switcher", "navbar-logo"],  # Place the dropdown above the logo
+    "navbar_start": ["version-switcher",
+                     "navbar-logo"],  # Place the dropdown above the logo
 }
 
 # More info on theme options:
@@ -220,7 +221,7 @@ def get_years(start_year=2021):
 
 nbsphinx_thumbnails = {
     "demo_notebooks/CEBRA_best_practices":
-    "_static/thumbnails/cebra-best.png",
+        "_static/thumbnails/cebra-best.png",
     "demo_notebooks/Demo_primate_reaching":
         "_static/thumbnails/ForelimbS1.png",
     "demo_notebooks/Demo_hippocampus":
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
index 8b60aa69..aaf09a25 100644
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -1218,36 +1218,36 @@ Putting all previous snippet examples together, we obtain the following pipeline
         output_dimension = 8,
         verbose = False
      )
-    
+
      # 2. Load example data
      neural_data = cebra.load_data(file="neural_data.npz", key="neural")
      new_neural_data = cebra.load_data(file="neural_data.npz", key="new_neural")
      continuous_label = cebra.load_data(file="auxiliary_behavior_data.h5", key="auxiliary_variables", columns=["continuous1", "continuous2", "continuous3"])
      discrete_label = cebra.load_data(file="auxiliary_behavior_data.h5", key="auxiliary_variables", columns=["discrete"]).flatten()
-    
-    
+
+
      assert neural_data.shape == (100, 3)
      assert new_neural_data.shape == (100, 4)
      assert discrete_label.shape == (100, )
      assert continuous_label.shape == (100, 3)
-    
+
      # 3. Split data and labels into train/validation
      from sklearn.model_selection import train_test_split
-    
+
      split_idx = int(0.8 * len(neural_data))
      # suggestion: 5%-20% depending on your dataset size; note that this splits the
      # into an early and late part, which might not be ideal for your data/experiment!
      # As a more involved alternative, consider e.g. a nested time-series split.
-    
+
      train_data = neural_data[:split_idx]
      valid_data = neural_data[split_idx:]
-    
+
      train_continuous_label = continuous_label[:split_idx]
      valid_continuous_label = continuous_label[split_idx:]
-    
+
      train_discrete_label = discrete_label[:split_idx]
      valid_discrete_label = discrete_label[split_idx:]
-    
+
      # 4. Fit the model
      # time contrastive learning
      cebra_model.fit(train_data)
@@ -1257,29 +1257,29 @@ Putting all previous snippet examples together, we obtain the following pipeline
      cebra_model.fit(train_data, train_continuous_label)
      # mixed behavior contrastive learning
      cebra_model.fit(train_data, train_discrete_label, train_continuous_label)
-    
-    
+
+
      # 5. Save the model
      tmp_file = Path(tempfile.gettempdir(), 'cebra.pt')
      cebra_model.save(tmp_file)
-    
+
      # 6. Load the model and compute an embedding
      cebra_model = cebra.CEBRA.load(tmp_file)
      train_embedding = cebra_model.transform(train_data)
      valid_embedding = cebra_model.transform(valid_data)
-    
+
      assert train_embedding.shape == (80, 8) # TODO(user): change to split ratio & output dim
      assert valid_embedding.shape == (20, 8) # TODO(user): change to split ratio & output dim
-    
+
      # 7. Evaluate the model performance (you can also check the train_data)
      goodness_of_fit = cebra.sklearn.metrics.goodness_of_fit_score(cebra_model,
                                                           valid_data,
                                                           valid_discrete_label,
                                                           valid_continuous_label)
-    
+
      # 8. Adapt the model to a new session
      cebra_model.fit(new_neural_data, adapt = True)
-    
+
      # 9. Decode discrete labels behavior from the embedding
      decoder = cebra.KNNDecoder()
      decoder.fit(train_embedding, train_discrete_label)
diff --git a/reinstall.sh b/reinstall.sh
index ece080b8..422e5d17 100755
--- a/reinstall.sh
+++ b/reinstall.sh
@@ -15,7 +15,7 @@ pip uninstall -y cebra
 # Get version info after uninstalling --- this will automatically get the
 # most recent version based on the source code in the current directory.
 # $(tools/get_cebra_version.sh)
-VERSION=0.5.0rc1
+VERSION=0.5.0
 echo "Upgrading to CEBRA v${VERSION}"
 
 # Upgrade the build system (PEP517/518 compatible)
diff --git a/setup.cfg b/setup.cfg
index 9a3c3a41..a03d3784 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -39,7 +39,9 @@ install_requires =
     scipy
     torch>=2.4.0
     tqdm
-    matplotlib
+    # NOTE(stes): Remove pin once https://github.com/AdaptiveMotorControlLab/CEBRA/issues/240
+    # is resolved.
+    matplotlib<3.11
     requests
 
 [options.extras_require]
diff --git a/tools/build_docker.sh b/tools/build_docker.sh
index 76aa8228..cec031a0 100755
--- a/tools/build_docker.sh
+++ b/tools/build_docker.sh
@@ -3,6 +3,21 @@
 
 set -e
 
+# Parse command line arguments
+RUN_FULL_TESTS=false
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --full-tests)
+      RUN_FULL_TESTS=true
+      shift
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+      ;;
+  esac
+done
+
 if [[ -z $(git status --porcelain) ]]; then
   TAG=$(git rev-parse --short HEAD)
 else
@@ -23,13 +38,20 @@ docker build \
 	-t $DOCKERNAME .
 docker tag $DOCKERNAME $LATEST
 
+# Determine whether to run full tests or not
+if [[ "$RUN_FULL_TESTS" == "true" ]]; then
+  echo "Running full test suite including tests that require datasets"
+else
+  echo "Running tests that don't require datasets"
+fi
+
 docker run \
   --gpus 2 \
   ${extra_kwargs[@]} \
   -v ${CEBRA_DATADIR:-./data}:/data \
   --env CEBRA_DATADIR=/data \
   --network host \
-  -it $DOCKERNAME python -m pytest --ff -x -m "not requires_dataset" --doctest-modules ./docs/source/usage.rst tests cebra
+  -it $DOCKERNAME python -m pytest --ff -x $([ "$RUN_FULL_TESTS" != "true" ] && echo '-m "not requires_dataset"') --doctest-modules ./docs/source/usage.rst tests cebra
 
 #docker push $DOCKERNAME
 #docker push $LATEST
diff --git a/tools/build_docs.sh b/tools/build_docs.sh
index 38a7982e..b6a31290 100755
--- a/tools/build_docs.sh
+++ b/tools/build_docs.sh
@@ -62,8 +62,8 @@ FROM python:3.9
 RUN python -m pip install --upgrade pip setuptools wheel \
     && apt-get update -y && apt-get install -y pandoc git
 RUN pip install torch --extra-index-url=https://download.pytorch.org/whl/cpu
-COPY dist/cebra-0.5.0rc1-py3-none-any.whl .
-RUN pip install 'cebra-0.5.0rc1-py3-none-any.whl[docs]'
+COPY dist/cebra-0.5.0-py3-none-any.whl .
+RUN pip install 'cebra-0.5.0-py3-none-any.whl[docs]'
 EOF
 
 checkout_cebra_figures

From 962568025c76015a51a99f8253714c77c7d3a498 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Fri, 18 Apr 2025 13:32:46 +0200
Subject: [PATCH 068/100] Upgrade docs build (#241)

* Improve build setup for docs

* update pydata theme options

* Add README for docs folder

* Fix demo notebook build

* Finish build setup

* update git workflow

* add timeout to workflow

* add timeout also to docs build

* switch build back to sphinx for gh actions

* attempt to fix build workflow

* update to sphinx-build

* fix build workflow

* fix indent error

* fix build system

* revert demos to main

* increase timeout to 30
---
 .dockerignore               |  2 +
 .github/workflows/build.yml |  1 +
 .github/workflows/docs.yml  | 20 ++++----
 .gitignore                  |  9 ++++
 cebra/models/criterions.py  |  2 +-
 docs/Dockerfile             | 14 ++++++
 docs/Makefile               | 18 +++++++-
 docs/README.md              | 14 ++++++
 docs/requirements.txt       | 23 ++++++++++
 docs/source/conf.py         | 53 ++++++++++++++-------
 docs/source/demos.rst       |  2 +-
 setup.cfg                   | 11 ++---
 tools/build_docs.sh         | 92 ++++++-------------------------------
 13 files changed, 149 insertions(+), 112 deletions(-)
 create mode 100644 docs/Dockerfile
 create mode 100644 docs/README.md
 create mode 100644 docs/requirements.txt
 mode change 100644 => 120000 docs/source/demos.rst

diff --git a/.dockerignore b/.dockerignore
index 3937fd07..945ab50a 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -5,3 +5,5 @@ tests/
 third_party/
 tools/
 PKGBUILD
+
+!docs/requirements.txt
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 5fed4c79..1249f3f4 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -10,6 +10,7 @@ on:
 
 jobs:
   build:
+    timeout-minutes: 30
     strategy:
       fail-fast: true
       matrix:
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 826d9e91..f99f0c72 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -57,10 +57,10 @@ jobs:
           path: docs/source/demo_notebooks
           ref: main
 
-      - name: Set up Python ${{ matrix.python-version }}
+      - name: Set up Python 3.10
         uses: actions/setup-python@v5
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: "3.10"
 
       - name: Install package
         run: |
@@ -69,17 +69,21 @@ jobs:
           # as of 29/10/23. Ubuntu 22.04 which is used for ubuntu-latest only has an
           # old pandoc version (2.9.). We will hence install the latest version manually.
           # previou: sudo apt-get install -y pandoc
-          wget https://github.com/jgm/pandoc/releases/download/3.1.9/pandoc-3.1.9-1-amd64.deb
-          sudo dpkg -i pandoc-3.1.9-1-amd64.deb
-          rm pandoc-3.1.9-1-amd64.deb
-          pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
-          pip install '.[docs]'
+          # NOTE(stes): Updated to latest version as of 17/04/2025, v3.6.4.
+          wget -q https://github.com/jgm/pandoc/releases/download/3.6.4/pandoc-3.6.4-1-amd64.deb
+          sudo dpkg -i pandoc-3.6.4-1-amd64.deb
+          rm pandoc-3.6.4-1-amd64.deb
+          pip install -r docs/requirements.txt
 
+      - name: Check software versions
+        run: |
+          sphinx-build --version
+          pandoc --version
 
       - name: Build docs
         run: |
           ls docs/source/cebra-figures
-          # later also add the -n option to check for broken links
+          export SPHINXBUILD="sphinx-build"
           export SPHINXOPTS="-W --keep-going -n"
           make docs
 
diff --git a/.gitignore b/.gitignore
index 30b65ee3..0563e474 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,15 @@ exports/
 demo_notebooks/
 assets/
 
+# demo run
+.vscode/
+auxiliary_behavior_data.h5
+cebra_model.pt
+data.npz
+grid_search_models/
+neural_data.npz
+saved_models/
+
 # Binary files
 *.png
 *.jpg
diff --git a/cebra/models/criterions.py b/cebra/models/criterions.py
index 47c2a87f..f78e298b 100644
--- a/cebra/models/criterions.py
+++ b/cebra/models/criterions.py
@@ -95,7 +95,7 @@ def infonce(
 
     Note:
         - The behavior of this function changed beginning in CEBRA 0.3.0.
-        The InfoNCE implementation is numerically stabilized.
+          The InfoNCE implementation is numerically stabilized.
     """
     with torch.no_grad():
         c, _ = neg_dist.max(dim=1, keepdim=True)
diff --git a/docs/Dockerfile b/docs/Dockerfile
new file mode 100644
index 00000000..d96c24d2
--- /dev/null
+++ b/docs/Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3.10
+
+RUN apt-get update && apt-get install -y \
+    git \
+    make \
+    pandoc \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY docs/requirements.txt .
+RUN pip install -r requirements.txt
+
+#COPY setup.cfg .
+#COPY pyproject.toml .
+#COPY cebra/ .
diff --git a/docs/Makefile b/docs/Makefile
index 2739f4af..26c260d3 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -3,7 +3,7 @@
 
 # You can set these variables from the command line, and also
 # from the environment for the first two.
-SPHINXOPTS    ?=
+SPHINXOPTS    ?= -W --keep-going -n
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = source
 BUILDDIR      = build
@@ -33,12 +33,26 @@ clean:
 source/cebra-figures:
 	git clone --depth 1 git@github.com:AdaptiveMotorControlLab/cebra-figures.git source/cebra-figures
 
+source/demo_notebooks:
+	git clone --depth 1 git@github.com:AdaptiveMotorControlLab/cebra-demos.git source/demo_notebooks
+
 # Update the figures. Note that this might prompt you for an SSH key
 figures: source/cebra-figures
 	cd source/cebra-figures &&	git pull --ff-only origin main
 
+demos: source/demo_notebooks
+	cd source/demo_notebooks &&	git pull --ff-only origin main
+
+source/assets:
+	git clone --depth 1 git@github.com:AdaptiveMotorControlLab/cebra-assets.git source/assets
+
+assets: source/assets
+	cd source/assets && git pull --ff-only origin main
+	cp -r source/assets/docs/* .
+	#rm -rf source/assets
+
 # Build the page with pre-built figures
-page: source/cebra-figures html
+page: source/cebra-figures source/demo_notebooks html
 	mkdir -p page/
 	mkdir -p page/docs
 	mkdir -p page/staging/docs
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..495e156c
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,14 @@
+# CEBRA documentation
+
+This directory contains the documentation for CEBRA.
+
+To build the docs, head to *the root folder of the repository* and run:
+
+```bash
+./tools/build_docs.sh
+```
+
+This will build the docker container in [Dockerfile](Dockerfile) and run the `make docs` command from the root repo.
+The exact requirements for building the docs are now listed in [requirements.txt](requirements.txt).
+
+For easier local development, docs are not using `sphinx-autobuild` and will by default be served at [http://127.0.0.1:8000](http://127.0.0.1:8000).
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 00000000..880611c8
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,23 @@
+sphinx==7.4.7
+nbsphinx==0.9.6
+pydata-sphinx-theme==0.16.1
+pytest-sphinx==0.6.3
+sphinx-autobuild==2024.10.3
+sphinx-autodoc-typehints==1.19.0
+sphinx-copybutton==0.5.2
+sphinx-gallery==0.19.0
+sphinx-tabs==3.4.7
+sphinx-togglebutton==0.3.2
+sphinx_design==0.6.0
+sphinx_pydata_theme==0.1.0
+sphinxcontrib-applehelp==2.0.0
+sphinxcontrib-devhelp==2.0.0
+sphinxcontrib-htmlhelp==2.1.0
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==2.0.0
+sphinxcontrib-serializinghtml==2.0.0
+
+literate_dataclasses
+# For IPython.sphinxext.ipython_console_highlighting extension
+ipython
+numpy
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c210526f..80399e5f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -26,16 +26,13 @@
 # list see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 
-# -- Path setup --------------------------------------------------------------
-
 import datetime
 import os
+import pathlib
 import sys
 
 sys.path.insert(0, os.path.abspath("."))
 
-import cebra  # noqa: E402
-
 
 def get_years(start_year=2021):
     year = datetime.datetime.now().year
@@ -49,8 +46,17 @@ def get_years(start_year=2021):
 project = "cebra"
 copyright = f"""{get_years(2021)}"""
 author = "See AUTHORS.md"
-# The full version, including alpha/beta/rc tags
-release = cebra.__version__
+version_file = pathlib.Path(
+    __file__).parent.parent.parent / "cebra" / "__init__.py"
+assert version_file.exists(), f"Could not find version file: {version_file}"
+with version_file.open("r") as f:
+    for line in f:
+        if line.startswith("__version__"):
+            version = line.split("=")[1].strip().strip('"').strip("'")
+            print("Building docs for version:", version)
+            break
+    else:
+        raise ValueError("Could not find version in __init__.py")
 
 # -- General configuration ---------------------------------------------------
 
@@ -60,8 +66,7 @@ def get_years(start_year=2021):
 
 #https://github.com/spatialaudio/nbsphinx/issues/128#issuecomment-1158712159
 html_js_files = [
-    "require.min.js",  # Add to your _static
-    "custom.js",
+    "https://cdn.plot.ly/plotly-latest.min.js",  # Add Plotly.js
 ]
 
 extensions = [
@@ -122,7 +127,8 @@ def get_years(start_year=2021):
 
 autodoc_member_order = "bysource"
 autodoc_mock_imports = [
-    "torch", "nlb_tools", "tqdm", "h5py", "pandas", "matplotlib", "plotly"
+    "torch", "nlb_tools", "tqdm", "h5py", "pandas", "matplotlib", "plotly",
+    "joblib", "scikit-learn", "scipy", "requests", "sklearn"
 ]
 # autodoc_typehints = "none"
 
@@ -134,7 +140,8 @@ def get_years(start_year=2021):
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = [
     "**/todo", "**/src", "cebra-figures/figures.rst", "cebra-figures/*.rst",
-    "*/cebra-figures/*.rst", "demo_notebooks/README.rst"
+    "*/cebra-figures/*.rst", "*/demo_notebooks/README.rst",
+    "demo_notebooks/README.rst"
 ]
 
 # -- Options for HTML output -------------------------------------------------
@@ -185,23 +192,26 @@ def get_years(start_year=2021):
             "icon": "fas fa-graduation-cap",
         },
     ],
-    "external_links": [
-        # {"name": "Mathis Lab", "url": "http://www.mackenziemathislab.org/"},
-    ],
     "collapse_navigation": False,
-    "navigation_depth": 4,
+    "navigation_depth": 1,
     "show_nav_level": 2,
     "navbar_align": "content",
     "show_prev_next": False,
+    "navbar_end": ["theme-switcher", "navbar-icon-links.html"],
+    "navbar_persistent": [],
+    "header_links_before_dropdown": 7
 }
 
-html_context = {"default_mode": "dark"}
+html_context = {"default_mode": "light"}
 html_favicon = "_static/img/logo_small.png"
 html_logo = "_static/img/logo_large.png"
 
-# Remove the search field for now
+# Replace with this configuration to enable "on this page" navigation
 html_sidebars = {
-    "**": ["search-field.html", "sidebar-nav-bs.html"],
+    "**": ["search-field.html", "sidebar-nav-bs", "page-toc.html"],
+    "demos": ["search-field.html", "sidebar-nav-bs"],
+    "api": ["search-field.html", "sidebar-nav-bs"],
+    "figures": ["search-field.html", "sidebar-nav-bs"],
 }
 
 # Disable links for embedded images
@@ -289,3 +299,12 @@ def get_years(start_year=2021):
 """
 # fmt: on
 # flake8: enable=E501
+
+# Configure nbsphinx to properly render Plotly plots
+nbsphinx_execute = 'auto'
+nbsphinx_allow_errors = True
+nbsphinx_requirejs_path = 'https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.7/require.js'
+nbsphinx_execute_arguments = [
+    "--InlineBackend.figure_formats={'png', 'svg', 'pdf'}",
+    "--InlineBackend.rc=figure.dpi=96",
+]
diff --git a/docs/source/demos.rst b/docs/source/demos.rst
deleted file mode 100644
index f0822386..00000000
--- a/docs/source/demos.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: demo_notebooks/README.rst
diff --git a/docs/source/demos.rst b/docs/source/demos.rst
new file mode 120000
index 00000000..edd57b74
--- /dev/null
+++ b/docs/source/demos.rst
@@ -0,0 +1 @@
+demo_notebooks/README.rst
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index a03d3784..560e8a3e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -64,11 +64,11 @@ integrations =
     plotly
     seaborn
 docs =
-    sphinx==5.3
-    sphinx-gallery==0.10.1
+    sphinx
+    sphinx-gallery
     docutils
-    pydata-sphinx-theme==0.9.0
-    sphinx_autodoc_typehints==1.19
+    pydata-sphinx-theme
+    sphinx_autodoc_typehints
     sphinx_copybutton
     sphinx_tabs
     sphinx_design
@@ -76,11 +76,10 @@ docs =
     nbsphinx
     nbconvert
     ipykernel
-    matplotlib<=3.5.2
+    matplotlib
     pandas
     seaborn
     scikit-learn
-    numpy<2.0.0
 demos =
     ipykernel
     jupyter
diff --git a/tools/build_docs.sh b/tools/build_docs.sh
index b6a31290..119272ed 100755
--- a/tools/build_docs.sh
+++ b/tools/build_docs.sh
@@ -1,79 +1,17 @@
 #!/bin/bash
-# Locally build the documentation and display it in a webserver.
 
-set -xe
-
-git_checkout_or_pull() {
-    local repo=$1
-    local target_dir=$2
-    # TODO(stes): theoretically we could also auto-update the repo,
-    # I commented this out for now to avoid interference with local
-    # dev/changes
-    #if [ -d "$target_dir" ]; then
-    #    cd "$target_dir"
-    #    git pull --ff-only origin main
-    #    cd -
-    #else
-    if [ ! -d "$target_dir" ]; then
-        git clone "$repo" "$target_dir"
-    fi
-}
-
-checkout_cebra_figures() {
-    git_checkout_or_pull git@github.com:AdaptiveMotorControlLab/cebra-figures.git docs/source/cebra-figures
-}
-
-checkout_assets() {
-    git_checkout_or_pull git@github.com:AdaptiveMotorControlLab/cebra-assets.git assets
-}
-
-checkout_cebra_demos() {
-    git_checkout_or_pull git@github.com:AdaptiveMotorControlLab/cebra-demos.git docs/source/demo_notebooks
-}
-
-setup_python() {
-    python -m pip install --upgrade pip setuptools wheel
-    sudo apt-get install -y pandoc
-    pip install torch --extra-index-url=https://download.pytorch.org/whl/cpu
-    pip install '.[docs]'
-}
-
-build_docs() {
-    cp -r assets/* .
-    export SPHINXOPTS="-W --keep-going -n"
-    (cd docs && PYTHONPATH=.. make page)
-}
-
-serve() {
-    python -m http.server 8080 --b 0.0.0.0 -d docs/build/html
-}
-
-main() {
-    build_docs
-    serve
-}
-
-if [[ "$1" == "--build" ]]; then
-    main
-fi
-
-docker build -t cebra-docs -f - . << "EOF"
-FROM python:3.9
-RUN python -m pip install --upgrade pip setuptools wheel \
-    && apt-get update -y && apt-get install -y pandoc git
-RUN pip install torch --extra-index-url=https://download.pytorch.org/whl/cpu
-COPY dist/cebra-0.5.0-py3-none-any.whl .
-RUN pip install 'cebra-0.5.0-py3-none-any.whl[docs]'
-EOF
-
-checkout_cebra_figures
-checkout_assets
-checkout_cebra_demos
-
-docker run \
-    -p 127.0.0.1:8080:8080 \
-    -u $(id -u):$(id -g) \
-    -v .:/app -w /app \
-    --tmpfs /.config --tmpfs /.cache \
-    -it cebra-docs \
-    ./tools/build_docs.sh --build
+docker build -t cebra-docs -f docs/Dockerfile .
+
+docker run -u $(id -u):$(id -g) \
+  -p 127.0.0.1:8000:8000 \
+  -v $(pwd):/app \
+  -v /tmp/.cache/pip:/.cache/pip \
+  -v /tmp/.cache/sphinx:/.cache/sphinx \
+  -v /tmp/.cache/matplotlib:/.cache/matplotlib \
+  -v /tmp/.cache/fontconfig:/.cache/fontconfig \
+  -e MPLCONFIGDIR=/tmp/.cache/matplotlib \
+  -w /app \
+  --env SPHINXBUILD="sphinx-autobuild" \
+  --env SPHINXOPTS="-W --keep-going -n --port 8000 --host 0.0.0.0" \
+  -it cebra-docs \
+  make docs

From 95e5296c6ab12223fe4a304b8859dc24433a8bfc Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Sun, 20 Apr 2025 21:03:09 +0200
Subject: [PATCH 069/100] Allow indexing of the cebra docs (#242)

* Allow indexing of the cebra docs

* Fix docs workflow
---
 .github/workflows/docs.yml | 8 --------
 docs/root/robots.txt       | 1 -
 2 files changed, 9 deletions(-)

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index f99f0c72..39f882b9 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -7,14 +7,6 @@ on:
   pull_request:
     branches:
       - main
-      - public
-      - dev
-    paths:
-      - '**.py'
-      - '**.ipynb'
-      - '**.js'
-      - '**.rst'
-      - '**.md'
 
 jobs:
   build:
diff --git a/docs/root/robots.txt b/docs/root/robots.txt
index 43249ef2..bbbcdfe9 100644
--- a/docs/root/robots.txt
+++ b/docs/root/robots.txt
@@ -1,3 +1,2 @@
 User-agent: *
 Disallow: /staging/
-Disallow: /docs/

From 20f5a778e1cb2f15cc43f54a8ef08552e4c6c775 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <stes@hey.com>
Date: Wed, 23 Apr 2025 06:24:01 +0200
Subject: [PATCH 070/100] Fix broken docs coverage workflows (#246)

---
 .github/workflows/build.yml        |  4 ++
 .github/workflows/doc-coverage.yml | 82 ------------------------------
 setup.cfg                          |  1 +
 3 files changed, 5 insertions(+), 82 deletions(-)
 delete mode 100644 .github/workflows/doc-coverage.yml

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 1249f3f4..f109294c 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -68,6 +68,10 @@ jobs:
         run: |
           make codespell
 
+      - name: Check the documentation coverage
+        run: |
+          make interrogate
+
       - name: Check CITATION.cff validity
         run: |
           cffconvert --validate
diff --git a/.github/workflows/doc-coverage.yml b/.github/workflows/doc-coverage.yml
deleted file mode 100644
index 8d7f0522..00000000
--- a/.github/workflows/doc-coverage.yml
+++ /dev/null
@@ -1,82 +0,0 @@
-name: PR Status
-# Adapted from https://github.com/shift-happens-benchmark/icml-2022/blob/main/.github/workflows/pr-status.yml
-# Apache 2.0 licensed
-
-
-# NOTE(stes): Use pull_request_target instead of pull_request to allow
-# to post comments on the current PR, even when an external contributor
-# opens a PR.
-# IMPORTANT: DO NOT EXPOSE REPOSITORY SECRETS WITHIN THIS PR!
-on:
-  pull_request:
-    branches:
-      - main
-      - public
-      - dev
-
-permissions:
-  pull-requests: write
-
-jobs:
-  documentation-status:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ['3.9']
-
-    steps:
-    # NOTE(stes) currently not used, we check
-    # the entire codebase now by default.
-    #- uses: actions/checkout@v3
-    #  with:
-    #    ref: main
-    - uses: actions/checkout@v3
-    - uses: actions/cache@v4
-      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip
-        restore-keys: |
-          ${{ runner.os }}-pip
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install package
-      run: |
-        python -m pip install --upgrade pip setuptools wheel
-        pip install interrogate==1.5.0
-    - name: documentation
-      id: documentation
-      run: |
-        RESULT=$(make --silent interrogate)
-        RESULT=$(tail -n +3 <<< $RESULT)
-        STATUS=$(tail -n1 <<< $RESULT)
-        STATUS=$(sed 's/-//g' <<< $STATUS)
-        # trim
-        STATUS=$(echo $STATUS | xargs echo -n)
-        RESULT=$(head -n -1 <<< $RESULT)
-        # remove second to last line
-        RESULTA=$(head -n -2 <<< $RESULT)
-        RESULTB=$(tail -n1 <<< $RESULT)
-        NL=$'\n'
-        RESULT="$RESULTA${NL}||||||${NL}$RESULTB"
-        RESULT="$RESULT${NL}${NL}$STATUS"
-        RESULT="${RESULT//'%'/'%25'}"
-        RESULT="${RESULT//$'\n'/'%0A'}"
-        RESULT="${RESULT//$'\r'/'%0D'}"
-        echo "::set-output name=result::$RESULT"
-      continue-on-error: true
-    #- name: comment documentation result on PR
-    #  uses: thollander/actions-comment-pull-request@v1
-    #  with:
-    #    message: |
-    #      ## Docstring Coverage Report
-    #      ${{ steps.documentation.outputs.result }}
-    #    comment_includes: '## Docstring Coverage Report'
-    #    GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-    # following snippet borrowed from
-    # https://stackoverflow.com/a/58003436
-    # CC BY-SA 4.0, Peter Evans
-    - name: Fail on insufficient coverage
-      if: steps.documentation.outcome != 'success'
-      run: exit 1
diff --git a/setup.cfg b/setup.cfg
index 560e8a3e..40383b89 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -106,6 +106,7 @@ dev =
     pytest-sphinx
     tables
     licenseheaders
+    interrogate
     # TODO(stes) Add back once upstream issue
     # https://github.com/PyCQA/docformatter/issues/119
     # is resolved.

From 0d85abbdd98643236400530d8874744490e328ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rodrigo=20Gonz=C3=A1lez=20Laiz?=
 <31796689+gonlairo@users.noreply.github.com>
Date: Wed, 23 Apr 2025 09:54:54 +0200
Subject: [PATCH 071/100] Add xCEBRA implementation (AISTATS 2025) (#225)

* Add multiobjective solver and regularized training (#783)

* Add multiobjective solver and regularized training

* Add example for multiobjective training

* Add jacobian regularizer and SAM

* update license headers

* add api draft for multiobjective training

* add all necessary modules to run the complete xcebra pipeline

* add notebooks to reproduce xcebra pipeline

* add first working notebook

* add notebook with hybrid learning

* add notebook with creation of synthetic data

* add notebook with hybrid training

* add plot with R2 for different parts of the embedding

* add new API

* update api wrapper with more checks and messages

* add tests and notebook with new api

* merge xcebra into attribution

* separate xcebra dataset from cebra

* some minor refactoring of cebra dataset

* separate xcebra loader from cebra

* remove xcebra distributions from cebra

* minor refactoring with distributions

* separate xcebra criterions from cebra

* minor refactoring on criterion

* separate xcebra models/criterions/layers from cebra

* refactoring multiobjective

* more refactoring...

* separate xcebra solvers from cebra

* more refactoring

* move xcebra to its own package

* move more files into xcebra package

* more files and remove changes with the registry

* remove unncessary import

* add folder structure

* move back distributions

* add missing init

* remove wrong init

* make loader and dataset run with new imports

* making it run!

* make attribution run

* Run pre-commit

* move xcebra repo one level up

* update gitignore and add __init__ from data

* add init to distributions

* add correct init for attribution pacakge

* add correct init for model package

* fix remaining imports

* fix tests

* add examples back to xcebra repo

* update imports from graphs_xcebra

* add setup.py to create a package

* update imports of graph_xcebra

* update notebooks

* Formatting code for submission

Co-authored-by: Rodrigo Gonzalez <gonlairo@gmail.com>

* move test into xcebra

* Add README

* move distributions back to main package

* clean up examples

* adapt tests

* Add LICENSE

* add train/eval notebook again

* add notebook with clean results

* rm synthetic data

* change name from xcebra to regcl

* change names of modules and adapt imports

* change name from graphs_xcebra to synthetic_data

* Integrate into CEBRA

* Fix remaining imports and make notebook runnable

* Add dependencies, add version flag

* Remove synthetic data files

* reset dockerfile, move vmf

* apply pre-commit

* Update notice

* add some docstrings

* Apply license headers

* add new scd notebook

* add notebook with scd

---------

Co-authored-by: Steffen Schneider <stes@hey.com>

* Fix tests

* bump version

* update dockerfile

* fix progress bar

* remove outdated test

* rename models

* Apply fixes to pass ruff tests

* Fix typos

* Update license headers, fix additional ruff errors

* remove unused comment

* rename regcl in codebase

* change regcl name in dockerfile

* Improve attribution module

* Fix imports name naming

* add basic integration test

* temp disable of binary check

* Add legacy multiobjective model for backward compat

* add synth import back in

* Fix docstrings and type annot in cebra/models/jacobian_regularizer.py

* add xcebra to tests

* add missing cvxpy dep

* fix docstrings

* more docstrings to fix attr error

* Improve build setup for docs

* update pydata theme options

* Add README for docs folder

* Fix demo notebook build

* Finish build setup

* update git workflow

* Move demo notebooks to CEBRA-demos repo

See https://github.com/AdaptiveMotorControlLab/CEBRA-demos/pull/28

* revert unneeded changes in solver

* formatting in solver

* further minimize solver diff

* Revert unneeded updates to the solver

* fix citation

* fix docs build, missing refs

* remove file dependency from xcebra int test

* remove unneeded change in registry

* update gitignore

* update docs

* exclude some assets

* include binary file check again

* add timeout to workflow

* add timeout also to docs build

* switch build back to sphinx for gh actions

* pin sphinx version in setup.cfg

* attempt workflow fix

* attempt to fix build workflow

* update to sphinx-build

* fix build workflow

* fix indent error

* fix build system

* revert demos to main

* adapt workflow for testing

* bump version to 0.6.0rc1

* format imports

* docs writing

* enable build on dev branch

* fix some review comments

* extend multiobjective docs

* Set version to alpha

* make tempdir platform independent

* Remove ratinabox and ephysiopy as deps

* Apply review comments

* Update Makefile

- setting coverage threshold to 80% to not delay good code being made public. In the near future this can be fixed and raised again to 90%.

---------

Co-authored-by: Steffen Schneider <stes@hey.com>
Co-authored-by: Steffen Schneider <steffen.schneider@helmholtz-munich.de>
Co-authored-by: Mackenzie Mathis <mathis@rowland.harvard.edu>
---
 .github/workflows/docs.yml                 |   6 +-
 .gitignore                                 |  10 +
 Dockerfile                                 |   2 +-
 Makefile                                   |   4 +-
 NOTICE.yml                                 |  80 +++
 PKGBUILD                                   |   2 +-
 cebra/__init__.py                          |   2 +-
 cebra/attribution/__init__.py              |  38 ++
 cebra/attribution/_jacobian.py             | 142 ++++
 cebra/attribution/attribution_models.py    | 720 +++++++++++++++++++++
 cebra/attribution/jacobian_attribution.py  |  95 +++
 cebra/data/__init__.py                     |   4 +-
 cebra/data/datasets.py                     | 142 +++-
 cebra/data/multiobjective.py               | 173 +++++
 cebra/data/single_session.py               |  73 ++-
 cebra/models/__init__.py                   |   2 +
 cebra/models/jacobian_regularizer.py       | 148 +++++
 cebra/models/layers.py                     |  22 +
 cebra/models/model.py                      | 259 ++++++++
 cebra/models/multicriterions.py            | 154 +++++
 cebra/models/multiobjective.py             | 203 +++++-
 cebra/solver/__init__.py                   |   3 +
 cebra/solver/base.py                       |  15 +
 cebra/solver/multiobjective.py             | 527 +++++++++++++++
 cebra/solver/regularized.py                | 105 +++
 cebra/solver/schedulers.py                 |  97 +++
 docs/.gitignore                            |   1 +
 docs/Makefile                              |   7 +-
 docs/source/api.rst                        |   3 +
 docs/source/api/pytorch/attribution.rst    |  21 +
 docs/source/api/pytorch/models.rst         |  12 +-
 docs/source/api/pytorch/multiobjective.rst |  15 +
 docs/source/api/pytorch/regularized.rst    |  24 +
 docs/source/conf.py                        |  19 +-
 reinstall.sh                               |   2 +-
 setup.cfg                                  |   3 +
 tests/test_attribution.py                  | 214 ++++++
 tests/test_integration_xcebra.py           | 152 +++++
 tests/test_models.py                       |   4 +
 tests/test_multiobjective.py               | 145 +++++
 40 files changed, 3605 insertions(+), 45 deletions(-)
 create mode 100644 cebra/attribution/__init__.py
 create mode 100644 cebra/attribution/_jacobian.py
 create mode 100644 cebra/attribution/attribution_models.py
 create mode 100644 cebra/attribution/jacobian_attribution.py
 create mode 100644 cebra/data/multiobjective.py
 create mode 100644 cebra/models/jacobian_regularizer.py
 create mode 100644 cebra/models/multicriterions.py
 create mode 100644 cebra/solver/multiobjective.py
 create mode 100644 cebra/solver/regularized.py
 create mode 100644 cebra/solver/schedulers.py
 create mode 100644 docs/source/api/pytorch/attribution.rst
 create mode 100644 docs/source/api/pytorch/multiobjective.rst
 create mode 100644 docs/source/api/pytorch/regularized.rst
 create mode 100644 tests/test_attribution.py
 create mode 100644 tests/test_integration_xcebra.py
 create mode 100644 tests/test_multiobjective.py

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 39f882b9..6771243d 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -47,7 +47,11 @@ jobs:
         with:
           repository: AdaptiveMotorControlLab/cebra-demos
           path: docs/source/demo_notebooks
-          ref: main
+          # NOTE(stes): This is a temporary branch to add the xCEBRA demo notebooks
+          # to the docs. Once the notebooks are merged into main, we can remove this
+          # branch and change the ref to main.
+          # ref: main
+          ref: stes/add-xcebra
 
       - name: Set up Python 3.10
         uses: actions/setup-python@v5
diff --git a/.gitignore b/.gitignore
index 0563e474..e30f5f43 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,16 @@ experiments/sweeps
 exports/
 demo_notebooks/
 assets/
+.remove
+
+# demo run
+.vscode/
+auxiliary_behavior_data.h5
+cebra_model.pt
+data.npz
+grid_search_models/
+neural_data.npz
+saved_models/
 
 # demo run
 .vscode/
diff --git a/Dockerfile b/Dockerfile
index 46c8a555..1a280a30 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,7 +40,7 @@ RUN make dist
 FROM cebra-base
 
 # install the cebra wheel
-ENV WHEEL=cebra-0.5.0-py3-none-any.whl
+ENV WHEEL=cebra-0.6.0a1-py3-none-any.whl
 WORKDIR /build
 COPY --from=wheel /build/dist/${WHEEL} .
 RUN pip install --no-cache-dir ${WHEEL}'[dev,integrations,datasets]'
diff --git a/Makefile b/Makefile
index 5b8cb107..a863a921 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-CEBRA_VERSION := 0.5.0
+CEBRA_VERSION := 0.6.0a1
 
 dist:
 	python3 -m pip install virtualenv
@@ -55,7 +55,7 @@ interrogate:
 		--ignore-private \
 		--ignore-magic \
 		--omit-covered-files \
-		-f 90 \
+		-f 80 \
 		cebra
 
 # Build documentation using sphinx
diff --git a/NOTICE.yml b/NOTICE.yml
index 3588b5e6..bf498e0f 100644
--- a/NOTICE.yml
+++ b/NOTICE.yml
@@ -35,3 +35,83 @@
     - 'tests/**/*.py'
     - 'docs/**/*.py'
     - 'conda/**/*.yml'
+
+- header: |
+    CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+    © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+    Source code:
+    https://github.com/AdaptiveMotorControlLab/CEBRA
+
+    Please see LICENSE.md for the full license document:
+    https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+
+    Adapted from https://github.com/rpatrik96/nl-causal-representations/blob/master/care_nl_ica/dep_mat.py,
+    licensed under the following MIT License:
+
+      MIT License
+
+      Copyright (c) 2022 Patrik Reizinger
+
+      Permission is hereby granted, free of charge, to any person obtaining a copy
+      of this software and associated documentation files (the "Software"), to deal
+      in the Software without restriction, including without limitation the rights
+      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+      copies of the Software, and to permit persons to whom the Software is
+      furnished to do so, subject to the following conditions:
+
+      The above copyright notice and this permission notice shall be included in all
+      copies or substantial portions of the Software.
+
+      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+      SOFTWARE.
+
+  include:
+    - 'cebra/attribution/jacobian.py'
+
+
+- header: |
+    CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+    © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+    Source code:
+    https://github.com/AdaptiveMotorControlLab/CEBRA
+
+    Please see LICENSE.md for the full license document:
+    https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+
+    This file contains the PyTorch implementation of Jacobian regularization described in [1].
+      Judy Hoffman, Daniel A. Roberts, and Sho Yaida,
+      "Robust Learning with Jacobian Regularization," 2019.
+      [arxiv:1908.02729](https://arxiv.org/abs/1908.02729)
+
+    Adapted from https://github.com/facebookresearch/jacobian_regularizer/blob/main/jacobian/jacobian.py
+    licensed under the following MIT License:
+
+      MIT License
+
+      Copyright (c) Facebook, Inc. and its affiliates.
+
+      Permission is hereby granted, free of charge, to any person obtaining a copy
+      of this software and associated documentation files (the "Software"), to deal
+      in the Software without restriction, including without limitation the rights
+      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+      copies of the Software, and to permit persons to whom the Software is
+      furnished to do so, subject to the following conditions:
+
+      The above copyright notice and this permission notice shall be included in all
+      copies or substantial portions of the Software.
+
+      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+      SOFTWARE.
+
+  include:
+    - 'cebra/models/jacobian_regularizer.py'
diff --git a/PKGBUILD b/PKGBUILD
index 7aa985a8..48088dcb 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -1,7 +1,7 @@
 # Maintainer: Steffen Schneider <stes@hey.com>
 pkgname=python-cebra
 _pkgname=cebra
-pkgver=0.5.0
+pkgver=0.6.0a1
 pkgrel=1
 pkgdesc="Consistent Embeddings of high-dimensional Recordings using Auxiliary variables"
 url="https://cebra.ai"
diff --git a/cebra/__init__.py b/cebra/__init__.py
index 0eb1f645..cb2cbd06 100644
--- a/cebra/__init__.py
+++ b/cebra/__init__.py
@@ -66,7 +66,7 @@
 
 import cebra.integrations.sklearn as sklearn
 
-__version__ = "0.5.0"
+__version__ = "0.6.0a1"
 __all__ = ["CEBRA"]
 __allow_lazy_imports = False
 __lazy_imports = {}
diff --git a/cebra/attribution/__init__.py b/cebra/attribution/__init__.py
new file mode 100644
index 00000000..e1d8306a
--- /dev/null
+++ b/cebra/attribution/__init__.py
@@ -0,0 +1,38 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Attribution methods for CEBRA.
+
+This module was added in v0.6.0 and contains attribution methods described and benchmarked
+in [Schneider2025]_.
+
+
+.. [Schneider2025] Schneider, S., González Laiz, R., Filippova, A., Frey, M., & Mathis, M. W. (2025).
+    Time-series attribution maps with regularized contrastive learning.
+    The 28th International Conference on Artificial Intelligence and Statistics.
+    https://openreview.net/forum?id=aGrCXoTB4P
+"""
+import cebra.registry
+
+cebra.registry.add_helper_functions(__name__)
+
+from cebra.attribution.attribution_models import *
+from cebra.attribution.jacobian_attribution import *
diff --git a/cebra/attribution/_jacobian.py b/cebra/attribution/_jacobian.py
new file mode 100644
index 00000000..00102aeb
--- /dev/null
+++ b/cebra/attribution/_jacobian.py
@@ -0,0 +1,142 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Adapted from https://github.com/rpatrik96/nl-causal-representations/blob/master/care_nl_ica/dep_mat.py,
+# licensed under the following MIT License:
+#
+#   MIT License
+#
+#   Copyright (c) 2022 Patrik Reizinger
+#
+#   Permission is hereby granted, free of charge, to any person obtaining a copy
+#   of this software and associated documentation files (the "Software"), to deal
+#   in the Software without restriction, including without limitation the rights
+#   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#   copies of the Software, and to permit persons to whom the Software is
+#   furnished to do so, subject to the following conditions:
+#
+#   The above copyright notice and this permission notice shall be included in all
+#   copies or substantial portions of the Software.
+#
+#   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#   SOFTWARE.
+#
+
+from typing import Union
+
+import numpy as np
+import torch
+
+
+def tensors_to_cpu_and_double(vars_: list[torch.Tensor]) -> list[torch.Tensor]:
+    """Convert a list of tensors to CPU and double precision.
+
+    Args:
+        vars_: List of PyTorch tensors to convert
+
+    Returns:
+        List of tensors converted to CPU and double precision
+    """
+    cpu_vars = []
+    for v in vars_:
+        if v.is_cuda:
+            v = v.to("cpu")
+        cpu_vars.append(v.double())
+    return cpu_vars
+
+
+def tensors_to_cuda(vars_: list[torch.Tensor],
+                    cuda_device: str) -> list[torch.Tensor]:
+    """Convert a list of tensors to CUDA device.
+
+    Args:
+        vars_: List of PyTorch tensors to convert
+        cuda_device: CUDA device to move tensors to
+
+    Returns:
+        List of tensors moved to specified CUDA device
+    """
+    cpu_vars = []
+    for v in vars_:
+        if not v.is_cuda:
+            v = v.to(cuda_device)
+        cpu_vars.append(v)
+    return cpu_vars
+
+
+def compute_jacobian(
+    model: torch.nn.Module,
+    input_vars: list[torch.Tensor],
+    mode: str = "autograd",
+    cuda_device: str = "cuda",
+    double_precision: bool = False,
+    convert_to_numpy: bool = True,
+    hybrid_solver: bool = False,
+) -> Union[torch.Tensor, np.ndarray]:
+    """Compute the Jacobian matrix for a given model and input.
+
+    This function computes the Jacobian matrix using PyTorch's autograd functionality.
+    It supports both CPU and CUDA computation, as well as single and double precision.
+
+    Args:
+        model: PyTorch model to compute Jacobian for
+        input_vars: List of input tensors
+        mode: Computation mode, currently only "autograd" is supported
+        cuda_device: Device to use for CUDA computation
+        double_precision: If True, use double precision
+        convert_to_numpy: If True, convert output to numpy array
+        hybrid_solver: If True, concatenate multiple outputs along dimension 1
+
+    Returns:
+        Jacobian matrix as either PyTorch tensor or numpy array
+    """
+    if double_precision:
+        model = model.to("cpu").double()
+        input_vars = tensors_to_cpu_and_double(input_vars)
+        if hybrid_solver:
+            output = model(*input_vars)
+            output_vars = torch.cat(output, dim=1).to("cpu").double()
+        else:
+            output_vars = model(*input_vars).to("cpu").double()
+    else:
+        model = model.to(cuda_device).float()
+        input_vars = tensors_to_cuda(input_vars, cuda_device=cuda_device)
+
+        if hybrid_solver:
+            output = model(*input_vars)
+            output_vars = torch.cat(output, dim=1)
+        else:
+            output_vars = model(*input_vars)
+
+    if mode == "autograd":
+        jacob = []
+        for i in range(output_vars.shape[1]):
+            grads = torch.autograd.grad(
+                output_vars[:, i:i + 1],
+                input_vars,
+                retain_graph=True,
+                create_graph=False,
+                grad_outputs=torch.ones(output_vars[:, i:i + 1].shape).to(
+                    output_vars.device),
+            )
+            jacob.append(torch.cat(grads, dim=1))
+
+        jacobian = torch.stack(jacob, dim=1)
+
+    jacobian = jacobian.detach().cpu()
+
+    if convert_to_numpy:
+        jacobian = jacobian.numpy()
+
+    return jacobian
diff --git a/cebra/attribution/attribution_models.py b/cebra/attribution/attribution_models.py
new file mode 100644
index 00000000..ddbc7a37
--- /dev/null
+++ b/cebra/attribution/attribution_models.py
@@ -0,0 +1,720 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import dataclasses
+import time
+
+import cvxpy as cp
+import numpy as np
+import scipy.linalg
+import sklearn.metrics
+import torch
+import torch.nn as nn
+import tqdm
+from captum.attr import NeuronFeatureAblation
+from captum.attr import NeuronGradient
+from captum.attr import NeuronGradientShap
+from captum.attr import NeuronIntegratedGradients
+
+import cebra
+import cebra.attribution._jacobian
+from cebra.attribution import register
+
+
+@dataclasses.dataclass
+class AttributionMap:
+    """Base class for computing attribution maps for CEBRA models.
+
+    Args:
+        model: The trained CEBRA model to analyze
+        input_data: Input data tensor to compute attributions for
+        output_dimension: Output dimension to analyze. If ``None``, uses model's output dimension
+        num_samples: Number of samples to use for attribution. If ``None``, uses full dataset
+        seed: Random seed which is used to subsample the data. Only relevant if ``num_samples`` is not ``None``.
+    """
+
+    model: nn.Module
+    input_data: torch.Tensor
+    output_dimension: int = None
+    num_samples: int = None
+    seed: int = 9712341
+
+    def __post_init__(self):
+        if isinstance(self.model, cebra.models.ConvolutionalModelMixin):
+            data = cebra.data.TensorDataset(self.input_data,
+                                            continuous=torch.zeros(
+                                                len(self.input_data)))
+            data.configure_for(self.model)
+            offset = self.model.get_offset()
+
+            #NOTE: explain, why do we do this again?
+            input_data = data[torch.arange(offset.left,
+                                           len(data) - offset.right + 1)].to(
+                                               self.input_data.device)
+
+            # subsample the data
+            if self.num_samples is not None:
+                if self.num_samples > input_data.shape[0]:
+                    raise ValueError(
+                        f"You are using a bigger number of samples to "
+                        f"subsample ({self.num_samples}) than the number "
+                        f"of samples in the dataset ({input_data.shape[0]}).")
+
+                random_generator = torch.Generator()
+                random_generator.manual_seed(self.seed)
+                num_elements = input_data.size(0)
+                random_indices = torch.randperm(
+                    num_elements, generator=random_generator)[:self.num_samples]
+                input_data = input_data[random_indices]
+
+            self.input_data = input_data
+
+    def compute_attribution_map(self):
+        """Compute the attribution map for the model.
+
+        Returns:
+            dict: Attribution maps and their variants
+
+        Raises:
+            NotImplementedError: Must be implemented by subclasses
+        """
+        raise NotImplementedError
+
+    def compute_metrics(self, attribution_map, ground_truth_map):
+        """Compute metrics comparing attribution map to ground truth.
+
+        This function computes various statistical metrics to compare the attribution values
+        between connected and non-connected neurons based on a ground truth connectivity map.
+        It separates the attribution values into two groups based on the binary ground truth,
+        and calculates summary statistics and differences between these groups.
+
+        Args:
+            attribution_map: Computed attribution values representing the strength of connections
+                between neurons
+            ground_truth_map: Binary ground truth connectivity map where True indicates a
+                connected neuron and False indicates a non-connected neuron
+
+        Returns:
+            dict: Dictionary containing the following metrics:
+                - max/mean/min_nonconnected: Statistics for non-connected neurons
+                - max/mean/min_connected: Statistics for connected neurons
+                - gap_max: Difference between max connected and max non-connected values
+                - gap_mean: Difference between mean connected and mean non-connected values
+                - gap_min: Difference between min connected and min non-connected values
+                - gap_minmax: Difference between min connected and max non-connected values
+                - max/min_jacobian: Global max/min values across all neurons
+        """
+        assert np.issubdtype(ground_truth_map.dtype, bool)
+        connected_neurons = attribution_map[np.where(ground_truth_map)]
+        non_connected_neurons = attribution_map[np.where(~ground_truth_map)]
+        assert connected_neurons.size == ground_truth_map.sum()
+        assert non_connected_neurons.size == ground_truth_map.size - ground_truth_map.sum(
+        )
+        assert connected_neurons.size + non_connected_neurons.size == attribution_map.size == ground_truth_map.size
+
+        max_connected = np.max(connected_neurons)
+        mean_connected = np.mean(connected_neurons)
+        min_connected = np.min(connected_neurons)
+
+        max_nonconnected = np.max(non_connected_neurons)
+        mean_nonconnected = np.mean(non_connected_neurons)
+        min_nonconnected = np.min(non_connected_neurons)
+
+        metrics = {
+            'max_nonconnected': max_nonconnected,
+            'mean_nonconnected': mean_nonconnected,
+            'min_nonconnected': min_nonconnected,
+            'max_connected': max_connected,
+            'mean_connected': mean_connected,
+            'min_connected': min_connected,
+            'gap_max': max_connected - max_nonconnected,
+            'gap_mean': mean_connected - mean_nonconnected,
+            'gap_min': min_connected - min_nonconnected,
+            'gap_minmax': min_connected - max_nonconnected,
+            'max_jacobian': np.max(attribution_map),
+            'min_jacobian': np.min(attribution_map),
+        }
+        return metrics
+
+    def compute_attribution_score(self, attribution_map, ground_truth_map):
+        """Compute ROC AUC score between attribution map and ground truth.
+
+        Args:
+            attribution_map: Computed attribution values
+            ground_truth_map: Binary ground truth connectivity map
+
+        Returns:
+            float: ROC AUC score
+        """
+        assert attribution_map.shape == ground_truth_map.shape
+        assert np.issubdtype(ground_truth_map.dtype, bool)
+        fpr, tpr, _ = sklearn.metrics.roc_curve(  # noqa: codespell:ignore fpr, tpr
+            ground_truth_map.flatten(), attribution_map.flatten())
+        auc = sklearn.metrics.auc(fpr, tpr)  # noqa: codespell:ignore fpr, tpr
+        return auc
+
+    @staticmethod
+    def _check_moores_penrose_conditions(
+            matrix: np.ndarray, matrix_inverse: np.ndarray) -> np.ndarray:
+        """Check Moore-Penrose conditions for a single matrix pair.
+
+        Args:
+            matrix: Input matrix
+            matrix_inverse: Putative pseudoinverse matrix
+
+        Returns:
+            np.ndarray: Boolean array indicating which conditions are satisfied
+        """
+        matrix_inverse = matrix_inverse.T
+        condition_1 = np.allclose(matrix @ matrix_inverse @ matrix, matrix)
+        condition_2 = np.allclose(matrix_inverse @ matrix @ matrix_inverse,
+                                  matrix_inverse)
+        condition_3 = np.allclose((matrix @ matrix_inverse).T,
+                                  matrix @ matrix_inverse)
+        condition_4 = np.allclose((matrix_inverse @ matrix).T,
+                                  matrix_inverse @ matrix)
+
+        return np.array([condition_1, condition_2, condition_3, condition_4])
+
+    def check_moores_penrose_conditions(
+            self, jacobian: np.ndarray,
+            jacobian_pseudoinverse: np.ndarray) -> np.ndarray:
+        """Check Moore-Penrose conditions for Jacobian matrices.
+
+        Args:
+            jacobian: Jacobian matrices of shape (num samples, output_dim, num_neurons)
+            jacobian_pseudoinverse: Pseudoinverse matrices of shape (num samples, num_neurons, output_dim)
+
+        Returns:
+            Boolean array of shape (num samples, 4) indicating satisfied conditions
+        """
+        # check the four conditions
+        conditions = np.zeros((jacobian.shape[0], 4))
+        for i, (matrix, inverse_matrix) in enumerate(
+                zip(jacobian, jacobian_pseudoinverse)):
+            conditions[i] = self._check_moores_penrose_conditions(
+                matrix, inverse_matrix)
+        return conditions
+
+    def _inverse(self, jacobian, method="lsq"):
+        """Compute inverse/pseudoinverse of Jacobian matrices.
+
+        Args:
+            jacobian: Input Jacobian matrices
+            method: Inversion method ('lsq_cvxpy', 'lsq', or 'svd')
+
+        Returns:
+            (Inverse matrices, computation time)
+        """
+        # NOTE(stes): Before we used "np.linalg.pinv" here, which
+        # is numerically not stable for the Jacobian matrices we
+        # need to compute.
+        start_time = time.time()
+        Jfinv = np.zeros_like(jacobian)
+        if method == "lsq_cvxpy":
+            for i in tqdm(range(len(jacobian))):
+                Jfinv[i] = self._inverse_lsq_cvxpy(jacobian[i]).T
+        elif method == "lsq":
+            for i in range(len(jacobian)):
+                Jfinv[i] = self._inverse_lsq_scipy(jacobian[i]).T
+        elif method == "svd":
+            for i in range(len(jacobian)):
+                Jfinv[i] = self._inverse_svd(jacobian[i]).T
+        else:
+            raise NotImplementedError(f"Method {method} not implemented.")
+        end_time = time.time()
+        return Jfinv, end_time - start_time
+
+    @staticmethod
+    def _inverse_lsq_cvxpy(matrix: np.ndarray,
+                           solver: str = 'SCS') -> np.ndarray:
+        """Compute least squares inverse using CVXPY.
+
+        Args:
+            matrix: Input matrix
+            solver: CVXPY solver to use
+
+        Returns:
+            np.ndarray: Least squares inverse matrix
+        """
+
+        matrix_param = cp.Parameter((matrix.shape[0], matrix.shape[1]))
+        matrix_param.value = matrix
+
+        identity = np.eye(matrix.shape[0])
+        matrix_inverse = cp.Variable((matrix.shape[1], matrix.shape[0]))
+        # noqa: codespell
+        objective = cp.Minimize(
+            cp.norm(matrix @ matrix_inverse - identity,
+                    "fro"))  # noqa: codespell:ignore fro
+        prob = cp.Problem(objective)
+        prob.solve(verbose=False, solver=solver)
+
+        return matrix_inverse.value
+
+    @staticmethod
+    def _inverse_lsq_scipy(jacobian):
+        """Compute least squares inverse using scipy.linalg.lstsq.
+
+        Args:
+            jacobian: Input Jacobian matrix
+
+        Returns:
+            np.ndarray: Least squares inverse matrix
+        """
+        return scipy.linalg.lstsq(jacobian, np.eye(jacobian.shape[0]))[0]
+
+    @staticmethod
+    def _inverse_svd(jacobian):
+        """Compute pseudoinverse using SVD.
+
+        Args:
+            jacobian: Input Jacobian matrix
+
+        Returns:
+            np.ndarray: Pseudoinverse matrix
+        """
+        return scipy.linalg.pinv(jacobian)
+
+    def _reduce_attribution_map(self, attribution_maps):
+        """Reduce attribution maps by averaging across dimensions.
+
+        Args:
+            attribution_maps: Dictionary of attribution maps to reduce
+
+        Returns:
+            dict: Reduced attribution maps
+        """
+
+        def _reduce(full_jacobian):
+            if full_jacobian.ndim == 4:
+                jf_convabs = abs(full_jacobian).mean(-1)
+                jf = full_jacobian.mean(-1)
+            else:
+                jf_convabs = full_jacobian
+                jf = full_jacobian
+            return jf, jf_convabs
+
+        result = {}
+        for key, value in attribution_maps.items():
+            result[key], result[f'{key}-convabs'] = _reduce(value)
+        return result
+
+
+@dataclasses.dataclass
+@register("jacobian-based")
+class JFMethodBased(AttributionMap):
+    """Compute the attribution map using the Jacobian of the model encoder."""
+
+    def _compute_jacobian(self, input_data):
+        return cebra.attribution._jacobian.compute_jacobian(
+            self.model,
+            input_vars=[input_data],
+            mode="autograd",
+            cuda_device=self.input_data.device,
+            double_precision=False,
+            convert_to_numpy=True,
+            hybrid_solver=False,
+        )
+
+    def compute_attribution_map(self):
+
+        full_jacobian = self._compute_jacobian(self.input_data)
+
+        result = {}
+        for key, value in self._reduce_attribution_map({
+                'jf': full_jacobian
+        }).items():
+            result[key] = value
+            for method in ['lsq', 'svd']:
+                print(f"Computing inverse for {key} with method {method}")
+                result[f"{key}-inv-{method}"], result[
+                    f'time_inversion_{method}'] = self._inverse(value,
+                                                                method=method)
+                # result[f"{key}-inv-{method}-conditions"] = self.check_moores_penrose_conditions(value, result[f"{key}-inv-{method}"])
+
+        return result
+
+
+@dataclasses.dataclass
+@register("jacobian-based-batched")
+class JFMethodBasedBatched(JFMethodBased):
+    """Compute an attribution map based on the Jacobian using mini-batches.
+
+    See also:
+        :py:class:`JFMethodBased`
+    """
+
+    def compute_attribution_map(self, batch_size=1024):
+        if batch_size > self.input_data.shape[0]:
+            raise ValueError(
+                f"Batch size ({batch_size}) is bigger than data ({self.input_data.shape[0]})"
+            )
+
+        input_data_batches = torch.split(self.input_data, batch_size)
+        full_jacobian = []
+        for input_data_batch in input_data_batches:
+            jacobian_batch = self._compute_jacobian(input_data_batch)
+            full_jacobian.append(jacobian_batch)
+        full_jacobian = np.vstack(full_jacobian)
+
+        result = {}
+        for key, value in self._reduce_attribution_map({
+                'jf': full_jacobian
+        }).items():
+            result[key] = value
+            for method in ['lsq', 'svd']:
+
+                result[f"{key}-inv-{method}"], result[
+                    f'time_inversion_{method}'] = self._inverse(value,
+                                                                method=method)
+
+        return result
+
+
+@dataclasses.dataclass
+@register("neuron-gradient")
+class NeuronGradientMethod(AttributionMap):
+    """Compute the attribution map using the neuron gradient from Captum.
+
+    Note:
+        This method is equivalent to Jacobian-based attributions, but
+        uses a different backend implementation.
+    """
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.captum_model = NeuronGradient(forward_func=self.model,
+                                           layer=self.model)
+
+    def compute_attribution_map(self, attribute_to_neuron_input=False):
+        attribution_map = []
+        for s in range(self.output_dimension):
+            att = self.captum_model.attribute(
+                inputs=self.input_data,
+                attribute_to_neuron_input=attribute_to_neuron_input,
+                neuron_selector=s)
+
+            attribution_map.append(att.detach().cpu().numpy())
+
+        attribution_map = np.array(attribution_map)
+        attribution_map = np.swapaxes(attribution_map, 1, 0)
+
+        result = {}
+        for key, value in self._reduce_attribution_map({
+                'neuron-gradient': attribution_map
+        }).items():
+            result[key] = value
+
+            for method in ['lsq', 'svd']:
+                result[f"{key}-inv-{method}"], result[
+                    f'time_inversion_{method}'] = self._inverse(value,
+                                                                method=method)
+                # result[f"{key}-inv-{method}-conditions"] = self.check_moores_penrose_conditions(value, result[f"{key}-inv-{method}"])
+
+        return result
+
+
+@dataclasses.dataclass
+@register("neuron-gradient-batched")
+class NeuronGradientMethodBatched(NeuronGradientMethod):
+    """As :py:class:`NeuronGradientMethod`, but using mini-batches.
+
+    See also:
+        :py:class:`NeuronGradientMethod`
+    """
+
+    def compute_attribution_map(self,
+                                attribute_to_neuron_input=False,
+                                batch_size=1024):
+        input_data_batches = torch.split(self.input_data, batch_size)
+
+        attribution_map = []
+        for input_data_batch in input_data_batches:
+            attribution_map_batch = []
+            for s in range(self.output_dimension):
+                att = self.captum_model.attribute(
+                    inputs=input_data_batch,
+                    attribute_to_neuron_input=attribute_to_neuron_input,
+                    neuron_selector=s)
+
+                attribution_map_batch.append(att.detach().cpu().numpy())
+
+            attribution_map_batch = np.array(attribution_map_batch)
+            attribution_map_batch = np.swapaxes(attribution_map_batch, 1, 0)
+            attribution_map.append(attribution_map_batch)
+
+        attribution_map = np.vstack(attribution_map)
+        return self._reduce_attribution_map({
+            'neuron-gradient': attribution_map,
+            #'neuron-gradient-invsvd': self._inverse_svd(attribution_map)
+        })
+
+
+@dataclasses.dataclass
+@register("feature-ablation")
+class FeatureAblationMethod(AttributionMap):
+    """Compute the attribution map using the feature ablation method from Captum."""
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.captum_model = NeuronFeatureAblation(forward_func=self.model,
+                                                  layer=self.model)
+
+    def compute_attribution_map(self,
+                                baselines=None,
+                                feature_mask=None,
+                                perturbations_per_eval=1,
+                                attribute_to_neuron_input=False):
+        attribution_map = []
+        for s in range(self.output_dimension):
+            att = self.captum_model.attribute(
+                inputs=self.input_data,
+                neuron_selector=s,
+                baselines=baselines,
+                perturbations_per_eval=perturbations_per_eval,
+                feature_mask=feature_mask,
+                attribute_to_neuron_input=attribute_to_neuron_input)
+
+            attribution_map.append(att.detach().cpu().numpy())
+
+        attribution_map = np.array(attribution_map)
+        attribution_map = np.swapaxes(attribution_map, 1, 0)
+        return self._reduce_attribution_map(
+            {'feature-ablation': attribution_map})
+
+
+@dataclasses.dataclass
+@register("feature-ablation-batched")
+class FeatureAblationMethodBAtched(FeatureAblationMethod):
+    """As :py:class:`FeatureAblationMethod`, but using mini-batches.
+
+    See also:
+        :py:class:`FeatureAblationMethod`
+    """
+
+    def compute_attribution_map(self,
+                                baselines=None,
+                                feature_mask=None,
+                                perturbations_per_eval=1,
+                                attribute_to_neuron_input=False,
+                                batch_size=1024):
+
+        input_data_batches = torch.split(self.input_data, batch_size)
+        attribution_map = []
+        for input_data_batch in input_data_batches:
+            attribution_map_batch = []
+            for s in range(self.output_dimension):
+                att = self.captum_model.attribute(
+                    inputs=input_data_batch,
+                    neuron_selector=s,
+                    baselines=baselines,
+                    perturbations_per_eval=perturbations_per_eval,
+                    feature_mask=feature_mask,
+                    attribute_to_neuron_input=attribute_to_neuron_input)
+
+                attribution_map_batch.append(att.detach().cpu().numpy())
+
+            attribution_map_batch = np.array(attribution_map_batch)
+            attribution_map_batch = np.swapaxes(attribution_map_batch, 1, 0)
+            attribution_map.append(attribution_map_batch)
+
+        attribution_map = np.vstack(attribution_map)
+        return self._reduce_attribution_map(
+            {'feature-ablation': attribution_map})
+
+
+@dataclasses.dataclass
+@register("integrated-gradients")
+class IntegratedGradientsMethod(AttributionMap):
+    """Compute the attribution map using the integrated gradients method from Captum."""
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.captum_model = NeuronIntegratedGradients(forward_func=self.model,
+                                                      layer=self.model)
+
+    def compute_attribution_map(self,
+                                n_steps=50,
+                                method='gausslegendre',
+                                internal_batch_size=None,
+                                attribute_to_neuron_input=False,
+                                baselines=None):
+        if internal_batch_size == "dataset":
+            internal_batch_size = len(self.input_data)
+
+        attribution_map = []
+        for s in range(self.output_dimension):
+            att = self.captum_model.attribute(
+                inputs=self.input_data,
+                neuron_selector=s,
+                n_steps=n_steps,
+                method=method,
+                internal_batch_size=internal_batch_size,
+                attribute_to_neuron_input=attribute_to_neuron_input,
+                baselines=baselines,
+            )
+            attribution_map.append(att.detach().cpu().numpy())
+
+        attribution_map = np.array(attribution_map)
+        attribution_map = np.swapaxes(attribution_map, 1, 0)
+        return self._reduce_attribution_map(
+            {'integrated-gradients': attribution_map})
+
+
+@dataclasses.dataclass
+@register("integrated-gradients-batched")
+class IntegratedGradientsMethodBatched(IntegratedGradientsMethod):
+    """As :py:class:`IntegratedGradientsMethod`, but using mini-batches.
+
+    See also:
+        :py:class:`IntegratedGradientsMethod`
+    """
+
+    def compute_attribution_map(self,
+                                n_steps=50,
+                                method='gausslegendre',
+                                internal_batch_size=None,
+                                attribute_to_neuron_input=False,
+                                baselines=None,
+                                batch_size=1024):
+
+        input_data_batches = torch.split(self.input_data, batch_size)
+        attribution_map = []
+        for input_data_batch in input_data_batches:
+            attribution_map_batch = []
+            if internal_batch_size == "dataset":
+                internal_batch_size = len(input_data_batch)
+            for s in range(self.output_dimension):
+                att = self.captum_model.attribute(
+                    inputs=input_data_batch,
+                    neuron_selector=s,
+                    n_steps=n_steps,
+                    method=method,
+                    internal_batch_size=internal_batch_size,
+                    attribute_to_neuron_input=attribute_to_neuron_input,
+                    baselines=baselines,
+                )
+                attribution_map_batch.append(att.detach().cpu().numpy())
+
+            attribution_map_batch = np.array(attribution_map_batch)
+            attribution_map_batch = np.swapaxes(attribution_map_batch, 1, 0)
+            attribution_map.append(attribution_map_batch)
+
+        attribution_map = np.vstack(attribution_map)
+        return self._reduce_attribution_map(
+            {'integrated-gradients': attribution_map})
+
+
+@dataclasses.dataclass
+@register("neuron-gradient-shap")
+class NeuronGradientShapMethod(AttributionMap):
+    """Compute the attribution map using the neuron gradient SHAP method from Captum."""
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.captum_model = NeuronGradientShap(forward_func=self.model,
+                                               layer=self.model)
+
+    def compute_attribution_map(self,
+                                baselines: str,
+                                n_samples=5,
+                                stdevs=0.0,
+                                attribute_to_neuron_input=False):
+
+        if baselines == "zeros":
+            baselines = torch.zeros(size=(self.input_data.shape),
+                                    device=self.input_data.device)
+        elif baselines == "shuffle":
+            data = self.input_data.flatten()
+            data = data[torch.randperm(len(data))]
+            baselines = data.reshape(self.input_data.shape)
+        else:
+            raise NotImplementedError(f"Baseline {baselines} not implemented.")
+
+        attribution_map = []
+        for s in range(self.output_dimension):
+            att = self.captum_model.attribute(
+                inputs=self.input_data,
+                neuron_selector=s,
+                baselines=baselines,
+                n_samples=n_samples,
+                stdevs=stdevs,
+                attribute_to_neuron_input=attribute_to_neuron_input,
+            )
+
+            attribution_map.append(att.detach().cpu().numpy())
+
+        attribution_map = np.array(attribution_map)
+        attribution_map = np.swapaxes(attribution_map, 1, 0)
+        return self._reduce_attribution_map(
+            {'neuron-gradient-shap': attribution_map})
+
+
+@dataclasses.dataclass
+@register("neuron-gradient-shap-batched")
+class NeuronGradientShapMethodBatched(NeuronGradientShapMethod):
+    """As :py:class:`NeuronGradientShapMethod`, but using mini-batches.
+
+    See also:
+        :py:class:`NeuronGradientShapMethod`
+    """
+
+    def compute_attribution_map(self,
+                                baselines: str,
+                                n_samples=5,
+                                stdevs=0.0,
+                                attribute_to_neuron_input=False,
+                                batch_size=1024):
+
+        if baselines == "zeros":
+            baselines = torch.zeros(size=(self.input_data.shape),
+                                    device=self.input_data.device)
+        elif baselines == "shuffle":
+            data = self.input_data.flatten()
+            data = data[torch.randperm(len(data))]
+            baselines = data.reshape(self.input_data.shape)
+        else:
+            raise NotImplementedError(f"Baseline {baselines} not implemented.")
+
+        input_data_batches = torch.split(self.input_data, batch_size)
+        attribution_map = []
+        for input_data_batch in input_data_batches:
+            attribution_map_batch = []
+            for s in range(self.output_dimension):
+                att = self.captum_model.attribute(
+                    inputs=input_data_batch,
+                    neuron_selector=s,
+                    baselines=baselines,
+                    n_samples=n_samples,
+                    stdevs=stdevs,
+                    attribute_to_neuron_input=attribute_to_neuron_input,
+                )
+
+                attribution_map_batch.append(att.detach().cpu().numpy())
+
+            attribution_map_batch = np.array(attribution_map_batch)
+            attribution_map_batch = np.swapaxes(attribution_map_batch, 1, 0)
+            attribution_map.append(attribution_map_batch)
+
+        attribution_map = np.vstack(attribution_map)
+        return self._reduce_attribution_map(
+            {'neuron-gradient-shap': attribution_map})
diff --git a/cebra/attribution/jacobian_attribution.py b/cebra/attribution/jacobian_attribution.py
new file mode 100644
index 00000000..f8db8344
--- /dev/null
+++ b/cebra/attribution/jacobian_attribution.py
@@ -0,0 +1,95 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Tools for computing attribution maps."""
+
+from typing import Literal
+
+import numpy as np
+import torch
+from torch import nn
+
+import cebra.attribution._jacobian
+
+__all__ = ["get_attribution_map"]
+
+
+def _prepare_inputs(inputs):
+    if not isinstance(inputs, torch.Tensor):
+        inputs = torch.from_numpy(inputs)
+    inputs.requires_grad_(True)
+    return inputs
+
+
+def _prepare_model(model):
+    for p in model.parameters():
+        p.requires_grad_(False)
+    return model
+
+
+def get_attribution_map(
+    model: nn.Module,
+    input_data: torch.Tensor,
+    double_precision: bool = True,
+    convert_to_numpy: bool = True,
+    aggregate: Literal["mean", "sum", "max"] = "mean",
+    transform: Literal["none", "abs"] = "none",
+    hybrid_solver: bool = False,
+):
+    """Estimate attribution maps using the Jacobian pseudo-inverse.
+
+    The function estimates Jacobian matrices for each point in the model,
+    computes the pseudo-inverse (for every sample) and then aggregates
+    the resulting matrices to compute an attribution map.
+
+    Args:
+        model: The neural network model for which to compute attributions.
+        input_data: Input tensor or numpy array to compute attributions for.
+        double_precision: If ``True``, use double precision for computation.
+        convert_to_numpy: If ``True``, convert the output to numpy arrays.
+        aggregate: Method to aggregate attribution values across samples.
+            Options are ``"mean"``, ``"sum"``, or ``"max"``.
+        transform: Transformation to apply to attribution values.
+            Options are ``"none"`` or ``"abs"``.
+        hybrid_solver: If ``True``, handle multi-objective models differently.
+
+    Returns:
+        A tuple containing the Jacobian matrix of shape (num_samples, output_dim, input_dim)
+        and the pseudo-inverse of the Jacobian matrix.
+
+    """
+    assert aggregate in ["mean", "sum", "max"]
+
+    input_data = _prepare_inputs(input_data)
+    model = _prepare_model(model)
+
+    # compute jacobian CEBRA model
+    jf = cebra.attribution._jacobian.compute_jacobian(
+        model,
+        input_vars=[input_data],
+        mode="autograd",
+        double_precision=double_precision,
+        convert_to_numpy=convert_to_numpy,
+        hybrid_solver=hybrid_solver,
+    )
+
+    jhatg = np.linalg.pinv(jf)
+    return jf, jhatg
diff --git a/cebra/data/__init__.py b/cebra/data/__init__.py
index ec753f18..697801ed 100644
--- a/cebra/data/__init__.py
+++ b/cebra/data/__init__.py
@@ -46,10 +46,8 @@
 #             these imports will not be reordered by isort (see .isort.cfg)
 from cebra.data.base import *
 from cebra.data.datatypes import *
-
 from cebra.data.single_session import *
 from cebra.data.multi_session import *
-
+from cebra.data.multiobjective import *
 from cebra.data.datasets import *
-
 from cebra.data.helper import *
diff --git a/cebra/data/datasets.py b/cebra/data/datasets.py
index dbb2f1f5..24735f47 100644
--- a/cebra/data/datasets.py
+++ b/cebra/data/datasets.py
@@ -22,7 +22,7 @@
 """Pre-defined datasets."""
 
 import types
-from typing import List, Literal, Optional, Tuple, Union
+from typing import List, Literal, Optional, Tuple, TYPE_CHECKING, Union
 
 import numpy as np
 import numpy.typing as npt
@@ -30,8 +30,14 @@
 
 import cebra.data as cebra_data
 import cebra.helper as cebra_helper
+import cebra.io as cebra_io
+from cebra.data.datatypes import Batch
+from cebra.data.datatypes import BatchIndex
 from cebra.data.datatypes import Offset
 
+if TYPE_CHECKING:
+    from cebra.models import Model
+
 
 class TensorDataset(cebra_data.SingleSessionDataset):
     """Discrete and/or continuously indexed dataset based on torch/numpy arrays.
@@ -295,3 +301,137 @@ def _apply(self, func):
 
     def _iter_property(self, attr):
         return (getattr(data, attr) for data in self.iter_sessions())
+
+
+# TODO(stes): This should be a single session dataset?
+class DatasetxCEBRA(cebra_io.HasDevice):
+    """Dataset class for xCEBRA models.
+
+    This class handles neural data and associated labels for xCEBRA models, providing
+    functionality for data loading and batch preparation.
+
+    Attributes:
+        neural: Neural data as a torch.Tensor or numpy array
+        labels: Labels associated with the data
+        offset: Offset for the dataset
+
+    Args:
+        neural: Neural data as a torch.Tensor or numpy array
+        device: Device to store the data on (default: "cpu")
+        **labels: Additional keyword arguments for labels associated with the data
+    """
+
+    def __init__(
+        self,
+        neural: Union[torch.Tensor, npt.NDArray],
+        device="cpu",
+        **labels,
+    ):
+        super().__init__(device)
+        self.neural = neural
+        self.labels = labels
+        self.offset = Offset(0, 1)
+
+    @property
+    def input_dimension(self) -> int:
+        """Get the input dimension of the neural data.
+
+        Returns:
+            The number of features in the neural data
+        """
+        return self.neural.shape[1]
+
+    def __len__(self):
+        """Get the length of the dataset.
+
+        Returns:
+            Number of samples in the dataset
+        """
+        return len(self.neural)
+
+    def configure_for(self, model: "Model"):
+        """Configure the dataset offset for the provided model.
+
+        Call this function before indexing the dataset. This sets the
+        :py:attr:`offset` attribute of the dataset.
+
+        Args:
+            model: The model to configure the dataset for.
+        """
+        self.offset = model.get_offset()
+
+    def expand_index(self, index: torch.Tensor) -> torch.Tensor:
+        """Expand indices based on the configured offset.
+
+        Args:
+            index: A one-dimensional tensor of type long containing indices
+                to select from the dataset.
+
+        Returns:
+            An expanded index of shape ``(len(index), len(self.offset))`` where
+            the elements will be
+            ``expanded_index[i,j] = index[i] + j - self.offset.left`` for all ``j``
+            in ``range(0, len(self.offset))``.
+
+        Note:
+            Requires the :py:attr:`offset` to be set.
+        """
+        offset = torch.arange(-self.offset.left,
+                              self.offset.right,
+                              device=index.device)
+
+        index = torch.clamp(index, self.offset.left,
+                            len(self) - self.offset.right)
+
+        return index[:, None] + offset[None, :]
+
+    def __getitem__(self, index):
+        """Get item(s) from the dataset at the specified index.
+
+        Args:
+            index: Index or indices to retrieve
+
+        Returns:
+            The neural data at the specified indices, with dimensions transposed
+        """
+        index = self.expand_index(index)
+        return self.neural[index].transpose(2, 1)
+
+    def load_batch_supervised(self, index: Batch,
+                              labels_supervised) -> torch.Tensor:
+        """Load a batch for supervised learning.
+
+        Args:
+            index: Batch indices for reference data
+            labels_supervised: Labels to load for supervised learning
+
+        Returns:
+            Batch containing reference data and corresponding labels
+        """
+        assert index.negative is None
+        assert index.positive is None
+        labels = [
+            self.labels[label].to(self.device) for label in labels_supervised
+        ]
+
+        return Batch(
+            reference=self[index.reference],
+            positive=[label[index.reference] for label in labels],
+            negative=None,
+        )
+
+    def load_batch_contrastive(self, index: BatchIndex) -> Batch:
+        """Load a batch for contrastive learning.
+
+        Args:
+            index: BatchIndex containing reference, positive and negative indices
+
+        Returns:
+            Batch containing reference, positive and negative samples
+        """
+        assert isinstance(index.positive, list)
+        return Batch(
+            reference=self[index.reference],
+            positive=[self[idx] for idx in index.positive],
+            negative=self[index.negative],
+        )
diff --git a/cebra/data/multiobjective.py b/cebra/data/multiobjective.py
new file mode 100644
index 00000000..f700d1c4
--- /dev/null
+++ b/cebra/data/multiobjective.py
@@ -0,0 +1,173 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import literate_dataclasses as dataclasses
+
+import cebra.data as cebra_data
+import cebra.distributions
+from cebra.data.datatypes import BatchIndex
+from cebra.distributions.continuous import Prior
+
+
+@dataclasses.dataclass
+class MultiObjectiveLoader(cebra_data.Loader):
+    """Baseclass of Multiobjective Data Loader. Yields batches of the specified size from the given dataset object.
+    """
+    dataset: int = dataclasses.field(
+        default=None,
+        doc="""A dataset instance specifying a ``__getitem__`` function.""",
+    )
+    num_steps: int = dataclasses.field(default=None)
+    batch_size: int = dataclasses.field(default=None)
+
+    def __post_init__(self):
+        super().__post_init__()
+        if self.batch_size > len(self.dataset.neural):
+            raise ValueError("Batch size can't be larger than data.")
+        self.prior = Prior(self.dataset.neural, device=self.device)
+
+    def get_indices(self):
+        return NotImplementedError
+
+    def __iter__(self):
+        return NotImplementedError
+
+    def add_config(self, config):
+        raise NotImplementedError
+
+
+@dataclasses.dataclass
+class SupervisedMultiObjectiveLoader(MultiObjectiveLoader):
+    """Supervised Multiobjective data Loader. Yields batches of the specified size from the given dataset object.
+    """
+    sampling_mode_supervised: str = dataclasses.field(
+        default="ref_shared",
+        doc="""Type of sampling performed, re whether reference are shared or not.
+                 are shared. Options will be ref_shared, independent.""")
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.labels = []
+
+    def add_config(self, config):
+        self.labels.append(config['label'])
+
+    def get_indices(self, num_samples: int):
+        if self.sampling_mode_supervised == "ref_shared":
+            reference_idx = self.prior.sample_prior(num_samples)
+        else:
+            raise ValueError(
+                f"Sampling mode {self.sampling_mode_supervised} is not implemented."
+            )
+
+        batch_index = BatchIndex(
+            reference=reference_idx,
+            positive=None,
+            negative=None,
+        )
+
+        return batch_index
+
+    def __iter__(self):
+        for _ in range(len(self)):
+            index = self.get_indices(num_samples=self.batch_size)
+            yield self.dataset.load_batch_supervised(index, self.labels)
+
+
+@dataclasses.dataclass
+class ContrastiveMultiObjectiveLoader(MultiObjectiveLoader):
+    """Contrastive Multiobjective data Loader. Yields batches of the specified size from the given dataset object.
+    """
+
+    sampling_mode_contrastive: str = dataclasses.field(
+        default="refneg_shared",
+        doc=
+        """Type of sampling performed, re whether reference and negative samples
+            are shared. Options will be ref_shared, neg_shared and refneg_shared"""
+    )
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.distributions = []
+
+    def add_config(self, config):
+        kwargs_distribution = config['kwargs']
+        if config['distribution'] == "time":
+            distribution = cebra.distributions.TimeContrastive(
+                time_offset=kwargs_distribution['time_offset'],
+                num_samples=len(self.dataset.neural),
+                device=self.device,
+            )
+        elif config['distribution'] == "time_delta":
+            distribution = cebra.distributions.TimedeltaDistribution(
+                continuous=self.dataset.labels[
+                    kwargs_distribution['label_name']],
+                time_delta=kwargs_distribution['time_delta'],
+                device=self.device)
+        elif config['distribution'] == "delta_normal":
+            distribution = cebra.distributions.DeltaNormalDistribution(
+                continuous=self.dataset.labels[
+                    kwargs_distribution['label_name']],
+                delta=kwargs_distribution['delta'],
+                device=self.device)
+        elif config['distribution'] == "delta_vmf":
+            distribution = cebra.distributions.DeltaVMFDistribution(
+                continuous=self.dataset.labels[
+                    kwargs_distribution['label_name']],
+                delta=kwargs_distribution['delta'],
+                device=self.device)
+        else:
+            raise NotImplementedError(
+                f"Distribution {config['distribution']} is not implemented yet."
+            )
+
+        self.distributions.append(distribution)
+
+    def get_indices(self, num_samples: int):
+        """Sample and return the specified number of indices."""
+
+        if self.sampling_mode_contrastive == "refneg_shared":
+            ref_and_neg = self.prior.sample_prior(num_samples * 2)
+            reference_idx = ref_and_neg[:num_samples]
+            negative_idx = ref_and_neg[num_samples:]
+
+            positives_idx = []
+            for distribution in self.distributions:
+                idx = distribution.sample_conditional(reference_idx)
+                positives_idx.append(idx)
+
+            batch_index = BatchIndex(
+                reference=reference_idx,
+                positive=positives_idx,
+                negative=negative_idx,
+            )
+        else:
+            raise ValueError(
+                f"Sampling mode {self.sampling_mode_contrastive} is not implemented yet."
+            )
+
+        return batch_index
+
+    def __iter__(self):
+        for _ in range(len(self)):
+            index = self.get_indices(num_samples=self.batch_size)
+            yield self.dataset.load_batch_contrastive(index)
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index ab6c9729..1962f007 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -237,6 +237,13 @@ def _init_distribution(self):
                     self.dataset.continuous_index,
                     self.delta,
                     device=self.device)
+            # TODO(stes): Add this distribution from internal xCEBRA codebase at a later point
+            # in time, currently not in use.
+            #elif self.conditional == "delta_vmf":
+            #    self.distribution = cebra.distributions.DeltaVMFDistribution(
+            #        self.dataset.continuous_index,
+            #        self.delta,
+            #        device=self.device)
             else:
                 raise ValueError(self.conditional)
 
@@ -343,6 +350,8 @@ class HybridDataLoader(cebra_data.Loader):
     """
 
     conditional: str = dataclasses.field(default="time_delta")
+    time_distribution: str = dataclasses.field(default="time")
+    time_offset: int = dataclasses.field(default=10)
     delta: float = dataclasses.field(default=0.1)
 
     @property
@@ -359,17 +368,59 @@ def __post_init__(self):
         #            e.g. integrating the FAISS dataloader back in.
         super().__post_init__()
 
-        if self.conditional != "time_delta":
-            raise NotImplementedError(
-                "Hybrid training is currently only implemented using the ``time_delta`` "
-                "continual distribution.")
-
-        self.time_distribution = cebra.distributions.TimeContrastive(
-            time_offset=self.time_offset,
-            num_samples=len(self.dataset.neural),
-            device=self.device)
-        self.behavior_distribution = cebra.distributions.TimedeltaDistribution(
-            self.dataset.continuous_index, self.time_offset, device=self.device)
+        self._init_behavior_distribution()
+        self._init_time_distribution()
+
+    def _init_behavior_distribution(self):
+        if self.conditional == "time":
+            self.behavior_distribution = cebra.distributions.TimeContrastive(
+                time_offset=self.time_offset,
+                num_samples=len(self.dataset.neural),
+                device=self.device,
+            )
+
+        if self.conditional == "time_delta":
+            self.behavior_distribution = cebra.distributions.TimedeltaDistribution(
+                self.dataset.continuous_index,
+                self.time_offset,
+                device=self.device)
+
+        elif self.conditional == "delta_normal":
+            self.behavior_distribution = cebra.distributions.DeltaNormalDistribution(
+                self.dataset.continuous_index, self.delta, device=self.device)
+
+        elif self.conditional == "time":
+            self.behavior_distribution = cebra.distributions.TimeContrastive(
+                time_offset=self.time_offset,
+                num_samples=len(self.dataset.neural),
+                device=self.device,
+            )
+
+    def _init_time_distribution(self):
+
+        if self.time_distribution == "time":
+            self.time_distribution = cebra.distributions.TimeContrastive(
+                time_offset=self.time_offset,
+                num_samples=len(self.dataset.neural),
+                device=self.device,
+            )
+
+        elif self.time_distribution == "time_delta":
+            self.time_distribution = cebra.distributions.TimedeltaDistribution(
+                self.dataset.continuous_index,
+                self.time_offset,
+                device=self.device)
+
+        elif self.time_distribution == "delta_normal":
+            self.time_distribution = cebra.distributions.DeltaNormalDistribution(
+                self.dataset.continuous_index, self.delta, device=self.device)
+
+        # TODO(stes): Add this distribution from internal xCEBRA codebase at a later point
+        #elif self.time_distribution == "delta_vmf":
+        #    self.time_distribution = cebra.distributions.DeltaVMFDistribution(
+        #        self.dataset.continuous_index, self.delta, device=self.device)
+        else:
+            raise ValueError
 
     def get_indices(self, num_samples: int) -> BatchIndex:
         """Samples indices for reference, positive and negative examples.
diff --git a/cebra/models/__init__.py b/cebra/models/__init__.py
index 4dfad333..2d170e24 100644
--- a/cebra/models/__init__.py
+++ b/cebra/models/__init__.py
@@ -36,5 +36,7 @@
 from cebra.models.multiobjective import *
 from cebra.models.layers import *
 from cebra.models.criterions import *
+from cebra.models.multicriterions import *
+from cebra.models.jacobian_regularizer import *
 
 cebra.registry.add_docstring(__name__)
diff --git a/cebra/models/jacobian_regularizer.py b/cebra/models/jacobian_regularizer.py
new file mode 100644
index 00000000..a909a31b
--- /dev/null
+++ b/cebra/models/jacobian_regularizer.py
@@ -0,0 +1,148 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# This file contains the PyTorch implementation of Jacobian regularization described in [1].
+#   Judy Hoffman, Daniel A. Roberts, and Sho Yaida,
+#   "Robust Learning with Jacobian Regularization," 2019.
+#   [arxiv:1908.02729](https://arxiv.org/abs/1908.02729)
+#
+# Adapted from https://github.com/facebookresearch/jacobian_regularizer/blob/main/jacobian/jacobian.py
+# licensed under the following MIT License:
+#
+#   MIT License
+#
+#   Copyright (c) Facebook, Inc. and its affiliates.
+#
+#   Permission is hereby granted, free of charge, to any person obtaining a copy
+#   of this software and associated documentation files (the "Software"), to deal
+#   in the Software without restriction, including without limitation the rights
+#   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#   copies of the Software, and to permit persons to whom the Software is
+#   furnished to do so, subject to the following conditions:
+#
+#   The above copyright notice and this permission notice shall be included in all
+#   copies or substantial portions of the Software.
+#
+#   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#   SOFTWARE.
+#
+"""Jacobian Regularization for CEBRA.
+
+This implementation is adapted from the Jacobian regularization described in [1]_.
+
+.. [1] Judy Hoffman, Daniel A. Roberts, and Sho Yaida,
+       "Robust Learning with Jacobian Regularization," 2019.
+       `arxiv:1908.02729 <https://arxiv.org/abs/1908.02729>`_
+"""
+
+from __future__ import division
+
+import torch
+import torch.nn as nn
+
+
+class JacobianReg(nn.Module):
+    """Loss criterion that computes the trace of the square of the Jacobian.
+
+    Args:
+        n: Determines the number of random projections. If n=-1, then it is set to the dimension
+           of the output space and projection is non-random and orthonormal, yielding the exact
+           result. For any reasonable batch size, the default (n=1) should be sufficient.
+           |Default:| ``1``
+    """
+
+    def __init__(self, n: int = 1):
+        assert n == -1 or n > 0
+        self.n = n
+        super(JacobianReg, self).__init__()
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        """Computes (1/2) tr \\|dy/dx\\|^2.
+
+        Args:
+            x: Input tensor
+            y: Output tensor
+
+        Returns:
+            The computed regularization term
+        """
+        B, C = y.shape
+        if self.n == -1:
+            num_proj = C
+        else:
+            num_proj = self.n
+        J2 = 0
+        for ii in range(num_proj):
+            if self.n == -1:
+                # orthonormal vector, sequentially spanned
+                v = torch.zeros(B, C)
+                v[:, ii] = 1
+            else:
+                # random properly-normalized vector for each sample
+                v = self._random_vector(C=C, B=B)
+            if x.is_cuda:
+                v = v.cuda()
+            Jv = self._jacobian_vector_product(y, x, v, create_graph=True)
+            J2 += C * torch.norm(Jv)**2 / (num_proj * B)
+        R = (1 / 2) * J2
+        return R
+
+    def _random_vector(self, C: int, B: int) -> torch.Tensor:
+        """Creates a random vector of dimension C with a norm of C^(1/2).
+
+        This is needed for the projection formula to work.
+
+        Args:
+            C: Output dimension
+            B: Batch size
+
+        Returns:
+            A random normalized vector
+        """
+        if C == 1:
+            return torch.ones(B)
+        v = torch.randn(B, C)
+        arxilirary_zero = torch.zeros(B, C)
+        vnorm = torch.norm(v, 2, 1, True)
+        v = torch.addcdiv(arxilirary_zero, 1.0, v, vnorm)
+        return v
+
+    def _jacobian_vector_product(self,
+                                 y: torch.Tensor,
+                                 x: torch.Tensor,
+                                 v: torch.Tensor,
+                                 create_graph: bool = False) -> torch.Tensor:
+        """Produce jacobian-vector product dy/dx dot v.
+
+        Args:
+            y: Output tensor
+            x: Input tensor
+            v: Vector to compute product with
+            create_graph: If True, graph of the derivative will be constructed, allowing
+                         to compute higher order derivative products. |Default:| ``False``
+
+        Returns:
+            The Jacobian-vector product
+
+        Note:
+            If you want to differentiate the result, you need to make create_graph=True
+        """
+        flat_y = y.reshape(-1)
+        flat_v = v.reshape(-1)
+        grad_x, = torch.autograd.grad(flat_y,
+                                      x,
+                                      flat_v,
+                                      retain_graph=True,
+                                      create_graph=create_graph)
+        return grad_x
diff --git a/cebra/models/layers.py b/cebra/models/layers.py
index 7c1c36e8..e8b8175e 100644
--- a/cebra/models/layers.py
+++ b/cebra/models/layers.py
@@ -97,3 +97,25 @@ def forward(self, inp: torch.Tensor) -> torch.Tensor:
         connect = self.layer(inp)
         downsampled = F.interpolate(inp, scale_factor=1 / self.downsample)
         return torch.cat([connect, downsampled[..., :connect.size(-1)]], dim=1)
+
+
+class _SkipLinear(nn.Module):
+    """Add a skip connection to a linear module
+    Args:
+        module (torch.nn.Module): Module to add to the bottleneck
+    """
+
+    def __init__(self, module):
+        super().__init__()
+        self.module = module
+        assert isinstance(self.module, nn.Linear)
+        padding_size = self.module.out_features - self.module.in_features
+        self.padding_size = padding_size
+
+    def forward(self, inp: torch.Tensor) -> torch.Tensor:
+        """Compute forward pass through the skip connection.
+        """
+        inp_padded = F.pad(inp, (0, self.padding_size),
+                           mode='constant',
+                           value=0)
+        return inp_padded + self.module(inp)
diff --git a/cebra/models/model.py b/cebra/models/model.py
index 7631ba86..a74b0229 100644
--- a/cebra/models/model.py
+++ b/cebra/models/model.py
@@ -29,6 +29,7 @@
 import cebra.data
 import cebra.data.datatypes
 import cebra.models.layers as cebra_layers
+from cebra.models import parametrize
 from cebra.models import register
 
 
@@ -780,3 +781,261 @@ def __init__(self, num_neurons, num_units, num_output, normalize=True):
     def get_offset(self) -> cebra.data.datatypes.Offset:
         """See `:py:meth:Model.get_offset`"""
         return cebra.data.Offset(18, 18)
+
+
+@register("offset15-model")
+class Offset15Model(_OffsetModel, ConvolutionalModelMixin):
+    """CEBRA model with a 15 sample receptive field."""
+
+    def __init__(self, num_neurons, num_units, num_output, normalize=True):
+        if num_units < 1:
+            raise ValueError(
+                f"Hidden dimension needs to be at least 1, but got {num_units}."
+            )
+        super().__init__(
+            nn.Conv1d(num_neurons, num_units, 2),
+            nn.GELU(),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            nn.Conv1d(num_units, num_output, 2),
+            num_input=num_neurons,
+            num_output=num_output,
+            normalize=normalize,
+        )
+
+    def get_offset(self) -> cebra.data.datatypes.Offset:
+        """See `:py:meth:Model.get_offset`"""
+        return cebra.data.Offset(7, 8)
+
+
+@register("offset20-model")
+class Offset20Model(_OffsetModel, ConvolutionalModelMixin):
+    """CEBRA model with a 15 sample receptive field."""
+
+    def __init__(self, num_neurons, num_units, num_output, normalize=True):
+        if num_units < 1:
+            raise ValueError(
+                f"Hidden dimension needs to be at least 1, but got {num_units}."
+            )
+        super().__init__(
+            nn.Conv1d(num_neurons, num_units, 2),
+            nn.GELU(),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            nn.Conv1d(num_units, num_output, 3),
+            num_input=num_neurons,
+            num_output=num_output,
+            normalize=normalize,
+        )
+
+    def get_offset(self) -> cebra.data.datatypes.Offset:
+        """See `:py:meth:Model.get_offset`"""
+        return cebra.data.Offset(10, 10)
+
+
+@register("offset10-model-mse-tanh")
+class Offset10Model(_OffsetModel, ConvolutionalModelMixin):
+    """CEBRA model with a 10 sample receptive field."""
+
+    def __init__(self, num_neurons, num_units, num_output, normalize=False):
+        if num_units < 1:
+            raise ValueError(
+                f"Hidden dimension needs to be at least 1, but got {num_units}."
+            )
+        super().__init__(
+            nn.Conv1d(num_neurons, num_units, 2),
+            nn.GELU(),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            cebra_layers._Skip(nn.Conv1d(num_units, num_units, 3), nn.GELU()),
+            nn.Conv1d(num_units, num_output, 3),
+            nn.Tanh(),  # Added tanh activation function
+            num_input=num_neurons,
+            num_output=num_output,
+            normalize=normalize,
+        )
+
+    def get_offset(self) -> cebra.data.datatypes.Offset:
+        """See :py:meth:`~.Model.get_offset`"""
+        return cebra.data.Offset(5, 5)
+
+
+@register("offset1-model-mse-tanh")
+class Offset0ModelMSETanH(_OffsetModel):
+    """CEBRA model with a single sample receptive field, without output normalization."""
+
+    def __init__(self, num_neurons, num_units, num_output, normalize=False):
+        super().__init__(
+            nn.Flatten(start_dim=1, end_dim=-1),
+            nn.Linear(
+                num_neurons,
+                num_output * 30,
+            ),
+            nn.GELU(),
+            nn.Linear(num_output * 30, num_output * 30),
+            nn.GELU(),
+            nn.Linear(num_output * 30, num_output * 10),
+            nn.GELU(),
+            nn.Linear(int(num_output * 10), num_output),
+            nn.Tanh(),  # Added tanh activation function
+            num_input=num_neurons,
+            num_output=num_output,
+            normalize=normalize,
+        )
+
+    def get_offset(self) -> cebra.data.datatypes.Offset:
+        """See :py:meth:`~.Model.get_offset`"""
+        return cebra.data.Offset(0, 1)
+
+
+@parametrize("offset1-model-mse-clip-{clip_min}-{clip_max}",
+             clip_min=(1000, 100, 50, 25, 20, 15, 10, 5, 1),
+             clip_max=(1000, 100, 50, 25, 20, 15, 10, 5, 1))
+class Offset0ModelMSEClip(_OffsetModel):
+    """CEBRA model with a single sample receptive field, without output normalization."""
+
+    def __init__(self,
+                 num_neurons,
+                 num_units,
+                 num_output,
+                 clip_min=-1,
+                 clip_max=1,
+                 normalize=False):
+        super().__init__(
+            nn.Flatten(start_dim=1, end_dim=-1),
+            nn.Linear(
+                num_neurons,
+                num_output * 30,
+            ),
+            nn.GELU(),
+            nn.Linear(num_output * 30, num_output * 30),
+            nn.GELU(),
+            nn.Linear(num_output * 30, num_output * 10),
+            nn.GELU(),
+            nn.Linear(int(num_output * 10), num_output),
+            num_input=num_neurons,
+            num_output=num_output,
+            normalize=normalize,
+        )
+        self.clamp = nn.Hardtanh(-clip_min, clip_max)
+
+    def forward(self, inputs):
+        outputs = super().forward(inputs)
+        outputs = self.clamp(outputs)
+        return outputs
+
+    def get_offset(self) -> cebra.data.datatypes.Offset:
+        """See :py:meth:`~.Model.get_offset`"""
+        return cebra.data.Offset(0, 1)
+
+
+@parametrize("offset1-model-mse-v2-{n_intermediate_layers}layers{tanh}",
+             n_intermediate_layers=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
+             tanh=("-tanh", ""))
+class Offset0ModelMSETanHv2(_OffsetModel):
+    """CEBRA model with a single sample receptive field, without output normalization."""
+
+    def __init__(self,
+                 num_neurons,
+                 num_units,
+                 num_output,
+                 tanh="",
+                 n_intermediate_layers=1,
+                 normalize=False):
+        if num_units < 2:
+            raise ValueError(
+                f"Number of hidden units needs to be at least 2, but got {num_units}."
+            )
+
+        intermediate_layers = [
+            nn.Linear(num_units, num_units),
+            nn.GELU(),
+        ] * n_intermediate_layers
+
+        layers = [
+            nn.Flatten(start_dim=1, end_dim=-1),
+            nn.Linear(
+                num_neurons,
+                num_units,
+            ),
+            nn.GELU(),
+            *intermediate_layers,
+            nn.Linear(num_units, int(num_units // 2)),
+            nn.GELU(),
+            nn.Linear(int(num_units // 2), num_output),
+        ]
+
+        if tanh == "-tanh":
+            layers += [nn.Tanh()]
+
+        super().__init__(
+            *layers,
+            num_input=num_neurons,
+            num_output=num_output,
+            normalize=normalize,
+        )
+
+    def get_offset(self) -> cebra.data.datatypes.Offset:
+        """See :py:meth:`~.Model.get_offset`"""
+        return cebra.data.Offset(0, 1)
+
+
+@parametrize("offset1-model-mse-resnet-{n_intermediate_layers}layers{tanh}",
+             n_intermediate_layers=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
+             tanh=("-tanh", ""))
+class Offset0ModelResNetTanH(_OffsetModel):
+    """CEBRA model with a single sample receptive field, without output normalization."""
+
+    def __init__(self,
+                 num_neurons,
+                 num_units,
+                 num_output,
+                 tanh="",
+                 n_intermediate_layers=1,
+                 normalize=False):
+        if num_units < 2:
+            raise ValueError(
+                f"Number of hidden units needs to be at least 2, but got {num_units}."
+            )
+
+        intermediate_layers = [
+            cebra_layers._SkipLinear(nn.Linear(num_units, num_units)),
+            nn.GELU(),
+        ] * n_intermediate_layers
+
+        layers = [
+            nn.Flatten(start_dim=1, end_dim=-1),
+            cebra_layers._SkipLinear(nn.Linear(
+                num_neurons,
+                num_units,
+            )),
+            nn.GELU(),
+            *intermediate_layers,
+            cebra_layers._SkipLinear(nn.Linear(num_units, int(num_units // 2))),
+            nn.GELU(),
+            nn.Linear(int(num_units // 2), num_output),
+        ]
+
+        if tanh == "-tanh":
+            layers += [nn.Tanh()]
+
+        super().__init__(
+            *layers,
+            num_input=num_neurons,
+            num_output=num_output,
+            normalize=normalize,
+        )
+
+    def get_offset(self) -> cebra.data.datatypes.Offset:
+        """See :py:meth:`~.Model.get_offset`"""
+        return cebra.data.Offset(0, 1)
diff --git a/cebra/models/multicriterions.py b/cebra/models/multicriterions.py
new file mode 100644
index 00000000..2b02fc37
--- /dev/null
+++ b/cebra/models/multicriterions.py
@@ -0,0 +1,154 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Support for training CEBRA with multiple criteria.
+
+.. note::
+   This module was introduced in CEBRA 0.6.0.
+
+"""
+from typing import Tuple
+
+import torch
+from torch import nn
+
+from cebra.data.datatypes import Batch
+
+
+class MultiCriterions(nn.Module):
+    """A module for handling multiple loss functions with different criteria.
+
+    This module allows combining multiple loss functions, each operating on specific
+    slices of the input data. It supports both supervised and contrastive learning modes.
+
+    Args:
+        losses: A list of dictionaries containing loss configurations. Each dictionary should have:
+            - 'indices': Tuple of (start, end) indices for the data slice
+            - 'supervised_loss': Dict with loss config for supervised mode
+            - 'contrastive_loss': Dict with loss config for contrastive mode
+            Loss configs should contain:
+            - 'name': Name of the loss function
+            - 'kwargs': Optional parameters for the loss function
+        mode: Either "supervised" or "contrastive" to specify the training mode
+
+    The loss functions can be from torch.nn or custom implementations from cebra.models.criterions.
+    Each criterion is applied to its corresponding slice of the input data during forward pass.
+
+    Example:
+        >>> import torch
+        >>> from cebra.data.datatypes import Batch
+        >>> # Define loss configurations for a hybrid model with both contrastive and supervised losses
+        >>> losses = [
+        ...     {
+        ...         'indices': (0, 10),  # First 10 dimensions
+        ...         'contrastive_loss': {
+        ...             'name': 'InfoNCE',  # Using CEBRA's InfoNCE loss
+        ...             'kwargs': {'temperature': 1.0}
+        ...         },
+        ...         'supervised_loss': {
+        ...             'name': 'nn.MSELoss',  # Using PyTorch's MSE loss
+        ...             'kwargs': {}
+        ...         }
+        ...     },
+        ...     {
+        ...         'indices': (10, 20),  # Next 10 dimensions
+        ...         'contrastive_loss': {
+        ...             'name': 'InfoNCE',  # Using CEBRA's InfoNCE loss
+        ...             'kwargs': {'temperature': 0.5}
+        ...         },
+        ...         'supervised_loss': {
+        ...             'name': 'nn.L1Loss',  # Using PyTorch's L1 loss
+        ...             'kwargs': {}
+        ...         }
+        ...     }
+        ... ]
+        >>> # Create sample predictions (2 batches of 32 samples each with 10 features)
+        >>> ref1 = torch.randn(32, 10)
+        >>> pos1 = torch.randn(32, 10)
+        >>> neg1 = torch.randn(32, 10)
+        >>> ref2 = torch.randn(32, 10)
+        >>> pos2 = torch.randn(32, 10)
+        >>> neg2 = torch.randn(32, 10)
+        >>> predictions = (
+        ...     Batch(reference=ref1, positive=pos1, negative=neg1),
+        ...     Batch(reference=ref2, positive=pos2, negative=neg2)
+        ... )
+        >>> # Create multi-criterion module in contrastive mode
+        >>> multi_loss = MultiCriterions(losses, mode="contrastive")
+        >>> # Forward pass with multiple predictions
+        >>> losses = multi_loss(predictions)  # Returns list of loss values
+        >>> assert len(losses) == 2  # One loss per criterion
+    """
+
+    def __init__(self, losses, mode):
+        super(MultiCriterions, self).__init__()
+        self.criterions = nn.ModuleList()
+        self.slices = []
+
+        for loss_info in losses:
+            slice_indices = loss_info['indices']
+
+            if mode == "supervised":
+                loss = loss_info['supervised_loss']
+            elif mode == "contrastive":
+                loss = loss_info['contrastive_loss']
+            else:
+                raise NotImplementedError
+
+            loss_name = loss['name']
+            loss_kwargs = loss.get('kwargs', {})
+
+            if loss_name.startswith("nn"):
+                name = loss_name.split(".")[-1]
+                criterion = getattr(torch.nn, name, None)
+            else:
+                import cebra.models
+                criterion = getattr(cebra.models.criterions, loss_name, None)
+
+            if criterion is None:
+                raise ValueError(f"Loss {loss_name} not found.")
+            else:
+                criterion = criterion(**loss_kwargs)
+
+            self.criterions.append(criterion)
+            self.slices.append(slice(*slice_indices))
+            assert len(self.criterions) == len(self.slices)
+
+    def forward(self, predictions: Tuple[Batch]):
+
+        losses = []
+
+        for criterion, prediction in zip(self.criterions, predictions):
+
+            if prediction.negative is None:
+                # supervised
+                #reference: data, positive: label
+                loss = criterion(prediction.reference, prediction.positive)
+            else:
+                #contrastive
+                loss, pos, neg = criterion(prediction.reference,
+                                           prediction.positive,
+                                           prediction.negative)
+
+            losses.append(loss)
+
+        assert len(self.criterions) == len(predictions) == len(losses)
+        return losses
diff --git a/cebra/models/multiobjective.py b/cebra/models/multiobjective.py
index d9393fdc..5dc4d247 100644
--- a/cebra/models/multiobjective.py
+++ b/cebra/models/multiobjective.py
@@ -19,19 +19,80 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-"""Wrappers for using models with multiobjective solvers.
-
-.. note::
-
-    Experimental as of Nov 06, 2022.
-"""
-
-from typing import Tuple
+import itertools
+from typing import List, Tuple
 
 import torch
 from torch import nn
 
 import cebra.models
+import cebra.models.model as cebra_models_base
+
+
+def create_multiobjective_model(module,
+                                **kwargs) -> "SubspaceMultiobjectiveModel":
+    assert isinstance(module, cebra_models_base.Model)
+    if isinstance(module, cebra.models.ConvolutionalModelMixin):
+        return SubspaceMultiobjectiveConvolutionalModel(module=module, **kwargs)
+    else:
+        return SubspaceMultiobjectiveModel(module=module, **kwargs)
+
+
+def check_slices_for_gaps(slice_list):
+    slice_list = sorted(slice_list, key=lambda s: s.start)
+    for i in range(1, len(slice_list)):
+        if slice_list[i - 1].stop < slice_list[i].start:
+            raise ValueError(
+                f"There is a gap in the slices {slice_list[i-1]} and {slice_list[i]}"
+            )
+
+
+def check_overlapping_feature_ranges(slice_list):
+    for slice1, slice2 in itertools.combinations(slice_list, 2):
+        if slice1.start < slice2.stop and slice1.stop > slice2.start:
+            return True
+    return False
+
+
+def compute_renormalize_ranges(feature_ranges, sort=True):
+
+    max_slice_dim = max(s.stop for s in feature_ranges)
+    min_slice_dim = min(s.start for s in feature_ranges)
+    full_emb_slice = slice(min_slice_dim, max_slice_dim)
+
+    n_full_emb_slices = sum(1 for s in feature_ranges if s == full_emb_slice)
+
+    if n_full_emb_slices > 1:
+        raise ValueError(
+            "There are more than one slice that cover the full embedding.")
+
+    if n_full_emb_slices == 0:
+        raise ValueError(
+            "There are overlapping slices but none of them cover the full embedding."
+        )
+
+    rest_of_slices = [s for s in feature_ranges if s != full_emb_slice]
+    max_slice_dim_rest = max(s.stop for s in rest_of_slices)
+    min_slice_dim_rest = min(s.start for s in rest_of_slices)
+
+    remaining_slices = []
+    if full_emb_slice.start < min_slice_dim_rest:
+        remaining_slices.append(slice(full_emb_slice.start, min_slice_dim_rest))
+
+    if full_emb_slice.stop > max_slice_dim_rest:
+        remaining_slices.append(slice(max_slice_dim_rest, full_emb_slice.stop))
+
+    if len(remaining_slices) == 0:
+        raise ValueError(
+            "The behavior slices and the time slices coincide completely.")
+
+    final_slices = remaining_slices + rest_of_slices
+
+    if sort:
+        final_slices = sorted(final_slices, key=lambda s: s.start)
+
+    check_slices_for_gaps(final_slices)
+    return final_slices
 
 
 class _Norm(nn.Module):
@@ -68,6 +129,13 @@ class MultiobjectiveModel(nn.Module):
 
     TODO:
         - Update nn.Module type annotation for ``module`` to cebra.models.Model
+
+    Note:
+        This model will be deprecated in a future version. Please use the functionality in
+        :py:mod:`cebra.models.multiobjective` instead, which provides more versatile
+        multi-objective training capabilities. Instantiation of this model will raise a
+        deprecation warning. The new model is :py:class:`cebra.models.multiobjective.SubspaceMultiobjectiveModel`
+        which allows for unlimited subspaces and better configuration of the feature ranges.
     """
 
     class Mode:
@@ -178,3 +246,122 @@ def forward(self, inputs):
         if self.renormalize:
             outputs = (self._norm(output) for output in outputs)
         return tuple(outputs)
+
+
+class SubspaceMultiobjectiveModel(nn.Module):
+    """Wrapper around contrastive learning models to all training with multiple objectives
+
+    Multi-objective training splits the last layer's feature representation into multiple
+    chunks, which are then used for individual training objectives.
+
+    Args:
+        module: The module to wrap
+        dimensions: A tuple of dimension values to extract from the model's feature embedding.
+        renormalize: If True, the individual feature slices will be re-normalized before
+            getting returned---this option only makes sense in conjunction with a loss based
+            on the cosine distance or dot product.
+    TODO:
+        - Update nn.Module type annotation for ``module`` to cebra.models.Model
+    """
+
+    def __init__(self,
+                 module: nn.Module,
+                 feature_ranges: List[slice],
+                 renormalize: bool,
+                 split_outputs: bool = True):
+        super().__init__()
+
+        if not isinstance(module, cebra.models.Model):
+            raise ValueError("Can only wrap models that are subclassing the "
+                             "cebra.models.Model abstract base class. "
+                             f"Got a model of type {type(module)}.")
+
+        self.module = module
+        self.renormalize = renormalize
+        self._norm = _Norm()
+        self.feature_ranges = feature_ranges
+        self.split_outputs = split_outputs
+
+        max_slice_dim = max(s.stop for s in self.feature_ranges)
+        min_slice_dim = min(s.start for s in self.feature_ranges)
+        if min_slice_dim != 0:
+            raise ValueError(
+                f"The first slice should start at 0, but it starts at {min_slice_dim}."
+            )
+
+        if max_slice_dim != self.num_output:
+            raise ValueError(
+                f"The dimension of output {self.num_output} is different than the highest dimension of the slices ({max_slice_dim})."
+                f"The output dimension and slice dimension need to have the same dimension."
+            )
+
+        check_slices_for_gaps(self.feature_ranges)
+
+        if check_overlapping_feature_ranges(self.feature_ranges):
+            print("Computing renormalized ranges...")
+            self.renormalize_ranges = compute_renormalize_ranges(
+                self.feature_ranges, sort=True)
+            print("New ranges:", self.renormalize_ranges)
+
+    def set_split_outputs(self, val):
+        assert isinstance(val, bool)
+        self.split_outputs = val
+
+    @property
+    def get_offset(self):
+        """See :py:meth:`cebra.models.model.Model.get_offset`."""
+        return self.module.get_offset
+
+    @property
+    def num_output(self):
+        """See :py:attr:`cebra.models.model.Model.num_output`."""
+        return self.module.num_output
+
+    def forward(self, inputs):
+        """Compute multiple embeddings for a single signal input.
+
+        Args:
+            inputs: The input tensor
+
+        Returns:
+            A tuple of tensors which are sliced according to `self.feature_ranges`
+            if `renormalize` is set to true, each of the tensors will be normalized
+            across the first (feature) dimension.
+        """
+
+        output = self.module(inputs)
+
+        if (not self.renormalize) and (not self.split_outputs):
+            return output
+
+        if self.renormalize:
+            if hasattr(self, "renormalize_ranges"):
+                if not all(self.renormalize_ranges[i].start <=
+                           self.renormalize_ranges[i + 1].start
+                           for i in range(len(self.renormalize_ranges) - 1)):
+                    raise ValueError(
+                        "The renormalize_ranges must be sorted by start index.")
+
+                output = [
+                    self._norm(output[:, slice_features])
+                    for slice_features in self.renormalize_ranges
+                ]
+            else:
+                output = [
+                    self._norm(output[:, slice_features])
+                    for slice_features in self.feature_ranges
+                ]
+
+            output = torch.cat(output, dim=1)
+
+        if self.split_outputs:
+            return tuple(output[:, slice_features]
+                         for slice_features in self.feature_ranges)
+        else:
+            assert isinstance(output, torch.Tensor)
+            return output
+
+
+class SubspaceMultiobjectiveConvolutionalModel(
+        SubspaceMultiobjectiveModel, cebra_models_base.ConvolutionalModelMixin):
+    pass
diff --git a/cebra/solver/__init__.py b/cebra/solver/__init__.py
index 12ad2f06..965c16c8 100644
--- a/cebra/solver/__init__.py
+++ b/cebra/solver/__init__.py
@@ -37,6 +37,9 @@
 # pylint: disable=wrong-import-position
 from cebra.solver.base import *
 from cebra.solver.multi_session import *
+from cebra.solver.multiobjective import *
+from cebra.solver.regularized import *
+from cebra.solver.schedulers import *
 from cebra.solver.single_session import *
 from cebra.solver.supervised import *
 
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index ea87a4ad..9827af36 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -694,11 +694,19 @@ class MultiobjectiveSolver(Solver):
             for time contrastive learning.
         renormalize_features: If ``True``, normalize the behavior and time
             contrastive features individually before computing similarity scores.
+        ignore_deprecation_warning: If ``True``, suppress the deprecation warning.
+
+    Note:
+        This solver will be deprecated in a future version. Please use the functionality in
+        :py:mod:`cebra.solver.multiobjective` instead, which provides more versatile
+        multi-objective training capabilities. Instantiation of this solver will raise a
+        deprecation warning.
     """
 
     num_behavior_features: int = 3
     renormalize_features: bool = False
     output_mode: Literal["overlapping", "separate"] = "overlapping"
+    ignore_deprecation_warning: bool = False
 
     @property
     def num_time_features(self):
@@ -710,6 +718,13 @@ def num_total_features(self):
 
     def __post_init__(self):
         super().__post_init__()
+        if not self.ignore_deprecation_warning:
+            warnings.warn(
+                "MultiobjectiveSolver is deprecated since CEBRA 0.6.0 and will be removed in a future version. "
+                "Use the new functionality in cebra.solver.multiobjective instead, which is more versatile. "
+                "If you see this warning when using the scikit-learn interface, no action is required.",
+                DeprecationWarning,
+                stacklevel=2)
         self._check_dimensions()
         self.model = cebra.models.MultiobjectiveModel(
             self.model,
diff --git a/cebra/solver/multiobjective.py b/cebra/solver/multiobjective.py
new file mode 100644
index 00000000..d4aa187d
--- /dev/null
+++ b/cebra/solver/multiobjective.py
@@ -0,0 +1,527 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Multiobjective contrastive learning.
+
+Starting in CEBRA 0.6.0, we have added support for subspace contrastive learning.
+This is a method for training models that are able to learn multiple subspaces of the
+feature space simultaneously.
+
+Subspace contrastive learning requires to use specialized models and criterions.
+This module specifies a test of classes required for training CEBRA models with multiple objectives.
+The objectives are defined by the wrapper class :py:class:`cebra.models.multicriterions.MultiCriterions`.
+
+Two solvers are currently implemented:
+
+- :py:class:`cebra.solver.multiobjective.ContrastiveMultiobjectiveSolverxCEBRA`
+- :py:class:`cebra.solver.multiobjective.SupervisedMultiobjectiveSolverxCEBRA`
+
+See Also:
+    :py:class:`cebra.solver.multiobjective.SupervisedMultiobjectiveSolverxCEBRA`
+    :py:class:`cebra.solver.multiobjective.MultiObjectiveConfig`
+    :py:class:`cebra.models.multicriterions.MultiCriterions`
+"""
+
+import logging
+import time
+import warnings
+from typing import Callable, Dict, List, Optional, Tuple
+
+import literate_dataclasses as dataclasses
+import numpy as np
+import torch
+
+import cebra
+import cebra.data
+import cebra.io
+import cebra.models
+from cebra.solver import register
+from cebra.solver.base import Solver
+from cebra.solver.schedulers import Scheduler
+from cebra.solver.util import Meter
+
+
+class MultiObjectiveConfig:
+    """Configuration class for setting up multi-objective learning with Cebra.
+
+
+
+    Args:
+        loader: Data loader used for configurations.
+    """
+
+    def __init__(self, loader):
+        self.loader = loader
+        self.total_info = []
+        self.current_info = {}
+
+    def _check_overwriting_key(self, key):
+        if key in self.current_info:
+            warnings.warn(
+                "Configuration key already exists. Overwriting existing value. "
+                "If you don't want to overwrite you should call push() before.")
+
+    def _check_pushed_status(self):
+        if "slice" not in self.current_info:
+            raise RuntimeError(
+                "Slice configuration is missing. Add it before pushing it.")
+        if "distributions" not in self.current_info:
+            raise RuntimeError(
+                "Distributions configuration is missing. Add it before pushing it."
+            )
+        if "losses" not in self.current_info:
+            raise RuntimeError(
+                "Losses configuration is missing. Add it before pushing it.")
+
+    def set_slice(self, start, end):
+        """Select the index range of the embedding.
+
+        The configured loss will be applied to the ``start:end`` slice of the
+        embedding space. Make sure the selected dimensionality is appropriate
+        for the chosen loss function and distribution.
+        """
+        self._check_overwriting_key("slice")
+        self.current_info['slice'] = (start, end)
+
+    def set_loss(self, loss_name, **kwargs):
+        """Select the loss function to apply.
+
+        Select a valid loss function from :py:mod:`cebra.models.criterions`.
+        Common choices are:
+
+        - `FixedEuclideanInfoNCE`
+        - `FixedCosineInfoNCE`
+
+        which can be passed as string values to ``loss_name``. The loss
+        will be applied to the range specified with ``set_slice``.
+        """
+        self._check_overwriting_key("losses")
+        self.current_info["losses"] = {"name": loss_name, "kwargs": kwargs}
+
+    def set_distribution(self, distribution_name, **kwargs):
+        """Select the distribution to sample from.
+
+        The loss function specified in ``set_loss`` is applied to positive
+        and negative pairs sampled from the specified distribution.
+        """
+        self._check_overwriting_key("distributions")
+        self.current_info["distributions"] = {
+            "name": distribution_name,
+            "kwargs": kwargs
+        }
+
+    def push(self):
+        """Add a slice/loss/distribution setting to the config.
+
+        After calling all of ``set_slice``, ``set_loss``, ``set_distribution``,
+        add this group to the config by calling this function.
+
+        Once all configuration parts are pushed, call ``finalize`` to finish
+        the configuration.
+        """
+        self._check_pushed_status()
+        print(f"Adding configuration for slice: {self.current_info['slice']}")
+        self.total_info.append(self.current_info)
+        self.current_info = {}
+
+    def finalize(self):
+        """Finalize the multiobjective configuration."""
+        self.losses = []
+        self.feature_ranges = []
+        self.feature_ranges_tuple = []
+
+        for info in self.total_info:
+            self._process_info(info)
+
+        if len(set(self.feature_ranges_tuple)) != len(
+                self.feature_ranges_tuple):
+            raise RuntimeError(
+                f"Feature ranges are not unique. Please check again and remove the duplicates. "
+                f"Feature ranges: {self.feature_ranges_tuple}")
+
+        print("Creating MultiCriterion")
+        self.criterion = cebra.models.MultiCriterions(losses=self.losses,
+                                                      mode="contrastive")
+
+    def _process_info(self, info):
+        """
+        Processes individual configuration info and updates the losses and feature ranges.
+
+        Args:
+            info (dict): The configuration info to process.
+        """
+        slice_info = info["slice"]
+        losses_info = info["losses"]
+        distributions_info = info["distributions"]
+
+        self.losses.append(
+            dict(indices=(slice_info[0], slice_info[1]),
+                 contrastive_loss=dict(name=losses_info['name'],
+                                       kwargs=losses_info['kwargs'])))
+
+        self.feature_ranges.append(slice(slice_info[0], slice_info[1]))
+        self.feature_ranges_tuple.append((slice_info[0], slice_info[1]))
+
+        print(f"Adding distribution of slice: {slice_info}")
+        self.loader.add_config(
+            dict(distribution=distributions_info["name"],
+                 kwargs=distributions_info["kwargs"]))
+
+
+@dataclasses.dataclass
+class MultiobjectiveSolverBase(Solver):
+
+    feature_ranges: List[slice] = None
+    renormalize: bool = None
+    log: Dict[Tuple,
+              List[float]] = dataclasses.field(default_factory=lambda: ({}))
+    use_sam: bool = False
+    regularizer: torch.nn.Module = None
+    metadata: Dict = dataclasses.field(default_factory=lambda: ({
+        "timestamp": None,
+        "batches_seen": None,
+    }))
+
+    def __post_init__(self):
+        super().__post_init__()
+
+        self.model = cebra.models.create_multiobjective_model(
+            module=self.model,
+            feature_ranges=self.feature_ranges,
+            renormalize=self.renormalize,
+        )
+
+    def fit(self,
+            loader: cebra.data.Loader,
+            valid_loader: cebra.data.Loader = None,
+            *,
+            valid_frequency: int = None,
+            log_frequency: int = None,
+            save_hook: Callable[[int, "Solver"], None] = None,
+            scheduler_regularizer: "Scheduler" = None,
+            scheduler_loss: "Scheduler" = None,
+            logger: logging.Logger = None):
+        """Train model for the specified number of steps.
+
+        Args:
+            loader: Data loader, which is an iterator over `cebra.data.Batch` instances.
+                Each batch contains reference, positive and negative input samples.
+            valid_loader: Data loader used for validation of the model.
+            valid_frequency: The frequency for running validation on the ``valid_loader`` instance.
+            logdir:  The logging directory for writing model checkpoints. The checkpoints
+                can be read again using the `solver.load` function, or manually via loading the
+                state dict.
+            save_hook: callback. It will be called when we run validation.
+            log_frequency: how frequent we log things.
+            logger: logger to log progress. None by default.
+
+        """
+
+        def _run_validation():
+            stats_val = self.validation(valid_loader, logger=logger)
+            if save_hook is not None:
+                save_hook(solver=self, step=num_steps)
+            return stats_val
+
+        self.to(loader.device)
+
+        iterator = self._get_loader(loader,
+                                    logger=logger,
+                                    log_frequency=log_frequency)
+        self.model.train()
+        for num_steps, batch in iterator:
+            weights_regularizer = None
+            if scheduler_regularizer is not None:
+                weights_regularizer = scheduler_regularizer.get_weights(
+                    step=num_steps)
+                # NOTE(stes): Both SAM and Jacobian regularization is not yet supported.
+                # For this, we need to re-implement the closure logic below (right now,
+                # the closure function applies the non-regularized loss in the second
+                # step, it is unclear if that is the correct behavior.
+                assert not self.use_sam
+
+            weights_loss = None
+            if scheduler_loss is not None:
+                weights_loss = scheduler_loss.get_weights()
+
+            stats = self.step(batch,
+                              weights_regularizer=weights_regularizer,
+                              weights_loss=weights_loss)
+
+            self._update_metadata(num_steps)
+            iterator.set_description(stats)
+            run_validation = (valid_loader
+                              is not None) and (num_steps % valid_frequency
+                                                == 0)
+            if run_validation:
+                _run_validation()
+
+        #TODO
+        #_run_validation()
+
+    def _get_loader(self, loader, **kwargs):
+        return super()._get_loader(loader)
+
+    def _update_metadata(self, num_steps):
+        self.metadata["timestamp"] = time.time()
+        self.metadata["batches_seen"] = num_steps
+
+    def compute_regularizer(self, predictions, inputs):
+        regularizer = []
+        for prediction in predictions:
+            R = self.regularizer(inputs, prediction.reference)
+            regularizer.append(R)
+
+        return regularizer
+
+    def create_closure(self, batch, weights_loss):
+
+        def inner_closure():
+            predictions = self._inference(batch)
+            losses = self.criterion(predictions)
+
+            if weights_loss is not None:
+                assert len(weights_loss) == len(
+                    losses
+                ), "Number of weights should match the number of losses"
+                losses = [
+                    weight * loss for weight, loss in zip(weights_loss, losses)
+                ]
+
+            loss = sum(losses)
+            loss.backward()
+            return loss
+
+        return inner_closure
+
+    def step(self,
+             batch: cebra.data.Batch,
+             weights_loss: Optional[List[float]] = None,
+             weights_regularizer: Optional[List[float]] = None) -> dict:
+        """Perform a single gradient update with multiple objectives."""
+
+        closure = None
+        if self.use_sam:
+            closure = self.create_closure(batch, weights_loss)
+
+        if weights_regularizer is not None:
+            assert isinstance(batch.reference, torch.Tensor)
+            batch.reference.requires_grad_(True)
+
+        predictions = self._inference(batch)
+        losses = self.criterion(predictions)
+
+        for i, loss_value in enumerate(losses):
+            key = "loss_train", i
+            self.log.setdefault(key, []).append(loss_value.item())
+
+        if weights_loss is not None:
+            losses = [
+                weight * loss for weight, loss in zip(weights_loss, losses)
+            ]
+
+        loss = sum(losses)
+
+        if weights_regularizer is not None:
+            regularizer = self.compute_regularizer(predictions=predictions,
+                                                   inputs=batch.reference)
+            assert len(weights_regularizer) == len(regularizer) == len(losses)
+            loss = loss + sum(
+                weight * reg
+                for weight, reg in zip(weights_regularizer, regularizer))
+
+        loss.backward()
+        self.optimizer.step(closure)
+        self.optimizer.zero_grad()
+
+        if weights_regularizer is not None:
+            for i, (weight,
+                    reg) in enumerate(zip(weights_regularizer, regularizer)):
+                assert isinstance(weight, float)
+                self.log.setdefault(("regularizer", i), []).append(reg.item())
+                self.log.setdefault(("regularizer_weight", i),
+                                    []).append(weight)
+
+        if weights_loss is not None:
+            for i, weight in enumerate(weights_loss):
+                assert isinstance(weight, float) or isinstance(weight, int)
+                self.log.setdefault(("loss_weight", i), []).append(weight)
+
+        # add sum_loss_train
+        self.log.setdefault(("sum_loss_train",), []).append(loss.item())
+        return {"sum_loss_train": loss.item()}
+
+    @torch.no_grad()
+    def _compute_metrics(self):
+        # NOTE: We set split_outputs = False when we compute
+        # validation metrics, otherwise it returns a tuple
+        # which led to a bug before.
+        embeddings = {}
+        self.model.set_split_outputs(False)
+        for split in self.metrics.splits:
+            embedding_tensor = self.transform(
+                self.metrics.datasets[split].neural)
+            embedding_np = embedding_tensor.cpu().numpy()
+            assert embedding_np.shape[1] == self.model.num_output
+            embeddings[split] = embedding_np
+
+        self.model.set_split_outputs(True)
+        return self.metrics.compute_metrics(embeddings)
+
+    @torch.no_grad()
+    def validation(
+        self,
+        loader: cebra.data.Loader,
+        logger=None,
+        weights_loss: Optional[List[float]] = None,
+    ):
+        self.model.eval()
+        total_loss = Meter()
+
+        losses_dict = {}
+        for _, batch in enumerate(loader):
+            predictions = self._inference(batch)
+            losses = self.criterion(predictions)
+
+            if weights_loss is not None:
+                assert len(weights_loss) == len(
+                    losses
+                ), "Number of weights should match the number of losses"
+                losses = [
+                    weight * loss for weight, loss in zip(weights_loss, losses)
+                ]
+
+            total_loss.add(sum(losses).item())
+
+            for i, loss_value in enumerate(losses):
+                key = "loss_val", i
+                losses_dict.setdefault(key, []).append(loss_value.item())
+
+        losses_dict_mean = {k: np.mean(v) for k, v in losses_dict.items()}
+        stats_val = {**losses_dict_mean}
+
+        if self.metrics is not None:
+            metrics = self._compute_metrics()
+            stats_val.update(metrics)
+
+        for key, value in stats_val.items():
+            self.log.setdefault(key, []).append(value)
+
+        if logger is not None:
+            formatted_loss = ', '.join([
+                f"{'_'.join(map(str, key))}:{value:.3f}"
+                for key, value in stats_val.items()
+                if key[0].startswith("loss")
+            ])
+            formatted_r2 = ', '.join([
+                f"{'_'.join(map(str, key))}:{value:.3f}"
+                for key, value in stats_val.items()
+                if key[0].startswith("r2")
+            ])
+            logger.info(f"Val: {formatted_loss}")
+            logger.info(f"Val: {formatted_r2}")
+
+        # add sum_loss_valid
+        sum_loss_valid = total_loss.average
+        self.log.setdefault(("sum_loss_val",), []).append(sum_loss_valid)
+        return stats_val
+
+    @torch.no_grad()
+    def transform(self, inputs: torch.Tensor) -> torch.Tensor:
+        offset = self.model.get_offset()
+        self.model.eval()
+        X = inputs.cpu().numpy()
+        X = np.pad(X, ((offset.left, offset.right - 1), (0, 0)), mode="edge")
+        X = torch.from_numpy(X).float().to(self.device)
+
+        if isinstance(self.model.module, cebra.models.ConvolutionalModelMixin):
+            # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+            X = X.transpose(1, 0).unsqueeze(0)
+            outputs = self.model(X)
+
+            # switch back from (1, C, T) -> (T, C)
+            if isinstance(outputs, torch.Tensor):
+                assert outputs.dim() == 3 and outputs.shape[0] == 1
+                outputs = outputs.squeeze(0).transpose(1, 0)
+            elif isinstance(outputs, tuple):
+                assert all(tensor.dim() == 3 and tensor.shape[0] == 1
+                           for tensor in outputs)
+                outputs = (
+                    output.squeeze(0).transpose(1, 0) for output in outputs)
+                outputs = tuple(outputs)
+            else:
+                raise ValueError("Invalid condition in solver.transform")
+        else:
+            # Standard evaluation, (T, C, dt)
+            outputs = self.model(X)
+
+        return outputs
+
+
+@register("supervised-solver-xcebra")
+@dataclasses.dataclass
+class SupervisedMultiobjectiveSolverxCEBRA(MultiobjectiveSolverBase):
+    """Supervised neural network training using the MSE loss.
+
+    This solver can be used as a baseline variant instead of the contrastive solver,
+    :py:class:`cebra.solver.multiobjective.ContrastiveMultiobjectiveSolverxCEBRA`.
+    """
+
+    _variant_name = "supervised-solver-xcebra"
+
+    def _inference(self, batch):
+        """Compute predictions (discrete/continuous) for the batch."""
+        pred_refs = self.model(batch.reference)
+        prediction_batches = []
+        for i, label_data in enumerate(batch.positive):
+            prediction_batches.append(
+                cebra.data.Batch(reference=pred_refs[i],
+                                 positive=label_data,
+                                 negative=None))
+        return prediction_batches
+
+
+@register("multiobjective-solver")
+@dataclasses.dataclass
+class ContrastiveMultiobjectiveSolverxCEBRA(MultiobjectiveSolverBase):
+    """Multi-objective solver for CEBRA.
+
+    This solver is used for training CEBRA models with multiple objectives.
+
+    See Also:
+        :py:class:`cebra.solver.multiobjective.SupervisedMultiobjectiveSolverxCEBRA`
+        :py:class:`cebra.solver.multiobjective.MultiObjectiveConfig`
+        :py:class:`cebra.models.multicriterions.MultiCriterions`
+    """
+
+    _variant_name = "contrastive-solver-xcebra"
+
+    def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
+        pred_refs = self.model(batch.reference)
+        pred_negs = self.model(batch.negative)
+
+        prediction_batches = []
+        for i, positive in enumerate(batch.positive):
+            pred_pos = self.model(positive)
+            prediction_batches.append(
+                cebra.data.Batch(pred_refs[i], pred_pos[i], pred_negs[i]))
+
+        return prediction_batches
diff --git a/cebra/solver/regularized.py b/cebra/solver/regularized.py
new file mode 100644
index 00000000..41284529
--- /dev/null
+++ b/cebra/solver/regularized.py
@@ -0,0 +1,105 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Regularized contrastive learning."""
+
+from typing import Dict, Optional
+
+import literate_dataclasses as dataclasses
+import torch
+
+import cebra
+import cebra.data
+import cebra.models
+from cebra.solver import register
+from cebra.solver.single_session import SingleSessionSolver
+
+
+@register("regularized-solver")
+@dataclasses.dataclass
+class RegularizedSolver(SingleSessionSolver):
+    """Optimize a model using Jacobian Regularizer."""
+
+    _variant_name = "regularized-solver"
+    log: Dict = dataclasses.field(default_factory=lambda: ({
+        "pos": [],
+        "neg": [],
+        "loss": [],
+        "loss_reg": [],
+        "temperature": [],
+        "reg": [],
+        "reg_lambda": [],
+    }))
+
+    lambda_JR: Optional[float] = None
+
+    def __post_init__(self):
+        super().__post_init__()
+        #TODO: rn we are using the full jacobian. Can be optimized later if needed.
+        self.jac_regularizer = cebra.models.JacobianReg(n=-1)
+
+    def step(self, batch: cebra.data.Batch) -> dict:
+        """Perform a single gradient update using the jacobian regularizaiton!.
+
+        Args:
+            batch: The input samples
+
+        Returns:
+            Dictionary containing training metrics.
+        """
+
+        self.optimizer.zero_grad()
+        batch.reference.requires_grad = True
+        prediction = self._inference(batch)
+        R = self.jac_regularizer(batch.reference, prediction.reference)
+
+        loss, align, uniform = self.criterion(prediction.reference,
+                                              prediction.positive,
+                                              prediction.negative)
+        loss_reg = loss + self.lambda_JR * R
+
+        loss_reg.backward()
+        self.optimizer.step()
+        self.history.append(loss.item())
+        stats = dict(pos=align.item(),
+                     neg=uniform.item(),
+                     loss=loss.item(),
+                     loss_reg=loss_reg.item(),
+                     reg=R.item(),
+                     temperature=self.criterion.temperature,
+                     reg_lambda=(self.lambda_JR * R).item())
+
+        for key, value in stats.items():
+            self.log[key].append(value)
+        return stats
+
+
+def _prepare_inputs(inputs):
+    if not isinstance(inputs, torch.Tensor):
+        inputs = torch.from_numpy(inputs)
+    inputs.requires_grad_(True)
+    return inputs
+
+
+def _prepare_model(model):
+    for p in model.parameters():
+        p.requires_grad_(False)
+    return model
diff --git a/cebra/solver/schedulers.py b/cebra/solver/schedulers.py
new file mode 100644
index 00000000..1da637af
--- /dev/null
+++ b/cebra/solver/schedulers.py
@@ -0,0 +1,97 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import abc
+import dataclasses
+from typing import List
+
+import cebra.registry
+
+cebra.registry.add_helper_functions(__name__)
+
+__all__ = ["Scheduler", "ConstantScheduler", "LinearScheduler", "LinearRampUp"]
+
+
+@dataclasses.dataclass
+class Scheduler(abc.ABC):
+
+    def __post_init__(self):
+        pass
+
+    @abc.abstractmethod
+    def get_weights(self):
+        pass
+
+
+@register("constant-weight")
+@dataclasses.dataclass
+class ConstantScheduler(Scheduler):
+    initial_weights: List[float]
+
+    def __post_init__(self):
+        super().__post_init__()
+
+    def get_weights(self):
+        weights = self.initial_weights
+        if len(weights) == 0:
+            weights = None
+        return weights
+
+
+@register("linear-scheduler")
+@dataclasses.dataclass
+class LinearScheduler(Scheduler):
+    n_splits: int
+    step_to_switch_on_reg: int
+    step_to_switch_off_reg: int
+    start_weight: float
+    end_weight: float
+    stay_constant_after_switch_off: bool = False
+
+    def __post_init__(self):
+        super().__post_init__()
+        assert self.step_to_switch_off_reg > self.step_to_switch_on_reg
+
+    def get_weights(self, step):
+        if self.step_to_switch_on_reg is not None:
+            if step >= self.step_to_switch_on_reg and step <= self.step_to_switch_off_reg:
+                interpolation_factor = min(
+                    1.0, (step - self.step_to_switch_on_reg) /
+                    (self.step_to_switch_off_reg - self.step_to_switch_on_reg))
+                weight = self.start_weight + (
+                    self.end_weight - self.start_weight) * interpolation_factor
+                weights = [weight] * self.n_splits
+            elif self.stay_constant_after_switch_off and step > self.step_to_switch_off_reg:
+                weight = self.end_weight
+                weights = [weight] * self.n_splits
+            else:
+                weights = None
+
+            return weights
+
+
+@register("linear-ramp-up")
+@dataclasses.dataclass
+class LinearRampUp(LinearScheduler):
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.stay_constant_after_switch_off = True
diff --git a/docs/.gitignore b/docs/.gitignore
index a48ebfca..f7176a04 100644
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1,2 +1,3 @@
 build/
 page/
+root/static
diff --git a/docs/Makefile b/docs/Makefile
index 26c260d3..9252ed72 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -31,7 +31,10 @@ clean:
 # Checkout the source repository for CEBRA figures. Note that this requires SSH access
 # and might prompt you for an SSH key.
 source/cebra-figures:
-	git clone --depth 1 git@github.com:AdaptiveMotorControlLab/cebra-figures.git source/cebra-figures
+	cd $(dir $(realpath $(firstword $(MAKEFILE_LIST)))) && git clone --depth 1 git@github.com:AdaptiveMotorControlLab/cebra-figures.git source/cebra-figures
+
+source/demo_notebooks:
+	cd $(dir $(realpath $(firstword $(MAKEFILE_LIST)))) && git clone --depth 1 git@github.com:AdaptiveMotorControlLab/cebra-demos.git source/demo_notebooks
 
 source/demo_notebooks:
 	git clone --depth 1 git@github.com:AdaptiveMotorControlLab/cebra-demos.git source/demo_notebooks
@@ -44,7 +47,7 @@ demos: source/demo_notebooks
 	cd source/demo_notebooks &&	git pull --ff-only origin main
 
 source/assets:
-	git clone --depth 1 git@github.com:AdaptiveMotorControlLab/cebra-assets.git source/assets
+	cd $(dir $(realpath $(firstword $(MAKEFILE_LIST)))) && git clone --depth 1 git@github.com:AdaptiveMotorControlLab/cebra-assets.git source/assets
 
 assets: source/assets
 	cd source/assets && git pull --ff-only origin main
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 8989337f..846602f1 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -38,6 +38,9 @@ these components in other contexts and research code bases.
    api/pytorch/distributions
    api/pytorch/models
    api/pytorch/helpers
+   api/pytorch/multiobjective
+   api/pytorch/regularized
+   api/pytorch/attribution
 
 .. toctree::
    :hidden:
diff --git a/docs/source/api/pytorch/attribution.rst b/docs/source/api/pytorch/attribution.rst
new file mode 100644
index 00000000..6efb043f
--- /dev/null
+++ b/docs/source/api/pytorch/attribution.rst
@@ -0,0 +1,21 @@
+===================
+Attribution Methods
+===================
+
+.. automodule:: cebra.attribution
+   :members:
+   :show-inheritance:
+
+Different attribution methods
+-----------------------------
+
+.. automodule:: cebra.attribution.attribution_models
+   :members:
+   :show-inheritance:
+
+Jacobian-based attribution
+--------------------------
+
+.. automodule:: cebra.attribution.jacobian_attribution
+   :members:
+   :show-inheritance:
diff --git a/docs/source/api/pytorch/models.rst b/docs/source/api/pytorch/models.rst
index ee3455bc..3fe2219b 100644
--- a/docs/source/api/pytorch/models.rst
+++ b/docs/source/api/pytorch/models.rst
@@ -43,12 +43,8 @@ Layers and model building blocks
    :show-inheritance:
 
 Multi-objective models
-~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~
 
-.. automodule:: cebra.models.multiobjective
-   :members:
-   :private-members:
-   :show-inheritance:
-
-..
-   - projector
+The multi-objective interface was moved to a separate section beginning with CEBRA 0.6.0.
+Please see the :doc:`Multi-objective models </api/pytorch/multiobjective>` section
+for all details, both on the old and new API interface.
diff --git a/docs/source/api/pytorch/multiobjective.rst b/docs/source/api/pytorch/multiobjective.rst
new file mode 100644
index 00000000..c959cfa1
--- /dev/null
+++ b/docs/source/api/pytorch/multiobjective.rst
@@ -0,0 +1,15 @@
+======================
+Multi-objective models
+======================
+
+.. automodule:: cebra.solver.multiobjective
+   :members:
+   :show-inheritance:
+
+.. automodule:: cebra.models.multicriterions
+   :members:
+   :show-inheritance:
+
+.. automodule:: cebra.models.multiobjective
+   :members:
+   :show-inheritance:
diff --git a/docs/source/api/pytorch/regularized.rst b/docs/source/api/pytorch/regularized.rst
new file mode 100644
index 00000000..7da94603
--- /dev/null
+++ b/docs/source/api/pytorch/regularized.rst
@@ -0,0 +1,24 @@
+================================
+Regularized Contrastive Learning
+================================
+
+Regularized solvers
+--------------------
+
+.. automodule:: cebra.solver.regularized
+   :members:
+   :show-inheritance:
+
+Schedulers
+----------
+
+.. automodule:: cebra.solver.schedulers
+   :members:
+   :show-inheritance:
+
+Jacobian Regularization
+-----------------------
+
+.. automodule:: cebra.models.jacobian_regularizer
+   :members:
+   :show-inheritance:
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 80399e5f..83c41fad 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -128,7 +128,7 @@ def get_years(start_year=2021):
 autodoc_member_order = "bysource"
 autodoc_mock_imports = [
     "torch", "nlb_tools", "tqdm", "h5py", "pandas", "matplotlib", "plotly",
-    "joblib", "scikit-learn", "scipy", "requests", "sklearn"
+    "cvxpy", "captum", "joblib", "scikit-learn", "scipy", "requests", "sklearn"
 ]
 # autodoc_typehints = "none"
 
@@ -139,9 +139,18 @@ def get_years(start_year=2021):
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = [
-    "**/todo", "**/src", "cebra-figures/figures.rst", "cebra-figures/*.rst",
-    "*/cebra-figures/*.rst", "*/demo_notebooks/README.rst",
-    "demo_notebooks/README.rst"
+    "**/todo",
+    "**/src",
+    "cebra-figures/figures.rst",
+    "cebra-figures/*.rst",
+    "*/cebra-figures/*.rst",
+    "*/demo_notebooks/README.rst",
+    "demo_notebooks/README.rst",
+    # TODO(stes): Remove this from the assets repo, then remove here
+    "_static/figures_usage.ipynb",
+    "*/_static/figures_usage.ipynb",
+    "assets/**/*.ipynb",
+    "*/assets/**/*.ipynb"
 ]
 
 # -- Options for HTML output -------------------------------------------------
@@ -194,7 +203,7 @@ def get_years(start_year=2021):
     ],
     "collapse_navigation": False,
     "navigation_depth": 1,
-    "show_nav_level": 2,
+    "show_nav_level": 1,
     "navbar_align": "content",
     "show_prev_next": False,
     "navbar_end": ["theme-switcher", "navbar-icon-links.html"],
diff --git a/reinstall.sh b/reinstall.sh
index 422e5d17..ea8981b9 100755
--- a/reinstall.sh
+++ b/reinstall.sh
@@ -15,7 +15,7 @@ pip uninstall -y cebra
 # Get version info after uninstalling --- this will automatically get the
 # most recent version based on the source code in the current directory.
 # $(tools/get_cebra_version.sh)
-VERSION=0.5.0
+VERSION=0.6.0a1
 echo "Upgrading to CEBRA v${VERSION}"
 
 # Upgrade the build system (PEP517/518 compatible)
diff --git a/setup.cfg b/setup.cfg
index 40383b89..7faff998 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -63,6 +63,9 @@ integrations =
     pandas
     plotly
     seaborn
+    captum
+    cvxpy
+    scikit-image
 docs =
     sphinx
     sphinx-gallery
diff --git a/tests/test_attribution.py b/tests/test_attribution.py
new file mode 100644
index 00000000..cfb8ad7a
--- /dev/null
+++ b/tests/test_attribution.py
@@ -0,0 +1,214 @@
+import numpy as np
+import pytest
+import torch
+
+import cebra.attribution._jacobian
+import cebra.attribution.jacobian_attribution as jacobian_attribution
+from cebra.attribution import attribution_models
+from cebra.models import Model
+
+
+class DummyModel(Model):
+
+    def __init__(self):
+        super().__init__(num_input=10, num_output=5)
+        self.linear = torch.nn.Linear(10, 5)
+
+    def forward(self, x):
+        return self.linear(x)
+
+    def get_offset(self):
+        return None
+
+
+@pytest.fixture
+def model():
+    return DummyModel()
+
+
+@pytest.fixture
+def input_data():
+    return torch.randn(100, 10)
+
+
+def test_neuron_gradient_method(model, input_data):
+    attribution = attribution_models.NeuronGradientMethod(model=model,
+                                                          input_data=input_data,
+                                                          output_dimension=5)
+
+    result = attribution.compute_attribution_map()
+
+    assert 'neuron-gradient' in result
+    assert 'neuron-gradient-convabs' in result
+    assert result['neuron-gradient'].shape == (100, 5, 10)
+
+
+def test_neuron_gradient_shap_method(model, input_data):
+    attribution = attribution_models.NeuronGradientShapMethod(
+        model=model, input_data=input_data, output_dimension=5)
+
+    result = attribution.compute_attribution_map(baselines="zeros")
+
+    assert 'neuron-gradient-shap' in result
+    assert 'neuron-gradient-shap-convabs' in result
+    assert result['neuron-gradient-shap'].shape == (100, 5, 10)
+
+    with pytest.raises(NotImplementedError):
+        attribution.compute_attribution_map(baselines="invalid")
+
+
+def test_feature_ablation_method(model, input_data):
+    attribution = attribution_models.FeatureAblationMethod(
+        model=model, input_data=input_data, output_dimension=5)
+
+    result = attribution.compute_attribution_map()
+
+    assert 'feature-ablation' in result
+    assert 'feature-ablation-convabs' in result
+    assert result['feature-ablation'].shape == (100, 5, 10)
+
+
+def test_integrated_gradients_method(model, input_data):
+    attribution = attribution_models.IntegratedGradientsMethod(
+        model=model, input_data=input_data, output_dimension=5)
+
+    result = attribution.compute_attribution_map()
+
+    assert 'integrated-gradients' in result
+    assert 'integrated-gradients-convabs' in result
+    assert result['integrated-gradients'].shape == (100, 5, 10)
+
+
+def test_batched_methods(model, input_data):
+    # Test batched version of NeuronGradientMethod
+    attribution = attribution_models.NeuronGradientMethodBatched(
+        model=model, input_data=input_data, output_dimension=5)
+
+    result = attribution.compute_attribution_map(batch_size=32)
+    assert 'neuron-gradient' in result
+    assert result['neuron-gradient'].shape == (100, 5, 10)
+
+    # Test batched version of IntegratedGradientsMethod
+    attribution = attribution_models.IntegratedGradientsMethodBatched(
+        model=model, input_data=input_data, output_dimension=5)
+
+    result = attribution.compute_attribution_map(batch_size=32)
+    assert 'integrated-gradients' in result
+    assert result['integrated-gradients'].shape == (100, 5, 10)
+
+
+def test_compute_metrics():
+    attribution = attribution_models.AttributionMap(model=None, input_data=None)
+
+    attribution_map = np.array([0.1, 0.8, 0.3, 0.9, 0.2])
+    ground_truth = np.array([False, True, False, True, False])
+
+    metrics = attribution.compute_metrics(attribution_map, ground_truth)
+
+    assert 'max_connected' in metrics
+    assert 'mean_connected' in metrics
+    assert 'min_connected' in metrics
+    assert 'max_nonconnected' in metrics
+    assert 'mean_nonconnected' in metrics
+    assert 'min_nonconnected' in metrics
+    assert 'gap_max' in metrics
+    assert 'gap_mean' in metrics
+    assert 'gap_min' in metrics
+    assert 'gap_minmax' in metrics
+    assert 'max_jacobian' in metrics
+    assert 'min_jacobian' in metrics
+
+
+def test_compute_attribution_score():
+    attribution = attribution_models.AttributionMap(model=None, input_data=None)
+
+    attribution_map = np.array([0.1, 0.8, 0.3, 0.9, 0.2])
+    ground_truth = np.array([False, True, False, True, False])
+
+    score = attribution.compute_attribution_score(attribution_map, ground_truth)
+    assert isinstance(score, float)
+    assert 0 <= score <= 1
+
+
+def test_jacobian_computation():
+    # Create a simple model and input for testing
+    model = torch.nn.Sequential(torch.nn.Linear(10, 5), torch.nn.ReLU(),
+                                torch.nn.Linear(5, 3))
+    input_data = torch.randn(100, 10, requires_grad=True)
+
+    # Test basic Jacobian computation
+    jf, jhatg = jacobian_attribution.get_attribution_map(model=model,
+                                                         input_data=input_data,
+                                                         double_precision=True,
+                                                         convert_to_numpy=True)
+
+    # Check shapes
+    assert jf.shape == (100, 3, 10)  # (batch_size, output_dim, input_dim)
+    assert jhatg.shape == (100, 10, 3)  # (batch_size, input_dim, output_dim)
+
+
+def test_tensor_conversion():
+    # Test CPU and double precision conversion
+    test_tensors = [torch.randn(10, 5), torch.randn(5, 3)]
+
+    converted = cebra.attribution._jacobian.tensors_to_cpu_and_double(
+        test_tensors)
+
+    for tensor in converted:
+        assert tensor.device.type == "cpu"
+        assert tensor.dtype == torch.float64
+
+    # Only test CUDA conversion if CUDA is available
+    if torch.cuda.is_available():
+        cuda_tensors = cebra.attribution._jacobian.tensors_to_cuda(
+            test_tensors, cuda_device="cuda")
+        for tensor in cuda_tensors:
+            assert tensor.is_cuda
+    else:
+        # Skip CUDA test with a message
+        pytest.skip("CUDA not available - skipping CUDA conversion test")
+
+
+def test_jacobian_with_hybrid_solver():
+    # Test Jacobian computation with hybrid solver
+    class HybridModel(torch.nn.Module):
+
+        def __init__(self):
+            super().__init__()
+            self.fc1 = torch.nn.Linear(10, 5)
+            self.fc2 = torch.nn.Linear(10, 3)
+
+        def forward(self, x):
+            return self.fc1(x), self.fc2(x)
+
+    model = HybridModel()
+    # Move model to CPU to ensure test works everywhere
+    model = model.cpu()
+    input_data = torch.randn(50, 10, requires_grad=True)
+
+    # Ensure input is on CPU
+    input_data = input_data.cpu()
+
+    jacobian = cebra.attribution._jacobian.compute_jacobian(
+        model=model,
+        input_vars=[input_data],
+        hybrid_solver=True,
+        convert_to_numpy=True,
+        cuda_device=None  # Explicitly set to None to use CPU
+    )
+
+    # Check shape (batch_size, output_dim, input_dim)
+    assert jacobian.shape == (50, 8, 10)  # 8 = 5 + 3 concatenated outputs
+
+
+def test_attribution_map_transforms():
+    model = torch.nn.Sequential(torch.nn.Linear(10, 5), torch.nn.ReLU(),
+                                torch.nn.Linear(5, 3))
+    input_data = torch.randn(100, 10)
+
+    # Test different aggregation methods
+    for aggregate in ["mean", "sum", "max"]:
+        jf, jhatg = jacobian_attribution.get_attribution_map(
+            model=model, input_data=input_data, aggregate=aggregate)
+        assert isinstance(jf, np.ndarray)
+        assert isinstance(jhatg, np.ndarray)
diff --git a/tests/test_integration_xcebra.py b/tests/test_integration_xcebra.py
new file mode 100644
index 00000000..4e647916
--- /dev/null
+++ b/tests/test_integration_xcebra.py
@@ -0,0 +1,152 @@
+import pickle
+
+import pytest
+import torch
+
+import cebra
+import cebra.attribution
+import cebra.data
+import cebra.models
+import cebra.solver
+from cebra.data import ContrastiveMultiObjectiveLoader
+from cebra.data import DatasetxCEBRA
+from cebra.solver import MultiObjectiveConfig
+from cebra.solver.schedulers import LinearRampUp
+
+
+@pytest.fixture
+def synthetic_data():
+    import tempfile
+    import urllib.request
+    from pathlib import Path
+
+    url = "https://cebra.fra1.digitaloceanspaces.com/xcebra_synthetic_data.pkl"
+
+    # Create a persistent temp directory specific to this test
+    temp_dir = Path(tempfile.gettempdir()) / "cebra_test_data"
+    temp_dir.mkdir(exist_ok=True)
+    filepath = temp_dir / "synthetic_data.pkl"
+
+    if not filepath.exists():
+        urllib.request.urlretrieve(url, filepath)
+
+    with filepath.open('rb') as file:
+        return pickle.load(file)
+
+
+@pytest.fixture
+def device():
+    return "cuda" if torch.cuda.is_available() else "cpu"
+
+
+def test_synthetic_data_training(synthetic_data, device):
+    # Setup data
+    neurons = synthetic_data['neurons']
+    latents = synthetic_data['latents']
+    n_latents = latents.shape[1]
+    Z1 = synthetic_data['Z1']
+    Z2 = synthetic_data['Z2']
+    gt_attribution_map = synthetic_data['gt_attribution_map']
+    data = DatasetxCEBRA(neurons, Z1=Z1, Z2=Z2)
+
+    # Configure training with reduced steps
+    TOTAL_STEPS = 50  # Reduced from 2000 for faster testing
+    loader = ContrastiveMultiObjectiveLoader(dataset=data,
+                                             num_steps=TOTAL_STEPS,
+                                             batch_size=512).to(device)
+
+    config = MultiObjectiveConfig(loader)
+    config.set_slice(0, 6)
+    config.set_loss("FixedEuclideanInfoNCE", temperature=1.)
+    config.set_distribution("time", time_offset=1)
+    config.push()
+
+    config.set_slice(3, 6)
+    config.set_loss("FixedEuclideanInfoNCE", temperature=1.)
+    config.set_distribution("time_delta", time_delta=1, label_name="Z2")
+    config.push()
+
+    config.finalize()
+
+    # Initialize model and solver
+    neural_model = cebra.models.init(
+        name="offset1-model-mse-clip-5-5",
+        num_neurons=data.neural.shape[1],
+        num_units=256,
+        num_output=n_latents,
+    ).to(device)
+
+    data.configure_for(neural_model)
+
+    opt = torch.optim.Adam(
+        list(neural_model.parameters()) + list(config.criterion.parameters()),
+        lr=3e-4,
+        weight_decay=0,
+    )
+
+    regularizer = cebra.models.jacobian_regularizer.JacobianReg()
+
+    solver = cebra.solver.init(
+        name="multiobjective-solver",
+        model=neural_model,
+        feature_ranges=config.feature_ranges,
+        regularizer=regularizer,
+        renormalize=False,
+        use_sam=False,
+        criterion=config.criterion,
+        optimizer=opt,
+        tqdm_on=False,
+    ).to(device)
+
+    # Train model with reduced steps for regularizer
+    weight_scheduler = LinearRampUp(
+        n_splits=2,
+        step_to_switch_on_reg=25,  # Reduced from 2500
+        step_to_switch_off_reg=40,  # Reduced from 15000
+        start_weight=0.,
+        end_weight=0.01,
+        stay_constant_after_switch_off=True)
+
+    solver.fit(
+        loader=loader,
+        valid_loader=None,
+        log_frequency=None,
+        scheduler_regularizer=weight_scheduler,
+        scheduler_loss=None,
+    )
+
+    # Basic test that model runs and produces output
+    solver.model.split_outputs = False
+    embedding = solver.model(data.neural.to(device)).detach().cpu()
+
+    # Verify output dimensions
+    assert embedding.shape[1] == n_latents, "Incorrect embedding dimension"
+    assert not torch.isnan(embedding).any(), "NaN values in embedding"
+
+    # Test attribution map functionality
+    data.neural.requires_grad_(True)
+    method = cebra.attribution.init(name="jacobian-based",
+                                    model=solver.model,
+                                    input_data=data.neural,
+                                    output_dimension=solver.model.num_output)
+
+    result = method.compute_attribution_map()
+    jfinv = abs(result['jf-inv-lsq']).mean(0)
+
+    # Verify attribution map output
+    assert not torch.isnan(
+        torch.tensor(jfinv)).any(), "NaN values in attribution map"
+    assert jfinv.shape == gt_attribution_map.shape, "Incorrect attribution map shape"
+
+    # Test split outputs functionality
+    solver.model.split_outputs = True
+    embedding_split = solver.model(data.neural.to(device))
+    Z1_hat = embedding_split[0].detach().cpu()
+    Z2_hat = embedding_split[1].detach().cpu()
+
+    # TODO(stes): Right now, this results 6D output vs. 3D as expected. Need to double check
+    # the API docs on the desired behavior here, both could be fine...
+    # assert Z1_hat.shape == Z1.shape, f"Incorrect Z1 embedding dimension: {Z1_hat.shape}"
+    assert Z2_hat.shape == Z2.shape, f"Incorrect Z2 embedding dimension: {Z2_hat.shape}"
+    assert not torch.isnan(Z1_hat).any(), "NaN values in Z1 embedding"
+    assert not torch.isnan(Z2_hat).any(), "NaN values in Z2 embedding"
diff --git a/tests/test_models.py b/tests/test_models.py
index d41dc7ab..658cc467 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -90,6 +90,10 @@ def test_offset_models(model_name, batch_size, input_length):
 
 def test_multiobjective():
 
+    # NOTE(stes): This test is deprecated and will be removed in a future version.
+    # As of CEBRA 0.6.0, the multi objective models are tested separately in
+    # test_multiobjective.py.
+
     class TestModel(cebra.models.Model):
 
         def __init__(self):
diff --git a/tests/test_multiobjective.py b/tests/test_multiobjective.py
new file mode 100644
index 00000000..a4c601ac
--- /dev/null
+++ b/tests/test_multiobjective.py
@@ -0,0 +1,145 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import warnings
+
+import pytest
+import torch
+
+import cebra
+from cebra.data import ContrastiveMultiObjectiveLoader
+from cebra.data import DatasetxCEBRA
+from cebra.solver import MultiObjectiveConfig
+
+
+@pytest.fixture
+def config():
+    neurons = torch.randn(100, 5)
+    behavior1 = torch.randn(100, 2)
+    behavior2 = torch.randn(100, 1)
+    data = DatasetxCEBRA(neurons, behavior1=behavior1, behavior2=behavior2)
+    loader = ContrastiveMultiObjectiveLoader(dataset=data,
+                                             num_steps=1,
+                                             batch_size=24)
+    return MultiObjectiveConfig(loader)
+
+
+def test_imports():
+    pass
+
+
+def test_add_data(config):
+    config.set_slice(0, 10)
+    config.set_loss('loss_name', param1='value1')
+    config.set_distribution('distribution_name', param2='value2')
+    config.push()
+
+    assert len(config.total_info) == 1
+    assert config.total_info[0]['slice'] == (0, 10)
+    assert config.total_info[0]['losses'] == {
+        "name": 'loss_name',
+        "kwargs": {
+            'param1': 'value1'
+        }
+    }
+    assert config.total_info[0]['distributions'] == {
+        "name": 'distribution_name',
+        "kwargs": {
+            'param2': 'value2'
+        }
+    }
+
+
+def test_overwriting_key_warning(config):
+    with warnings.catch_warnings(record=True) as w:
+        config.set_slice(0, 10)
+        config.set_slice(10, 20)
+        assert len(w) == 1
+        assert issubclass(w[-1].category, UserWarning)
+        assert "Configuration key already exists" in str(w[-1].message)
+
+
+def test_missing_slice_error(config):
+    with pytest.raises(RuntimeError, match="Slice configuration is missing"):
+        config.set_loss('loss_name', param1='value1')
+        config.set_distribution('distribution_name', param2='value2')
+        config.push()
+
+
+def test_missing_distributions_error(config):
+    with pytest.raises(RuntimeError,
+                       match="Distributions configuration is missing"):
+        config.set_slice(0, 10)
+        config.set_loss('loss_name', param1='value1')
+        config.push()
+
+
+def test_missing_losses_error(config):
+    with pytest.raises(RuntimeError, match="Losses configuration is missing"):
+        config.set_slice(0, 10)
+        config.set_distribution('distribution_name', param2='value2')
+        config.push()
+
+
+def test_finalize(config):
+    config.set_slice(0, 6)
+    config.set_loss("FixedEuclideanInfoNCE", temperature=1.)
+    config.set_distribution("time", time_offset=1)
+    config.push()
+
+    config.set_slice(3, 6)
+    config.set_loss("FixedEuclideanInfoNCE", temperature=1.)
+    config.set_distribution("time_delta", time_delta=3, label_name="behavior2")
+    config.push()
+
+    config.finalize()
+
+    assert len(config.losses) == 2
+    assert config.losses[0]['indices'] == (0, 6)
+    assert config.losses[1]['indices'] == (3, 6)
+
+    assert len(config.feature_ranges) == 2
+    assert config.feature_ranges[0] == slice(0, 6)
+    assert config.feature_ranges[1] == slice(3, 6)
+
+    assert len(config.loader.distributions) == 2
+    assert isinstance(config.loader.distributions[0],
+                      cebra.distributions.continuous.TimeContrastive)
+    assert config.loader.distributions[0].time_offset == 1
+
+    assert isinstance(config.loader.distributions[1],
+                      cebra.distributions.continuous.TimedeltaDistribution)
+    assert config.loader.distributions[1].time_delta == 3
+
+
+def test_non_unique_feature_ranges_error(config):
+    config.set_slice(0, 10)
+    config.set_loss("FixedEuclideanInfoNCE", temperature=1.)
+    config.set_distribution("time", time_offset=1)
+    config.push()
+
+    config.set_slice(0, 10)
+    config.set_loss("FixedEuclideanInfoNCE", temperature=1.)
+    config.set_distribution("time_delta", time_delta=3, label_name="behavior2")
+    config.push()
+
+    with pytest.raises(RuntimeError, match="Feature ranges are not unique"):
+        config.finalize()

From b19be595e38d4d4c3eac76b84edb3d54825be25f Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Fri, 23 Jun 2023 11:53:00 +0200
Subject: [PATCH 072/100] start tests

---
 tests/test_solver.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_solver.py b/tests/test_solver.py
index e93b87fc..c4d142c2 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -31,6 +31,7 @@
 import cebra.datasets
 import cebra.models
 import cebra.solver
+import numpy as np
 
 device = "cpu"
 

From e908083d305404a99f679b0145365f09fa5a6ccf Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Wed, 27 Sep 2023 17:58:19 +0200
Subject: [PATCH 073/100] remove print statements

---
 cebra/solver/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 9827af36..5af87a10 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -36,6 +36,7 @@
 
 import literate_dataclasses as dataclasses
 import numpy.typing as npt
+import numpy as np
 import torch
 import torch.nn.functional as F
 from torch.utils.data import DataLoader

From 3d2b1e3eacf9570ab016d127b1ed7b2d9b005880 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Wed, 27 Sep 2023 18:22:12 +0200
Subject: [PATCH 074/100] first passing test

---
 tests/test_solver.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_solver.py b/tests/test_solver.py
index c4d142c2..e93b87fc 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -31,7 +31,6 @@
 import cebra.datasets
 import cebra.models
 import cebra.solver
-import numpy as np
 
 device = "cpu"
 

From 3ef4bc1c189c334efa93f5cd240bf68f1dfb4f2b Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Fri, 27 Oct 2023 13:43:05 +0200
Subject: [PATCH 075/100] move functionality to base file in solver and
 separate in functions

---
 cebra/solver/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 5af87a10..e1e6df96 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -39,6 +39,7 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
+import tqdm
 from torch.utils.data import DataLoader
 from torch.utils.data import Dataset
 

From ad564726f103d75263d0dee3209643a3851f0854 Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Mon, 30 Oct 2023 12:54:13 +0100
Subject: [PATCH 076/100] add test_select_model for multisession

---
 cebra/solver/base.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index e1e6df96..5637042e 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -32,7 +32,8 @@
 
 import abc
 import os
-from typing import Callable, Dict, List, Literal, Optional, Tuple, Union
+import warnings
+from typing import Callable, Dict, Iterable, List, Literal, Optional, Union
 
 import literate_dataclasses as dataclasses
 import numpy.typing as npt
@@ -194,7 +195,10 @@ def _transform(
         offset: Model offset.
 
     Returns:
-        The embedding.
+        torch.Tensor: The (potentially) padded data.
+
+    Raises:
+        ValueError: If add_padding is True and offset is not provided.
     """
     if pad_before_transform:
         inputs = F.pad(inputs.T, (offset.left, offset.right - 1), 'replicate').T
@@ -592,28 +596,24 @@ def transform(self,
         Returns:
             The output embedding.
         """
-        if not self.is_fitted:
-            raise ValueError(
-                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
-                "appropriate arguments before using this estimator.")
-
-        if batch_size is not None and batch_size < 1:
-            raise ValueError(
-                f"Batch size should be at least 1, got {batch_size}")
-
         if isinstance(inputs, list):
-            raise ValueError(
-                "Inputs to transform() should be the data for a single session, but received a list."
+            raise NotImplementedError(
+                "Inputs to transform() should be the data for a single session."
             )
-
         elif not isinstance(inputs, torch.Tensor):
             raise ValueError(
                 f"Inputs should be a torch.Tensor, not {type(inputs)}.")
-
+            
+        self._check_is_fitted()
+        
         model, offset = self._select_model(inputs, session_id)
 
         if len(offset) < 2 and pad_before_transform:
             pad_before_transform = False
+            
+        if batch_size is not None and batch_size < 1:
+            raise ValueError(
+                f"Batch size should be at least 1, got {batch_size}")
 
         model.eval()
         if batch_size is not None:

From b73c1232a9297a5ca5876756cf87f934a94a6ddc Mon Sep 17 00:00:00 2001
From: Rodrigo <gonlairo@gmail.com>
Date: Fri, 24 Nov 2023 13:22:45 +0100
Subject: [PATCH 077/100] remove float16

---
 cebra/integrations/sklearn/cebra.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index fe53c8e9..7dda4fd4 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1287,7 +1287,7 @@ def transform_deprecated(self,
 
         # Input validation
         X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
-        input_dtype = X.dtype
+        #input_dtype = X.dtype
 
         with torch.no_grad():
             model.eval()
@@ -1305,10 +1305,11 @@ def transform_deprecated(self,
                 # Standard evaluation, (T, C, dt)
                 output = model(X).cpu().numpy()
 
-        if input_dtype == "float64":
-            return output.astype(input_dtype)
+        #TODO: check if this is safe.
+        return output.numpy(force=True)
 
-        return output
+        #if input_dtype == "float64":
+        #    return output.astype(input_dtype)
 
     def fit_transform(
         self,

From d71ca8db1873d6a6e5d64dad347385315eb3d219 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:42:53 +0200
Subject: [PATCH 078/100] Improve modularity remove duplicate code and todos

---
 cebra/integrations/sklearn/cebra.py | 57 +----------------------------
 cebra/solver/base.py                | 12 ++++--
 cebra/solver/multi_session.py       |  6 +--
 3 files changed, 12 insertions(+), 63 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 7dda4fd4..7e161ddf 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1235,7 +1235,7 @@ def transform(self,
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
         self.solver_._check_is_session_id_valid(session_id=session_id)
 
-        if torch.is_tensor(X):
+        if torch.is_tensor(X) and X.device.type == "cuda":
             X = X.detach().cpu()
 
         X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
@@ -1256,61 +1256,6 @@ def transform(self,
 
         return output.detach().cpu().numpy()
 
-    # Deprecated, kept for testing.
-    def transform_deprecated(self,
-                             X: Union[npt.NDArray, torch.Tensor],
-                             session_id: Optional[int] = None) -> npt.NDArray:
-        """Transform an input sequence and return the embedding.
-
-        Args:
-            X: A numpy array or torch tensor of size ``time x dimension``.
-            session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`num_sessions` for
-                multisession, set to ``None`` for single session.
-
-        Returns:
-            A :py:func:`numpy.array` of size ``time x output_dimension``.
-
-        Example:
-
-            >>> import cebra
-            >>> import numpy as np
-            >>> dataset =  np.random.uniform(0, 1, (1000, 30))
-            >>> cebra_model = cebra.CEBRA(max_iterations=10)
-            >>> cebra_model.fit(dataset)
-            CEBRA(max_iterations=10)
-            >>> embedding = cebra_model.transform(dataset)
-
-        """
-
-        sklearn_utils_validation.check_is_fitted(self, "n_features_")
-        model, offset = self._select_model(X, session_id)
-
-        # Input validation
-        X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
-        #input_dtype = X.dtype
-
-        with torch.no_grad():
-            model.eval()
-
-            if self.pad_before_transform:
-                X = np.pad(X, ((offset.left, offset.right - 1), (0, 0)),
-                           mode="edge")
-            X = torch.from_numpy(X).float().to(self.device_)
-
-            if isinstance(model, cebra.models.ConvolutionalModelMixin):
-                # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
-                X = X.transpose(1, 0).unsqueeze(0)
-                output = model(X).cpu().numpy().squeeze(0).transpose(1, 0)
-            else:
-                # Standard evaluation, (T, C, dt)
-                output = model(X).cpu().numpy()
-
-        #TODO: check if this is safe.
-        return output.numpy(force=True)
-
-        #if input_dtype == "float64":
-        #    return output.astype(input_dtype)
-
     def fit_transform(
         self,
         X: Union[npt.NDArray, torch.Tensor],
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 5637042e..e04eb8d1 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -451,6 +451,8 @@ def fit(
                 if logdir is not None:
                     self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
 
+        self._set_fitted_params(loader)
+
     def step(self, batch: cebra.data.Batch) -> dict:
         """Perform a single gradient update.
 
@@ -610,10 +612,6 @@ def transform(self,
 
         if len(offset) < 2 and pad_before_transform:
             pad_before_transform = False
-            
-        if batch_size is not None and batch_size < 1:
-            raise ValueError(
-                f"Batch size should be at least 1, got {batch_size}")
 
         model.eval()
         if batch_size is not None:
@@ -665,6 +663,12 @@ def load(self, logdir, filename="checkpoint.pth"):
         checkpoint = torch.load(savepath, map_location=self.device)
         self.load_state_dict(checkpoint, strict=True)
 
+        if hasattr(self.model, "n_features"):
+            n_features = self.model.n_features
+            self.n_features = ([
+                session_n_features for session_n_features in n_features
+            ] if isinstance(n_features, list) else n_features)
+
     def save(self, logdir, filename="checkpoint_last.pth"):
         """Save the model and optimizer params.
 
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index 2c2153c2..a5c0b376 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -41,9 +41,9 @@ class MultiSessionSolver(abc_.Solver):
 
     def parameters(self, session_id: Optional[int] = None):
         """Iterate over all parameters."""
-        if session_id is not None:
-            for parameter in self.model[session_id].parameters():
-                yield parameter
+        self._check_is_session_id_valid(session_id=session_id)
+        for parameter in self.model[session_id].parameters():
+            yield parameter
 
         for parameter in self.criterion.parameters():
             yield parameter

From 3e91459b8b99509875a50a85cf3557856a928157 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 22 Aug 2024 11:41:44 +0200
Subject: [PATCH 079/100] Add tests to solver

---
 cebra/data/multi_session.py  |  12 ++
 cebra/data/single_session.py |   2 +-
 cebra/solver/base.py         |   9 +-
 tests/test_solver.py         | 211 ++++++++++++++++++++++++++++++++++-
 4 files changed, 226 insertions(+), 8 deletions(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index ebae8b6f..6a53385f 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -116,6 +116,18 @@ def configure_for(self, model: "cebra.models.Model"):
         for i, session in enumerate(self.iter_sessions()):
             session.configure_for(model[i])
 
+    def configure_for(self, model: "cebra.models.Model"):
+        """Configure the dataset offset for the provided model.
+
+        Call this function before indexing the dataset. This sets the
+        :py:attr:`offset` attribute of the dataset.
+
+        Args:
+            model: The model to configure the dataset for.
+        """
+        for i, session in enumerate(self.iter_sessions()):
+            session.configure_for(model[i])
+
 
 @dataclasses.dataclass
 class MultiSessionLoader(cebra_data.Loader):
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index 1962f007..63888711 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -73,7 +73,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        `offset` attribute of the dataset.
+        :py:attr:`offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index e04eb8d1..00c74d1d 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -38,6 +38,7 @@
 import literate_dataclasses as dataclasses
 import numpy.typing as npt
 import numpy as np
+import numpy.typing as npt
 import torch
 import torch.nn.functional as F
 import tqdm
@@ -569,8 +570,12 @@ def _select_model(
         raise NotImplementedError
 
     @property
-    def is_fitted(self):
-        return hasattr(self, "n_features")
+    def _check_is_fitted(self):
+        #NOTE(celia): instead of hasattr(model, "n_features_"), double check this!
+        if not (hasattr(self, "history") and len(self.history) > 0):
+            raise ValueError(
+                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
+                "appropriate arguments before using this estimator.")
 
     @torch.no_grad()
     def transform(self,
diff --git a/tests/test_solver.py b/tests/test_solver.py
index e93b87fc..a6f5cf00 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -59,11 +59,13 @@
      cebra.data.ContinuousMultiSessionDataLoader, "offset1-model"),
     ("demo-continuous-multisession",
      cebra.data.ContinuousMultiSessionDataLoader, "offset10-model"),
+    ("demo-discrete-multisession",
+              cebra.data.DiscreteMultiSessionDataLoader, "offset1-model"),
+    ("demo-discrete-multisession",
+     cebra.data.DiscreteMultiSessionDataLoader, "offset10-model"),
 ]:
     multi_session_tests.append((*args, cebra.solver.MultiSessionSolver))
 
-# multi_session_tests.append((*args, cebra.solver.MultiSessionAuxVariableSolver))
-
 
 def _get_loader(data, loader_initfunc):
     kwargs = dict(num_steps=5, batch_size=32)
@@ -165,6 +167,28 @@ def test_single_session(data_name, loader_initfunc, model_architecture,
 
     assert solver.num_sessions == None
     assert solver.n_features == X.shape[1]
+    
+    embedding = solver.transform(X)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(torch.Tensor(X))
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(X, session_id=0)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(X, pad_before_transform=False)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X.shape[0] - len(offset) + 1, OUTPUT_DIMENSION)
+
+    with pytest.raises(ValueError, match="torch.Tensor"):
+        solver.transform(X.numpy())
+    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
+        embedding = solver.transform(X, session_id=2)
+
+    for param in solver.parameters():
+        assert isinstance(param, torch.Tensor)
+
 
     embedding = solver.transform(X)
     assert isinstance(embedding, torch.Tensor)
@@ -320,6 +344,183 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
     assert solver.num_sessions == 3
     assert solver.n_features == [X[i].shape[1] for i in range(len(X))]
 
+    embedding = solver.transform(X[0], session_id=0)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X[0].shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(X[1], session_id=1)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X[1].shape[0], OUTPUT_DIMENSION)
+    embedding = solver.transform(X[0], session_id=0, pad_before_transform=False)
+    assert isinstance(embedding, torch.Tensor)
+    assert embedding.shape == (X[0].shape[0] -
+                               len(solver.model[0].get_offset()) + 1,
+                               OUTPUT_DIMENSION)
+
+    with pytest.raises(ValueError, match="torch.Tensor"):
+        embedding = solver.transform(X[0].numpy(), session_id=0)
+
+    with pytest.raises(ValueError, match="shape"):
+        embedding = solver.transform(X[1], session_id=0)
+    with pytest.raises(ValueError, match="shape"):
+        embedding = solver.transform(X[0], session_id=1)
+
+    with pytest.raises(RuntimeError, match="No.*session_id"):
+        embedding = solver.transform(X[0])
+    with pytest.raises(RuntimeError, match="single.*session"):
+        embedding = solver.transform(X)
+    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
+        embedding = solver.transform(X[0], session_id=5)
+    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
+        embedding = solver.transform(X[0], session_id=-1)
+
+    for param in solver.parameters(session_id=0):
+        assert isinstance(param, torch.Tensor)
+
+    with pytest.raises(RuntimeError, match="No.*session_id"):
+        for param in solver.parameters():
+            assert isinstance(param, torch.Tensor)
+
+
+@pytest.mark.parametrize(
+    "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",
+    [
+        # Test case 1: No padding
+        (torch.tensor([[1, 2], [3, 4], [5, 6]]), False, cebra.data.Offset(
+            0, 1), 0, 2, torch.tensor([[1, 2], [3, 4]])),  # first batch
+        (torch.tensor([[1, 2], [3, 4], [5, 6]]), False, cebra.data.Offset(
+            0, 1), 1, 3, torch.tensor([[3, 4], [5, 6]])),  # last batch
+        (torch.tensor(
+            [[1, 2], [3, 4], [5, 6], [7, 8]]), False, cebra.data.Offset(
+                0, 1), 1, 3, torch.tensor([[3, 4], [5, 6]])),  # middle batch
+
+        # Test case 2: First batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(0, 1),
+            0,
+            2,
+            torch.tensor([[1, 2, 3], [4, 5, 6]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(1, 1),
+            0,
+            3,
+            torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+
+        # Test case 3: Last batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(0, 1),
+            1,
+            3,
+            torch.tensor([[4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
+                          [13, 14, 15]]),
+            True,
+            cebra.data.Offset(1, 2),
+            1,
+            3,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+        ),
+
+        # Test case 4: Middle batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+            True,
+            cebra.data.Offset(0, 1),
+            1,
+            3,
+            torch.tensor([[4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+            True,
+            cebra.data.Offset(1, 1),
+            1,
+            3,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
+                          [13, 14, 15]]),
+            True,
+            cebra.data.Offset(0, 1),
+            2,
+            4,
+            torch.tensor([[7, 8, 9], [10, 11, 12]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+            True,
+            cebra.data.Offset(0, 1),
+            0,
+            3,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+
+        # Examples that throw an error:
+
+        # Padding without offset (should raise an error)
+        (torch.tensor([[1, 2]]), True, None, 0, 2, ValueError),
+        # Negative start_batch_idx or end_batch_idx (should raise an error)
+        (torch.tensor([[1, 2]]), False, cebra.data.Offset(
+            0, 1), -1, 2, ValueError),
+        # out of bound indices because offset is too large
+        (torch.tensor([[1, 2], [3, 4]]), True, cebra.data.Offset(
+            5, 5), 1, 2, ValueError),
+        # Batch length is smaller than offset.
+        (torch.tensor([[1, 2], [3, 4]]), False, cebra.data.Offset(
+            0, 1), 0, 1, ValueError),  # first batch
+    ],
+)
+def test_get_batch(inputs, add_padding, offset, start_batch_idx, end_batch_idx,
+                   expected_output):
+    if expected_output == ValueError:
+        with pytest.raises(ValueError):
+            cebra.solver.base._get_batch(inputs, offset, start_batch_idx,
+                                         end_batch_idx, add_padding)
+    else:
+        result = cebra.solver.base._get_batch(inputs, offset, start_batch_idx,
+                                              end_batch_idx, add_padding)
+        assert torch.equal(result, expected_output)
+
+
+@pytest.mark.parametrize("data_name, loader_initfunc, solver_initfunc",
+                         multi_session_tests)
+def test_multi_session_2(data_name, loader_initfunc, solver_initfunc):
+    loader = _get_loader(data_name, loader_initfunc)
+    criterion = cebra.models.InfoNCE()
+    model = nn.ModuleList(
+        [_make_model(dataset) for dataset in loader.dataset.iter_sessions()])
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+
+    solver = solver_initfunc(model=model,
+                             criterion=criterion,
+                             optimizer=optimizer,
+                             tqdm_on=True)
+
+    batch = next(iter(loader))
+    for session_id, dataset in enumerate(loader.dataset.iter_sessions()):
+        assert batch[session_id].reference.shape == (32,
+                                                     dataset.input_dimension,
+                                                     10)
+        assert batch[session_id].index is not None
+
+    log = solver.step(batch)
+    assert isinstance(log, dict)
+
+    solver.fit(loader)
+
+    assert solver.num_sessions == 3
+    assert solver.n_features == [X[i].shape[1] for i in range(len(X))]
+
     embedding = solver.transform(X[0], session_id=0)
     assert isinstance(embedding, torch.Tensor)
     assert embedding.shape == (X[0].shape[0], OUTPUT_DIMENSION)
@@ -504,8 +705,8 @@ def create_model(model_name, input_dimension):
 
 @pytest.mark.parametrize(
     "data_name, model_name ,session_id, loader_initfunc, solver_initfunc",
-    single_session_tests_select_model + single_session_hybrid_tests_select_model
-)
+    single_session_tests_select_model +
+    single_session_hybrid_tests_select_model)
 def test_select_model_single_session(data_name, model_name, session_id,
                                      loader_initfunc, solver_initfunc):
     dataset = cebra.datasets.init(data_name)
@@ -576,7 +777,7 @@ def test_select_model_multi_session(data_name, model_name, session_id,
     "offset40-model-4x-subsample",
     "offset1-model",
     "offset10-model",
-]
+]  #NOTE(rodrigo): there is an issue with "offset4-model-2x-subsample" because it's not a convolutional model.
 batch_size_inference = [40_000, 99_990, 99_999]
 
 single_session_tests_transform = []

From c6179ad23166f6bf799e1688a782c6b08eb1e062 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 22 Aug 2024 13:27:34 +0200
Subject: [PATCH 080/100] Fix save/load

---
 cebra/solver/base.py | 11 +++++------
 tests/test_solver.py | 13 +++++++++++++
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 00c74d1d..c810a4a2 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -668,13 +668,12 @@ def load(self, logdir, filename="checkpoint.pth"):
         checkpoint = torch.load(savepath, map_location=self.device)
         self.load_state_dict(checkpoint, strict=True)
 
-        if hasattr(self.model, "n_features"):
-            n_features = self.model.n_features
-            self.n_features = ([
-                session_n_features for session_n_features in n_features
-            ] if isinstance(n_features, list) else n_features)
+        n_features = self.n_features
+        self.n_features = ([
+            session_n_features for session_n_features in n_features
+        ] if isinstance(n_features, list) else n_features)
 
-    def save(self, logdir, filename="checkpoint_last.pth"):
+    def save(self, logdir, filename="checkpoint.pth"):
         """Save the model and optimizer params.
 
         Args:
diff --git a/tests/test_solver.py b/tests/test_solver.py
index a6f5cf00..5a6088f1 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -20,6 +20,7 @@
 # limitations under the License.
 #
 import copy
+import itertools
 import tempfile
 
 import numpy as np
@@ -189,6 +190,12 @@ def test_single_session(data_name, loader_initfunc, model_architecture,
     for param in solver.parameters():
         assert isinstance(param, torch.Tensor)
 
+    fitted_solver = copy.deepcopy(solver)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        solver.save(temp_dir)
+        solver.load(temp_dir)
+    _assert_equal(fitted_solver, solver)
+
 
     embedding = solver.transform(X)
     assert isinstance(embedding, torch.Tensor)
@@ -380,6 +387,12 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
         for param in solver.parameters():
             assert isinstance(param, torch.Tensor)
 
+    fitted_solver = copy.deepcopy(solver)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        solver.save(temp_dir)
+        solver.load(temp_dir)
+    _assert_equal(fitted_solver, solver)
+
 
 @pytest.mark.parametrize(
     "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",

From dafabe5edcf35e37fe405dfe1fa20f1e7adac065 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 18 Sep 2024 12:22:54 +0200
Subject: [PATCH 081/100] Fix extra docs errors

---
 cebra/data/multi_session.py  | 14 +-------------
 cebra/data/single_session.py |  2 +-
 tests/test_solver.py         |  1 +
 3 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index 6a53385f..e87db159 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -108,19 +108,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        `offset` attribute of the dataset.
-
-        Args:
-            model: The model to configure the dataset for.
-        """
-        for i, session in enumerate(self.iter_sessions()):
-            session.configure_for(model[i])
-
-    def configure_for(self, model: "cebra.models.Model"):
-        """Configure the dataset offset for the provided model.
-
-        Call this function before indexing the dataset. This sets the
-        :py:attr:`offset` attribute of the dataset.
+        :py:attr:`cebra_data.Dataset.offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index 63888711..f7125490 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -73,7 +73,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`offset` attribute of the dataset.
+        :py:attr:`cebra_data.Dataset.offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 5a6088f1..81396354 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -68,6 +68,7 @@
     multi_session_tests.append((*args, cebra.solver.MultiSessionSolver))
 
 
+
 def _get_loader(data, loader_initfunc):
     kwargs = dict(num_steps=5, batch_size=32)
     loader = loader_initfunc(data, **kwargs)

From 7b0cc686a866db4eca598d98f6975fd3c254a3e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 19 Sep 2024 13:55:19 +0200
Subject: [PATCH 082/100] Add review updates

---
 cebra/integrations/sklearn/cebra.py | 56 ++++++++++++++++++++++++++++-
 cebra/solver/base.py                | 17 ++++++---
 cebra/solver/multi_session.py       |  6 ++--
 tests/test_sklearn.py               |  2 +-
 tests/test_solver.py                |  6 +---
 5 files changed, 72 insertions(+), 15 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 7e161ddf..fe53c8e9 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1235,7 +1235,7 @@ def transform(self,
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
         self.solver_._check_is_session_id_valid(session_id=session_id)
 
-        if torch.is_tensor(X) and X.device.type == "cuda":
+        if torch.is_tensor(X):
             X = X.detach().cpu()
 
         X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
@@ -1256,6 +1256,60 @@ def transform(self,
 
         return output.detach().cpu().numpy()
 
+    # Deprecated, kept for testing.
+    def transform_deprecated(self,
+                             X: Union[npt.NDArray, torch.Tensor],
+                             session_id: Optional[int] = None) -> npt.NDArray:
+        """Transform an input sequence and return the embedding.
+
+        Args:
+            X: A numpy array or torch tensor of size ``time x dimension``.
+            session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`num_sessions` for
+                multisession, set to ``None`` for single session.
+
+        Returns:
+            A :py:func:`numpy.array` of size ``time x output_dimension``.
+
+        Example:
+
+            >>> import cebra
+            >>> import numpy as np
+            >>> dataset =  np.random.uniform(0, 1, (1000, 30))
+            >>> cebra_model = cebra.CEBRA(max_iterations=10)
+            >>> cebra_model.fit(dataset)
+            CEBRA(max_iterations=10)
+            >>> embedding = cebra_model.transform(dataset)
+
+        """
+
+        sklearn_utils_validation.check_is_fitted(self, "n_features_")
+        model, offset = self._select_model(X, session_id)
+
+        # Input validation
+        X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
+        input_dtype = X.dtype
+
+        with torch.no_grad():
+            model.eval()
+
+            if self.pad_before_transform:
+                X = np.pad(X, ((offset.left, offset.right - 1), (0, 0)),
+                           mode="edge")
+            X = torch.from_numpy(X).float().to(self.device_)
+
+            if isinstance(model, cebra.models.ConvolutionalModelMixin):
+                # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+                X = X.transpose(1, 0).unsqueeze(0)
+                output = model(X).cpu().numpy().squeeze(0).transpose(1, 0)
+            else:
+                # Standard evaluation, (T, C, dt)
+                output = model(X).cpu().numpy()
+
+        if input_dtype == "float64":
+            return output.astype(input_dtype)
+
+        return output
+
     def fit_transform(
         self,
         X: Union[npt.NDArray, torch.Tensor],
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index c810a4a2..afb4cd26 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -452,8 +452,6 @@ def fit(
                 if logdir is not None:
                     self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
 
-        self._set_fitted_params(loader)
-
     def step(self, batch: cebra.data.Batch) -> dict:
         """Perform a single gradient update.
 
@@ -603,9 +601,18 @@ def transform(self,
         Returns:
             The output embedding.
         """
+        if not self.is_fitted:
+            raise ValueError(
+                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
+                "appropriate arguments before using this estimator.")
+
+        if batch_size is not None and batch_size < 1:
+            raise ValueError(
+                f"Batch size should be at least 1, got {batch_size}")
+
         if isinstance(inputs, list):
-            raise NotImplementedError(
-                "Inputs to transform() should be the data for a single session."
+            raise ValueError(
+                "Inputs to transform() should be the data for a single session, but received a list."
             )
         elif not isinstance(inputs, torch.Tensor):
             raise ValueError(
@@ -673,7 +680,7 @@ def load(self, logdir, filename="checkpoint.pth"):
             session_n_features for session_n_features in n_features
         ] if isinstance(n_features, list) else n_features)
 
-    def save(self, logdir, filename="checkpoint.pth"):
+    def save(self, logdir, filename="checkpoint_last.pth"):
         """Save the model and optimizer params.
 
         Args:
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index a5c0b376..2c2153c2 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -41,9 +41,9 @@ class MultiSessionSolver(abc_.Solver):
 
     def parameters(self, session_id: Optional[int] = None):
         """Iterate over all parameters."""
-        self._check_is_session_id_valid(session_id=session_id)
-        for parameter in self.model[session_id].parameters():
-            yield parameter
+        if session_id is not None:
+            for parameter in self.model[session_id].parameters():
+                yield parameter
 
         for parameter in self.criterion.parameters():
             yield parameter
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index f340548c..d3cde070 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -1519,4 +1519,4 @@ def test_last_incomplete_batch_smaller_than_offset():
                         device="cpu")
     model.fit(train.neural, train.continuous)
 
-    _ = model.transform(train.neural, batch_size=300)
\ No newline at end of file
+    _ = model.transform(train.neural, batch_size=300)
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 81396354..d22b72ab 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -374,7 +374,7 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
 
     with pytest.raises(RuntimeError, match="No.*session_id"):
         embedding = solver.transform(X[0])
-    with pytest.raises(RuntimeError, match="single.*session"):
+    with pytest.raises(ValueError, match="single.*session"):
         embedding = solver.transform(X)
     with pytest.raises(RuntimeError, match="Invalid.*session_id"):
         embedding = solver.transform(X[0], session_id=5)
@@ -384,10 +384,6 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
     for param in solver.parameters(session_id=0):
         assert isinstance(param, torch.Tensor)
 
-    with pytest.raises(RuntimeError, match="No.*session_id"):
-        for param in solver.parameters():
-            assert isinstance(param, torch.Tensor)
-
     fitted_solver = copy.deepcopy(solver)
     with tempfile.TemporaryDirectory() as temp_dir:
         solver.save(temp_dir)

From 7dfd4b941813de97743bed54ed57ad1b49f1ae3a Mon Sep 17 00:00:00 2001
From: Steffen Schneider <steffen.schneider@helmholtz-munich.de>
Date: Sun, 27 Oct 2024 19:08:10 +0100
Subject: [PATCH 083/100] apply ruff auto-fixes

---
 cebra/data/single_session.py       | 5 +++++
 cebra/datasets/gaussian_mixture.py | 1 -
 cebra/solver/base.py               | 3 +--
 tests/test_solver.py               | 5 ++---
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index f7125490..8f45e5fe 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -370,6 +370,11 @@ def __post_init__(self):
 
         self._init_behavior_distribution()
         self._init_time_distribution()
+        
+        if self.conditional != "time_delta":
+            raise NotImplementedError(
+                "Hybrid training is currently only implemented using the ``time_delta`` "
+                "continual distribution.")
 
     def _init_behavior_distribution(self):
         if self.conditional == "time":
diff --git a/cebra/datasets/gaussian_mixture.py b/cebra/datasets/gaussian_mixture.py
index 48e10446..cbfb6502 100644
--- a/cebra/datasets/gaussian_mixture.py
+++ b/cebra/datasets/gaussian_mixture.py
@@ -27,7 +27,6 @@
 
 import cebra.data
 import cebra.io
-from cebra.datasets import get_datapath
 from cebra.datasets import parametrize
 from cebra.datasets import register
 
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index afb4cd26..401b401f 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -33,7 +33,7 @@
 import abc
 import os
 import warnings
-from typing import Callable, Dict, Iterable, List, Literal, Optional, Union
+from typing import Callable, Dict, Iterable, List, Literal, Optional, Tuple, Union
 
 import literate_dataclasses as dataclasses
 import numpy.typing as npt
@@ -41,7 +41,6 @@
 import numpy.typing as npt
 import torch
 import torch.nn.functional as F
-import tqdm
 from torch.utils.data import DataLoader
 from torch.utils.data import Dataset
 
diff --git a/tests/test_solver.py b/tests/test_solver.py
index d22b72ab..4dc2fd43 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -20,7 +20,6 @@
 # limitations under the License.
 #
 import copy
-import itertools
 import tempfile
 
 import numpy as np
@@ -715,8 +714,8 @@ def create_model(model_name, input_dimension):
 
 @pytest.mark.parametrize(
     "data_name, model_name ,session_id, loader_initfunc, solver_initfunc",
-    single_session_tests_select_model +
-    single_session_hybrid_tests_select_model)
+    single_session_tests_select_model + single_session_hybrid_tests_select_model
+)
 def test_select_model_single_session(data_name, model_name, session_id,
                                      loader_initfunc, solver_initfunc):
     dataset = cebra.datasets.init(data_name)

From 3acbdf49ae6e73c51007548fda98a8bf5ae73ac6 Mon Sep 17 00:00:00 2001
From: Steffen Schneider <steffen.schneider@helmholtz-munich.de>
Date: Wed, 22 Jan 2025 00:02:06 +0100
Subject: [PATCH 084/100] fix linting errors

---
 cebra/integrations/sklearn/cebra.py |  1 +
 tests/test_sklearn.py               |  2 --
 tests/test_solver.py                | 38 +++++++++++++----------------
 3 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index fe53c8e9..c6340387 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -77,6 +77,7 @@ def _safe_torch_load(filename, weights_only, **kwargs):
 
 
 
+
 def _init_loader(
     is_cont: bool,
     is_disc: bool,
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index d3cde070..12d26225 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -1375,11 +1375,9 @@ def test_new_transform(model_architecture, device):
     # example dataset
     X = np.random.uniform(0, 1, (1000, 50))
     X_s2 = np.random.uniform(0, 1, (800, 30))
-    X_s3 = np.random.uniform(0, 1, (1000, 30))
     y_c1 = np.random.uniform(0, 1, (1000, 5))
     y_c1_s2 = np.random.uniform(0, 1, (800, 5))
     y_c2 = np.random.uniform(0, 1, (1000, 2))
-    y_c2_s2 = np.random.uniform(0, 1, (800, 2))
     y_d = np.random.randint(0, 10, (1000,))
     y_d_s2 = np.random.randint(0, 10, (800,))
 
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 4dc2fd43..25587f81 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -166,7 +166,7 @@ def test_single_session(data_name, loader_initfunc, model_architecture,
 
     solver.fit(loader)
 
-    assert solver.num_sessions == None
+    assert solver.num_sessions is None
     assert solver.n_features == X.shape[1]
     
     embedding = solver.transform(X)
@@ -231,25 +231,25 @@ def test_single_session_auxvar(data_name, loader_initfunc, model_architecture, s
 
     pytest.skip("Not yet supported")
 
-    loader = _get_loader(data_name, loader_initfunc)
-    model = _make_model(loader.dataset)
-    behavior_model = _make_behavior_model(loader.dataset)  # noqa: F841
+    # loader = _get_loader(data_name, loader_initfunc)
+    # model = _make_model(loader.dataset)
+    # behavior_model = _make_behavior_model(loader.dataset)  # noqa: F841
 
-    criterion = cebra.models.InfoNCE()
-    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+    # criterion = cebra.models.InfoNCE()
+    # optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
 
-    solver = solver_initfunc(
-        model=model,
-        criterion=criterion,
-        optimizer=optimizer,
-    )
+    # solver = solver_initfunc(
+    #     model=model,
+    #     criterion=criterion,
+    #     optimizer=optimizer,
+    # )
 
-    batch = next(iter(loader))
-    assert batch.reference.shape == (32, loader.dataset.input_dimension, 10)
-    log = solver.step(batch)
-    assert isinstance(log, dict)
+    # batch = next(iter(loader))
+    # assert batch.reference.shape == (32, loader.dataset.input_dimension, 10)
+    # log = solver.step(batch)
+    # assert isinstance(log, dict)
 
-    solver.fit(loader)
+    # solver.fit(loader)
 
 
 @pytest.mark.parametrize(
@@ -280,7 +280,7 @@ def test_single_session_hybrid(data_name, loader_initfunc, model_architecture,
 
     solver.fit(loader)
 
-    assert solver.num_sessions == None
+    assert solver.num_sessions is None
     assert solver.n_features == X.shape[1]
 
     embedding = solver.transform(X)
@@ -721,7 +721,6 @@ def test_select_model_single_session(data_name, model_name, session_id,
     dataset = cebra.datasets.init(data_name)
     model = create_model(model_name, dataset.input_dimension)
     dataset.configure_for(model)
-    loader = _get_loader(dataset, loader_initfunc=loader_initfunc)
     offset = model.get_offset()
     solver = solver_initfunc(model=model, criterion=None, optimizer=None)
 
@@ -841,7 +840,6 @@ def test_batched_transform_single_session(
 
     smallest_batch_length = loader.dataset.neural.shape[0] - batch_size
     offset_ = model.get_offset()
-    padding_left = offset_.left if padding else 0
 
     if smallest_batch_length <= len(offset_):
         with pytest.raises(ValueError):
@@ -893,7 +891,6 @@ def test_batched_transform_multi_session(data_name, model_name, padding,
 
     smallest_batch_length = n_samples - batch_size
     offset_ = model[0].get_offset()
-    padding_left = offset_.left if padding else 0
     for d in dataset._datasets:
         d.offset = offset_
     loader_kwargs = dict(num_steps=10, batch_size=32)
@@ -918,7 +915,6 @@ def test_batched_transform_multi_session(data_name, model_name, padding,
                                  pad_before_transform=padding)
 
         else:
-            model_ = model[i]
             embedding = solver.transform(inputs=inputs.neural,
                                          session_id=i,
                                          pad_before_transform=padding)

From 5745449580f4ad351e0bbb9280901259863b8b70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 23 Apr 2025 13:50:34 +0200
Subject: [PATCH 085/100] Run isort, ruff, yapf

---
 cebra/data/single_session.py        |   2 +-
 cebra/integrations/sklearn/cebra.py |   3 +-
 cebra/solver/base.py                |   2 +-
 tests/test_solver.py                | 163 +---------------------------
 4 files changed, 8 insertions(+), 162 deletions(-)

diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index 8f45e5fe..ab37cb6b 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -370,7 +370,7 @@ def __post_init__(self):
 
         self._init_behavior_distribution()
         self._init_time_distribution()
-        
+
         if self.conditional != "time_delta":
             raise NotImplementedError(
                 "Hybrid training is currently only implemented using the ``time_delta`` "
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index c6340387..6dc1e0d0 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -51,6 +51,7 @@
     np.dtypes.Float64DType, np.dtypes.Int64DType
 ]
 
+
 def check_version(estimator):
     # NOTE(stes): required as a check for the old way of specifying tags
     # https://github.com/scikit-learn/scikit-learn/pull/29677#issuecomment-2334229165
@@ -76,8 +77,6 @@ def _safe_torch_load(filename, weights_only, **kwargs):
     return checkpoint
 
 
-
-
 def _init_loader(
     is_cont: bool,
     is_disc: bool,
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 401b401f..495f45d7 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -33,7 +33,7 @@
 import abc
 import os
 import warnings
-from typing import Callable, Dict, Iterable, List, Literal, Optional, Tuple, Union
+from typing import Callable, Dict, List, Literal, Optional, Tuple, Union
 
 import literate_dataclasses as dataclasses
 import numpy.typing as npt
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 25587f81..c754847a 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -59,15 +59,14 @@
      cebra.data.ContinuousMultiSessionDataLoader, "offset1-model"),
     ("demo-continuous-multisession",
      cebra.data.ContinuousMultiSessionDataLoader, "offset10-model"),
-    ("demo-discrete-multisession",
-              cebra.data.DiscreteMultiSessionDataLoader, "offset1-model"),
-    ("demo-discrete-multisession",
-     cebra.data.DiscreteMultiSessionDataLoader, "offset10-model"),
+    ("demo-discrete-multisession", cebra.data.DiscreteMultiSessionDataLoader,
+     "offset1-model"),
+    ("demo-discrete-multisession", cebra.data.DiscreteMultiSessionDataLoader,
+     "offset10-model"),
 ]:
     multi_session_tests.append((*args, cebra.solver.MultiSessionSolver))
 
 
-
 def _get_loader(data, loader_initfunc):
     kwargs = dict(num_steps=5, batch_size=32)
     loader = loader_initfunc(data, **kwargs)
@@ -168,7 +167,7 @@ def test_single_session(data_name, loader_initfunc, model_architecture,
 
     assert solver.num_sessions is None
     assert solver.n_features == X.shape[1]
-    
+
     embedding = solver.transform(X)
     assert isinstance(embedding, torch.Tensor)
     assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
@@ -527,158 +526,6 @@ def test_multi_session_2(data_name, loader_initfunc, solver_initfunc):
 
     solver.fit(loader)
 
-    assert solver.num_sessions == 3
-    assert solver.n_features == [X[i].shape[1] for i in range(len(X))]
-
-    embedding = solver.transform(X[0], session_id=0)
-    assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X[0].shape[0], OUTPUT_DIMENSION)
-    embedding = solver.transform(X[1], session_id=1)
-    assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X[1].shape[0], OUTPUT_DIMENSION)
-    embedding = solver.transform(X[0], session_id=0, pad_before_transform=False)
-    assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X[0].shape[0] -
-                               len(solver.model[0].get_offset()) + 1,
-                               OUTPUT_DIMENSION)
-
-    with pytest.raises(ValueError, match="torch.Tensor"):
-        embedding = solver.transform(X[0].numpy(), session_id=0)
-
-    with pytest.raises(ValueError, match="shape"):
-        embedding = solver.transform(X[1], session_id=0)
-    with pytest.raises(ValueError, match="shape"):
-        embedding = solver.transform(X[0], session_id=1)
-
-    with pytest.raises(RuntimeError, match="No.*session_id"):
-        embedding = solver.transform(X[0])
-    with pytest.raises(ValueError, match="single.*session"):
-        embedding = solver.transform(X)
-    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
-        embedding = solver.transform(X[0], session_id=5)
-    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
-        embedding = solver.transform(X[0], session_id=-1)
-
-    for param in solver.parameters(session_id=0):
-        assert isinstance(param, torch.Tensor)
-
-    fitted_solver = copy.deepcopy(solver)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        solver.save(temp_dir)
-        solver.load(temp_dir)
-    _assert_equal(fitted_solver, solver)
-
-
-@pytest.mark.parametrize(
-    "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",
-    [
-        # Test case 1: No padding
-        (torch.tensor([[1, 2], [3, 4], [5, 6]]), False, cebra.data.Offset(
-            0, 1), 0, 2, torch.tensor([[1, 2], [3, 4]])),  # first batch
-        (torch.tensor([[1, 2], [3, 4], [5, 6]]), False, cebra.data.Offset(
-            0, 1), 1, 3, torch.tensor([[3, 4], [5, 6]])),  # last batch
-        (torch.tensor(
-            [[1, 2], [3, 4], [5, 6], [7, 8]]), False, cebra.data.Offset(
-                0, 1), 1, 3, torch.tensor([[3, 4], [5, 6]])),  # middle batch
-
-        # Test case 2: First batch with padding
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(0, 1),
-            0,
-            2,
-            torch.tensor([[1, 2, 3], [4, 5, 6]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(1, 1),
-            0,
-            3,
-            torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-        ),
-
-        # Test case 3: Last batch with padding
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(0, 1),
-            1,
-            3,
-            torch.tensor([[4, 5, 6], [7, 8, 9]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
-                          [13, 14, 15]]),
-            True,
-            cebra.data.Offset(1, 2),
-            1,
-            3,
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
-        ),
-
-        # Test case 4: Middle batch with padding
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
-            True,
-            cebra.data.Offset(0, 1),
-            1,
-            3,
-            torch.tensor([[4, 5, 6], [7, 8, 9]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
-            True,
-            cebra.data.Offset(1, 1),
-            1,
-            3,
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
-                          [13, 14, 15]]),
-            True,
-            cebra.data.Offset(0, 1),
-            2,
-            4,
-            torch.tensor([[7, 8, 9], [10, 11, 12]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
-            True,
-            cebra.data.Offset(0, 1),
-            0,
-            3,
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-        ),
-
-        # Examples that throw an error:
-
-        # Padding without offset (should raise an error)
-        (torch.tensor([[1, 2]]), True, None, 0, 2, ValueError),
-        # Negative start_batch_idx or end_batch_idx (should raise an error)
-        (torch.tensor([[1, 2]]), False, cebra.data.Offset(
-            0, 1), -1, 2, ValueError),
-        # out of bound indices because offset is too large
-        (torch.tensor([[1, 2], [3, 4]]), True, cebra.data.Offset(
-            5, 5), 1, 2, ValueError),
-        # Batch length is smaller than offset.
-        (torch.tensor([[1, 2], [3, 4]]), False, cebra.data.Offset(
-            0, 1), 0, 1, ValueError),  # first batch
-    ],
-)
-def test_get_batch(inputs, add_padding, offset, start_batch_idx, end_batch_idx,
-                   expected_output):
-    if expected_output == ValueError:
-        with pytest.raises(ValueError):
-            cebra.solver.base._get_batch(inputs, offset, start_batch_idx,
-                                         end_batch_idx, add_padding)
-    else:
-        result = cebra.solver.base._get_batch(inputs, offset, start_batch_idx,
-                                              end_batch_idx, add_padding)
-        assert torch.equal(result, expected_output)
-
 
 def create_model(model_name, input_dimension):
     return cebra.models.init(model_name,

From acd21111b56c2c37cedde4f17f7d30242b5b454d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 23 Apr 2025 16:29:55 +0200
Subject: [PATCH 086/100] Fix gaussian mixture dataset import

---
 cebra/datasets/gaussian_mixture.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cebra/datasets/gaussian_mixture.py b/cebra/datasets/gaussian_mixture.py
index cbfb6502..48e10446 100644
--- a/cebra/datasets/gaussian_mixture.py
+++ b/cebra/datasets/gaussian_mixture.py
@@ -27,6 +27,7 @@
 
 import cebra.data
 import cebra.io
+from cebra.datasets import get_datapath
 from cebra.datasets import parametrize
 from cebra.datasets import register
 

From 217a8a799085b5d351bd2c82938ce25c199880c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 23 Apr 2025 19:24:03 +0200
Subject: [PATCH 087/100] Fix all tests but xcebra tests

---
 cebra/integrations/sklearn/cebra.py |  10 +-
 cebra/solver/base.py                |  23 +-
 cebra/solver/multi_session.py       |   3 +-
 cebra/solver/single_session.py      |  10 +-
 tests/test_solver.py                | 387 +-------------------------
 tests/test_solver_batched.py        | 408 ++++++++++++++++++++++++++++
 6 files changed, 431 insertions(+), 410 deletions(-)
 create mode 100644 tests/test_solver_batched.py

diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 6dc1e0d0..b1f932a6 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1053,14 +1053,12 @@ def _partial_fit(
 
         # Save variables of interest as semi-private attributes
         self.model_ = model
-        self.n_features_ = ([
-            loader.dataset.get_input_dimension(session_id)
-            for session_id in range(loader.dataset.num_sessions)
-        ] if is_multisession else loader.dataset.input_dimension)
+
+        self.n_features_ = solver.n_features
+        self.num_sessions_ = solver.num_sessions
         self.solver_ = solver
         self.n_features_in_ = ([model[n].num_input for n in range(len(model))]
                                if is_multisession else model.num_input)
-        self.num_sessions_ = loader.dataset.num_sessions if is_multisession else None
 
         return self
 
@@ -1256,7 +1254,7 @@ def transform(self,
 
         return output.detach().cpu().numpy()
 
-    # Deprecated, kept for testing.
+    #NOTE: Deprecated, as transform is now handled in the solver but kept for testing.
     def transform_deprecated(self,
                              X: Union[npt.NDArray, torch.Tensor],
                              session_id: Optional[int] = None) -> npt.NDArray:
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 64d913a0..acb4a88f 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -234,6 +234,11 @@ def __getitem__(self, idx):
     index_dataset = IndexDataset(inputs)
     index_dataloader = DataLoader(index_dataset, batch_size=batch_size)
 
+    if len(index_dataloader) < 2:
+        raise ValueError(
+            f"Number of batches must be greater than 1, you can use transform without batching instead, got {len(index_dataloader)}."
+        )
+
     output = []
     for batch_idx, index_batch in enumerate(index_dataloader):
         # NOTE(celia): This is to prevent that adding the offset to the
@@ -449,6 +454,9 @@ def fit(
                 if logdir is not None:
                     self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
 
+        assert hasattr(self, "n_features")
+        assert hasattr(self, "num_sessions")
+
     def step(self, batch: cebra.data.Batch) -> dict:
         """Perform a single gradient update.
 
@@ -564,10 +572,8 @@ def _select_model(
         """
         raise NotImplementedError
 
-    @property
     def _check_is_fitted(self):
-        #NOTE(celia): instead of hasattr(model, "n_features_"), double check this!
-        if not (hasattr(self, "history") and len(self.history) > 0):
+        if not hasattr(self, "n_features"):
             raise ValueError(
                 f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
                 "appropriate arguments before using this estimator.")
@@ -598,15 +604,6 @@ def transform(self,
         Returns:
             The output embedding.
         """
-        if not self.is_fitted:
-            raise ValueError(
-                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
-                "appropriate arguments before using this estimator.")
-
-        if batch_size is not None and batch_size < 1:
-            raise ValueError(
-                f"Batch size should be at least 1, got {batch_size}")
-
         if isinstance(inputs, list):
             raise ValueError(
                 "Inputs to transform() should be the data for a single session, but received a list."
@@ -623,7 +620,7 @@ def transform(self,
             pad_before_transform = False
 
         model.eval()
-        if batch_size is not None:
+        if batch_size is not None and inputs.shape[0] > int(batch_size * 2):
             output = _batched_transform(
                 model=model,
                 inputs=inputs,
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index 2c2153c2..6f173308 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -177,7 +177,7 @@ def _check_is_session_id_valid(self, session_id: Optional[int]):
             )
 
     def _select_model(self, inputs: torch.Tensor, session_id: Optional[int]):
-        """ Select the model based on the input dimension and session ID.
+        """ Select the (trained) model based on the input dimension and session ID.
 
         Args:
             inputs: Data to infer using the selected model.
@@ -189,6 +189,7 @@ def _select_model(self, inputs: torch.Tensor, session_id: Optional[int]):
             The model (first returns) and the offset of the model (second returns).
         """
         self._check_is_session_id_valid(session_id=session_id)
+        self._check_is_fitted()
         self._check_is_inputs_valid(inputs, session_id=session_id)
 
         model = self.model[session_id]
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
index 62570a57..001200ea 100644
--- a/cebra/solver/single_session.py
+++ b/cebra/solver/single_session.py
@@ -103,7 +103,7 @@ def _select_model(
                             List[torch.Tensor]], session_id: Optional[int]
     ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
                cebra.data.datatypes.Offset]:
-        """ Select the model based on the input dimension and session ID.
+        """ Select the (trained) model based on the input dimension and session ID.
 
         Args:
             inputs: Data to infer using the selected model.
@@ -114,8 +114,9 @@ def _select_model(
         Returns:
             The model (first returns) and the offset of the model (second returns).
         """
-        self._check_is_inputs_valid(inputs, session_id=session_id)
         self._check_is_session_id_valid(session_id=session_id)
+        self._check_is_fitted()
+        self._check_is_inputs_valid(inputs, session_id=session_id)
 
         model = self.model
         offset = model.get_offset()
@@ -228,7 +229,7 @@ def _select_model(
                             List[torch.Tensor]], session_id: Optional[int]
     ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
                cebra.data.datatypes.Offset]:
-        """ Select the model based on the input dimension and session ID.
+        """ Select the (trained) model based on the input dimension and session ID.
 
         Args:
             inputs: Data to infer using the selected model.
@@ -239,8 +240,9 @@ def _select_model(
         Returns:
             The model (first returns) and the offset of the model (second returns).
         """
-        self._check_is_inputs_valid(inputs, session_id=session_id)
         self._check_is_session_id_valid(session_id=session_id)
+        self._check_is_fitted()
+        self._check_is_inputs_valid(inputs, session_id=session_id)
 
         model = self.model.module
         if hasattr(model, 'get_offset'):
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 2a7b5933..56e269cd 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -68,7 +68,7 @@
 
 
 def _get_loader(data, loader_initfunc):
-    kwargs = dict(num_steps=5, batch_size=32)
+    kwargs = dict(num_steps=2, batch_size=32)
     loader = loader_initfunc(data, **kwargs)
     return loader
 
@@ -388,388 +388,3 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
         solver.save(temp_dir)
         solver.load(temp_dir)
     _assert_equal(fitted_solver, solver)
-
-
-@pytest.mark.parametrize(
-    "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",
-    [
-        # Test case 1: No padding
-        (torch.tensor([[1, 2], [3, 4], [5, 6]]), False, cebra.data.Offset(
-            0, 1), 0, 2, torch.tensor([[1, 2], [3, 4]])),  # first batch
-        (torch.tensor([[1, 2], [3, 4], [5, 6]]), False, cebra.data.Offset(
-            0, 1), 1, 3, torch.tensor([[3, 4], [5, 6]])),  # last batch
-        (torch.tensor(
-            [[1, 2], [3, 4], [5, 6], [7, 8]]), False, cebra.data.Offset(
-                0, 1), 1, 3, torch.tensor([[3, 4], [5, 6]])),  # middle batch
-
-        # Test case 2: First batch with padding
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(0, 1),
-            0,
-            2,
-            torch.tensor([[1, 2, 3], [4, 5, 6]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(1, 1),
-            0,
-            3,
-            torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-        ),
-
-        # Test case 3: Last batch with padding
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            True,
-            cebra.data.Offset(0, 1),
-            1,
-            3,
-            torch.tensor([[4, 5, 6], [7, 8, 9]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
-                          [13, 14, 15]]),
-            True,
-            cebra.data.Offset(1, 2),
-            1,
-            3,
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
-        ),
-
-        # Test case 4: Middle batch with padding
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
-            True,
-            cebra.data.Offset(0, 1),
-            1,
-            3,
-            torch.tensor([[4, 5, 6], [7, 8, 9]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
-            True,
-            cebra.data.Offset(1, 1),
-            1,
-            3,
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
-                          [13, 14, 15]]),
-            True,
-            cebra.data.Offset(0, 1),
-            2,
-            4,
-            torch.tensor([[7, 8, 9], [10, 11, 12]]),
-        ),
-        (
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
-            True,
-            cebra.data.Offset(0, 1),
-            0,
-            3,
-            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-        ),
-
-        # Examples that throw an error:
-
-        # Padding without offset (should raise an error)
-        (torch.tensor([[1, 2]]), True, None, 0, 2, ValueError),
-        # Negative start_batch_idx or end_batch_idx (should raise an error)
-        (torch.tensor([[1, 2]]), False, cebra.data.Offset(
-            0, 1), -1, 2, ValueError),
-        # out of bound indices because offset is too large
-        (torch.tensor([[1, 2], [3, 4]]), True, cebra.data.Offset(
-            5, 5), 1, 2, ValueError),
-        # Batch length is smaller than offset.
-        (torch.tensor([[1, 2], [3, 4]]), False, cebra.data.Offset(
-            0, 1), 0, 1, ValueError),  # first batch
-    ],
-)
-def test_get_batch(inputs, add_padding, offset, start_batch_idx, end_batch_idx,
-                   expected_output):
-    if expected_output == ValueError:
-        with pytest.raises(ValueError):
-            cebra.solver.base._get_batch(inputs, offset, start_batch_idx,
-                                         end_batch_idx, add_padding)
-    else:
-        result = cebra.solver.base._get_batch(inputs, offset, start_batch_idx,
-                                              end_batch_idx, add_padding)
-        assert torch.equal(result, expected_output)
-
-
-@pytest.mark.parametrize("data_name, loader_initfunc, solver_initfunc",
-                         multi_session_tests)
-def test_multi_session_2(data_name, loader_initfunc, solver_initfunc):
-    loader = _get_loader(data_name, loader_initfunc)
-    criterion = cebra.models.InfoNCE()
-    model = nn.ModuleList(
-        [_make_model(dataset) for dataset in loader.dataset.iter_sessions()])
-    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
-
-    solver = solver_initfunc(model=model,
-                             criterion=criterion,
-                             optimizer=optimizer,
-                             tqdm_on=True)
-
-    batch = next(iter(loader))
-    for session_id, dataset in enumerate(loader.dataset.iter_sessions()):
-        assert batch[session_id].reference.shape == (32,
-                                                     dataset.input_dimension,
-                                                     10)
-        assert batch[session_id].index is not None
-
-    log = solver.step(batch)
-    assert isinstance(log, dict)
-
-    solver.fit(loader)
-
-
-def create_model(model_name, input_dimension):
-    return cebra.models.init(model_name,
-                             num_neurons=input_dimension,
-                             num_units=128,
-                             num_output=OUTPUT_DIMENSION)
-
-
-single_session_tests_select_model = []
-single_session_hybrid_tests_select_model = []
-for model_name in ["offset1-model", "offset10-model"]:
-    for session_id in [None, 0, 5]:
-        for args in [
-            ("demo-discrete", model_name, session_id,
-             cebra.data.DiscreteDataLoader),
-            ("demo-continuous", model_name, session_id,
-             cebra.data.ContinuousDataLoader),
-            ("demo-mixed", model_name, session_id, cebra.data.MixedDataLoader),
-        ]:
-            single_session_tests_select_model.append(
-                (*args, cebra.solver.SingleSessionSolver))
-            single_session_hybrid_tests_select_model.append(
-                (*args, cebra.solver.SingleSessionHybridSolver))
-
-multi_session_tests_select_model = []
-for model_name in ["offset10-model"]:
-    for session_id in [None, 0, 1, 5, 2, 6, 4]:
-        for args in [("demo-continuous-multisession", model_name, session_id,
-                      cebra.data.ContinuousMultiSessionDataLoader)]:
-            multi_session_tests_select_model.append(
-                (*args, cebra.solver.MultiSessionSolver))
-
-
-@pytest.mark.parametrize(
-    "data_name, model_name ,session_id, loader_initfunc, solver_initfunc",
-    single_session_tests_select_model + single_session_hybrid_tests_select_model
-)
-def test_select_model_single_session(data_name, model_name, session_id,
-                                     loader_initfunc, solver_initfunc):
-    dataset = cebra.datasets.init(data_name)
-    model = create_model(model_name, dataset.input_dimension)
-    dataset.configure_for(model)
-    offset = model.get_offset()
-    solver = solver_initfunc(model=model, criterion=None, optimizer=None)
-
-    with pytest.raises(ValueError):
-        solver.n_features = 1000
-        solver._select_model(inputs=dataset.neural, session_id=0)
-
-    solver.n_features = dataset.neural.shape[1]
-    if session_id is not None and session_id > 0:
-        with pytest.raises(RuntimeError):
-            solver._select_model(inputs=dataset.neural, session_id=session_id)
-    else:
-        model_, offset_ = solver._select_model(inputs=dataset.neural,
-                                               session_id=session_id)
-        assert offset.left == offset_.left and offset.right == offset_.right
-        assert model == model_
-
-
-@pytest.mark.parametrize(
-    "data_name, model_name, session_id, loader_initfunc, solver_initfunc",
-    multi_session_tests_select_model)
-def test_select_model_multi_session(data_name, model_name, session_id,
-                                    loader_initfunc, solver_initfunc):
-    dataset = cebra.datasets.init(data_name)
-    model = nn.ModuleList([
-        create_model(model_name, dataset.input_dimension)
-        for dataset in dataset.iter_sessions()
-    ])
-    dataset.configure_for(model)
-    loader = _get_loader(dataset, loader_initfunc=loader_initfunc)
-
-    offset = model[0].get_offset()
-    solver = solver_initfunc(model=model,
-                             criterion=cebra.models.InfoNCE(),
-                             optimizer=torch.optim.Adam(model.parameters(),
-                                                        lr=1e-3))
-
-    loader_kwargs = dict(num_steps=10, batch_size=32)
-    loader = cebra.data.ContinuousMultiSessionDataLoader(
-        dataset, **loader_kwargs)
-    solver.fit(loader)
-
-    for i, (model, dataset_) in enumerate(zip(model, dataset.iter_sessions())):
-        inputs = dataset_.neural
-
-        if session_id is None or session_id >= dataset.num_sessions:
-            with pytest.raises(RuntimeError):
-                solver._select_model(inputs, session_id=session_id)
-        elif i != session_id:
-            with pytest.raises(ValueError):
-                solver._select_model(inputs, session_id=session_id)
-        else:
-            model_, offset_ = solver._select_model(inputs,
-                                                   session_id=session_id)
-            assert offset.left == offset_.left and offset.right == offset_.right
-            assert model == model_
-
-
-models = [
-    "offset1-model",
-    "offset10-model",
-    "offset40-model-4x-subsample",
-    "offset1-model",
-    "offset10-model",
-]  #NOTE(rodrigo): there is an issue with "offset4-model-2x-subsample" because it's not a convolutional model.
-batch_size_inference = [40_000, 99_990, 99_999]
-
-single_session_tests_transform = []
-for padding in [True, False]:
-    for model_name in models:
-        for batch_size in batch_size_inference:
-            for args in [
-                ("demo-discrete", model_name, padding, batch_size,
-                 cebra.data.DiscreteDataLoader),
-                ("demo-continuous", model_name, padding, batch_size,
-                 cebra.data.ContinuousDataLoader),
-                ("demo-mixed", model_name, padding, batch_size,
-                 cebra.data.MixedDataLoader),
-            ]:
-                single_session_tests_transform.append(
-                    (*args, cebra.solver.SingleSessionSolver))
-
-single_session_hybrid_tests_transform = []
-for padding in [True, False]:
-    for model_name in models:
-        for batch_size in batch_size_inference:
-            for args in [("demo-continuous", model_name, padding, batch_size,
-                          cebra.data.HybridDataLoader)]:
-                single_session_hybrid_tests_transform.append(
-                    (*args, cebra.solver.SingleSessionHybridSolver))
-
-
-@pytest.mark.parametrize(
-    "data_name, model_name, padding, batch_size_inference, loader_initfunc, solver_initfunc",
-    single_session_tests_transform + single_session_hybrid_tests_transform)
-def test_batched_transform_single_session(
-    data_name,
-    model_name,
-    padding,
-    batch_size_inference,
-    loader_initfunc,
-    solver_initfunc,
-):
-    dataset = cebra.datasets.init(data_name)
-    model = create_model(model_name, dataset.input_dimension)
-    dataset.offset = model.get_offset()
-    loader_kwargs = dict(num_steps=10, batch_size=32)
-    loader = loader_initfunc(dataset, **loader_kwargs)
-
-    criterion = cebra.models.InfoNCE()
-    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
-
-    solver = solver_initfunc(model=model,
-                             criterion=criterion,
-                             optimizer=optimizer)
-    solver.fit(loader)
-
-    smallest_batch_length = loader.dataset.neural.shape[0] - batch_size
-    offset_ = model.get_offset()
-
-    if smallest_batch_length <= len(offset_):
-        with pytest.raises(ValueError):
-            solver.transform(inputs=loader.dataset.neural,
-                             batch_size=batch_size,
-                             pad_before_transform=padding)
-
-    else:
-        embedding_batched = solver.transform(inputs=loader.dataset.neural,
-                                             batch_size=batch_size,
-                                             pad_before_transform=padding)
-
-        embedding = solver.transform(inputs=loader.dataset.neural,
-                                     pad_before_transform=padding)
-
-        assert embedding_batched.shape == embedding.shape
-        assert np.allclose(embedding_batched, embedding, rtol=1e-02)
-
-
-multi_session_tests_transform = []
-for padding in [True, False]:
-    for model_name in models:
-        for batch_size in batch_size_inference:
-            for args in [
-                ("demo-continuous-multisession", model_name, padding,
-                 batch_size, cebra.data.ContinuousMultiSessionDataLoader)
-            ]:
-                multi_session_tests_transform.append(
-                    (*args, cebra.solver.MultiSessionSolver))
-
-
-@pytest.mark.parametrize(
-    "data_name, model_name,padding,batch_size_inference,loader_initfunc, solver_initfunc",
-    multi_session_tests_transform)
-def test_batched_transform_multi_session(data_name, model_name, padding,
-                                         batch_size_inference, loader_initfunc,
-                                         solver_initfunc):
-    dataset = cebra.datasets.init(data_name)
-    model = nn.ModuleList([
-        create_model(model_name, dataset.input_dimension)
-        for dataset in dataset.iter_sessions()
-    ])
-    dataset.offset = model[0].get_offset()
-
-    n_samples = dataset._datasets[0].neural.shape[0]
-    assert all(
-        d.neural.shape[0] == n_samples for d in dataset._datasets
-    ), "for this set all of the sessions need to have same number of samples."
-
-    smallest_batch_length = n_samples - batch_size
-    offset_ = model[0].get_offset()
-    for d in dataset._datasets:
-        d.offset = offset_
-    loader_kwargs = dict(num_steps=10, batch_size=32)
-    loader = loader_initfunc(dataset, **loader_kwargs)
-
-    criterion = cebra.models.InfoNCE()
-    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
-
-    solver = solver_initfunc(model=model,
-                             criterion=criterion,
-                             optimizer=optimizer)
-    solver.fit(loader)
-
-    # Transform each session with the right model, by providing the corresponding session ID
-    for i, inputs in enumerate(dataset.iter_sessions()):
-
-        if smallest_batch_length <= len(offset_):
-            with pytest.raises(ValueError):
-                solver.transform(inputs=inputs.neural,
-                                 batch_size=batch_size,
-                                 session_id=i,
-                                 pad_before_transform=padding)
-
-        else:
-            embedding = solver.transform(inputs=inputs.neural,
-                                         session_id=i,
-                                         pad_before_transform=padding)
-            embedding_batched = solver.transform(inputs=inputs.neural,
-                                                 session_id=i,
-                                                 pad_before_transform=padding,
-                                                 batch_size=batch_size)
-
-            assert embedding_batched.shape == embedding.shape
-            assert np.allclose(embedding_batched, embedding, rtol=1e-02)
diff --git a/tests/test_solver_batched.py b/tests/test_solver_batched.py
new file mode 100644
index 00000000..32b11d8c
--- /dev/null
+++ b/tests/test_solver_batched.py
@@ -0,0 +1,408 @@
+#
+# CEBRA: Consistent EmBeddings of high-dimensional Recordings using Auxiliary variables
+# © Mackenzie W. Mathis & Steffen Schneider (v0.4.0+)
+# Source code:
+# https://github.com/AdaptiveMotorControlLab/CEBRA
+#
+# Please see LICENSE.md for the full license document:
+# https://github.com/AdaptiveMotorControlLab/CEBRA/blob/main/LICENSE.md
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import numpy as np
+import pytest
+import torch
+from torch import nn
+
+import cebra.data
+import cebra.datasets
+import cebra.models
+import cebra.solver
+
+device = "cpu"
+
+NUM_STEPS = 2
+
+single_session_tests = []
+for args in [
+    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset10-model"),
+    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset1-model"),
+    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset1-model"),
+    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset10-model"),
+    ("demo-continuous", cebra.data.ContinuousDataLoader, "offset10-model"),
+    ("demo-continuous", cebra.data.ContinuousDataLoader, "offset1-model"),
+    ("demo-mixed", cebra.data.MixedDataLoader, "offset10-model"),
+    ("demo-mixed", cebra.data.MixedDataLoader, "offset1-model"),
+]:
+    single_session_tests.append((*args, cebra.solver.SingleSessionSolver))
+
+single_session_hybrid_tests = []
+for args in [("demo-continuous", cebra.data.HybridDataLoader, "offset10-model"),
+             ("demo-continuous", cebra.data.HybridDataLoader, "offset1-model")]:
+    single_session_hybrid_tests.append(
+        (*args, cebra.solver.SingleSessionHybridSolver))
+
+multi_session_tests = []
+for args in [
+    ("demo-continuous-multisession",
+     cebra.data.ContinuousMultiSessionDataLoader, "offset1-model"),
+    ("demo-continuous-multisession",
+     cebra.data.ContinuousMultiSessionDataLoader, "offset10-model"),
+    ("demo-discrete-multisession", cebra.data.DiscreteMultiSessionDataLoader,
+     "offset1-model"),
+    ("demo-discrete-multisession", cebra.data.DiscreteMultiSessionDataLoader,
+     "offset10-model"),
+]:
+    multi_session_tests.append((*args, cebra.solver.MultiSessionSolver))
+
+
+def _get_loader(data, loader_initfunc):
+    kwargs = dict(num_steps=NUM_STEPS, batch_size=32)
+    loader = loader_initfunc(data, **kwargs)
+    return loader
+
+
+@pytest.mark.parametrize(
+    "inputs, add_padding, offset, start_batch_idx, end_batch_idx, expected_output",
+    [
+        # Test case 1: No padding
+        (torch.tensor([[1, 2], [3, 4], [5, 6]]), False, cebra.data.Offset(
+            0, 1), 0, 2, torch.tensor([[1, 2], [3, 4]])),  # first batch
+        (torch.tensor([[1, 2], [3, 4], [5, 6]]), False, cebra.data.Offset(
+            0, 1), 1, 3, torch.tensor([[3, 4], [5, 6]])),  # last batch
+        (torch.tensor(
+            [[1, 2], [3, 4], [5, 6], [7, 8]]), False, cebra.data.Offset(
+                0, 1), 1, 3, torch.tensor([[3, 4], [5, 6]])),  # middle batch
+
+        # Test case 2: First batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(0, 1),
+            0,
+            2,
+            torch.tensor([[1, 2, 3], [4, 5, 6]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(1, 1),
+            0,
+            3,
+            torch.tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+
+        # Test case 3: Last batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            True,
+            cebra.data.Offset(0, 1),
+            1,
+            3,
+            torch.tensor([[4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
+                          [13, 14, 15]]),
+            True,
+            cebra.data.Offset(1, 2),
+            1,
+            3,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+        ),
+
+        # Test case 4: Middle batch with padding
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+            True,
+            cebra.data.Offset(0, 1),
+            1,
+            3,
+            torch.tensor([[4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+            True,
+            cebra.data.Offset(1, 1),
+            1,
+            3,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
+                          [13, 14, 15]]),
+            True,
+            cebra.data.Offset(0, 1),
+            2,
+            4,
+            torch.tensor([[7, 8, 9], [10, 11, 12]]),
+        ),
+        (
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),
+            True,
+            cebra.data.Offset(0, 1),
+            0,
+            3,
+            torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+        ),
+
+        # Examples that throw an error:
+
+        # Padding without offset (should raise an error)
+        (torch.tensor([[1, 2]]), True, None, 0, 2, ValueError),
+        # Negative start_batch_idx or end_batch_idx (should raise an error)
+        (torch.tensor([[1, 2]]), False, cebra.data.Offset(
+            0, 1), -1, 2, ValueError),
+        # out of bound indices because offset is too large
+        (torch.tensor([[1, 2], [3, 4]]), True, cebra.data.Offset(
+            5, 5), 1, 2, ValueError),
+        # Batch length is smaller than offset.
+        (torch.tensor([[1, 2], [3, 4]]), False, cebra.data.Offset(
+            0, 1), 0, 1, ValueError),  # first batch
+    ],
+)
+def test_get_batch(inputs, add_padding, offset, start_batch_idx, end_batch_idx,
+                   expected_output):
+    if expected_output == ValueError:
+        with pytest.raises(ValueError):
+            cebra.solver.base._get_batch(inputs, offset, start_batch_idx,
+                                         end_batch_idx, add_padding)
+    else:
+        result = cebra.solver.base._get_batch(inputs, offset, start_batch_idx,
+                                              end_batch_idx, add_padding)
+        assert torch.equal(result, expected_output)
+
+
+def create_model(model_name, input_dimension):
+    return cebra.models.init(model_name,
+                             num_neurons=input_dimension,
+                             num_units=128,
+                             num_output=3)
+
+
+single_session_tests_select_model = []
+single_session_hybrid_tests_select_model = []
+for model_name in ["offset1-model", "offset10-model"]:
+    for session_id in [None, 0, 5]:
+        for args in [
+            ("demo-discrete", model_name, session_id,
+             cebra.data.DiscreteDataLoader),
+            ("demo-continuous", model_name, session_id,
+             cebra.data.ContinuousDataLoader),
+            ("demo-mixed", model_name, session_id, cebra.data.MixedDataLoader),
+        ]:
+            single_session_tests_select_model.append(
+                (*args, cebra.solver.SingleSessionSolver))
+            single_session_hybrid_tests_select_model.append(
+                (*args, cebra.solver.SingleSessionHybridSolver))
+
+multi_session_tests_select_model = []
+for model_name in ["offset10-model"]:
+    for session_id in [None, 0, 1, 5, 2, 6, 4]:
+        for args in [("demo-continuous-multisession", model_name, session_id,
+                      cebra.data.ContinuousMultiSessionDataLoader)]:
+            multi_session_tests_select_model.append(
+                (*args, cebra.solver.MultiSessionSolver))
+
+
+@pytest.mark.parametrize(
+    "data_name, model_name ,session_id, loader_initfunc, solver_initfunc",
+    single_session_tests_select_model + single_session_hybrid_tests_select_model
+)
+def test_select_model_single_session(data_name, model_name, session_id,
+                                     loader_initfunc, solver_initfunc):
+    dataset = cebra.datasets.init(data_name)
+    model = create_model(model_name, dataset.input_dimension)
+    dataset.configure_for(model)
+    offset = model.get_offset()
+    solver = solver_initfunc(model=model, criterion=None, optimizer=None)
+
+    with pytest.raises(ValueError):
+        solver.n_features = 1000
+        solver._select_model(inputs=dataset.neural, session_id=0)
+
+    solver.n_features = dataset.neural.shape[1]
+    if session_id is not None and session_id > 0:
+        with pytest.raises(RuntimeError):
+            solver._select_model(inputs=dataset.neural, session_id=session_id)
+    else:
+        model_, offset_ = solver._select_model(inputs=dataset.neural,
+                                               session_id=session_id)
+        assert offset.left == offset_.left and offset.right == offset_.right
+        assert model == model_
+
+
+@pytest.mark.parametrize(
+    "data_name, model_name, session_id, loader_initfunc, solver_initfunc",
+    multi_session_tests_select_model)
+def test_select_model_multi_session(data_name, model_name, session_id,
+                                    loader_initfunc, solver_initfunc):
+    dataset = cebra.datasets.init(data_name)
+    model = nn.ModuleList([
+        create_model(model_name, dataset.input_dimension)
+        for dataset in dataset.iter_sessions()
+    ])
+    dataset.configure_for(model)
+    loader = _get_loader(dataset, loader_initfunc=loader_initfunc)
+
+    offset = model[0].get_offset()
+    solver = solver_initfunc(model=model,
+                             criterion=cebra.models.InfoNCE(),
+                             optimizer=torch.optim.Adam(model.parameters(),
+                                                        lr=1e-3))
+
+    loader_kwargs = dict(num_steps=NUM_STEPS, batch_size=32)
+    loader = cebra.data.ContinuousMultiSessionDataLoader(
+        dataset, **loader_kwargs)
+    solver.fit(loader)
+
+    for i, (model, dataset_) in enumerate(zip(model, dataset.iter_sessions())):
+        inputs = dataset_.neural
+
+        if session_id is None or session_id >= dataset.num_sessions:
+            with pytest.raises(RuntimeError):
+                solver._select_model(inputs, session_id=session_id)
+        elif i != session_id:
+            with pytest.raises(ValueError):
+                solver._select_model(inputs, session_id=session_id)
+        else:
+            model_, offset_ = solver._select_model(inputs,
+                                                   session_id=session_id)
+            assert offset.left == offset_.left and offset.right == offset_.right
+            assert model == model_
+
+
+models = [
+    "offset1-model",
+    "offset10-model",
+    "offset40-model-4x-subsample",
+    "offset1-model",
+    "offset10-model",
+]  #NOTE(rodrigo): there is an issue with "offset4-model-2x-subsample" because it's not a convolutional model.
+batch_size_inference = [40_000, 99_990, 99_999]
+
+single_session_tests_transform = []
+for padding in [True, False]:
+    for model_name in models:
+        for batch_size in batch_size_inference:
+            for args in [
+                ("demo-discrete", model_name, padding, batch_size,
+                 cebra.data.DiscreteDataLoader),
+                ("demo-continuous", model_name, padding, batch_size,
+                 cebra.data.ContinuousDataLoader),
+                ("demo-mixed", model_name, padding, batch_size,
+                 cebra.data.MixedDataLoader),
+            ]:
+                single_session_tests_transform.append(
+                    (*args, cebra.solver.SingleSessionSolver))
+
+single_session_hybrid_tests_transform = []
+for padding in [True, False]:
+    for model_name in models:
+        for batch_size in batch_size_inference:
+            for args in [("demo-continuous", model_name, padding, batch_size,
+                          cebra.data.HybridDataLoader)]:
+                single_session_hybrid_tests_transform.append(
+                    (*args, cebra.solver.SingleSessionHybridSolver))
+
+
+@pytest.mark.parametrize(
+    "data_name, model_name, padding, batch_size_inference, loader_initfunc, solver_initfunc",
+    single_session_tests_transform + single_session_hybrid_tests_transform)
+def test_batched_transform_single_session(
+    data_name,
+    model_name,
+    padding,
+    batch_size_inference,
+    loader_initfunc,
+    solver_initfunc,
+):
+    dataset = cebra.datasets.init(data_name)
+    model = create_model(model_name, dataset.input_dimension)
+    dataset.configure_for(model)
+    loader_kwargs = dict(num_steps=NUM_STEPS, batch_size=32)
+    loader = loader_initfunc(dataset, **loader_kwargs)
+
+    criterion = cebra.models.InfoNCE()
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+
+    solver = solver_initfunc(model=model,
+                             criterion=criterion,
+                             optimizer=optimizer)
+    solver.fit(loader)
+
+    embedding_batched = solver.transform(inputs=loader.dataset.neural,
+                                         batch_size=batch_size,
+                                         pad_before_transform=padding)
+
+    embedding = solver.transform(inputs=loader.dataset.neural,
+                                 pad_before_transform=padding)
+
+    assert embedding_batched.shape == embedding.shape
+    assert np.allclose(embedding_batched, embedding, rtol=1e-02)
+
+
+multi_session_tests_transform = []
+for padding in [True, False]:
+    for model_name in models:
+        for batch_size in batch_size_inference:
+            for args in [
+                ("demo-continuous-multisession", model_name, padding,
+                 batch_size, cebra.data.ContinuousMultiSessionDataLoader)
+            ]:
+                multi_session_tests_transform.append(
+                    (*args, cebra.solver.MultiSessionSolver))
+
+
+@pytest.mark.parametrize(
+    "data_name, model_name,padding,batch_size_inference,loader_initfunc, solver_initfunc",
+    multi_session_tests_transform)
+def test_batched_transform_multi_session(data_name, model_name, padding,
+                                         batch_size_inference, loader_initfunc,
+                                         solver_initfunc):
+    dataset = cebra.datasets.init(data_name)
+    model = nn.ModuleList([
+        create_model(model_name, dataset.input_dimension)
+        for dataset in dataset.iter_sessions()
+    ])
+    dataset.configure_for(model)
+
+    n_samples = dataset._datasets[0].neural.shape[0]
+    assert all(
+        d.neural.shape[0] == n_samples for d in dataset._datasets
+    ), "for this set all of the sessions need to have same number of samples."
+
+    loader_kwargs = dict(num_steps=NUM_STEPS, batch_size=32)
+    loader = loader_initfunc(dataset, **loader_kwargs)
+
+    criterion = cebra.models.InfoNCE()
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+
+    solver = solver_initfunc(model=model,
+                             criterion=criterion,
+                             optimizer=optimizer)
+    solver.fit(loader)
+
+    # Transform each session with the right model, by providing
+    # the corresponding session ID
+    for i, inputs in enumerate(dataset.iter_sessions()):
+        embedding = solver.transform(inputs=inputs.neural,
+                                     session_id=i,
+                                     pad_before_transform=padding)
+        embedding_batched = solver.transform(inputs=inputs.neural,
+                                             session_id=i,
+                                             pad_before_transform=padding,
+                                             batch_size=batch_size)
+
+        assert embedding_batched.shape == embedding.shape
+        assert np.allclose(embedding_batched, embedding, rtol=1e-02)

From a1218aa30990e45565212d7b9a9c7d3f619885e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 24 Apr 2025 09:57:46 +0200
Subject: [PATCH 088/100] Fix pytorch API usage example

---
 docs/source/usage.rst | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/docs/source/usage.rst b/docs/source/usage.rst
index aaf09a25..82e45a0b 100644
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -1436,17 +1436,14 @@ gets initialized which also allows the `prior` to be directly parametrized.
     solver.fit(loader=loader)
 
     # 7. Transform Embedding
-    train_batches = np.lib.stride_tricks.sliding_window_view(
-        neural_data, neural_model.get_offset().__len__(), axis=0
-    )
-
     x_train_emb = solver.transform(
-        torch.from_numpy(train_batches[:]).type(torch.FloatTensor).to(device)
-    ).to(device)
+        torch.from_numpy(neural_data).type(torch.FloatTensor).to(device),
+        pad_before_transform=True,
+        batch_size=512).to(device)
 
     # 8. Plot Embedding
     cebra.plot_embedding(
         x_train_emb.cpu(),
-        discrete_label[neural_model.get_offset().__len__() - 1 :, 0],
+        discrete_label[:,0],
         markersize=10,
     )

From 64d1db8298765e28fe979eb83e0cac700e32cca3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 24 Apr 2025 09:59:00 +0200
Subject: [PATCH 089/100] Make xCEBRA compatible with the batched inference &
 padding in solver

---
 cebra/solver/base.py           |  2 +-
 cebra/solver/multiobjective.py | 18 +++++++++++++++---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index acb4a88f..2dea7da2 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -599,7 +599,7 @@ def transform(self,
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
-            batch_size: If not None, batched inference will be applied.
+            batch_size: If not None, batched inference will not be applied.
 
         Returns:
             The output embedding.
diff --git a/cebra/solver/multiobjective.py b/cebra/solver/multiobjective.py
index d4aa187d..16840d4e 100644
--- a/cebra/solver/multiobjective.py
+++ b/cebra/solver/multiobjective.py
@@ -53,6 +53,7 @@
 import cebra.data
 import cebra.io
 import cebra.models
+import cebra.solver.single_session as cebra_solver_single
 from cebra.solver import register
 from cebra.solver.base import Solver
 from cebra.solver.schedulers import Scheduler
@@ -187,7 +188,7 @@ def _process_info(self, info):
 
 
 @dataclasses.dataclass
-class MultiobjectiveSolverBase(Solver):
+class MultiobjectiveSolverBase(cebra_solver_single.SingleSessionSolver):
 
     feature_ranges: List[slice] = None
     renormalize: bool = None
@@ -209,6 +210,13 @@ def __post_init__(self):
             renormalize=self.renormalize,
         )
 
+    def parameters(self, session_id: Optional[int] = None):
+        """Iterate over all parameters."""
+        super().parameters(session_id=session_id)
+
+        for parameter in self.regularizer.parameters():
+            yield parameter
+
     def fit(self,
             loader: cebra.data.Loader,
             valid_loader: cebra.data.Loader = None,
@@ -241,6 +249,7 @@ def _run_validation():
                 save_hook(solver=self, step=num_steps)
             return stats_val
 
+        self._set_fitted_params(loader)
         self.to(loader.device)
 
         iterator = self._get_loader(loader,
@@ -393,11 +402,14 @@ def validation(
         logger=None,
         weights_loss: Optional[List[float]] = None,
     ):
+        loader.dataset.configure_for(self.model)
+        iterator = self._get_loader(loader)
+
         self.model.eval()
         total_loss = Meter()
 
         losses_dict = {}
-        for _, batch in enumerate(loader):
+        for _, batch in iterator:
             predictions = self._inference(batch)
             losses = self.criterion(predictions)
 
@@ -445,7 +457,7 @@ def validation(
         return stats_val
 
     @torch.no_grad()
-    def transform(self, inputs: torch.Tensor) -> torch.Tensor:
+    def transform_deprecated(self, inputs: torch.Tensor) -> torch.Tensor:
         offset = self.model.get_offset()
         self.model.eval()
         X = inputs.cpu().numpy()

From 9875a382673174cf90fa04cd30eabf6d1deb2103 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 24 Apr 2025 09:59:48 +0200
Subject: [PATCH 090/100] Add some tests on transform() with xCEBRA

---
 tests/test_integration_xcebra.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test_integration_xcebra.py b/tests/test_integration_xcebra.py
index 4e647916..004f0cf6 100644
--- a/tests/test_integration_xcebra.py
+++ b/tests/test_integration_xcebra.py
@@ -1,5 +1,6 @@
 import pickle
 
+import numpy as np
 import pytest
 import torch
 
@@ -150,3 +151,20 @@ def test_synthetic_data_training(synthetic_data, device):
     assert Z2_hat.shape == Z2.shape, f"Incorrect Z2 embedding dimension: {Z2_hat.shape}"
     assert not torch.isnan(Z1_hat).any(), "NaN values in Z1 embedding"
     assert not torch.isnan(Z2_hat).any(), "NaN values in Z2 embedding"
+
+    # Test the transform
+    solver.model.split_outputs = False
+    transform_embedding = solver.transform(data.neural.to(device))
+    assert transform_embedding.shape[
+        1] == n_latents, "Incorrect embedding dimension"
+    assert not torch.isnan(transform_embedding).any(), "NaN values in embedding"
+    assert np.allclose(embedding, transform_embedding, rtol=1e-02)
+
+    # Test the transform with batching
+    batched_embedding = solver.transform(data.neural.to(device), batch_size=512)
+    assert batched_embedding.shape[
+        1] == n_latents, "Incorrect embedding dimension"
+    assert not torch.isnan(batched_embedding).any(), "NaN values in embedding"
+    assert np.allclose(embedding, batched_embedding, rtol=1e-02)
+
+    assert np.allclose(transform_embedding, batched_embedding, rtol=1e-02)

From 65fc45535cfea918e4fed5ced9092e1fa84b2b08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 24 Apr 2025 10:49:07 +0200
Subject: [PATCH 091/100] Add some docstrings and typings and clean unnecessary
 changes

---
 cebra/data/single_session.py        |  5 ---
 cebra/integrations/sklearn/cebra.py |  4 +-
 cebra/solver/base.py                | 47 +++++++++++++++--------
 cebra/solver/multi_session.py       | 31 +++++++++++++--
 cebra/solver/single_session.py      | 58 +++++++++++++++++++++++++++--
 5 files changed, 116 insertions(+), 29 deletions(-)

diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index ab37cb6b..f7125490 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -371,11 +371,6 @@ def __post_init__(self):
         self._init_behavior_distribution()
         self._init_time_distribution()
 
-        if self.conditional != "time_delta":
-            raise NotImplementedError(
-                "Hybrid training is currently only implemented using the ``time_delta`` "
-                "continual distribution.")
-
     def _init_behavior_distribution(self):
         if self.conditional == "time":
             self.behavior_distribution = cebra.distributions.TimeContrastive(
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index b1f932a6..2114753b 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -1227,7 +1227,7 @@ def transform(self,
             >>> cebra_model = cebra.CEBRA(max_iterations=10)
             >>> cebra_model.fit(dataset)
             CEBRA(max_iterations=10)
-            >>> embedding = cebra_model.transform(dataset)
+            >>> embedding = cebra_model.transform(dataset, batch_size=200)
 
         """
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
@@ -1254,7 +1254,7 @@ def transform(self,
 
         return output.detach().cpu().numpy()
 
-    #NOTE: Deprecated, as transform is now handled in the solver but kept for testing.
+    #NOTE: Deprecated: transform is now handled in the solver but kept for testing.
     def transform_deprecated(self,
                              X: Union[npt.NDArray, torch.Tensor],
                              session_id: Optional[int] = None) -> npt.NDArray:
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 2dea7da2..8433e4e7 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -54,9 +54,10 @@ def _check_indices(batch_start_idx: int, batch_end_idx: int,
                    offset: cebra.data.Offset, num_samples: int):
     """Check that indexes in a batch are in a correct range.
 
-    First and last index must be positive integers, smaller than the total length of inputs
-    in the dataset, the first index must be smaller than the last and the batch size cannot
-    be smaller than the offset of the model.
+    First and last index must be positive integers, smaller than
+    the total length of inputs in the dataset, the first index
+    must be smaller than the last and the batch size cannot be
+    smaller than the offset of the model.
 
     Args:
         batch_start_idx: Index of the first sample in the batch.
@@ -380,6 +381,16 @@ def num_parameters(self) -> int:
 
     @abc.abstractmethod
     def parameters(self, session_id: Optional[int] = None):
+        """Iterate over all parameters of the model.
+
+        Args:
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Yields:
+            The parameters of the model.
+        """
         raise NotImplementedError
 
     def _get_loader(self, loader):
@@ -573,6 +584,13 @@ def _select_model(
         raise NotImplementedError
 
     def _check_is_fitted(self):
+        """Check if the model is fitted.
+
+        If the model is fitted, the solver should have a `n_features` attribute.
+
+        Raises:
+            ValueError: If the model is not fitted.
+        """
         if not hasattr(self, "n_features"):
             raise ValueError(
                 f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
@@ -581,7 +599,7 @@ def _check_is_fitted(self):
     @torch.no_grad()
     def transform(self,
                   inputs: Union[torch.Tensor, List[torch.Tensor], npt.NDArray],
-                  pad_before_transform: bool = True,
+                  pad_before_transform: Optional[bool] = True,
                   session_id: Optional[int] = None,
                   batch_size: Optional[int] = None) -> torch.Tensor:
         """Compute the embedding.
@@ -591,11 +609,12 @@ def transform(self,
 
         Args:
             inputs: The input signal
-            pad_before_transform: If ``False``, no padding is applied to the input sequence.
-                and the output sequence will be smaller than the input sequence due to the
-                receptive field of the model. If the input sequence is ``n`` steps long,
-                and a model with receptive field ``m`` is used, the output sequence would
-                only be ``n-m+1`` steps long.
+            pad_before_transform: If ``False``, no padding is applied to the input
+                sequence and the output sequence will be smaller than the input
+                sequence due to the receptive field of the model. If the
+                input sequence is ``n`` steps long, and a model with receptive
+                field ``m`` is used, the output sequence would  only be
+                ``n-m+1`` steps long.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
@@ -640,8 +659,6 @@ def transform(self,
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """Given a batch of input examples, return the model outputs.
 
-        TODO: make this a public function?
-
         Args:
             batch: The input data, not necessarily aligned across the batch
                 dimension. This means that ``batch.index`` specifies the map
@@ -654,12 +671,12 @@ def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """
         raise NotImplementedError
 
-    def load(self, logdir, filename="checkpoint.pth"):
+    def load(self, logdir: str, filename: str = "checkpoint.pth"):
         """Load the experiment from its checkpoint file.
 
         Args:
-            logdir: Log directory.
-            filename (str): Checkpoint name for loading the experiment.
+            logdir: Logging directory.
+            filename: Checkpoint name for loading the experiment.
         """
 
         savepath = os.path.join(logdir, filename)
@@ -674,7 +691,7 @@ def load(self, logdir, filename="checkpoint.pth"):
             session_n_features for session_n_features in n_features
         ] if isinstance(n_features, list) else n_features)
 
-    def save(self, logdir, filename="checkpoint_last.pth"):
+    def save(self, logdir: str, filename: str = "checkpoint_last.pth"):
         """Save the model and optimizer params.
 
         Args:
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index 6f173308..386f1c0c 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -40,11 +40,21 @@ class MultiSessionSolver(abc_.Solver):
     _variant_name = "multi-session"
 
     def parameters(self, session_id: Optional[int] = None):
-        """Iterate over all parameters."""
+        """Iterate over all parameters.
+
+        Args:
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Yields:
+            The parameters of the model.
+        """
         if session_id is not None:
             for parameter in self.model[session_id].parameters():
                 yield parameter
 
+        # If session_id is None, it can still iterate over the criterion
         for parameter in self.criterion.parameters():
             yield parameter
 
@@ -161,12 +171,12 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor,
     def _check_is_session_id_valid(self, session_id: Optional[int]):
         """Check that the session ID provided is valid for the solver instance.
 
-        The session ID must be non-null and between 0 and the number session in the dataset.
+        The session ID must be non-null and between 0 and the number session
+        in the dataset.
 
         Args:
             session_id: The session ID to check.
         """
-
         if session_id is None:
             raise RuntimeError(
                 "No session_id provided: multisession model requires a session_id to choose the model corresponding to your data shape."
@@ -233,7 +243,20 @@ class MultiSessionAuxVariableSolver(MultiSessionSolver):
     _variant_name = "multi-session-aux"
     reference_model: torch.nn.Module
 
-    def _inference(self, batches):
+    def _inference(self, batches: List[cebra.data.Batch]) -> cebra.data.Batch:
+        """Given batches of input examples, computes the feature representations/embeddings.
+
+        Args:
+            batches: A list of input data, not necessarily aligned across the batch
+                dimension. This means that ``batch.index`` specifies the map
+                between reference/positive samples, if not equal ``None``.
+
+        Returns:
+            Processed batch of data. While the input data might not be aligned
+            across the sample dimensions, the output data should be aligned and
+            ``batch.index`` should be set to ``None``.
+
+        """
         refs = []
         poss = []
         negs = []
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
index 001200ea..6a8dc9b4 100644
--- a/cebra/solver/single_session.py
+++ b/cebra/solver/single_session.py
@@ -46,8 +46,18 @@ class SingleSessionSolver(abc_.Solver):
     _variant_name = "single-session"
 
     def parameters(self, session_id: Optional[int] = None):
-        """Iterate over all parameters."""
-        self._check_is_session_id_valid(session_id=session_id)
+        """Iterate over all parameters.
+
+        Args:
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Yields:
+            The parameters of the model.
+        """
+        # If session_id is invalid, it doesn't matter, since we are
+        # using a single session solver.
         for parameter in self.model.parameters():
             yield parameter
 
@@ -196,7 +206,22 @@ def __post_init__(self):
             self.reference_model = copy.deepcopy(self.model)
             self.reference_model.to(self.model.device)
 
-    def _inference(self, batch):
+    def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
+        """Given a batch of input examples, computes the feature representation/embedding.
+
+        The reference samples are processed with a different model than the
+        positive and negative samples.
+
+        Args:
+            batch: The input data, not necessarily aligned across the batch
+                dimension. This means that ``batch.index`` specifies the map
+                between reference/positive samples, if not equal ``None``.
+
+        Returns:
+            Processed batch of data. While the input data might not be aligned
+            across the sample dimensions, the output data should be aligned and
+            ``batch.index`` should be set to ``None``.
+        """
         batch.to(self.device)
         ref = self.reference_model(batch.reference)
         pos = self.model(batch.positive)
@@ -212,6 +237,21 @@ class SingleSessionHybridSolver(abc_.MultiobjectiveSolver, SingleSessionSolver):
     _variant_name = "single-session-hybrid"
 
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
+        """Given a batch of input examples, computes the feature representation/embedding.
+
+        The samples are processed with both a time-contrastive module and a
+        behavior-contrastive module, that are part of the same model.
+
+        Args:
+            batch: The input data, not necessarily aligned across the batch
+                dimension. This means that ``batch.index`` specifies the map
+                between reference/positive samples, if not equal ``None``.
+
+        Returns:
+            Processed batch of data. While the input data might not be aligned
+            across the sample dimensions, the output data should be aligned and
+            ``batch.index`` should be set to ``None``.
+        """
         batch.to(self.device)
         behavior_ref = self.model(batch.reference)[0]
         behavior_pos = self.model(batch.positive[:int(len(batch.positive) //
@@ -305,6 +345,18 @@ def get_embedding(self, data):
             return self.model(data[0].T)
 
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
+        """Given a batch of input examples, computes the feature representation/embedding.
+
+        Args:
+            batch: The input data, not necessarily aligned across the batch
+                dimension. This means that ``batch.index`` specifies the map
+                between reference/positive samples, if not equal ``None``.
+
+        Returns:
+            Processed batch of data. While the input data might not be aligned
+            across the sample dimensions, the output data should be aligned and
+            ``batch.index`` should be set to ``None``.
+        """
         outputs = self.get_embedding(self.neural)
         idc = batch.positive - self.offset.left >= len(outputs)
         batch.positive[idc] = batch.reference[idc]

From 1d0c498b447696a1bc38b65040bf31e8e5860c7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Thu, 24 Apr 2025 18:05:54 +0200
Subject: [PATCH 092/100] Implement review comments

---
 cebra/data/base.py                  |   5 +-
 cebra/data/datasets.py              |   2 +-
 cebra/data/multi_session.py         |   2 +-
 cebra/data/single_session.py        |  11 --
 cebra/integrations/sklearn/cebra.py |  30 +++--
 cebra/solver/base.py                |  25 ++--
 cebra/solver/multiobjective.py      |  21 +++-
 tests/test_integration_xcebra.py    |  27 +++-
 tests/test_solver.py                | 177 +++++++++++++++------------
 tests/test_solver_batched.py        | 183 +++++++++-------------------
 10 files changed, 240 insertions(+), 243 deletions(-)

diff --git a/cebra/data/base.py b/cebra/data/base.py
index e35e20c5..28d445d7 100644
--- a/cebra/data/base.py
+++ b/cebra/data/base.py
@@ -193,17 +193,16 @@ def load_batch(self, index: BatchIndex) -> Batch:
         """
         raise NotImplementedError()
 
-    @abc.abstractmethod
     def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`offset` attribute of the dataset.
+        ``offset`` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
         """
-        raise NotImplementedError
+        self.offset = model.get_offset()
 
 
 @dataclasses.dataclass
diff --git a/cebra/data/datasets.py b/cebra/data/datasets.py
index 24735f47..49dcccee 100644
--- a/cebra/data/datasets.py
+++ b/cebra/data/datasets.py
@@ -353,7 +353,7 @@ def configure_for(self, model: "Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`offset` attribute of the dataset.
+        ``offset`` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index e87db159..df8a293f 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -108,7 +108,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`cebra_data.Dataset.offset` attribute of the dataset.
+        ``offset`` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
index f7125490..e3946089 100644
--- a/cebra/data/single_session.py
+++ b/cebra/data/single_session.py
@@ -69,17 +69,6 @@ def load_batch(self, index: BatchIndex) -> Batch:
             reference=self[index.reference],
         )
 
-    def configure_for(self, model: "cebra.models.Model"):
-        """Configure the dataset offset for the provided model.
-
-        Call this function before indexing the dataset. This sets the
-        :py:attr:`cebra_data.Dataset.offset` attribute of the dataset.
-
-        Args:
-            model: The model to configure the dataset for.
-        """
-        self.offset = model.get_offset()
-
 
 @dataclasses.dataclass
 class DiscreteDataLoader(cebra_data.Loader):
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index 2114753b..dc4f4c66 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -22,6 +22,7 @@
 """Define the CEBRA model."""
 
 import itertools
+import warnings
 from typing import (Callable, Dict, Iterable, List, Literal, Optional, Tuple,
                     Union)
 
@@ -129,7 +130,7 @@ def _init_loader(
         (not is_cont, not is_disc, is_multi),
     ]
     if any(all(combination) for combination in incompatible_combinations):
-        raise ValueError(f"Invalid index combination.\n"
+        raise ValueError("Invalid index combination.\n"
                          f"Continuous: {is_cont},\n"
                          f"Discrete: {is_disc},\n"
                          f"Hybrid training: {is_hybrid},\n"
@@ -293,7 +294,7 @@ def _require_arg(key):
                         "single-session",
                     )
 
-    error_message = (f"Invalid index combination.\n"
+    error_message = ("Invalid index combination.\n"
                      f"Continuous: {is_cont},\n"
                      f"Discrete: {is_disc},\n"
                      f"Hybrid training: {is_hybrid},\n"
@@ -340,7 +341,7 @@ def _load_cebra_with_sklearn_backend(cebra_info: Dict) -> "CEBRA":
     if missing_keys:
         raise ValueError(
             f"Missing keys in data dictionary: {', '.join(missing_keys)}. "
-            f"You can try loading the CEBRA model with the torch backend.")
+            "You can try loading the CEBRA model with the torch backend.")
 
     args, state, state_dict = cebra_info['args'], cebra_info[
         'state'], cebra_info['state_dict']
@@ -656,12 +657,12 @@ def _get_dataset_multi(X: List[Iterable], y: List[Iterable]):
             # TODO(celia): to make it work for multiple set of index. For now, y should be a tuple of one list only
             if isinstance(y, tuple) and len(y) > 1:
                 raise NotImplementedError(
-                    f"Support for multiple set of index is not implemented in multissesion training, "
+                    "Support for multiple set of index is not implemented in multissesion training, "
                     f"got {len(y)} sets of indexes.")
 
             if not _are_sessions_equal(X, y):
                 raise ValueError(
-                    f"Invalid number of sessions: number of sessions in X and y need to match, "
+                    "Invalid number of sessions: number of sessions in X and y need to match, "
                     f"got X:{len(X)} and y:{[len(y_i) for y_i in y]}.")
 
             for session in range(len(X)):
@@ -685,8 +686,8 @@ def _get_dataset_multi(X: List[Iterable], y: List[Iterable]):
         else:
             if not _are_sessions_equal(X, y):
                 raise ValueError(
-                    f"Invalid number of samples or labels sessions: provide one session for single-session training, "
-                    f"and make sure the number of samples in X and y need match, "
+                    "Invalid number of samples or labels sessions: provide one session for single-session training, "
+                    "and make sure the number of samples in X and y need match, "
                     f"got {len(X)} and {[len(y_i) for y_i in y]}.")
             is_multisession = False
             dataset = _get_dataset(X, y)
@@ -848,7 +849,7 @@ def _check_labels_types(self, y: tuple, session_id: Optional[int] = None):
         # Check that same number of index
         if len(self.label_types_) != n_idx:
             raise ValueError(
-                f"Number of index invalid: labels must have the same number of index as for fitting,"
+                "Number of index invalid: labels must have the same number of index as for fitting,"
                 f"expects {len(self.label_types_)}, got {n_idx} idx.")
 
         for i in range(len(self.label_types_)):  # for each index
@@ -861,12 +862,12 @@ def _check_labels_types(self, y: tuple, session_id: Optional[int] = None):
                     > 1):  # is there more than one feature in the index
                 if label_types_idx[1][1] != y[i].shape[1]:
                     raise ValueError(
-                        f"Labels invalid: must have the same number of features as the ones used for fitting,"
+                        "Labels invalid: must have the same number of features as the ones used for fitting,"
                         f"expects {label_types_idx[1]}, got {y[i].shape}.")
 
             if label_types_idx[0] != y[i].dtype:
                 raise ValueError(
-                    f"Labels invalid: must have the same type of features as the ones used for fitting,"
+                    "Labels invalid: must have the same type of features as the ones used for fitting,"
                     f"expects {label_types_idx[0]}, got {y[i].dtype}.")
 
     def _prepare_fit(
@@ -1254,7 +1255,8 @@ def transform(self,
 
         return output.detach().cpu().numpy()
 
-    #NOTE: Deprecated: transform is now handled in the solver but kept for testing.
+    #NOTE: Deprecated: transform is now handled in the solver but the original
+    #      method is kept here for testing.
     def transform_deprecated(self,
                              X: Union[npt.NDArray, torch.Tensor],
                              session_id: Optional[int] = None) -> npt.NDArray:
@@ -1279,6 +1281,12 @@ def transform_deprecated(self,
             >>> embedding = cebra_model.transform(dataset)
 
         """
+        warnings.warn(
+            "The method `transform_deprecated` is deprecated "
+            "but kept for testing puroposes."
+            "We recommend using `transform` instead.",
+            DeprecationWarning,
+            stacklevel=2)
 
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
         model, offset = self._select_model(X, session_id)
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 8433e4e7..5bc46947 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -84,7 +84,7 @@ def _check_indices(batch_start_idx: int, batch_end_idx: int,
         raise ValueError(
             f"The batch has length {batch_size_length} which "
             f"is smaller or equal than the required offset length {len(offset)}."
-            f"Either choose a model with smaller offset or the batch should contain more samples."
+            f"Either choose a model with smaller offset or the batch should contain 3 times more samples."
         )
 
 
@@ -127,7 +127,7 @@ def _get_batch(inputs: torch.Tensor, offset: Optional[cebra.data.Offset],
         inputs: Input data.
         offset: Model offset.
         batch_start_idx: Index of the first sample in the batch.
-        batch_end_idx: Index of the first sample in the batch.
+        batch_end_idx: Index of the last sample in the batch.
         pad_before_transform: If True zero-pad the batched data.
 
     Returns:
@@ -237,8 +237,8 @@ def __getitem__(self, idx):
 
     if len(index_dataloader) < 2:
         raise ValueError(
-            f"Number of batches must be greater than 1, you can use transform without batching instead, got {len(index_dataloader)}."
-        )
+            f"Number of batches must be greater than 1, you can use transform "
+            f"without batching instead, got {len(index_dataloader)}.")
 
     output = []
     for batch_idx, index_batch in enumerate(index_dataloader):
@@ -253,7 +253,11 @@ def __getitem__(self, idx):
         if batch_idx == (len(index_dataloader) - 1):
             # last batch, incomplete
             index_batch = torch.cat((last_batch, index_batch), dim=0)
+            assert index_batch[-1] + 1 == len(inputs), (
+                f"Last batch index {index_batch[-1]} + 1 should be equal to the length of inputs {len(inputs)}."
+            )
 
+        # Batch start and end so that `batch_size` size with the last batch including 2 batches
         batch_start_idx, batch_end_idx = index_batch[0], index_batch[-1] + 1
         batched_data = _get_batch(inputs=inputs,
                                   offset=offset,
@@ -264,7 +268,7 @@ def __getitem__(self, idx):
         output_batch = _inference_transform(model, batched_data)
         output.append(output_batch)
 
-    output = torch.cat(output)
+    output = torch.cat(output, dim=0)
     return output
 
 
@@ -608,7 +612,7 @@ def transform(self,
         of the given model, after switching it into eval mode.
 
         Args:
-            inputs: The input signal
+            inputs: The input signal (T, N).
             pad_before_transform: If ``False``, no padding is applied to the input
                 sequence and the output sequence will be smaller than the input
                 sequence due to the receptive field of the model. If the
@@ -635,11 +639,14 @@ def transform(self,
 
         model, offset = self._select_model(inputs, session_id)
 
-        if len(offset) < 2 and pad_before_transform:
-            pad_before_transform = False
+        #if len(offset) < 2 and pad_before_transform:
+        #    pad_before_transform = False
 
         model.eval()
-        if batch_size is not None and inputs.shape[0] > int(batch_size * 2):
+        if batch_size is not None and inputs.shape[0] > int(
+                batch_size * 2) and not isinstance(
+                    self.model, cebra.models.ResampleModelMixin):
+            # NOTE: resampling models are not supported for batched inference.
             output = _batched_transform(
                 model=model,
                 inputs=inputs,
diff --git a/cebra/solver/multiobjective.py b/cebra/solver/multiobjective.py
index 16840d4e..12e63763 100644
--- a/cebra/solver/multiobjective.py
+++ b/cebra/solver/multiobjective.py
@@ -155,7 +155,7 @@ def finalize(self):
         if len(set(self.feature_ranges_tuple)) != len(
                 self.feature_ranges_tuple):
             raise RuntimeError(
-                f"Feature ranges are not unique. Please check again and remove the duplicates. "
+                "Feature ranges are not unique. Please check again and remove the duplicates. "
                 f"Feature ranges: {self.feature_ranges_tuple}")
 
         print("Creating MultiCriterion")
@@ -456,8 +456,27 @@ def validation(
         self.log.setdefault(("sum_loss_val",), []).append(sum_loss_valid)
         return stats_val
 
+    # NOTE: Deprecated: batched transform can now be performed (more memory efficient)
+    #       using the transform method of the model, and handling padding is implemented
+    #       directly in the base Solver. This method is kept for testing purposes.
     @torch.no_grad()
     def transform_deprecated(self, inputs: torch.Tensor) -> torch.Tensor:
+        """Transform the input data using the model.
+
+        Args:
+            inputs: The input data to transform.
+
+        Returns:
+            The transformed data.
+        """
+
+        warnings.warn(
+            "The method `transform_deprecated` is deprecated "
+            "but kept for testing puroposes."
+            "We recommend using `transform` instead.",
+            DeprecationWarning,
+            stacklevel=2)
+
         offset = self.model.get_offset()
         self.model.eval()
         X = inputs.cpu().numpy()
diff --git a/tests/test_integration_xcebra.py b/tests/test_integration_xcebra.py
index 004f0cf6..aa510700 100644
--- a/tests/test_integration_xcebra.py
+++ b/tests/test_integration_xcebra.py
@@ -158,13 +158,32 @@ def test_synthetic_data_training(synthetic_data, device):
     assert transform_embedding.shape[
         1] == n_latents, "Incorrect embedding dimension"
     assert not torch.isnan(transform_embedding).any(), "NaN values in embedding"
-    assert np.allclose(embedding, transform_embedding, rtol=1e-02)
+    assert np.allclose(embedding, transform_embedding, rtol=1e-4, atol=1e-4)
 
     # Test the transform with batching
     batched_embedding = solver.transform(data.neural.to(device), batch_size=512)
     assert batched_embedding.shape[
         1] == n_latents, "Incorrect embedding dimension"
     assert not torch.isnan(batched_embedding).any(), "NaN values in embedding"
-    assert np.allclose(embedding, batched_embedding, rtol=1e-02)
-
-    assert np.allclose(transform_embedding, batched_embedding, rtol=1e-02)
+    assert np.allclose(embedding, batched_embedding, rtol=1e-4, atol=1e-4)
+
+    assert np.allclose(transform_embedding,
+                       batched_embedding,
+                       rtol=1e-4,
+                       atol=1e-4)
+
+    # Test and compare the previous transform (transform_deprecated)
+    deprecated_transform_embedding = solver.transform_deprecated(
+        data.neural.to(device))
+    assert np.allclose(embedding,
+                       deprecated_transform_embedding,
+                       rtol=1e-4,
+                       atol=1e-4)
+    assert np.allclose(transform_embedding,
+                       deprecated_transform_embedding,
+                       rtol=1e-4,
+                       atol=1e-4)
+    assert np.allclose(batched_embedding,
+                       deprecated_transform_embedding,
+                       rtol=1e-4,
+                       atol=1e-4)
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 56e269cd..56c1e330 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -34,43 +34,12 @@
 
 device = "cpu"
 
-single_session_tests = []
-for args in [
-    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset10-model"),
-    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset1-model"),
-    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset1-model"),
-    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset10-model"),
-    ("demo-continuous", cebra.data.ContinuousDataLoader, "offset10-model"),
-    ("demo-continuous", cebra.data.ContinuousDataLoader, "offset1-model"),
-    ("demo-mixed", cebra.data.MixedDataLoader, "offset10-model"),
-    ("demo-mixed", cebra.data.MixedDataLoader, "offset1-model"),
-]:
-    single_session_tests.append((*args, cebra.solver.SingleSessionSolver))
-
-single_session_hybrid_tests = []
-for args in [("demo-continuous", cebra.data.HybridDataLoader, "offset10-model"),
-             ("demo-continuous", cebra.data.HybridDataLoader, "offset1-model")]:
-    single_session_hybrid_tests.append(
-        (*args, cebra.solver.SingleSessionHybridSolver))
-
-multi_session_tests = []
-for args in [
-    ("demo-continuous-multisession",
-     cebra.data.ContinuousMultiSessionDataLoader, "offset1-model"),
-    ("demo-continuous-multisession",
-     cebra.data.ContinuousMultiSessionDataLoader, "offset10-model"),
-    ("demo-discrete-multisession", cebra.data.DiscreteMultiSessionDataLoader,
-     "offset1-model"),
-    ("demo-discrete-multisession", cebra.data.DiscreteMultiSessionDataLoader,
-     "offset10-model"),
-]:
-    multi_session_tests.append((*args, cebra.solver.MultiSessionSolver))
-
-
-def _get_loader(data, loader_initfunc):
+
+def _get_loader(data_name, loader_initfunc):
+    data = cebra.datasets.init(data_name)
     kwargs = dict(num_steps=2, batch_size=32)
     loader = loader_initfunc(data, **kwargs)
-    return loader
+    return loader, data
 
 
 OUTPUT_DIMENSION = 3
@@ -86,12 +55,12 @@ def _make_model(dataset, model_architecture="offset10-model"):
                              OUTPUT_DIMENSION)
 
 
-# def _make_behavior_model(dataset):
-#     # TODO flexible input dimension
-#     return nn.Sequential(
-#         nn.Conv1d(dataset.input_dimension, 5, kernel_size=10),
-#         nn.Flatten(start_dim=1, end_dim=-1),
-#     )
+def _make_behavior_model(dataset):
+    # TODO flexible input dimension
+    return nn.Sequential(
+        nn.Conv1d(dataset.input_dimension, 5, kernel_size=10),
+        nn.Flatten(start_dim=1, end_dim=-1),
+    )
 
 
 def _assert_same_state_dict(first, second):
@@ -137,12 +106,16 @@ def _assert_equal(original_solver, loaded_solver):
 
 
 @pytest.mark.parametrize(
-    "data_name, loader_initfunc, model_architecture, solver_initfunc",
-    single_session_tests)
+    "data_name, model_architecture, loader_initfunc, solver_initfunc",
+    [(dataset, model, loader, cebra.solver.SingleSessionSolver)
+     for dataset, loader in [("demo-discrete", cebra.data.DiscreteDataLoader),
+                             ("demo-continuous", cebra.data.ContinuousDataLoader
+                             ), ("demo-mixed", cebra.data.MixedDataLoader)]
+     for model in
+     ["offset1-model", "offset10-model", "offset40-model-4x-subsample"]])
 def test_single_session(data_name, loader_initfunc, model_architecture,
                         solver_initfunc):
-    data = cebra.datasets.init(data_name)
-    loader = _get_loader(data, loader_initfunc)
+    loader, data = _get_loader(data_name, loader_initfunc)
     model = _make_model(data, model_architecture)
     data.configure_for(model)
     offset = model.get_offset()
@@ -170,16 +143,34 @@ def test_single_session(data_name, loader_initfunc, model_architecture,
 
     embedding = solver.transform(X)
     assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    if isinstance(solver.model, cebra.models.ResampleModelMixin):
+        assert embedding.shape == (X.shape[0] // solver.model.resample_factor,
+                                   OUTPUT_DIMENSION)
+    else:
+        assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
     embedding = solver.transform(torch.Tensor(X))
     assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    if isinstance(solver.model, cebra.models.ResampleModelMixin):
+        assert embedding.shape == (X.shape[0] // solver.model.resample_factor,
+                                   OUTPUT_DIMENSION)
+    else:
+        assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
     embedding = solver.transform(X, session_id=0)
     assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    if isinstance(solver.model, cebra.models.ResampleModelMixin):
+        assert embedding.shape == (X.shape[0] // solver.model.resample_factor,
+                                   OUTPUT_DIMENSION)
+    else:
+        assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
     embedding = solver.transform(X, pad_before_transform=False)
     assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0] - len(offset) + 1, OUTPUT_DIMENSION)
+    if isinstance(solver.model, cebra.models.ResampleModelMixin):
+        assert embedding.shape == (
+            (X.shape[0] - len(offset)) // solver.model.resample_factor + 1,
+            OUTPUT_DIMENSION)
+    else:
+        assert embedding.shape == (X.shape[0] - len(offset) + 1,
+                                   OUTPUT_DIMENSION)
 
     with pytest.raises(ValueError, match="torch.Tensor"):
         solver.transform(X.numpy())
@@ -197,16 +188,34 @@ def test_single_session(data_name, loader_initfunc, model_architecture,
 
     embedding = solver.transform(X)
     assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    if isinstance(solver.model, cebra.models.ResampleModelMixin):
+        assert embedding.shape == (X.shape[0] // solver.model.resample_factor,
+                                   OUTPUT_DIMENSION)
+    else:
+        assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
     embedding = solver.transform(torch.Tensor(X))
     assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    if isinstance(solver.model, cebra.models.ResampleModelMixin):
+        assert embedding.shape == (X.shape[0] // solver.model.resample_factor,
+                                   OUTPUT_DIMENSION)
+    else:
+        assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
     embedding = solver.transform(X, session_id=0)
     assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
+    if isinstance(solver.model, cebra.models.ResampleModelMixin):
+        assert embedding.shape == (X.shape[0] // solver.model.resample_factor,
+                                   OUTPUT_DIMENSION)
+    else:
+        assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
     embedding = solver.transform(X, pad_before_transform=False)
     assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0] - len(offset) + 1, OUTPUT_DIMENSION)
+    if isinstance(solver.model, cebra.models.ResampleModelMixin):
+        assert embedding.shape == (
+            (X.shape[0] - len(offset)) // solver.model.resample_factor + 1,
+            OUTPUT_DIMENSION)
+    else:
+        assert embedding.shape == (X.shape[0] - len(offset) + 1,
+                                   OUTPUT_DIMENSION)
 
     with pytest.raises(ValueError, match="torch.Tensor"):
         solver.transform(X.numpy())
@@ -224,41 +233,47 @@ def test_single_session(data_name, loader_initfunc, model_architecture,
 
 
 @pytest.mark.parametrize(
-    "data_name, loader_initfunc, model_architecture, solver_initfunc",
-    single_session_tests)
+    "data_name, model_architecture, loader_initfunc, solver_initfunc",
+    [(dataset, model, loader, cebra.solver.SingleSessionSolver)
+     for dataset, loader in [("demo-discrete", cebra.data.DiscreteDataLoader),
+                             ("demo-continuous", cebra.data.ContinuousDataLoader
+                             ), ("demo-mixed", cebra.data.MixedDataLoader)]
+     for model in
+     ["offset1-model", "offset10-model", "offset40-model-4x-subsample"]])
 def test_single_session_auxvar(data_name, loader_initfunc, model_architecture,
                                solver_initfunc):
 
     pytest.skip("Not yet supported")
 
-    # loader = _get_loader(data_name, loader_initfunc)
-    # model = _make_model(loader.dataset)
-    # behavior_model = _make_behavior_model(loader.dataset)  # noqa: F841
+    loader = _get_loader(data_name, loader_initfunc)
+    model = _make_model(loader.dataset)
+    behavior_model = _make_behavior_model(loader.dataset)  # noqa: F841
 
-    # criterion = cebra.models.InfoNCE()
-    # optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+    criterion = cebra.models.InfoNCE()
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
 
-    # solver = solver_initfunc(
-    #     model=model,
-    #     criterion=criterion,
-    #     optimizer=optimizer,
-    # )
+    solver = solver_initfunc(
+        model=model,
+        criterion=criterion,
+        optimizer=optimizer,
+    )
 
-    # batch = next(iter(loader))
-    # assert batch.reference.shape == (32, loader.dataset.input_dimension, 10)
-    # log = solver.step(batch)
-    # assert isinstance(log, dict)
+    batch = next(iter(loader))
+    assert batch.reference.shape == (32, loader.dataset.input_dimension, 10)
+    log = solver.step(batch)
+    assert isinstance(log, dict)
 
-    # solver.fit(loader)
+    solver.fit(loader)
 
 
 @pytest.mark.parametrize(
-    "data_name, loader_initfunc, model_architecture, solver_initfunc",
-    single_session_hybrid_tests)
+    "data_name, model_architecture, loader_initfunc, solver_initfunc",
+    [("demo-continuous", model, cebra.data.HybridDataLoader,
+      cebra.solver.SingleSessionHybridSolver)
+     for model in ["offset1-model", "offset10-model"]])
 def test_single_session_hybrid(data_name, loader_initfunc, model_architecture,
                                solver_initfunc):
-    data = cebra.datasets.init(data_name)
-    loader = _get_loader(data, loader_initfunc)
+    loader, data = _get_loader(data_name, loader_initfunc)
     model = _make_model(data, model_architecture)
     data.configure_for(model)
     offset = model.get_offset()
@@ -312,12 +327,18 @@ def test_single_session_hybrid(data_name, loader_initfunc, model_architecture,
 
 
 @pytest.mark.parametrize(
-    "data_name, loader_initfunc, model_architecture, solver_initfunc",
-    multi_session_tests)
+    "data_name, model_architecture, loader_initfunc, solver_initfunc",
+    [(dataset, model, loader, cebra.solver.MultiSessionSolver)
+     for dataset, loader in [
+         ("demo-discrete-multisession",
+          cebra.data.DiscreteMultiSessionDataLoader),
+         ("demo-continuous-multisession",
+          cebra.data.ContinuousMultiSessionDataLoader),
+     ]
+     for model in ["offset1-model", "offset10-model"]])
 def test_multi_session(data_name, loader_initfunc, model_architecture,
                        solver_initfunc):
-    data = cebra.datasets.init(data_name)
-    loader = _get_loader(data, loader_initfunc)
+    loader, data = _get_loader(data_name, loader_initfunc)
     model = nn.ModuleList([
         _make_model(dataset, model_architecture)
         for dataset in data.iter_sessions()
diff --git a/tests/test_solver_batched.py b/tests/test_solver_batched.py
index 32b11d8c..8592aea2 100644
--- a/tests/test_solver_batched.py
+++ b/tests/test_solver_batched.py
@@ -31,45 +31,9 @@
 
 device = "cpu"
 
-NUM_STEPS = 2
-
-single_session_tests = []
-for args in [
-    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset10-model"),
-    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset1-model"),
-    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset1-model"),
-    ("demo-discrete", cebra.data.DiscreteDataLoader, "offset10-model"),
-    ("demo-continuous", cebra.data.ContinuousDataLoader, "offset10-model"),
-    ("demo-continuous", cebra.data.ContinuousDataLoader, "offset1-model"),
-    ("demo-mixed", cebra.data.MixedDataLoader, "offset10-model"),
-    ("demo-mixed", cebra.data.MixedDataLoader, "offset1-model"),
-]:
-    single_session_tests.append((*args, cebra.solver.SingleSessionSolver))
-
-single_session_hybrid_tests = []
-for args in [("demo-continuous", cebra.data.HybridDataLoader, "offset10-model"),
-             ("demo-continuous", cebra.data.HybridDataLoader, "offset1-model")]:
-    single_session_hybrid_tests.append(
-        (*args, cebra.solver.SingleSessionHybridSolver))
-
-multi_session_tests = []
-for args in [
-    ("demo-continuous-multisession",
-     cebra.data.ContinuousMultiSessionDataLoader, "offset1-model"),
-    ("demo-continuous-multisession",
-     cebra.data.ContinuousMultiSessionDataLoader, "offset10-model"),
-    ("demo-discrete-multisession", cebra.data.DiscreteMultiSessionDataLoader,
-     "offset1-model"),
-    ("demo-discrete-multisession", cebra.data.DiscreteMultiSessionDataLoader,
-     "offset10-model"),
-]:
-    multi_session_tests.append((*args, cebra.solver.MultiSessionSolver))
-
-
-def _get_loader(data, loader_initfunc):
-    kwargs = dict(num_steps=NUM_STEPS, batch_size=32)
-    loader = loader_initfunc(data, **kwargs)
-    return loader
+NUM_STEPS = 10
+BATCHES = [25_000, 50_000, 75_000]
+MODELS = ["offset1-model", "offset10-model", "offset40-model-4x-subsample"]
 
 
 @pytest.mark.parametrize(
@@ -155,9 +119,6 @@ def _get_loader(data, loader_initfunc):
             3,
             torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
         ),
-
-        # Examples that throw an error:
-
         # Padding without offset (should raise an error)
         (torch.tensor([[1, 2]]), True, None, 0, 2, ValueError),
         # Negative start_batch_idx or end_batch_idx (should raise an error)
@@ -190,35 +151,21 @@ def create_model(model_name, input_dimension):
                              num_output=3)
 
 
-single_session_tests_select_model = []
-single_session_hybrid_tests_select_model = []
-for model_name in ["offset1-model", "offset10-model"]:
-    for session_id in [None, 0, 5]:
-        for args in [
-            ("demo-discrete", model_name, session_id,
-             cebra.data.DiscreteDataLoader),
-            ("demo-continuous", model_name, session_id,
-             cebra.data.ContinuousDataLoader),
-            ("demo-mixed", model_name, session_id, cebra.data.MixedDataLoader),
-        ]:
-            single_session_tests_select_model.append(
-                (*args, cebra.solver.SingleSessionSolver))
-            single_session_hybrid_tests_select_model.append(
-                (*args, cebra.solver.SingleSessionHybridSolver))
-
-multi_session_tests_select_model = []
-for model_name in ["offset10-model"]:
-    for session_id in [None, 0, 1, 5, 2, 6, 4]:
-        for args in [("demo-continuous-multisession", model_name, session_id,
-                      cebra.data.ContinuousMultiSessionDataLoader)]:
-            multi_session_tests_select_model.append(
-                (*args, cebra.solver.MultiSessionSolver))
-
-
 @pytest.mark.parametrize(
-    "data_name, model_name ,session_id, loader_initfunc, solver_initfunc",
-    single_session_tests_select_model + single_session_hybrid_tests_select_model
-)
+    "data_name, model_name, session_id, loader_initfunc, solver_initfunc",
+    [(dataset, model, session_id, loader, cebra.solver.SingleSessionSolver)
+     for dataset, loader in [("demo-discrete", cebra.data.DiscreteDataLoader),
+                             ("demo-continuous", cebra.data.ContinuousDataLoader
+                             ), ("demo-mixed", cebra.data.MixedDataLoader)]
+     for model in ["offset1-model", "offset10-model"]
+     for session_id in [None, 0, 5]] +
+    [(dataset, model, session_id, loader,
+      cebra.solver.SingleSessionHybridSolver)
+     for dataset, loader in [
+         ("demo-continuous", cebra.data.HybridDataLoader),
+     ]
+     for model in ["offset1-model", "offset10-model"]
+     for session_id in [None, 0, 5]])
 def test_select_model_single_session(data_name, model_name, session_id,
                                      loader_initfunc, solver_initfunc):
     dataset = cebra.datasets.init(data_name)
@@ -244,16 +191,25 @@ def test_select_model_single_session(data_name, model_name, session_id,
 
 @pytest.mark.parametrize(
     "data_name, model_name, session_id, loader_initfunc, solver_initfunc",
-    multi_session_tests_select_model)
+    [(dataset, model, session_id, loader, cebra.solver.MultiSessionSolver)
+     for dataset, loader in [
+         ("demo-continuous-multisession",
+          cebra.data.ContinuousMultiSessionDataLoader),
+     ]
+     for model in ["offset1-model", "offset10-model"]
+     for session_id in [None, 0, 1, 5, 2, 6, 4]])
 def test_select_model_multi_session(data_name, model_name, session_id,
                                     loader_initfunc, solver_initfunc):
+
     dataset = cebra.datasets.init(data_name)
+    kwargs = dict(num_steps=NUM_STEPS, batch_size=32)
+    loader = loader_initfunc(dataset, **kwargs)
+
     model = nn.ModuleList([
         create_model(model_name, dataset.input_dimension)
         for dataset in dataset.iter_sessions()
     ])
     dataset.configure_for(model)
-    loader = _get_loader(dataset, loader_initfunc=loader_initfunc)
 
     offset = model[0].get_offset()
     solver = solver_initfunc(model=model,
@@ -282,43 +238,25 @@ def test_select_model_multi_session(data_name, model_name, session_id,
             assert model == model_
 
 
-models = [
-    "offset1-model",
-    "offset10-model",
-    "offset40-model-4x-subsample",
-    "offset1-model",
-    "offset10-model",
-]  #NOTE(rodrigo): there is an issue with "offset4-model-2x-subsample" because it's not a convolutional model.
-batch_size_inference = [40_000, 99_990, 99_999]
-
-single_session_tests_transform = []
-for padding in [True, False]:
-    for model_name in models:
-        for batch_size in batch_size_inference:
-            for args in [
-                ("demo-discrete", model_name, padding, batch_size,
-                 cebra.data.DiscreteDataLoader),
-                ("demo-continuous", model_name, padding, batch_size,
-                 cebra.data.ContinuousDataLoader),
-                ("demo-mixed", model_name, padding, batch_size,
-                 cebra.data.MixedDataLoader),
-            ]:
-                single_session_tests_transform.append(
-                    (*args, cebra.solver.SingleSessionSolver))
-
-single_session_hybrid_tests_transform = []
-for padding in [True, False]:
-    for model_name in models:
-        for batch_size in batch_size_inference:
-            for args in [("demo-continuous", model_name, padding, batch_size,
-                          cebra.data.HybridDataLoader)]:
-                single_session_hybrid_tests_transform.append(
-                    (*args, cebra.solver.SingleSessionHybridSolver))
-
-
 @pytest.mark.parametrize(
     "data_name, model_name, padding, batch_size_inference, loader_initfunc, solver_initfunc",
-    single_session_tests_transform + single_session_hybrid_tests_transform)
+    [(dataset, model, padding, batch_size, loader,
+      cebra.solver.SingleSessionSolver)
+     for dataset, loader in [("demo-discrete", cebra.data.DiscreteDataLoader),
+                             ("demo-continuous", cebra.data.ContinuousDataLoader
+                             ), ("demo-mixed", cebra.data.MixedDataLoader)]
+     for model in
+     ["offset1-model", "offset10-model", "offset40-model-4x-subsample"]
+     for padding in [True, False]
+     for batch_size in BATCHES] +
+    [(dataset, model, padding, batch_size, loader,
+      cebra.solver.SingleSessionHybridSolver)
+     for dataset, loader in [
+         ("demo-continuous", cebra.data.HybridDataLoader),
+     ]
+     for model in MODELS
+     for padding in [True, False]
+     for batch_size in BATCHES])
 def test_batched_transform_single_session(
     data_name,
     model_name,
@@ -342,31 +280,28 @@ def test_batched_transform_single_session(
     solver.fit(loader)
 
     embedding_batched = solver.transform(inputs=loader.dataset.neural,
-                                         batch_size=batch_size,
+                                         batch_size=batch_size_inference,
                                          pad_before_transform=padding)
 
     embedding = solver.transform(inputs=loader.dataset.neural,
                                  pad_before_transform=padding)
 
     assert embedding_batched.shape == embedding.shape
-    assert np.allclose(embedding_batched, embedding, rtol=1e-02)
-
-
-multi_session_tests_transform = []
-for padding in [True, False]:
-    for model_name in models:
-        for batch_size in batch_size_inference:
-            for args in [
-                ("demo-continuous-multisession", model_name, padding,
-                 batch_size, cebra.data.ContinuousMultiSessionDataLoader)
-            ]:
-                multi_session_tests_transform.append(
-                    (*args, cebra.solver.MultiSessionSolver))
+    assert np.allclose(embedding_batched, embedding, rtol=1e-4, atol=1e-4)
 
 
 @pytest.mark.parametrize(
     "data_name, model_name,padding,batch_size_inference,loader_initfunc, solver_initfunc",
-    multi_session_tests_transform)
+    [(dataset, model, padding, batch_size, loader,
+      cebra.solver.MultiSessionSolver)
+     for dataset, loader in [
+         ("demo-continuous-multisession",
+          cebra.data.ContinuousMultiSessionDataLoader),
+     ]
+     for model in
+     ["offset1-model", "offset10-model", "offset40-model-4x-subsample"]
+     for padding in [True, False]
+     for batch_size in BATCHES])
 def test_batched_transform_multi_session(data_name, model_name, padding,
                                          batch_size_inference, loader_initfunc,
                                          solver_initfunc):
@@ -402,7 +337,7 @@ def test_batched_transform_multi_session(data_name, model_name, padding,
         embedding_batched = solver.transform(inputs=inputs.neural,
                                              session_id=i,
                                              pad_before_transform=padding,
-                                             batch_size=batch_size)
+                                             batch_size=batch_size_inference)
 
         assert embedding_batched.shape == embedding.shape
-        assert np.allclose(embedding_batched, embedding, rtol=1e-02)
+        assert np.allclose(embedding_batched, embedding, rtol=1e-4, atol=1e-4)

From 4a25899b639ce29c8c17b6869b889ece15df215b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Fri, 25 Apr 2025 11:09:53 +0200
Subject: [PATCH 093/100] Fix sklearn test

---
 cebra/solver/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index 5bc46947..cc2cf7da 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -639,8 +639,8 @@ def transform(self,
 
         model, offset = self._select_model(inputs, session_id)
 
-        #if len(offset) < 2 and pad_before_transform:
-        #    pad_before_transform = False
+        if len(offset) < 2 and pad_before_transform:
+            pad_before_transform = False
 
         model.eval()
         if batch_size is not None and inputs.shape[0] > int(

From 0d56e442170f1fd2ba095a7ce393884fbedac40e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Fri, 25 Apr 2025 19:51:39 +0200
Subject: [PATCH 094/100] Add name in NOTE

Co-authored-by: Steffen Schneider <steffen@bethgelab.org>
---
 cebra/integrations/sklearn/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cebra/integrations/sklearn/utils.py b/cebra/integrations/sklearn/utils.py
index 80013d00..be6f54ce 100644
--- a/cebra/integrations/sklearn/utils.py
+++ b/cebra/integrations/sklearn/utils.py
@@ -92,7 +92,7 @@ def check_input_array(X: npt.NDArray, *, min_samples: int) -> npt.NDArray:
         X,
         accept_sparse=False,
         accept_large_sparse=False,
-        # NOTE: remove float16 because F.pad does not allow float16.
+        # NOTE(celia): remove float16 because F.pad does not allow float16.
         dtype=("float32", "float64"),
         order=None,
         copy=False,

From c5dc011e0504388f4f61dfa2e10d057fffe5101c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Fri, 25 Apr 2025 20:47:00 +0200
Subject: [PATCH 095/100] Implement reviews on tests and typing

---
 cebra/data/base.py                  |   2 +-
 cebra/data/datasets.py              |   2 +-
 cebra/data/multi_session.py         |  10 ++-
 cebra/integrations/sklearn/cebra.py |  64 +-------------
 cebra/solver/base.py                |   2 +-
 cebra/solver/multiobjective.py      |  50 -----------
 cebra/solver/single_session.py      |   5 +-
 tests/_utils_deprecated.py          | 127 ++++++++++++++++++++++++++++
 tests/test_integration_xcebra.py    |   5 +-
 tests/test_sklearn.py               |  64 +++++++++-----
 tests/test_solver.py                |   7 +-
 11 files changed, 191 insertions(+), 147 deletions(-)
 create mode 100644 tests/_utils_deprecated.py

diff --git a/cebra/data/base.py b/cebra/data/base.py
index 28d445d7..e71b05c4 100644
--- a/cebra/data/base.py
+++ b/cebra/data/base.py
@@ -197,7 +197,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        ``offset`` attribute of the dataset.
+        :py:attr:`offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/data/datasets.py b/cebra/data/datasets.py
index 49dcccee..24735f47 100644
--- a/cebra/data/datasets.py
+++ b/cebra/data/datasets.py
@@ -353,7 +353,7 @@ def configure_for(self, model: "Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        ``offset`` attribute of the dataset.
+        :py:attr:`offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index df8a293f..ef19e53e 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -31,6 +31,7 @@
 import cebra.distributions
 from cebra.data.datatypes import Batch
 from cebra.data.datatypes import BatchIndex
+from cebra.models import Model
 
 __all__ = [
     "MultiSessionDataset",
@@ -104,17 +105,18 @@ def load_batch(self, index: BatchIndex) -> List[Batch]:
             ) for session_id, session in enumerate(self.iter_sessions())
         ]
 
-    def configure_for(self, model: "cebra.models.Model"):
+    def configure_for(self, model: "Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        ``offset`` attribute of the dataset.
+        :py:attr:`cebra.data.Dataset.offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
         """
-        for i, session in enumerate(self.iter_sessions()):
-            session.configure_for(model[i])
+        self.offset = model.get_offset()
+        for session in self.iter_sessions():
+            session.configure_for(model)
 
 
 @dataclasses.dataclass
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index dc4f4c66..d08dae71 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -22,7 +22,6 @@
 """Define the CEBRA model."""
 
 import itertools
-import warnings
 from typing import (Callable, Dict, Iterable, List, Literal, Optional, Tuple,
                     Union)
 
@@ -687,7 +686,7 @@ def _get_dataset_multi(X: List[Iterable], y: List[Iterable]):
             if not _are_sessions_equal(X, y):
                 raise ValueError(
                     "Invalid number of samples or labels sessions: provide one session for single-session training, "
-                    "and make sure the number of samples in X and y need match, "
+                    "and make sure the number of samples in X and y match, "
                     f"got {len(X)} and {[len(y_i) for y_i in y]}.")
             is_multisession = False
             dataset = _get_dataset(X, y)
@@ -1255,67 +1254,6 @@ def transform(self,
 
         return output.detach().cpu().numpy()
 
-    #NOTE: Deprecated: transform is now handled in the solver but the original
-    #      method is kept here for testing.
-    def transform_deprecated(self,
-                             X: Union[npt.NDArray, torch.Tensor],
-                             session_id: Optional[int] = None) -> npt.NDArray:
-        """Transform an input sequence and return the embedding.
-
-        Args:
-            X: A numpy array or torch tensor of size ``time x dimension``.
-            session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`num_sessions` for
-                multisession, set to ``None`` for single session.
-
-        Returns:
-            A :py:func:`numpy.array` of size ``time x output_dimension``.
-
-        Example:
-
-            >>> import cebra
-            >>> import numpy as np
-            >>> dataset =  np.random.uniform(0, 1, (1000, 30))
-            >>> cebra_model = cebra.CEBRA(max_iterations=10)
-            >>> cebra_model.fit(dataset)
-            CEBRA(max_iterations=10)
-            >>> embedding = cebra_model.transform(dataset)
-
-        """
-        warnings.warn(
-            "The method `transform_deprecated` is deprecated "
-            "but kept for testing puroposes."
-            "We recommend using `transform` instead.",
-            DeprecationWarning,
-            stacklevel=2)
-
-        sklearn_utils_validation.check_is_fitted(self, "n_features_")
-        model, offset = self._select_model(X, session_id)
-
-        # Input validation
-        X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
-        input_dtype = X.dtype
-
-        with torch.no_grad():
-            model.eval()
-
-            if self.pad_before_transform:
-                X = np.pad(X, ((offset.left, offset.right - 1), (0, 0)),
-                           mode="edge")
-            X = torch.from_numpy(X).float().to(self.device_)
-
-            if isinstance(model, cebra.models.ConvolutionalModelMixin):
-                # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
-                X = X.transpose(1, 0).unsqueeze(0)
-                output = model(X).cpu().numpy().squeeze(0).transpose(1, 0)
-            else:
-                # Standard evaluation, (T, C, dt)
-                output = model(X).cpu().numpy()
-
-        if input_dtype == "float64":
-            return output.astype(input_dtype)
-
-        return output
-
     def fit_transform(
         self,
         X: Union[npt.NDArray, torch.Tensor],
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index cc2cf7da..e745265e 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -52,7 +52,7 @@
 
 def _check_indices(batch_start_idx: int, batch_end_idx: int,
                    offset: cebra.data.Offset, num_samples: int):
-    """Check that indexes in a batch are in a correct range.
+    """Check that indices in a batch are in a correct range.
 
     First and last index must be positive integers, smaller than
     the total length of inputs in the dataset, the first index
diff --git a/cebra/solver/multiobjective.py b/cebra/solver/multiobjective.py
index 12e63763..98587bd7 100644
--- a/cebra/solver/multiobjective.py
+++ b/cebra/solver/multiobjective.py
@@ -456,56 +456,6 @@ def validation(
         self.log.setdefault(("sum_loss_val",), []).append(sum_loss_valid)
         return stats_val
 
-    # NOTE: Deprecated: batched transform can now be performed (more memory efficient)
-    #       using the transform method of the model, and handling padding is implemented
-    #       directly in the base Solver. This method is kept for testing purposes.
-    @torch.no_grad()
-    def transform_deprecated(self, inputs: torch.Tensor) -> torch.Tensor:
-        """Transform the input data using the model.
-
-        Args:
-            inputs: The input data to transform.
-
-        Returns:
-            The transformed data.
-        """
-
-        warnings.warn(
-            "The method `transform_deprecated` is deprecated "
-            "but kept for testing puroposes."
-            "We recommend using `transform` instead.",
-            DeprecationWarning,
-            stacklevel=2)
-
-        offset = self.model.get_offset()
-        self.model.eval()
-        X = inputs.cpu().numpy()
-        X = np.pad(X, ((offset.left, offset.right - 1), (0, 0)), mode="edge")
-        X = torch.from_numpy(X).float().to(self.device)
-
-        if isinstance(self.model.module, cebra.models.ConvolutionalModelMixin):
-            # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
-            X = X.transpose(1, 0).unsqueeze(0)
-            outputs = self.model(X)
-
-            # switch back from (1, C, T) -> (T, C)
-            if isinstance(outputs, torch.Tensor):
-                assert outputs.dim() == 3 and outputs.shape[0] == 1
-                outputs = outputs.squeeze(0).transpose(1, 0)
-            elif isinstance(outputs, tuple):
-                assert all(tensor.dim() == 3 and tensor.shape[0] == 1
-                           for tensor in outputs)
-                outputs = (
-                    output.squeeze(0).transpose(1, 0) for output in outputs)
-                outputs = tuple(outputs)
-            else:
-                raise ValueError("Invalid condition in solver.transform")
-        else:
-            # Standard evaluation, (T, C, dt)
-            outputs = self.model(X)
-
-        return outputs
-
 
 @register("supervised-solver-xcebra")
 @dataclasses.dataclass
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
index 6a8dc9b4..657ea2c6 100644
--- a/cebra/solver/single_session.py
+++ b/cebra/solver/single_session.py
@@ -285,10 +285,7 @@ def _select_model(
         self._check_is_inputs_valid(inputs, session_id=session_id)
 
         model = self.model.module
-        if hasattr(model, 'get_offset'):
-            offset = model.get_offset()
-        else:
-            offset = None
+        offset = model.get_offset()
         return model, offset
 
 
diff --git a/tests/_utils_deprecated.py b/tests/_utils_deprecated.py
new file mode 100644
index 00000000..03a246f7
--- /dev/null
+++ b/tests/_utils_deprecated.py
@@ -0,0 +1,127 @@
+import warnings
+from typing import Optional, Union
+
+import numpy as np
+import numpy.typing as npt
+import sklearn.utils.validation as sklearn_utils_validation
+import torch
+
+import cebra
+import cebra.integrations.sklearn.utils as sklearn_utils
+import cebra.models
+import cebra.solvers
+
+
+#NOTE: Deprecated: transform is now handled in the solver but the original
+#      method is kept here for testing.
+def cebra_transform_deprecated(cebra_model,
+                               X: Union[npt.NDArray, torch.Tensor],
+                               session_id: Optional[int] = None) -> npt.NDArray:
+    """Transform an input sequence and return the embedding.
+
+    Args:
+        cebra_model: The CEBRA model to use for the transform.
+        X: A numpy array or torch tensor of size ``time x dimension``.
+        session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`num_sessions` for
+            multisession, set to ``None`` for single session.
+
+    Returns:
+        A :py:func:`numpy.array` of size ``time x output_dimension``.
+
+    Example:
+
+        >>> import cebra
+        >>> import numpy as np
+        >>> dataset =  np.random.uniform(0, 1, (1000, 30))
+        >>> cebra_model = cebra.CEBRA(max_iterations=10)
+        >>> cebra_model.fit(dataset)
+        CEBRA(max_iterations=10)
+        >>> embedding = cebra_model.transform(dataset)
+
+    """
+    warnings.warn(
+        "The method is deprecated "
+        "but kept for testing puroposes."
+        "We recommend using `transform` instead.",
+        DeprecationWarning,
+        stacklevel=2)
+
+    sklearn_utils_validation.check_is_fitted(cebra_model, "n_features_")
+    model, offset = cebra_model._select_model(X, session_id)
+
+    # Input validation
+    X = sklearn_utils.check_input_array(X, min_samples=len(cebra_model.offset_))
+    input_dtype = X.dtype
+
+    with torch.no_grad():
+        model.eval()
+
+        if cebra_model.pad_before_transform:
+            X = np.pad(X, ((offset.left, offset.right - 1), (0, 0)),
+                       mode="edge")
+        X = torch.from_numpy(X).float().to(cebra_model.device_)
+
+        if isinstance(model, cebra.models.ConvolutionalModelMixin):
+            # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+            X = X.transpose(1, 0).unsqueeze(0)
+            output = model(X).cpu().numpy().squeeze(0).transpose(1, 0)
+        else:
+            # Standard evaluation, (T, C, dt)
+            output = model(X).cpu().numpy()
+
+    if input_dtype == "float64":
+        return output.astype(input_dtype)
+
+    return output
+
+
+# NOTE: Deprecated: batched transform can now be performed (more memory efficient)
+#       using the transform method of the model, and handling padding is implemented
+#       directly in the base Solver. This method is kept for testing purposes.
+@torch.no_grad()
+def multiobjective_transform_deprecated(solver: cebra.solvers.Solver,
+                                        inputs: torch.Tensor) -> torch.Tensor:
+    """Transform the input data using the model.
+
+    Args:
+        solver: The solver containing the model and device.
+        inputs: The input data to transform.
+
+    Returns:
+        The transformed data.
+    """
+
+    warnings.warn(
+        "The method is deprecated "
+        "but kept for testing puroposes."
+        "We recommend using `transform` instead.",
+        DeprecationWarning,
+        stacklevel=2)
+
+    offset = solver.model.get_offset()
+    solver.model.eval()
+    X = inputs.cpu().numpy()
+    X = np.pad(X, ((offset.left, offset.right - 1), (0, 0)), mode="edge")
+    X = torch.from_numpy(X).float().to(solver.device)
+
+    if isinstance(solver.model.module, cebra.models.ConvolutionalModelMixin):
+        # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+        X = X.transpose(1, 0).unsqueeze(0)
+        outputs = solver.model(X)
+
+        # switch back from (1, C, T) -> (T, C)
+        if isinstance(outputs, torch.Tensor):
+            assert outputs.dim() == 3 and outputs.shape[0] == 1
+            outputs = outputs.squeeze(0).transpose(1, 0)
+        elif isinstance(outputs, tuple):
+            assert all(tensor.dim() == 3 and tensor.shape[0] == 1
+                       for tensor in outputs)
+            outputs = (output.squeeze(0).transpose(1, 0) for output in outputs)
+            outputs = tuple(outputs)
+        else:
+            raise ValueError("Invalid condition in solver.transform")
+    else:
+        # Standard evaluation, (T, C, dt)
+        outputs = solver.model(X)
+
+    return outputs
diff --git a/tests/test_integration_xcebra.py b/tests/test_integration_xcebra.py
index aa510700..760e26ef 100644
--- a/tests/test_integration_xcebra.py
+++ b/tests/test_integration_xcebra.py
@@ -1,5 +1,6 @@
 import pickle
 
+import _utils_deprecated
 import numpy as np
 import pytest
 import torch
@@ -173,8 +174,8 @@ def test_synthetic_data_training(synthetic_data, device):
                        atol=1e-4)
 
     # Test and compare the previous transform (transform_deprecated)
-    deprecated_transform_embedding = solver.transform_deprecated(
-        data.neural.to(device))
+    deprecated_transform_embedding = _utils_deprecated.multiobjective_transform_deprecated(
+        solver, data.neural.to(device))
     assert np.allclose(embedding,
                        deprecated_transform_embedding,
                        rtol=1e-4,
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index 12d26225..8c7cd0a1 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -24,6 +24,7 @@
 import warnings
 
 import _util
+import _utils_deprecated
 import numpy as np
 import pkg_resources
 import pytest
@@ -1384,7 +1385,7 @@ def test_new_transform(model_architecture, device):
     # time contrastive
     cebra_model.fit(X)
     embedding1 = cebra_model.transform(X)
-    embedding2 = cebra_model.transform_deprecated(X)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model, X)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
@@ -1393,17 +1394,20 @@ def test_new_transform(model_architecture, device):
     assert cebra_model.num_sessions is None
 
     embedding1 = cebra_model.transform(X)
-    embedding2 = cebra_model.transform_deprecated(X)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model, X)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
     embedding1 = cebra_model.transform(torch.Tensor(X))
-    embedding2 = cebra_model.transform_deprecated(torch.Tensor(X))
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(
+        cebra_model, torch.Tensor(X))
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
     embedding1 = cebra_model.transform(torch.Tensor(X), session_id=0)
-    embedding2 = cebra_model.transform_deprecated(torch.Tensor(X), session_id=0)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              torch.Tensor(X),
+                                                              session_id=0)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
@@ -1413,14 +1417,14 @@ def test_new_transform(model_architecture, device):
     # discrete behavior contrastive
     cebra_model.fit(X, y_d)
     embedding1 = cebra_model.transform(X)
-    embedding2 = cebra_model.transform_deprecated(X)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model, X)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
     # mixed
     cebra_model.fit(X, y_c1, y_c2, y_d)
     embedding1 = cebra_model.transform(X)
-    embedding2 = cebra_model.transform_deprecated(X)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model, X)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
@@ -1428,17 +1432,23 @@ def test_new_transform(model_architecture, device):
     cebra_model.fit([X, X_s2], [y_d, y_d_s2])
 
     embedding1 = cebra_model.transform(X, session_id=0)
-    embedding2 = cebra_model.transform_deprecated(X, session_id=0)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              X,
+                                                              session_id=0)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
     embedding1 = cebra_model.transform(torch.Tensor(X), session_id=0)
-    embedding2 = cebra_model.transform_deprecated(torch.Tensor(X), session_id=0)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              torch.Tensor(X),
+                                                              session_id=0)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
     embedding1 = cebra_model.transform(X_s2, session_id=1)
-    embedding2 = cebra_model.transform_deprecated(X_s2, session_id=1)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              X_s2,
+                                                              session_id=1)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
@@ -1446,12 +1456,16 @@ def test_new_transform(model_architecture, device):
     cebra_model.fit([X, X_s2], [y_c1, y_c1_s2])
 
     embedding1 = cebra_model.transform(X, session_id=0)
-    embedding2 = cebra_model.transform_deprecated(X, session_id=0)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              X,
+                                                              session_id=0)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
     embedding1 = cebra_model.transform(torch.Tensor(X), session_id=0)
-    embedding2 = cebra_model.transform_deprecated(torch.Tensor(X), session_id=0)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              torch.Tensor(X),
+                                                              session_id=0)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
@@ -1470,17 +1484,23 @@ def test_new_transform(model_architecture, device):
     cebra_model.fit([X, X_s2, X], [y_d, y_d_s2, y_d])
 
     embedding1 = cebra_model.transform(X, session_id=0)
-    embedding2 = cebra_model.transform_deprecated(X, session_id=0)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              X,
+                                                              session_id=0)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
     embedding1 = cebra_model.transform(X_s2, session_id=1)
-    embedding2 = cebra_model.transform_deprecated(X_s2, session_id=1)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              X_s2,
+                                                              session_id=1)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
     embedding1 = cebra_model.transform(X, session_id=2)
-    embedding2 = cebra_model.transform_deprecated(X, session_id=2)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              X,
+                                                              session_id=2)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
@@ -1488,25 +1508,31 @@ def test_new_transform(model_architecture, device):
     cebra_model.fit([X, X_s2, X], [y_c1, y_c1_s2, y_c1])
 
     embedding1 = cebra_model.transform(X, session_id=0)
-    embedding2 = cebra_model.transform_deprecated(X, session_id=0)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              X,
+                                                              session_id=0)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
     embedding1 = cebra_model.transform(X_s2, session_id=1)
-    embedding2 = cebra_model.transform_deprecated(X_s2, session_id=1)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              X_s2,
+                                                              session_id=1)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
     embedding1 = cebra_model.transform(X, session_id=2)
-    embedding2 = cebra_model.transform_deprecated(X, session_id=2)
+    embedding2 = _utils_deprecated.cebra_transform_deprecated(cebra_model,
+                                                              X,
+                                                              session_id=2)
     assert np.allclose(embedding1, embedding2, rtol=1e-5,
                        atol=1e-8), "Arrays are not close enough"
 
 
 def test_last_incomplete_batch_smaller_than_offset():
     """
-    When offset of the model is larger than the remaining samples in the 
-    last batch, an error could happen. We merge the penultimate 
+    When offset of the model is larger than the remaining samples in the
+    last batch, an error could happen. We merge the penultimate
     and last batches together to avoid this.
     """
     train = cebra.data.TensorDataset(neural=np.random.rand(20111, 100),
diff --git a/tests/test_solver.py b/tests/test_solver.py
index 56c1e330..ea8b43d0 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -344,6 +344,8 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
         for dataset in data.iter_sessions()
     ])
     data.configure_for(model)
+    offset_length = len(model[0].get_offset())
+
     criterion = cebra.models.InfoNCE()
     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
 
@@ -353,8 +355,9 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
 
     batch = next(iter(loader))
     for session_id, dataset in enumerate(loader.dataset.iter_sessions()):
-        assert batch[session_id].reference.shape[:2] == (
-            32, dataset.input_dimension)
+        assert batch[session_id].reference.shape == (32,
+                                                     dataset.input_dimension,
+                                                     offset_length)
         assert batch[session_id].index is not None
 
     log = solver.step(batch)

From c9fa5c89e9fba48844d493f0569bd68cd2d87834 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Mon, 28 Apr 2025 11:03:14 +0200
Subject: [PATCH 096/100] Fix import errors

---
 cebra/data/multi_session.py | 17 ++++++++++++-----
 tests/_utils_deprecated.py  |  3 +--
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index ef19e53e..ac1d416b 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -26,12 +26,12 @@
 
 import literate_dataclasses as dataclasses
 import torch
+import torch.nn as nn
 
 import cebra.data as cebra_data
 import cebra.distributions
 from cebra.data.datatypes import Batch
 from cebra.data.datatypes import BatchIndex
-from cebra.models import Model
 
 __all__ = [
     "MultiSessionDataset",
@@ -105,7 +105,7 @@ def load_batch(self, index: BatchIndex) -> List[Batch]:
             ) for session_id, session in enumerate(self.iter_sessions())
         ]
 
-    def configure_for(self, model: "Model"):
+    def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
@@ -114,9 +114,16 @@ def configure_for(self, model: "Model"):
         Args:
             model: The model to configure the dataset for.
         """
-        self.offset = model.get_offset()
-        for session in self.iter_sessions():
-            session.configure_for(model)
+        if not isinstance(model, nn.ModuleList):
+            raise ValueError(
+                "The model must be a nn.ModuleList to configure the dataset.")
+        if len(model) != self.num_sessions:
+            raise ValueError(
+                f"The model must have {self.num_sessions} sessions, but got {len(model)}."
+            )
+
+        for i, session in enumerate(self.iter_sessions()):
+            session.configure_for(model[i])
 
 
 @dataclasses.dataclass
diff --git a/tests/_utils_deprecated.py b/tests/_utils_deprecated.py
index 03a246f7..bf412058 100644
--- a/tests/_utils_deprecated.py
+++ b/tests/_utils_deprecated.py
@@ -9,7 +9,6 @@
 import cebra
 import cebra.integrations.sklearn.utils as sklearn_utils
 import cebra.models
-import cebra.solvers
 
 
 #NOTE: Deprecated: transform is now handled in the solver but the original
@@ -79,7 +78,7 @@ def cebra_transform_deprecated(cebra_model,
 #       using the transform method of the model, and handling padding is implemented
 #       directly in the base Solver. This method is kept for testing purposes.
 @torch.no_grad()
-def multiobjective_transform_deprecated(solver: cebra.solvers.Solver,
+def multiobjective_transform_deprecated(solver: "cebra.solvers.Solver",
                                         inputs: torch.Tensor) -> torch.Tensor:
     """Transform the input data using the model.
 

From 4632c04df32237d57063d7d9beefcdd59cd2f93d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Mon, 28 Apr 2025 12:05:53 +0200
Subject: [PATCH 097/100] Add select_model to aux solvers

---
 cebra/data/multi_session.py    |  2 +-
 cebra/solver/multi_session.py  | 95 +++++++++++++++++++++++++++++++++-
 cebra/solver/single_session.py | 86 ++++++++++++++++++++++++++++++
 3 files changed, 180 insertions(+), 3 deletions(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index ac1d416b..593d4f78 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -109,7 +109,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`cebra.data.Dataset.offset` attribute of the dataset.
+        :py:attr:`cebra_data.Dataset.offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index 386f1c0c..40db55e6 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -21,8 +21,10 @@
 #
 """Solver implementations for multi-session datasetes."""
 
-from typing import List, Optional
+import copy
+from typing import List, Optional, Tuple, Union
 
+import numpy.typing as npt
 import torch
 
 import cebra
@@ -241,7 +243,16 @@ class MultiSessionAuxVariableSolver(MultiSessionSolver):
     """Multi session training, contrasting neural data against behavior."""
 
     _variant_name = "multi-session-aux"
-    reference_model: torch.nn.Module
+    reference_model: torch.nn.Module = None
+
+    def __post_init__(self):
+        super().__post_init__()
+        if self.reference_model is None:
+            # NOTE(stes): This should work, according to this thread
+            # https://discuss.pytorch.org/t/can-i-deepcopy-a-model/52192/19
+            # and create a true copy of the model.
+            self.reference_model = copy.deepcopy(self.model)
+            self.reference_model.to(self.device)
 
     def _inference(self, batches: List[cebra.data.Batch]) -> cebra.data.Batch:
         """Given batches of input examples, computes the feature representations/embeddings.
@@ -276,3 +287,83 @@ def _inference(self, batches: List[cebra.data.Batch]) -> cebra.data.Batch:
             positive=pos.view(-1, num_features),
             negative=neg.view(-1, num_features),
         )
+
+    def _select_model(
+        self,
+        inputs: Union[torch.Tensor, List[torch.Tensor]],
+        session_id: Optional[int] = None,
+        use_reference_model: bool = False,
+    ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
+               cebra.data.datatypes.Offset]:
+        """ Select the model based on the input dimension and session ID.
+
+        Args:
+            inputs: Data to infer using the selected model.
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Returns:
+            The model (first returns) and the offset of the model (second returns).
+        """
+        self._check_is_inputs_valid(inputs, session_id=session_id)
+        self._check_is_session_id_valid(session_id=session_id)
+
+        if use_reference_model:
+            model = self.reference_model[session_id]
+        else:
+            model = self.model[session_id]
+        offset = model.get_offset()
+        return model, offset
+
+    @torch.no_grad()
+    def transform(self,
+                  inputs: Union[torch.Tensor, List[torch.Tensor], npt.NDArray],
+                  pad_before_transform: bool = True,
+                  session_id: Optional[int] = None,
+                  batch_size: Optional[int] = None,
+                  use_reference_model: bool = False) -> torch.Tensor:
+        """Compute the embedding.
+        This function by default use ``model`` that was trained to encode the positive
+        and negative samples. To use ``reference_model`` instead of ``model``
+        ``use_reference_model`` should be equal ``True``.
+        Args:
+            inputs: The input signal
+            use_reference_model: Flag for using ``reference_model``
+        Returns:
+            The output embedding.
+        """
+        if isinstance(inputs, list):
+            raise NotImplementedError(
+                "Inputs to transform() should be the data for a single session."
+            )
+        elif not isinstance(inputs, torch.Tensor):
+            raise ValueError(
+                f"Inputs should be a torch.Tensor, not {type(inputs)}.")
+
+        if not hasattr(self, "history") and len(self.history) > 0:
+            raise ValueError(
+                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
+                "appropriate arguments before using this estimator.")
+        model, offset = self._select_model(
+            inputs, session_id, use_reference_model=use_reference_model)
+
+        if len(offset) < 2 and pad_before_transform:
+            pad_before_transform = False
+
+        model.eval()
+        if batch_size is not None:
+            output = abc_._batched_transform(
+                model=model,
+                inputs=inputs,
+                offset=offset,
+                batch_size=batch_size,
+                pad_before_transform=pad_before_transform,
+            )
+        else:
+            output = abc_._transform(model=model,
+                                     inputs=inputs,
+                                     offset=offset,
+                                     pad_before_transform=pad_before_transform)
+
+        return output
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
index 657ea2c6..1a1d86f1 100644
--- a/cebra/solver/single_session.py
+++ b/cebra/solver/single_session.py
@@ -25,6 +25,7 @@
 from typing import List, Optional, Tuple, Union
 
 import literate_dataclasses as dataclasses
+import numpy.typing as npt
 import torch
 
 import cebra
@@ -206,6 +207,91 @@ def __post_init__(self):
             self.reference_model = copy.deepcopy(self.model)
             self.reference_model.to(self.model.device)
 
+    def _select_model(
+        self,
+        inputs: Union[torch.Tensor, List[torch.Tensor]],
+        session_id: Optional[int] = None,
+        use_reference_model: bool = False,
+    ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
+               cebra.data.datatypes.Offset]:
+        """ Select the model based on the input dimension and session ID.
+
+        Args:
+            inputs: Data to infer using the selected model.
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+            use_reference_model: Flag for using ``reference_model``.
+
+        Returns:
+            The model (first returns) and the offset of the model (second returns).
+        """
+        self._check_is_inputs_valid(inputs, session_id=session_id)
+        self._check_is_session_id_valid(session_id=session_id)
+
+        if use_reference_model:
+            model = self.reference_model
+        else:
+            model = self.model
+
+        if hasattr(model, 'get_offset'):
+            offset = model.get_offset()
+        else:
+            offset = None
+        return model, offset
+
+    @torch.no_grad()
+    def transform(self,
+                  inputs: Union[torch.Tensor, List[torch.Tensor], npt.NDArray],
+                  pad_before_transform: bool = True,
+                  session_id: Optional[int] = None,
+                  batch_size: Optional[int] = None,
+                  use_reference_model: bool = False) -> torch.Tensor:
+        """Compute the embedding.
+        This function by default use ``model`` that was trained to encode the positive
+        and negative samples. To use ``reference_model`` instead of ``model``
+        ``use_reference_model`` should be equal ``True``.
+        Args:
+            inputs: The input signal
+            use_reference_model: Flag for using ``reference_model``
+        Returns:
+            The output embedding.
+        """
+        if isinstance(inputs, list):
+            raise NotImplementedError(
+                "Inputs to transform() should be the data for a single session."
+            )
+        elif not isinstance(inputs, torch.Tensor):
+            raise ValueError(
+                f"Inputs should be a torch.Tensor, not {type(inputs)}.")
+
+        if not hasattr(self, "history") and len(self.history) > 0:
+            raise ValueError(
+                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
+                "appropriate arguments before using this estimator.")
+        model, offset = self._select_model(
+            inputs, session_id, use_reference_model=use_reference_model)
+
+        if len(offset) < 2 and pad_before_transform:
+            pad_before_transform = False
+
+        model.eval()
+        if batch_size is not None:
+            output = abc_._batched_transform(
+                model=model,
+                inputs=inputs,
+                offset=offset,
+                batch_size=batch_size,
+                pad_before_transform=pad_before_transform,
+            )
+        else:
+            output = abc_._transform(model=model,
+                                     inputs=inputs,
+                                     offset=offset,
+                                     pad_before_transform=pad_before_transform)
+
+        return output
+
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """Given a batch of input examples, computes the feature representation/embedding.
 

From 22e3c4746eb17b74568e9ae8513554152f7c3d2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Wed, 30 Apr 2025 09:54:25 +0200
Subject: [PATCH 098/100] Fix docs error

---
 cebra/data/multi_session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
index 593d4f78..ad987e63 100644
--- a/cebra/data/multi_session.py
+++ b/cebra/data/multi_session.py
@@ -109,7 +109,7 @@ def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
         Call this function before indexing the dataset. This sets the
-        :py:attr:`cebra_data.Dataset.offset` attribute of the dataset.
+        :py:attr:`~.Dataset.offset` attribute of the dataset.
 
         Args:
             model: The model to configure the dataset for.

From 2fcfb7f4b68c871fbe5217f891faa7a9194eb895 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Fri, 2 May 2025 11:50:57 +0200
Subject: [PATCH 099/100] Add tests on the private functions in base solver

---
 tests/test_solver_batched.py | 151 ++++++++++++++++++++++++++++++++++-
 1 file changed, 150 insertions(+), 1 deletion(-)

diff --git a/tests/test_solver_batched.py b/tests/test_solver_batched.py
index 8592aea2..fe49a0de 100644
--- a/tests/test_solver_batched.py
+++ b/tests/test_solver_batched.py
@@ -31,7 +31,7 @@
 
 device = "cpu"
 
-NUM_STEPS = 10
+NUM_STEPS = 2
 BATCHES = [25_000, 50_000, 75_000]
 MODELS = ["offset1-model", "offset10-model", "offset40-model-4x-subsample"]
 
@@ -341,3 +341,152 @@ def test_batched_transform_multi_session(data_name, model_name, padding,
 
         assert embedding_batched.shape == embedding.shape
         assert np.allclose(embedding_batched, embedding, rtol=1e-4, atol=1e-4)
+
+
+@pytest.mark.parametrize(
+    "batch_start_idx, batch_end_idx, offset, num_samples, expected_exception",
+    [
+        # Valid indices
+        (0, 5, cebra.data.Offset(1, 1), 10, None),
+        (2, 8, cebra.data.Offset(2, 2), 10, None),
+        # Negative indices
+        (-1, 5, cebra.data.Offset(1, 1), 10, ValueError),
+        (0, -5, cebra.data.Offset(1, 1), 10, ValueError),
+        # Start index greater than end index
+        (5, 3, cebra.data.Offset(1, 1), 10, ValueError),
+        # End index out of bounds
+        (0, 11, cebra.data.Offset(1, 1), 10, ValueError),
+        # Batch size smaller than offset
+        (0, 2, cebra.data.Offset(3, 3), 10, ValueError),
+    ],
+)
+def test_check_indices(batch_start_idx, batch_end_idx, offset, num_samples,
+                       expected_exception):
+    if expected_exception:
+        with pytest.raises(expected_exception):
+            cebra.solver.base._check_indices(batch_start_idx, batch_end_idx,
+                                             offset, num_samples)
+    else:
+        cebra.solver.base._check_indices(batch_start_idx, batch_end_idx, offset,
+                                         num_samples)
+
+
+@pytest.mark.parametrize(
+    "batch_start_idx, batch_end_idx, num_samples, expected_exception",
+    [
+        # First batch
+        (0, 6, 12, 8),
+        # Last batch
+        (6, 12, 12, 8),
+        # Middle batch
+        (3, 9, 12, 6),
+        # Invalid start index
+        (-1, 3, 4, ValueError),
+        # Invalid end index
+        (3, -10, 4, ValueError),
+        # Start index greater than end index
+        (5, 3, 4, ValueError),
+        # End index out of bounds
+        (0, 15, 12, ValueError),
+        # Batch size smaller than batched_data
+        (0, 2, 2, ValueError),
+        # Batch size larger than batched_data
+        (0, 12, 12, ValueError),
+    ],
+)
+def test_add_batched_zero_padding(batch_start_idx, batch_end_idx, num_samples,
+                                  expected_exception):
+    batched_data = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0],
+                                 [9.0, 10.0], [1.0, 2.0]])
+
+    model = create_model(model_name="offset5-model",
+                         input_dimension=batched_data.shape[1])
+    offset = model.get_offset()
+
+    if expected_exception == ValueError:
+        with pytest.raises(expected_exception):
+            result = cebra.solver.base._add_batched_zero_padding(
+                batched_data, offset, batch_start_idx, batch_end_idx,
+                num_samples)
+    else:
+        result = cebra.solver.base._add_batched_zero_padding(
+            batched_data, offset, batch_start_idx, batch_end_idx, num_samples)
+        assert result.shape[0] == expected_exception
+
+
+@pytest.mark.parametrize(
+    "pad_before_transform, expected_exception",
+    [
+        # Valid batched inputs
+        (True, None),
+        # No padding
+        (False, None),
+    ],
+)
+def test_transform(pad_before_transform, expected_exception):
+    inputs = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0],
+                           [9.0, 10.0], [1.0, 2.0], [3.0, 4.0], [5.0, 6.0],
+                           [7.0, 8.0], [9.0, 10.0], [1.0, 2.0], [3.0, 4.0],
+                           [5.0, 6.0], [7.0, 8.0], [9.0, 10.0]])
+    model = create_model(model_name="offset5-model",
+                         input_dimension=inputs.shape[1])
+    offset = model.get_offset()
+
+    result = cebra.solver.base._transform(
+        model=model,
+        inputs=inputs,
+        pad_before_transform=pad_before_transform,
+        offset=offset,
+    )
+    if pad_before_transform:
+        assert result.shape[0] == inputs.shape[0]
+    else:
+        assert result.shape[0] == inputs.shape[0] - len(offset) + 1
+
+
+@pytest.mark.parametrize(
+    "batch_size, pad_before_transform, expected_exception",
+    [
+        # Valid batched inputs
+        (6, True, None),
+        # Invalid batch size (too large)
+        (12, True, ValueError),
+        # Invalid batch size (too small)
+        (2, True, ValueError),
+        # Last batch size incomplete
+        (5, True, None),
+        # No padding
+        (6, False, None),
+    ],
+)
+def test_batched_transform(batch_size, pad_before_transform,
+                           expected_exception):
+    inputs = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0],
+                           [9.0, 10.0], [1.0, 2.0], [3.0, 4.0], [5.0, 6.0],
+                           [7.0, 8.0], [9.0, 10.0], [1.0, 2.0], [3.0, 4.0],
+                           [5.0, 6.0], [7.0, 8.0], [9.0, 10.0]])
+    model = create_model(model_name="offset5-model",
+                         input_dimension=inputs.shape[1])
+    offset = model.get_offset()
+
+    if expected_exception:
+        with pytest.raises(expected_exception):
+            cebra.solver.base._batched_transform(
+                model=model,
+                inputs=inputs,
+                batch_size=batch_size,
+                pad_before_transform=pad_before_transform,
+                offset=offset,
+            )
+    else:
+        result = cebra.solver.base._batched_transform(
+            model=model,
+            inputs=inputs,
+            batch_size=batch_size,
+            pad_before_transform=pad_before_transform,
+            offset=offset,
+        )
+        if pad_before_transform:
+            assert result.shape[0] == inputs.shape[0]
+        else:
+            assert result.shape[0] == inputs.shape[0] - len(offset) + 1

From 66fc6aae761366917ed237260378cab2264d146b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lia=20Benquet?=
 <32598028+CeliaBenquet@users.noreply.github.com>
Date: Mon, 5 May 2025 09:58:55 +0200
Subject: [PATCH 100/100] Update tests and duplicate code based on review

---
 cebra/datasets/demo.py              |   2 +-
 cebra/integrations/sklearn/cebra.py |   9 +-
 cebra/solver/base.py                | 141 ++++++++++++++++++++------
 cebra/solver/multi_session.py       | 101 ++++---------------
 cebra/solver/single_session.py      | 125 +++++------------------
 tests/_utils_deprecated.py          |   4 +
 tests/test_sklearn.py               |  16 +--
 tests/test_solver.py                | 147 ++++++++--------------------
 tests/test_solver_batched.py        |  10 +-
 9 files changed, 213 insertions(+), 342 deletions(-)

diff --git a/cebra/datasets/demo.py b/cebra/datasets/demo.py
index 90ba5367..a28f9fa1 100644
--- a/cebra/datasets/demo.py
+++ b/cebra/datasets/demo.py
@@ -32,7 +32,7 @@
 import cebra.io
 from cebra.datasets import register
 
-_DEFAULT_NUM_TIMEPOINTS = 100000
+_DEFAULT_NUM_TIMEPOINTS = 1_000
 
 
 class DemoDataset(cebra.data.SingleSessionDataset):
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
index d08dae71..0dee4c3f 100644
--- a/cebra/integrations/sklearn/cebra.py
+++ b/cebra/integrations/sklearn/cebra.py
@@ -826,6 +826,8 @@ def _configure_for_all(
 
     def _select_model(self, X: Union[npt.NDArray, torch.Tensor],
                       session_id: int):
+        if isinstance(X, np.ndarray):
+            X = torch.from_numpy(X)
         return self.solver_._select_model(X, session_id=session_id)
 
     def _check_labels_types(self, y: tuple, session_id: Optional[int] = None):
@@ -1055,7 +1057,8 @@ def _partial_fit(
         self.model_ = model
 
         self.n_features_ = solver.n_features
-        self.num_sessions_ = solver.num_sessions
+        self.num_sessions_ = solver.num_sessions if hasattr(
+            solver, "num_sessions") else None
         self.solver_ = solver
         self.n_features_in_ = ([model[n].num_input for n in range(len(model))]
                                if is_multisession else model.num_input)
@@ -1241,10 +1244,6 @@ def transform(self,
         if isinstance(X, np.ndarray):
             X = torch.from_numpy(X)
 
-        if batch_size is not None and batch_size < 1:
-            raise ValueError(
-                f"Batch size should be at least 1, got {batch_size}")
-
         with torch.no_grad():
             output = self.solver_.transform(
                 inputs=X,
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
index e745265e..66d90786 100644
--- a/cebra/solver/base.py
+++ b/cebra/solver/base.py
@@ -36,7 +36,6 @@
 from typing import Callable, Dict, List, Literal, Optional, Tuple, Union
 
 import literate_dataclasses as dataclasses
-import numpy.typing as npt
 import torch
 import torch.nn.functional as F
 from torch.utils.data import DataLoader
@@ -104,6 +103,15 @@ def _add_batched_zero_padding(batched_data: torch.Tensor,
     Returns:
         The padded batch.
     """
+    if batch_start_idx > batch_end_idx:
+        raise ValueError(
+            f"batch_start_idx ({batch_start_idx}) cannot be greater than batch_end_idx ({batch_end_idx})."
+        )
+    if batch_start_idx < 0 or batch_end_idx < 0:
+        raise ValueError(
+            f"batch_start_idx ({batch_start_idx}) and batch_end_idx ({batch_end_idx}) must be positive integers."
+        )
+
     reversed_dims = torch.arange(batched_data.ndim - 1, -1, -1)
 
     if batch_start_idx == 0:  # First batch
@@ -179,7 +187,7 @@ def _inference_transform(model: cebra.models.Model,
     return output
 
 
-def _transform(
+def _not_batched_transform(
     model: cebra.models.Model,
     inputs: torch.Tensor,
     pad_before_transform: bool,
@@ -253,9 +261,11 @@ def __getitem__(self, idx):
         if batch_idx == (len(index_dataloader) - 1):
             # last batch, incomplete
             index_batch = torch.cat((last_batch, index_batch), dim=0)
-            assert index_batch[-1] + 1 == len(inputs), (
-                f"Last batch index {index_batch[-1]} + 1 should be equal to the length of inputs {len(inputs)}."
-            )
+
+            if index_batch[-1] + 1 != len(inputs):
+                raise ValueError(
+                    f"Last batch index {index_batch[-1]} + 1 should be equal to the length of inputs {len(inputs)}."
+                )
 
         # Batch start and end so that `batch_size` size with the last batch including 2 batches
         batch_start_idx, batch_end_idx = index_batch[0], index_batch[-1] + 1
@@ -469,9 +479,6 @@ def fit(
                 if logdir is not None:
                     self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
 
-        assert hasattr(self, "n_features")
-        assert hasattr(self, "num_sessions")
-
     def step(self, batch: cebra.data.Batch) -> dict:
         """Perform a single gradient update.
 
@@ -515,7 +522,10 @@ def validation(self,
         Returns:
             Loss averaged over iterations on data batch.
         """
-        assert (session_id is None) or (session_id == 0)
+        if session_id is not None and session_id != 0:
+            raise ValueError(
+                f"session_id should be set to None or 0, got {session_id}")
+
         iterator = self._get_loader(loader)
         total_loss = Meter()
         self.model.eval()
@@ -544,7 +554,6 @@ def decoding(self, train_loader, valid_loader):
         )
         return decode_metric
 
-    @abc.abstractmethod
     def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
         """Check that the inputs can be inferred using the selected model.
 
@@ -557,7 +566,13 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
         """
-        raise NotImplementedError
+        if isinstance(inputs, list):
+            raise ValueError(
+                "Inputs to transform() should be the data for a single session, but received a list."
+            )
+        elif not isinstance(inputs, torch.Tensor):
+            raise ValueError(
+                f"Inputs should be a torch.Tensor, not {type(inputs)}.")
 
     @abc.abstractmethod
     def _check_is_session_id_valid(self, session_id: Optional[int] = None):
@@ -568,7 +583,6 @@ def _check_is_session_id_valid(self, session_id: Optional[int] = None):
         """
         raise NotImplementedError
 
-    @abc.abstractmethod
     def _select_model(
         self, inputs: Union[torch.Tensor,
                             List[torch.Tensor]], session_id: Optional[int]
@@ -585,6 +599,25 @@ def _select_model(
         Returns:
             The model (first returns) and the offset of the model (second returns).
         """
+        model = self._get_model(session_id=session_id)
+        offset = model.get_offset()
+
+        self._check_is_inputs_valid(inputs, session_id=session_id)
+        return model, offset
+
+    @abc.abstractmethod
+    def _get_model(self,
+                   session_id: Optional[int] = None) -> cebra.models.Model:
+        """Get the model to use for inference.
+
+        Args:
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Returns:
+            The model.
+        """
         raise NotImplementedError
 
     def _check_is_fitted(self):
@@ -602,7 +635,7 @@ def _check_is_fitted(self):
 
     @torch.no_grad()
     def transform(self,
-                  inputs: Union[torch.Tensor, List[torch.Tensor], npt.NDArray],
+                  inputs: torch.Tensor,
                   pad_before_transform: Optional[bool] = True,
                   session_id: Optional[int] = None,
                   batch_size: Optional[int] = None) -> torch.Tensor:
@@ -627,26 +660,40 @@ def transform(self,
         Returns:
             The output embedding.
         """
-        if isinstance(inputs, list):
-            raise ValueError(
-                "Inputs to transform() should be the data for a single session, but received a list."
-            )
-        elif not isinstance(inputs, torch.Tensor):
-            raise ValueError(
-                f"Inputs should be a torch.Tensor, not {type(inputs)}.")
-
         self._check_is_fitted()
-
         model, offset = self._select_model(inputs, session_id)
 
         if len(offset) < 2 and pad_before_transform:
             pad_before_transform = False
 
         model.eval()
+        return self._transform(model=model,
+                               inputs=inputs,
+                               pad_before_transform=pad_before_transform,
+                               offset=offset,
+                               batch_size=batch_size)
+
+    @torch.no_grad()
+    def _transform(self, model: cebra.models.Model, inputs: torch.Tensor,
+                   pad_before_transform: bool,
+                   offset: cebra.data.datatypes.Offset,
+                   batch_size: Optional[int]) -> torch.Tensor:
+        """Compute the embedding on the inputs using the model provided.
+
+        Args:
+            model: Model to use for inference.
+            inputs: Data.
+            pad_before_transform: If True zero-pad the batched data.
+            offset: Offset of the model to consider when padding.
+            batch_size: If not None, batched inference will not be applied.
+
+        Returns:
+            The embedding.
+        """
         if batch_size is not None and inputs.shape[0] > int(
-                batch_size * 2) and not isinstance(
-                    self.model, cebra.models.ResampleModelMixin):
-            # NOTE: resampling models are not supported for batched inference.
+                batch_size * 2) and not (isinstance(
+                    self._get_model(0), cebra.models.ResampleModelMixin)):
+            # NOTE(celia): resampling models are not supported for batched inference.
             output = _batched_transform(
                 model=model,
                 inputs=inputs,
@@ -655,11 +702,11 @@ def transform(self,
                 pad_before_transform=pad_before_transform,
             )
         else:
-            output = _transform(model=model,
-                                inputs=inputs,
-                                offset=offset,
-                                pad_before_transform=pad_before_transform)
-
+            output = _not_batched_transform(
+                model=model,
+                inputs=inputs,
+                offset=offset,
+                pad_before_transform=pad_before_transform)
         return output
 
     @abc.abstractmethod
@@ -838,3 +885,37 @@ def step(self, batch: cebra.data.Batch) -> dict:
             time_neg=time_uniform.item(),
             time_total=time_loss.item(),
         )
+
+
+class AuxiliaryVariableSolver(Solver):
+
+    @torch.no_grad()
+    def transform(self,
+                  inputs: torch.Tensor,
+                  pad_before_transform: bool = True,
+                  session_id: Optional[int] = None,
+                  batch_size: Optional[int] = None,
+                  use_reference_model: bool = False) -> torch.Tensor:
+        """Compute the embedding.
+        This function by default use ``model`` that was trained to encode the positive
+        and negative samples. To use ``reference_model`` instead of ``model``
+        ``use_reference_model`` should be equal ``True``.
+        Args:
+            inputs: The input signal
+            use_reference_model: Flag for using ``reference_model``
+        Returns:
+            The output embedding.
+        """
+        self._check_is_fitted()
+        model, offset = self._select_model(
+            inputs, session_id, use_reference_model=use_reference_model)
+
+        if len(offset) < 2 and pad_before_transform:
+            pad_before_transform = False
+
+        model.eval()
+        return self._transform(model=model,
+                               inputs=inputs,
+                               pad_before_transform=pad_before_transform,
+                               offset=offset,
+                               batch_size=batch_size)
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
index 40db55e6..dfd71921 100644
--- a/cebra/solver/multi_session.py
+++ b/cebra/solver/multi_session.py
@@ -22,9 +22,8 @@
 """Solver implementations for multi-session datasetes."""
 
 import copy
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional
 
-import numpy.typing as npt
 import torch
 
 import cebra
@@ -52,11 +51,11 @@ def parameters(self, session_id: Optional[int] = None):
         Yields:
             The parameters of the model.
         """
-        if session_id is not None:
-            for parameter in self.model[session_id].parameters():
-                yield parameter
+        self._check_is_session_id_valid(session_id=session_id)
+
+        for parameter in self.model[session_id].parameters():
+            yield parameter
 
-        # If session_id is None, it can still iterate over the criterion
         for parameter in self.criterion.parameters():
             yield parameter
 
@@ -164,6 +163,7 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor,
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
         """
+        super()._check_is_inputs_valid(inputs, session_id=session_id)
         if self.n_features[session_id] != inputs.shape[1]:
             raise ValueError(
                 f"Invalid input shape: model for session {session_id} requires an input of shape"
@@ -188,25 +188,20 @@ def _check_is_session_id_valid(self, session_id: Optional[int]):
                 f"Invalid session_id {session_id}: session_id for the current multisession model must be between 0 and {self.num_sessions-1}."
             )
 
-    def _select_model(self, inputs: torch.Tensor, session_id: Optional[int]):
-        """ Select the (trained) model based on the input dimension and session ID.
+    def _get_model(self, session_id: Optional[int] = None):
+        """Get the model for the given session ID.
 
         Args:
-            inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
 
         Returns:
-            The model (first returns) and the offset of the model (second returns).
+            The model for the given session ID.
         """
         self._check_is_session_id_valid(session_id=session_id)
         self._check_is_fitted()
-        self._check_is_inputs_valid(inputs, session_id=session_id)
-
-        model = self.model[session_id]
-        offset = model.get_offset()
-        return model, offset
+        return self.model[session_id]
 
     def validation(self, loader, session_id: Optional[int] = None):
         """Compute score of the model on data.
@@ -239,7 +234,8 @@ def validation(self, loader, session_id: Optional[int] = None):
 
 
 @register("multi-session-aux")
-class MultiSessionAuxVariableSolver(MultiSessionSolver):
+class MultiSessionAuxVariableSolver(MultiSessionSolver,
+                                    abc_.AuxiliaryVariableSolver):
     """Multi session training, contrasting neural data against behavior."""
 
     _variant_name = "multi-session-aux"
@@ -288,82 +284,23 @@ def _inference(self, batches: List[cebra.data.Batch]) -> cebra.data.Batch:
             negative=neg.view(-1, num_features),
         )
 
-    def _select_model(
-        self,
-        inputs: Union[torch.Tensor, List[torch.Tensor]],
-        session_id: Optional[int] = None,
-        use_reference_model: bool = False,
-    ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
-               cebra.data.datatypes.Offset]:
-        """ Select the model based on the input dimension and session ID.
+    def _get_model(self,
+                   session_id: Optional[int] = None,
+                   use_reference_model: bool = False):
+        """Get the model for the given session ID.
 
         Args:
-            inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
 
         Returns:
-            The model (first returns) and the offset of the model (second returns).
+            The model for the given session ID.
         """
-        self._check_is_inputs_valid(inputs, session_id=session_id)
         self._check_is_session_id_valid(session_id=session_id)
-
+        self._check_is_fitted()
         if use_reference_model:
             model = self.reference_model[session_id]
         else:
             model = self.model[session_id]
-        offset = model.get_offset()
-        return model, offset
-
-    @torch.no_grad()
-    def transform(self,
-                  inputs: Union[torch.Tensor, List[torch.Tensor], npt.NDArray],
-                  pad_before_transform: bool = True,
-                  session_id: Optional[int] = None,
-                  batch_size: Optional[int] = None,
-                  use_reference_model: bool = False) -> torch.Tensor:
-        """Compute the embedding.
-        This function by default use ``model`` that was trained to encode the positive
-        and negative samples. To use ``reference_model`` instead of ``model``
-        ``use_reference_model`` should be equal ``True``.
-        Args:
-            inputs: The input signal
-            use_reference_model: Flag for using ``reference_model``
-        Returns:
-            The output embedding.
-        """
-        if isinstance(inputs, list):
-            raise NotImplementedError(
-                "Inputs to transform() should be the data for a single session."
-            )
-        elif not isinstance(inputs, torch.Tensor):
-            raise ValueError(
-                f"Inputs should be a torch.Tensor, not {type(inputs)}.")
-
-        if not hasattr(self, "history") and len(self.history) > 0:
-            raise ValueError(
-                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
-                "appropriate arguments before using this estimator.")
-        model, offset = self._select_model(
-            inputs, session_id, use_reference_model=use_reference_model)
-
-        if len(offset) < 2 and pad_before_transform:
-            pad_before_transform = False
-
-        model.eval()
-        if batch_size is not None:
-            output = abc_._batched_transform(
-                model=model,
-                inputs=inputs,
-                offset=offset,
-                batch_size=batch_size,
-                pad_before_transform=pad_before_transform,
-            )
-        else:
-            output = abc_._transform(model=model,
-                                     inputs=inputs,
-                                     offset=offset,
-                                     pad_before_transform=pad_before_transform)
-
-        return output
+        return model
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
index 1a1d86f1..c74eb704 100644
--- a/cebra/solver/single_session.py
+++ b/cebra/solver/single_session.py
@@ -22,10 +22,9 @@
 """Single session solvers embed a single pair of time series."""
 
 import copy
-from typing import List, Optional, Tuple, Union
+from typing import Optional
 
 import literate_dataclasses as dataclasses
-import numpy.typing as npt
 import torch
 
 import cebra
@@ -74,7 +73,7 @@ def _set_fitted_params(self, loader: cebra.data.Loader):
         Args:
             loader: Loader used to fit the solver.
         """
-        self.num_sessions = None
+        #self.num_sessions = None
         self.n_features = loader.dataset.input_dimension
 
     def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
@@ -89,6 +88,7 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor, session_id: int):
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
         """
+        super()._check_is_inputs_valid(inputs, session_id=session_id)
         if self.n_features != inputs.shape[1]:
             raise ValueError(
                 f"Invalid input shape: model for session {session_id} requires an input of shape"
@@ -109,29 +109,20 @@ def _check_is_session_id_valid(self, session_id: Optional[int] = None):
                 f"Invalid session_id {session_id}: single session models only takes an optional null session_id."
             )
 
-    def _select_model(
-        self, inputs: Union[torch.Tensor,
-                            List[torch.Tensor]], session_id: Optional[int]
-    ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
-               cebra.data.datatypes.Offset]:
-        """ Select the (trained) model based on the input dimension and session ID.
+    def _get_model(self, session_id: Optional[int] = None):
+        """Get the model for the given session ID.
 
         Args:
-            inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
 
         Returns:
-            The model (first returns) and the offset of the model (second returns).
+            The model for the given session ID.
         """
         self._check_is_session_id_valid(session_id=session_id)
         self._check_is_fitted()
-        self._check_is_inputs_valid(inputs, session_id=session_id)
-
-        model = self.model
-        offset = model.get_offset()
-        return model, offset
+        return self.model
 
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """Given a batch of input examples, computes the feature representation/embedding.
@@ -180,7 +171,8 @@ def get_embedding(self, data: torch.Tensor) -> torch.Tensor:
 
 @register("single-session-aux")
 @dataclasses.dataclass
-class SingleSessionAuxVariableSolver(SingleSessionSolver):
+class SingleSessionAuxVariableSolver(SingleSessionSolver,
+                                     abc_.AuxiliaryVariableSolver):
     """Single session training for reference and positive/negative samples.
 
     This solver processes reference samples with a model different from
@@ -207,90 +199,26 @@ def __post_init__(self):
             self.reference_model = copy.deepcopy(self.model)
             self.reference_model.to(self.model.device)
 
-    def _select_model(
-        self,
-        inputs: Union[torch.Tensor, List[torch.Tensor]],
-        session_id: Optional[int] = None,
-        use_reference_model: bool = False,
-    ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
-               cebra.data.datatypes.Offset]:
-        """ Select the model based on the input dimension and session ID.
+    def _get_model(self,
+                   session_id: Optional[int] = None,
+                   use_reference_model: bool = False):
+        """Get the model for the given session ID.
 
         Args:
-            inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
-            use_reference_model: Flag for using ``reference_model``.
 
         Returns:
-            The model (first returns) and the offset of the model (second returns).
+            The model for the given session ID.
         """
-        self._check_is_inputs_valid(inputs, session_id=session_id)
         self._check_is_session_id_valid(session_id=session_id)
-
+        self._check_is_fitted()
         if use_reference_model:
-            model = self.reference_model
+            model = self.reference_model[session_id]
         else:
-            model = self.model
-
-        if hasattr(model, 'get_offset'):
-            offset = model.get_offset()
-        else:
-            offset = None
-        return model, offset
-
-    @torch.no_grad()
-    def transform(self,
-                  inputs: Union[torch.Tensor, List[torch.Tensor], npt.NDArray],
-                  pad_before_transform: bool = True,
-                  session_id: Optional[int] = None,
-                  batch_size: Optional[int] = None,
-                  use_reference_model: bool = False) -> torch.Tensor:
-        """Compute the embedding.
-        This function by default use ``model`` that was trained to encode the positive
-        and negative samples. To use ``reference_model`` instead of ``model``
-        ``use_reference_model`` should be equal ``True``.
-        Args:
-            inputs: The input signal
-            use_reference_model: Flag for using ``reference_model``
-        Returns:
-            The output embedding.
-        """
-        if isinstance(inputs, list):
-            raise NotImplementedError(
-                "Inputs to transform() should be the data for a single session."
-            )
-        elif not isinstance(inputs, torch.Tensor):
-            raise ValueError(
-                f"Inputs should be a torch.Tensor, not {type(inputs)}.")
-
-        if not hasattr(self, "history") and len(self.history) > 0:
-            raise ValueError(
-                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
-                "appropriate arguments before using this estimator.")
-        model, offset = self._select_model(
-            inputs, session_id, use_reference_model=use_reference_model)
-
-        if len(offset) < 2 and pad_before_transform:
-            pad_before_transform = False
-
-        model.eval()
-        if batch_size is not None:
-            output = abc_._batched_transform(
-                model=model,
-                inputs=inputs,
-                offset=offset,
-                batch_size=batch_size,
-                pad_before_transform=pad_before_transform,
-            )
-        else:
-            output = abc_._transform(model=model,
-                                     inputs=inputs,
-                                     offset=offset,
-                                     pad_before_transform=pad_before_transform)
-
-        return output
+            model = self.model[session_id]
+        return model
 
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """Given a batch of input examples, computes the feature representation/embedding.
@@ -350,29 +278,20 @@ def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
                                 behavior_neg), cebra.data.Batch(
                                     time_ref, time_pos, time_neg)
 
-    def _select_model(
-        self, inputs: Union[torch.Tensor,
-                            List[torch.Tensor]], session_id: Optional[int]
-    ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
-               cebra.data.datatypes.Offset]:
-        """ Select the (trained) model based on the input dimension and session ID.
+    def _get_model(self, session_id: Optional[int] = None):
+        """Get the model for the given session ID.
 
         Args:
-            inputs: Data to infer using the selected model.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
 
         Returns:
-            The model (first returns) and the offset of the model (second returns).
+            The model for the given session ID.
         """
         self._check_is_session_id_valid(session_id=session_id)
         self._check_is_fitted()
-        self._check_is_inputs_valid(inputs, session_id=session_id)
-
-        model = self.model.module
-        offset = model.get_offset()
-        return model, offset
+        return self.model.module
 
 
 @register("single-session-full")
diff --git a/tests/_utils_deprecated.py b/tests/_utils_deprecated.py
index bf412058..5c533f26 100644
--- a/tests/_utils_deprecated.py
+++ b/tests/_utils_deprecated.py
@@ -46,6 +46,10 @@ def cebra_transform_deprecated(cebra_model,
         stacklevel=2)
 
     sklearn_utils_validation.check_is_fitted(cebra_model, "n_features_")
+
+    if isinstance(X, np.ndarray):
+        X = torch.from_numpy(X)
+
     model, offset = cebra_model._select_model(X, session_id)
 
     # Input validation
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index 8c7cd0a1..c3d2095c 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -320,7 +320,7 @@ def test_sklearn(model_architecture, device):
         model_architecture=model_architecture,
         time_offsets=10,
         learning_rate=3e-4,
-        max_iterations=5,
+        max_iterations=2,
         device=device,
         output_dimension=output_dimension,
         batch_size=42,
@@ -374,9 +374,9 @@ def test_sklearn(model_architecture, device):
 
     with pytest.raises(RuntimeError, match="Invalid.*session_id"):
         embedding = cebra_model.transform(X, session_id=2)
-    with pytest.raises(ValueError, match="Batch.*size"):
+    with pytest.raises(ValueError, match="batch_size"):
         embedding = cebra_model.transform(X, batch_size=0)
-    with pytest.raises(ValueError, match="Batch.*size"):
+    with pytest.raises(ValueError, match="batch_size"):
         embedding = cebra_model.transform(X, batch_size=-10)
     with pytest.raises(ValueError, match="Invalid.*labels"):
         cebra_model.fit(X, [y_c1, y_c1_s2])
@@ -420,7 +420,7 @@ def test_sklearn(model_architecture, device):
         embedding = cebra_model.transform(X_s2, session_id=0)
     with pytest.raises(ValueError, match="shape"):
         embedding = cebra_model.transform(X, session_id=1)
-    with pytest.raises(RuntimeError, match="No.*session_id"):
+    with pytest.raises(RuntimeError, match="session_id.*provided"):
         embedding = cebra_model.transform(X)
     with pytest.raises(RuntimeError, match="Invalid.*session_id"):
         embedding = cebra_model.transform(X, session_id=2)
@@ -447,7 +447,7 @@ def test_sklearn(model_architecture, device):
         embedding = cebra_model.transform(X_s2, session_id=0)
     with pytest.raises(ValueError, match="shape"):
         embedding = cebra_model.transform(X, session_id=1)
-    with pytest.raises(RuntimeError, match="No.*session_id"):
+    with pytest.raises(RuntimeError, match="session_id.*provided"):
         embedding = cebra_model.transform(X)
     with pytest.raises(RuntimeError, match="Invalid.*session_id"):
         embedding = cebra_model.transform(X, session_id=2)
@@ -483,7 +483,7 @@ def test_sklearn(model_architecture, device):
         embedding = cebra_model.transform(X_s2, session_id=2)
     with pytest.raises(ValueError, match="shape"):
         embedding = cebra_model.transform(X, session_id=1)
-    with pytest.raises(RuntimeError, match="No.*session_id"):
+    with pytest.raises(RuntimeError, match="session_id.*provided"):
         embedding = cebra_model.transform(X)
     with pytest.raises(RuntimeError, match="Invalid.*session_id"):
         embedding = cebra_model.transform(X, session_id=3)
@@ -511,7 +511,7 @@ def test_sklearn(model_architecture, device):
         embedding = cebra_model.transform(X_s2, session_id=2)
     with pytest.raises(ValueError, match="shape"):
         embedding = cebra_model.transform(X, session_id=1)
-    with pytest.raises(RuntimeError, match="No.*session_id"):
+    with pytest.raises(RuntimeError, match="session_id.*provided"):
         embedding = cebra_model.transform(X)
     with pytest.raises(RuntimeError, match="Invalid.*session_id"):
         embedding = cebra_model.transform(X, session_id=3)
@@ -1366,7 +1366,7 @@ def test_new_transform(model_architecture, device):
         model_architecture=model_architecture,
         time_offsets=10,
         learning_rate=3e-4,
-        max_iterations=5,
+        max_iterations=2,
         device=device,
         output_dimension=output_dimension,
         batch_size=42,
diff --git a/tests/test_solver.py b/tests/test_solver.py
index ea8b43d0..3be54c2e 100644
--- a/tests/test_solver.py
+++ b/tests/test_solver.py
@@ -105,42 +105,7 @@ def _assert_equal(original_solver, loaded_solver):
                                original_solver.transform(X))
 
 
-@pytest.mark.parametrize(
-    "data_name, model_architecture, loader_initfunc, solver_initfunc",
-    [(dataset, model, loader, cebra.solver.SingleSessionSolver)
-     for dataset, loader in [("demo-discrete", cebra.data.DiscreteDataLoader),
-                             ("demo-continuous", cebra.data.ContinuousDataLoader
-                             ), ("demo-mixed", cebra.data.MixedDataLoader)]
-     for model in
-     ["offset1-model", "offset10-model", "offset40-model-4x-subsample"]])
-def test_single_session(data_name, loader_initfunc, model_architecture,
-                        solver_initfunc):
-    loader, data = _get_loader(data_name, loader_initfunc)
-    model = _make_model(data, model_architecture)
-    data.configure_for(model)
-    offset = model.get_offset()
-    criterion = cebra.models.InfoNCE()
-    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
-
-    solver = solver_initfunc(model=model,
-                             criterion=criterion,
-                             optimizer=optimizer,
-                             tqdm_on=False)
-
-    batch = next(iter(loader))
-    assert batch.reference.shape[:2] == (32, loader.dataset.input_dimension)
-    log = solver.step(batch)
-    assert isinstance(log, dict)
-
-    X = loader.dataset.neural
-    with pytest.raises(ValueError, match="not.*fitted"):
-        solver.transform(X)
-
-    solver.fit(loader)
-
-    assert solver.num_sessions is None
-    assert solver.n_features == X.shape[1]
-
+def _test_single_session_transform(solver, X, offset):
     embedding = solver.transform(X)
     assert isinstance(embedding, torch.Tensor)
     if isinstance(solver.model, cebra.models.ResampleModelMixin):
@@ -186,50 +151,44 @@ def test_single_session(data_name, loader_initfunc, model_architecture,
         solver.load(temp_dir)
     _assert_equal(fitted_solver, solver)
 
-    embedding = solver.transform(X)
-    assert isinstance(embedding, torch.Tensor)
-    if isinstance(solver.model, cebra.models.ResampleModelMixin):
-        assert embedding.shape == (X.shape[0] // solver.model.resample_factor,
-                                   OUTPUT_DIMENSION)
-    else:
-        assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
-    embedding = solver.transform(torch.Tensor(X))
-    assert isinstance(embedding, torch.Tensor)
-    if isinstance(solver.model, cebra.models.ResampleModelMixin):
-        assert embedding.shape == (X.shape[0] // solver.model.resample_factor,
-                                   OUTPUT_DIMENSION)
-    else:
-        assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
-    embedding = solver.transform(X, session_id=0)
-    assert isinstance(embedding, torch.Tensor)
-    if isinstance(solver.model, cebra.models.ResampleModelMixin):
-        assert embedding.shape == (X.shape[0] // solver.model.resample_factor,
-                                   OUTPUT_DIMENSION)
-    else:
-        assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
-    embedding = solver.transform(X, pad_before_transform=False)
-    assert isinstance(embedding, torch.Tensor)
-    if isinstance(solver.model, cebra.models.ResampleModelMixin):
-        assert embedding.shape == (
-            (X.shape[0] - len(offset)) // solver.model.resample_factor + 1,
-            OUTPUT_DIMENSION)
-    else:
-        assert embedding.shape == (X.shape[0] - len(offset) + 1,
-                                   OUTPUT_DIMENSION)
 
-    with pytest.raises(ValueError, match="torch.Tensor"):
-        solver.transform(X.numpy())
-    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
-        embedding = solver.transform(X, session_id=2)
+@pytest.mark.parametrize(
+    "data_name, model_architecture, loader_initfunc, solver_initfunc",
+    [(dataset, model, loader, cebra.solver.SingleSessionSolver)
+     for dataset, loader in [("demo-discrete", cebra.data.DiscreteDataLoader),
+                             ("demo-continuous", cebra.data.ContinuousDataLoader
+                             ), ("demo-mixed", cebra.data.MixedDataLoader)]
+     for model in
+     ["offset1-model", "offset10-model", "offset40-model-4x-subsample"]])
+def test_single_session(data_name, loader_initfunc, model_architecture,
+                        solver_initfunc):
+    loader, data = _get_loader(data_name, loader_initfunc)
+    model = _make_model(data, model_architecture)
+    data.configure_for(model)
+    offset = model.get_offset()
+    criterion = cebra.models.InfoNCE()
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
 
-    for param in solver.parameters():
-        assert isinstance(param, torch.Tensor)
+    solver = solver_initfunc(model=model,
+                             criterion=criterion,
+                             optimizer=optimizer,
+                             tqdm_on=False)
 
-    fitted_solver = copy.deepcopy(solver)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        solver.save(temp_dir)
-        solver.load(temp_dir)
-    _assert_equal(fitted_solver, solver)
+    batch = next(iter(loader))
+    assert batch.reference.shape[:2] == (32, loader.dataset.input_dimension)
+    log = solver.step(batch)
+    assert isinstance(log, dict)
+
+    X = loader.dataset.neural
+    with pytest.raises(ValueError, match="not.*fitted"):
+        solver.transform(X)
+
+    solver.fit(loader)
+
+    assert not hasattr(solver, 'num_sessions')
+    assert solver.n_features == X.shape[1]
+
+    _test_single_session_transform(solver, X, offset)
 
 
 @pytest.mark.parametrize(
@@ -265,6 +224,9 @@ def test_single_session_auxvar(data_name, loader_initfunc, model_architecture,
 
     solver.fit(loader)
 
+    assert not hasattr(solver, 'num_sessions')
+    assert solver.n_features == loader.dataset.neural.shape[1]
+
 
 @pytest.mark.parametrize(
     "data_name, model_architecture, loader_initfunc, solver_initfunc",
@@ -295,35 +257,10 @@ def test_single_session_hybrid(data_name, loader_initfunc, model_architecture,
 
     solver.fit(loader)
 
-    assert solver.num_sessions is None
+    assert not hasattr(solver, 'num_sessions')
     assert solver.n_features == X.shape[1]
 
-    embedding = solver.transform(X)
-    assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
-    embedding = solver.transform(torch.Tensor(X))
-    assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
-    embedding = solver.transform(X, session_id=0)
-    assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0], OUTPUT_DIMENSION)
-    embedding = solver.transform(X, pad_before_transform=False)
-    assert isinstance(embedding, torch.Tensor)
-    assert embedding.shape == (X.shape[0] - len(offset) + 1, OUTPUT_DIMENSION)
-
-    with pytest.raises(ValueError, match="torch.Tensor"):
-        solver.transform(X.numpy())
-    with pytest.raises(RuntimeError, match="Invalid.*session_id"):
-        embedding = solver.transform(X, session_id=2)
-
-    for param in solver.parameters():
-        assert isinstance(param, torch.Tensor)
-
-    fitted_solver = copy.deepcopy(solver)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        solver.save(temp_dir)
-        solver.load(temp_dir)
-    _assert_equal(fitted_solver, solver)
+    _test_single_session_transform(solver, X, offset)
 
 
 @pytest.mark.parametrize(
@@ -397,7 +334,7 @@ def test_multi_session(data_name, loader_initfunc, model_architecture,
 
     with pytest.raises(RuntimeError, match="No.*session_id"):
         embedding = solver.transform(X[0])
-    with pytest.raises(ValueError, match="single.*session"):
+    with pytest.raises(RuntimeError, match="session_id.*provided"):
         embedding = solver.transform(X)
     with pytest.raises(RuntimeError, match="Invalid.*session_id"):
         embedding = solver.transform(X[0], session_id=5)
diff --git a/tests/test_solver_batched.py b/tests/test_solver_batched.py
index fe49a0de..8d60e77d 100644
--- a/tests/test_solver_batched.py
+++ b/tests/test_solver_batched.py
@@ -32,7 +32,7 @@
 device = "cpu"
 
 NUM_STEPS = 2
-BATCHES = [25_000, 50_000, 75_000]
+BATCHES = [250, 500, 750]
 MODELS = ["offset1-model", "offset10-model", "offset40-model-4x-subsample"]
 
 
@@ -386,12 +386,6 @@ def test_check_indices(batch_start_idx, batch_end_idx, offset, num_samples,
         (3, -10, 4, ValueError),
         # Start index greater than end index
         (5, 3, 4, ValueError),
-        # End index out of bounds
-        (0, 15, 12, ValueError),
-        # Batch size smaller than batched_data
-        (0, 2, 2, ValueError),
-        # Batch size larger than batched_data
-        (0, 12, 12, ValueError),
     ],
 )
 def test_add_batched_zero_padding(batch_start_idx, batch_end_idx, num_samples,
@@ -432,7 +426,7 @@ def test_transform(pad_before_transform, expected_exception):
                          input_dimension=inputs.shape[1])
     offset = model.get_offset()
 
-    result = cebra.solver.base._transform(
+    result = cebra.solver.base._not_batched_transform(
         model=model,
         inputs=inputs,
         pad_before_transform=pad_before_transform,