AdaptiveMotorControlLab · stes · Aug 2, 2025 · Aug 2, 2025 · Aug 2, 2025 · Aug 2, 2025
diff --git a/cebra/data/base.py b/cebra/data/base.py
@@ -22,6 +22,7 @@
 """Base classes for datasets and loaders."""
 
 import abc
+from typing import Iterator
 
 import literate_dataclasses as dataclasses
 import torch
@@ -239,6 +240,12 @@ class Loader(abc.ABC, cebra.io.HasDevice):
     batch_size: int = dataclasses.field(default=None,
                                         doc="""The total batch size.""")
 
+    num_negatives: int = dataclasses.field(
+        default=None,
+        doc=("The number of negative samples to draw for each reference. "
+             "If not specified, the batch size is used."),
+    )
+
     def __post_init__(self):
         if self.num_steps is None or self.num_steps <= 0:
             raise ValueError(
@@ -248,28 +255,41 @@ def __post_init__(self):
             raise ValueError(
                 f"Batch size has to be None, or a non-negative value. Got {self.batch_size}."
             )
+        if self.num_negatives is not None and self.num_negatives <= 0:
+            raise ValueError(
+                f"Number of negatives has to be None, or a non-negative value. Got {self.num_negatives}."
+            )
+
+        if self.num_negatives is None:
+            self.num_negatives = self.batch_size
 
     def __len__(self):
         """The number of batches returned when calling as an iterator."""
         return self.num_steps
 
-    def __iter__(self) -> Batch:
+    def __iter__(self) -> Iterator[Batch]:
         for _ in range(len(self)):
-            index = self.get_indices(num_samples=self.batch_size)
+            index = self.get_indices()
             yield self.dataset.load_batch(index)
 
     @abc.abstractmethod
-    def get_indices(self, num_samples: int):
+    def get_indices(self, num_samples: int = None):
         """Sample and return the specified number of indices.
 
         The elements of the returned `BatchIndex` will be used to index the
         `dataset` of this data loader.
 
         Args:
-            num_samples: The size of each of the reference, positive and
-                negative samples.
+            num_samples: Deprecated. Use ``batch_size`` on the instance level
+                instead.
 
         Returns:
             batch indices for the reference, positive and negative sample.
+
+        Note:
+            From version 0.7.0 onwards, specifying the ``num_samples``
+            directly is deprecated and will be removed in version 0.8.0.
+            Please set ``batch_size`` and ``num_negatives`` on the instance
+            level instead.
         """
         raise NotImplementedError()
diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
@@ -155,10 +155,14 @@ def __post_init__(self):
         super().__post_init__()
         self.sampler = cebra.distributions.MultisessionSampler(
             self.dataset, self.time_offset)
+        if self.num_negatives is None:
+            self.num_negatives = self.batch_size
 
-    def get_indices(self, num_samples: int) -> List[BatchIndex]:
+    # NOTE(stes): In the longer run, we need to unify the API here; the num_samples argument
+    # is not used in the multi-session case, which is different to the single session samples.
+    def get_indices(self) -> List[BatchIndex]:
         ref_idx = self.sampler.sample_prior(self.batch_size)
-        neg_idx = self.sampler.sample_prior(self.batch_size)
+        neg_idx = self.sampler.sample_prior(self.num_negatives)
         pos_idx, idx, idx_rev = self.sampler.sample_conditional(ref_idx)
 
         ref_idx = torch.from_numpy(ref_idx)
@@ -192,8 +196,11 @@ class DiscreteMultiSessionDataLoader(MultiSessionLoader):
     # Overwrite sampler with the discrete implementation
     # Generalize MultisessionSampler to avoid doing this?
     def __post_init__(self):
+        # NOTE(stes): __post_init__ from superclass is intentionally not called.
         self.sampler = cebra.distributions.DiscreteMultisessionSampler(
             self.dataset)
+        if self.num_negatives is None:
+            self.num_negatives = self.batch_size
 
     @property
     def index(self):
@@ -229,7 +236,14 @@ def __post_init__(self):
         self.sampler = cebra.distributions.UnifiedSampler(
             self.dataset, self.time_offset)
 
-    def get_indices(self, num_samples: int) -> BatchIndex:
+        if self.batch_size is not None and self.batch_size < 2:
+            raise ValueError("UnifiedLoader does not support batch_size < 2.")
+
+        if self.num_negatives is not None and self.num_negatives < 2:
+            raise ValueError(
+                "UnifiedLoader does not support num_negatives < 2.")
+
+    def get_indices(self) -> BatchIndex:
         """Sample and return the specified number of indices.
 
         The elements of the returned ``BatchIndex`` will be used to index the
@@ -251,7 +265,7 @@ def get_indices(self, num_samples: int) -> BatchIndex:
             Batch indices for the reference, positive and negative samples.
         """
         ref_idx = self.sampler.sample_prior(self.batch_size)
-        neg_idx = self.sampler.sample_prior(self.batch_size)
+        neg_idx = self.sampler.sample_prior(self.num_negatives)
 
         pos_idx = self.sampler.sample_conditional(ref_idx)
 

diff --git a/cebra/data/multiobjective.py b/cebra/data/multiobjective.py
@@ -20,10 +20,13 @@
 # limitations under the License.
 #
 
+from typing import Iterator
+
 import literate_dataclasses as dataclasses
 
 import cebra.data as cebra_data
 import cebra.distributions
+from cebra.data.datatypes import Batch
 from cebra.data.datatypes import BatchIndex
 from cebra.distributions.continuous import Prior
 
@@ -71,9 +74,9 @@ def __post_init__(self):
     def add_config(self, config):
         self.labels.append(config['label'])
 
-    def get_indices(self, num_samples: int):
+    def get_indices(self) -> BatchIndex:
         if self.sampling_mode_supervised == "ref_shared":
-            reference_idx = self.prior.sample_prior(num_samples)
+            reference_idx = self.prior.sample_prior(self.batch_size)
         else:
             raise ValueError(
                 f"Sampling mode {self.sampling_mode_supervised} is not implemented."
@@ -87,9 +90,9 @@ def get_indices(self, num_samples: int):
 
         return batch_index
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[Batch]:
         for _ in range(len(self)):
-            index = self.get_indices(num_samples=self.batch_size)
+            index = self.get_indices()
             yield self.dataset.load_batch_supervised(index, self.labels)
 
 
@@ -142,13 +145,14 @@ def add_config(self, config):
 
         self.distributions.append(distribution)
 
-    def get_indices(self, num_samples: int):
+    def get_indices(self) -> BatchIndex:
         """Sample and return the specified number of indices."""
 
         if self.sampling_mode_contrastive == "refneg_shared":
-            ref_and_neg = self.prior.sample_prior(num_samples * 2)
-            reference_idx = ref_and_neg[:num_samples]
-            negative_idx = ref_and_neg[num_samples:]
+            ref_and_neg = self.prior.sample_prior(self.batch_size +
+                                                  self.num_negatives)
+            reference_idx = ref_and_neg[:self.batch_size]
+            negative_idx = ref_and_neg[self.batch_size:]
 
             positives_idx = []
             for distribution in self.distributions:
@@ -169,5 +173,5 @@ def get_indices(self, num_samples: int):
 
     def __iter__(self):
         for _ in range(len(self)):
-            index = self.get_indices(num_samples=self.batch_size)
+            index = self.get_indices()
             yield self.dataset.load_batch_contrastive(index)
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
@@ -27,6 +27,7 @@
 
 import abc
 import warnings
+from typing import Iterator
 
 import literate_dataclasses as dataclasses
 import torch
@@ -138,7 +139,7 @@ def _init_distribution(self):
                 f"Invalid choice of prior distribution. Got '{self.prior}', but "
                 f"only accept 'uniform' or 'empirical' as potential values.")
 
-    def get_indices(self, num_samples: int) -> BatchIndex:
+    def get_indices(self) -> BatchIndex:
         """Samples indices for reference, positive and negative examples.
 
         The reference samples will be sampled from the empirical or uniform prior
@@ -154,13 +155,15 @@ def get_indices(self, num_samples: int) -> BatchIndex:
         Args:
             num_samples: The number of samples (batch size) of the returned
                 :py:class:`cebra.data.datatypes.BatchIndex`.
+            num_negatives: The number of negative samples. If None, defaults to num_samples.
 
         Returns:
             Indices for reference, positive and negatives samples.
-            Indices for reference, positive and negatives samples.
+        The number of reference samples (batch size) and the number of negative samples
+        are determined by the instance attributes ``batch_size`` and ``num_negatives``, respectively.
+
+        Returns:
+            Indices for reference, positive and negative samples.
-            Indices for reference, positive and negatives samples.
+        The number of reference samples (batch size) and the number of negative samples
+        are determined by the instance attributes ``batch_size`` and ``num_negatives``, respectively.
+
+        Returns:
+            Indices for reference, positive and negative samples.
         """
-        reference_idx = self.distribution.sample_prior(num_samples * 2)
-        negative_idx = reference_idx[num_samples:]
-        reference_idx = reference_idx[:num_samples]
+        reference_idx = self.distribution.sample_prior(self.batch_size +
+                                                       self.num_negatives)
+        negative_idx = reference_idx[self.batch_size:]
+        reference_idx = reference_idx[:self.batch_size]
         reference = self.index[reference_idx]
         positive_idx = self.distribution.sample_conditional(reference)
         return BatchIndex(reference=reference_idx,
@@ -246,7 +249,7 @@ def _init_distribution(self):
             else:
                 raise ValueError(self.conditional)
 
-    def get_indices(self, num_samples: int) -> BatchIndex:
+    def get_indices(self) -> BatchIndex:
         """Samples indices for reference, positive and negative examples.
 
         The reference and negative samples will be sampled uniformly from
@@ -262,9 +265,10 @@ def get_indices(self, num_samples: int) -> BatchIndex:
         Returns:
             Indices for reference, positive and negatives samples.
         """
-        reference_idx = self.distribution.sample_prior(num_samples * 2)
-        negative_idx = reference_idx[num_samples:]
-        reference_idx = reference_idx[:num_samples]
+        reference_idx = self.distribution.sample_prior(self.batch_size +
+                                                       self.num_negatives)
+        negative_idx = reference_idx[self.batch_size:]
+        reference_idx = reference_idx[:self.batch_size]
         positive_idx = self.distribution.sample_conditional(reference_idx)
         return BatchIndex(reference=reference_idx,
                           positive=positive_idx,
@@ -305,7 +309,7 @@ def __post_init__(self):
             continuous=self.cindex,
             time_delta=self.time_offset)
 
-    def get_indices(self, num_samples: int) -> BatchIndex:
+    def get_indices(self) -> BatchIndex:
         """Samples indices for reference, positive and negative examples.
 
         The reference and negative samples will be sampled uniformly from
@@ -319,6 +323,7 @@ def get_indices(self, num_samples: int) -> BatchIndex:
         Args:
             num_samples: The number of samples (batch size) of the returned
                 :py:class:`cebra.data.datatypes.BatchIndex`.
+            num_negatives: The number of negative samples. If None, defaults to num_samples.
 
         Returns:
             Indices for reference, positive and negatives samples.
-            Indices for reference, positive and negatives samples.
+        The number of reference samples (batch size) is determined by the
+        instance's ``batch_size`` attribute. The number of negative samples
+        is determined by the instance's ``num_negatives`` attribute.
+
+        Returns:
+            Indices for reference, positive and negatives samples as a
+            :py:class:`cebra.data.datatypes.BatchIndex`.
-            Indices for reference, positive and negatives samples.
+        The number of reference samples (batch size) is determined by the
+        instance's ``batch_size`` attribute. The number of negative samples
+        is determined by the instance's ``num_negatives`` attribute.
+
+        Returns:
+            Indices for reference, positive and negatives samples as a
+            :py:class:`cebra.data.datatypes.BatchIndex`.
@@ -328,10 +333,13 @@ def get_indices(self, num_samples: int) -> BatchIndex:
               class.
             - Sample the negatives with matching discrete variable
         """
-        reference_idx = self.distribution.sample_prior(num_samples)
+        reference_idx = self.distribution.sample_prior(self.batch_size +
+                                                       self.num_negatives)
+        negative_idx = reference_idx[self.batch_size:]
+        reference_idx = reference_idx[:self.batch_size]
         return BatchIndex(
             reference=reference_idx,
-            negative=self.distribution.sample_prior(num_samples),
+            negative=negative_idx,
             positive=self.distribution.sample_conditional(reference_idx),
         )
 
@@ -421,11 +429,11 @@ def _init_time_distribution(self):
         else:
             raise ValueError
 
-    def get_indices(self, num_samples: int) -> BatchIndex:
+    def get_indices(self) -> BatchIndex:
         """Samples indices for reference, positive and negative examples.
 
         The reference and negative samples will be sampled uniformly from
-        all available time steps, and a total of ``2*num_samples`` will be
+        all available time steps, and a total of ``num_samples + num_negatives`` will be
         returned for both.
 
         For the positive samples, ``num_samples`` are sampled according to the
@@ -436,6 +444,7 @@ def get_indices(self, num_samples: int) -> BatchIndex:
         Args:
             num_samples: The number of samples (batch size) of the returned
                 :py:class:`cebra.data.datatypes.BatchIndex`.
+            num_negatives: The number of negative samples. If None, defaults to num_samples.
 
         Returns:
             Indices for reference, positive and negatives samples.
@@ -444,9 +453,10 @@ def get_indices(self, num_samples: int) -> BatchIndex:
             Add the ``empirical`` vs. ``discrete`` sampling modes to this
             class.
         """
-        reference_idx = self.time_distribution.sample_prior(num_samples * 2)
-        negative_idx = reference_idx[num_samples:]
-        reference_idx = reference_idx[:num_samples]
+        reference_idx = self.time_distribution.sample_prior(self.batch_size +
+                                                            self.num_negatives)
+        negative_idx = reference_idx[self.batch_size:]
+        reference_idx = reference_idx[:self.batch_size]
         behavior_positive_idx = self.behavior_distribution.sample_conditional(
             reference_idx)
         time_positive_idx = self.time_distribution.sample_conditional(
@@ -464,13 +474,18 @@ class FullDataLoader(ContinuousDataLoader):
 
     def __post_init__(self):
         super().__post_init__()
-        self.batch_size = None
+
+        if self.batch_size is not None:
+            raise ValueError("Batch size cannot be set for FullDataLoader.")
+        if self.num_negatives is not None:
+            raise ValueError(
+                "Number of negatives cannot be set for FullDataLoader.")
 
     @property
     def offset(self):
         return self.dataset.offset
 
-    def get_indices(self, num_samples=None) -> BatchIndex:
+    def get_indices(self) -> BatchIndex:
         """Samples indices for reference, positive and negative examples.
 
         The reference indices are all available (valid, according to the
@@ -490,7 +505,6 @@ def get_indices(self, num_samples=None) -> BatchIndex:
             Add the ``empirical`` vs. ``discrete`` sampling modes to this
             class.
         """
-        assert num_samples is None
 
         reference_idx = torch.arange(
             self.offset.left,
@@ -504,7 +518,6 @@ def get_indices(self, num_samples=None) -> BatchIndex:
                                      positive=positive_idx,
                                      negative=negative_idx)
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[BatchIndex]:
         for _ in range(len(self)):
-            index = self.get_indices(num_samples=self.batch_size)
-            yield index
+            yield self.get_indices()
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
@@ -501,6 +501,9 @@ class CEBRA(TransformerMixin, BaseEstimator):
             A Tuple of masking types and their corresponding required masking values. The keys are the
             names of the Mask instances and formatting should be ``((key, value), (key, value))``.
             |Default:| ``None``.
+        num_negatives (int):
+            The number of negative samples to use for training. If ``None``, the number of negative samples
+            will be set to the batch size. |Default:| ``None``.
 
     Example:
 
@@ -576,6 +579,7 @@ def __init__(
         ),
         masking_kwargs: Tuple[Tuple[str, Union[float, List[float],
                                                Tuple[float, ...]]], ...] = None,
+        num_negatives: int = None,
     ):
         self.__dict__.update(locals())
 
@@ -592,6 +596,13 @@ def num_sessions(self) -> Optional[int]:
         """
         return self.num_sessions_
 
+    @property
+    def num_negatives_(self) -> int:
+        """The number of negative examples."""
+        if self.num_negatives is None:
+            return self.batch_size
+        return self.num_negatives
+
     @property
     def state_dict_(self) -> dict:
         return self.solver_.state_dict()
@@ -728,6 +739,7 @@ def _prepare_loader(self, dataset: cebra.data.Dataset, max_iterations: int,
                 dataset=dataset,
                 batch_size=self.batch_size,
                 num_steps=max_iterations,
+                num_negatives=self.num_negatives,
             ),
             extra_kwargs=dict(
                 time_offsets=self.time_offsets,