sensorium-competition · schewskone · Jan 26, 2026 · Jan 26, 2026 · reneburghardt · Jan 28, 2026
diff --git a/experanto/utils.py b/experanto/utils.py
@@ -121,6 +121,42 @@ def __iter__(self):
         for i in range(len(self)):
             yield next(self.iterator)
 
+    def benchmark(
+        self,
+        n_batches=100,
+        n_warmup=10,
+        verbose=False,
+    ):
-    ):
+    ):
+        """Benchmark the data loading time over a number of batches.
+
+        Args:
+            n_batches (int, optional): Number of batches to time. If ``None``,
+                iterate over ``len(self)`` batches. Defaults to ``100``.
+            n_warmup (int, optional): Number of initial batches to run without
+                timing, to allow for warmup. Defaults to ``10``.
+            verbose (bool, optional): If ``True``, print the time for each
+                individual batch. Defaults to ``False``.
+
+        Returns:
+            dict: A dictionary with the following keys:
+
+                - ``avg_time`` (float): Average time per batch in seconds.
+                - ``std_time`` (float): Standard deviation of per-batch times
+                  in seconds.
+                - ``batch_times`` (List[float]): List of per-batch timings in
+                  seconds.
+
+        Example:
+            >>> stats = dataloader.benchmark(n_batches=50, n_warmup=5)
+            >>> print(stats["avg_time"], stats["std_time"])
+        """
-    ):
+    ):
+        """Benchmark the data loading time over a number of batches.
+
+        Args:
+            n_batches (int, optional): Number of batches to time. If ``None``,
+                iterate over ``len(self)`` batches. Defaults to ``100``.
+            n_warmup (int, optional): Number of initial batches to run without
+                timing, to allow for warmup. Defaults to ``10``.
+            verbose (bool, optional): If ``True``, print the time for each
+                individual batch. Defaults to ``False``.
+
+        Returns:
+            dict: A dictionary with the following keys:
+
+                - ``avg_time`` (float): Average time per batch in seconds.
+                - ``std_time`` (float): Standard deviation of per-batch times
+                  in seconds.
+                - ``batch_times`` (List[float]): List of per-batch timings in
+                  seconds.
+
+        Example:
+            >>> stats = dataloader.benchmark(n_batches=50, n_warmup=5)
+            >>> print(stats["avg_time"], stats["std_time"])
+        """
+        it = iter(self)
+        batch_times = []
+
+        total_batches = len(self) if n_batches is None else n_batches
+
+        # Warmup
+        for _ in range(n_warmup):
+            batch = next(it, None)
+            if batch is None:
+                it = iter(self)
+                batch = next(it)
-                batch = next(it)
+                next(it)
-                batch = next(it)
+                next(it)
+
+        # Timed iteration
+        for i in range(total_batches):
+            start = time.perf_counter()
+            batch = next(it, None)
+            if batch is None:
-            batch = next(it, None)
-            if batch is None:
-                it = iter(self)
-                batch = next(it)
-
-        # Timed iteration
-        for i in range(total_batches):
-            start = time.perf_counter()
-            batch = next(it, None)
-            if batch is None:
+            try:
+                batch = next(it)
+            except StopIteration:
+                it = iter(self)
+                batch = next(it)
+
+        # Timed iteration
+        for i in range(total_batches):
+            start = time.perf_counter()
+            try:
+                batch = next(it)
+            except StopIteration:
-            batch = next(it, None)
-            if batch is None:
-                it = iter(self)
-                batch = next(it)
-
-        # Timed iteration
-        for i in range(total_batches):
-            start = time.perf_counter()
-            batch = next(it, None)
-            if batch is None:
+            try:
+                batch = next(it)
+            except StopIteration:
+                it = iter(self)
+                batch = next(it)
+
+        # Timed iteration
+        for i in range(total_batches):
+            start = time.perf_counter()
+            try:
+                batch = next(it)
+            except StopIteration:
+                it = iter(self)
+                batch = next(it)
-            start = time.perf_counter()
-            batch = next(it, None)
-            if batch is None:
-                it = iter(self)
-                batch = next(it)
+            batch = next(it, None)
+            if batch is None:
+                # Iterator exhausted: reinitialize before starting timing
+                it = iter(self)
+                start = time.perf_counter()
+                batch = next(it)
+            else:
+                start = time.perf_counter()
+                # Measure the time to obtain the next batch
+                # (batch was already fetched above for consistency)
-            start = time.perf_counter()
-            batch = next(it, None)
-            if batch is None:
-                it = iter(self)
-                batch = next(it)
+            batch = next(it, None)
+            if batch is None:
+                # Iterator exhausted: reinitialize before starting timing
+                it = iter(self)
+                start = time.perf_counter()
+                batch = next(it)
+            else:
+                start = time.perf_counter()
+                # Measure the time to obtain the next batch
+                # (batch was already fetched above for consistency)
+            end = time.perf_counter()
+            batch_times.append(end - start)
+            if verbose:
+                print(f"Batch {i+1}: {batch_times[-1]:.4f}s")
+
-        # Warmup
-        for _ in range(n_warmup):
-            batch = next(it, None)
-            if batch is None:
-                it = iter(self)
-                batch = next(it)
-
-        # Timed iteration
-        for i in range(total_batches):
-            start = time.perf_counter()
-            batch = next(it, None)
-            if batch is None:
-                it = iter(self)
-                batch = next(it)
-            end = time.perf_counter()
-            batch_times.append(end - start)
-            if verbose:
-                print(f"Batch {i+1}: {batch_times[-1]:.4f}s")
+        # Handle empty dataloader or non-positive batch request
+        if len(self) == 0 or total_batches is None or total_batches <= 0:
+            warnings.warn(
+                "MultiEpochsDataLoader.benchmark: no batches available for benchmarking.",
+                RuntimeWarning,
+            )
+            return {
+                "avg_time": math.nan,
+                "std_time": math.nan,
+                "batch_times": batch_times,
+            }
+
+        # Warmup (do not warm up more than total_batches)
+        effective_warmup = min(n_warmup, total_batches)
+        for _ in range(effective_warmup):
+            batch = next(it, None)
+            if batch is None:
+                # Reinitialize iterator once; if still no batch, stop warmup
+                it = iter(self)
+                batch = next(it, None)
+                if batch is None:
+                    break
+
+        # Timed iteration
+        for i in range(total_batches):
+            start = time.perf_counter()
+            batch = next(it, None)
+            if batch is None:
+                # Reinitialize iterator once; if still no batch, stop timing
+                it = iter(self)
+                batch = next(it, None)
+                if batch is None:
+                    break
+            end = time.perf_counter()
+            batch_times.append(end - start)
+            if verbose:
+                print(f"Batch {i+1}: {batch_times[-1]:.4f}s")
+
+        # Avoid division by zero if no batch times were recorded
+        if not batch_times:
+            warnings.warn(
+                "MultiEpochsDataLoader.benchmark: no batch times recorded.",
+                RuntimeWarning,
+            )
+            return {
+                "avg_time": math.nan,
+                "std_time": math.nan,
+                "batch_times": batch_times,
+            }
-        # Warmup
-        for _ in range(n_warmup):
-            batch = next(it, None)
-            if batch is None:
-                it = iter(self)
-                batch = next(it)
-
-        # Timed iteration
-        for i in range(total_batches):
-            start = time.perf_counter()
-            batch = next(it, None)
-            if batch is None:
-                it = iter(self)
-                batch = next(it)
-            end = time.perf_counter()
-            batch_times.append(end - start)
-            if verbose:
-                print(f"Batch {i+1}: {batch_times[-1]:.4f}s")
+        # Handle empty dataloader or non-positive batch request
+        if len(self) == 0 or total_batches is None or total_batches <= 0:
+            warnings.warn(
+                "MultiEpochsDataLoader.benchmark: no batches available for benchmarking.",
+                RuntimeWarning,
+            )
+            return {
+                "avg_time": math.nan,
+                "std_time": math.nan,
+                "batch_times": batch_times,
+            }
+
+        # Warmup (do not warm up more than total_batches)
+        effective_warmup = min(n_warmup, total_batches)
+        for _ in range(effective_warmup):
+            batch = next(it, None)
+            if batch is None:
+                # Reinitialize iterator once; if still no batch, stop warmup
+                it = iter(self)
+                batch = next(it, None)
+                if batch is None:
+                    break
+
+        # Timed iteration
+        for i in range(total_batches):
+            start = time.perf_counter()
+            batch = next(it, None)
+            if batch is None:
+                # Reinitialize iterator once; if still no batch, stop timing
+                it = iter(self)
+                batch = next(it, None)
+                if batch is None:
+                    break
+            end = time.perf_counter()
+            batch_times.append(end - start)
+            if verbose:
+                print(f"Batch {i+1}: {batch_times[-1]:.4f}s")
+
+        # Avoid division by zero if no batch times were recorded
+        if not batch_times:
+            warnings.warn(
+                "MultiEpochsDataLoader.benchmark: no batch times recorded.",
+                RuntimeWarning,
+            )
+            return {
+                "avg_time": math.nan,
+                "std_time": math.nan,
+                "batch_times": batch_times,
+            }
+        avg_time = sum(batch_times) / len(batch_times)
+        std_time = (
+            sum((t - avg_time) ** 2 for t in batch_times) / len(batch_times)
+        ) ** 0.5
+        return {"avg_time": avg_time, "std_time": std_time, "batch_times": batch_times}
-        avg_time = sum(batch_times) / len(batch_times)
-        std_time = (
-            sum((t - avg_time) ** 2 for t in batch_times) / len(batch_times)
-        ) ** 0.5
-        return {"avg_time": avg_time, "std_time": std_time, "batch_times": batch_times}
+        avg_time = float(np.mean(batch_times))
+        std_time = float(np.std(batch_times))
+        return {
+            "avg_time": avg_time,
+            "std_time": std_time,
+            "batch_times": batch_times,
+        }
-        avg_time = sum(batch_times) / len(batch_times)
-        std_time = (
-            sum((t - avg_time) ** 2 for t in batch_times) / len(batch_times)
-        ) ** 0.5
-        return {"avg_time": avg_time, "std_time": std_time, "batch_times": batch_times}
+        avg_time = float(np.mean(batch_times))
+        std_time = float(np.std(batch_times))
+        return {
+            "avg_time": avg_time,
+            "std_time": std_time,
+            "batch_times": batch_times,
+        }
+
 
 # borrowed with <3 from
 # https://github.com/sinzlab/neuralpredictors/blob/main/neuralpredictors/training/cyclers.py