From 71f7eb682b6461d2b940b793b24203ee5bfe7ece Mon Sep 17 00:00:00 2001
From: Josh Bendavid <Josh.Bendavid@cern.ch>
Date: Tue, 7 Apr 2026 02:03:33 +0200
Subject: [PATCH 1/6] protect against future incompatible change in hist

---
 wums/ioutils.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/wums/ioutils.py b/wums/ioutils.py
index a47184e..3b2cce2 100644
--- a/wums/ioutils.py
+++ b/wums/ioutils.py
@@ -2,8 +2,9 @@
 import pickle
 import sys
 
-import wums
-sys.modules['narf.ioutils'] = sys.modules['wums.ioutils'] # backwards compatibility to use old files
+sys.modules["narf.ioutils"] = sys.modules[
+    "wums.ioutils"
+]  # backwards compatibility to use old files
 
 import boost_histogram as bh
 import h5py
@@ -156,9 +157,11 @@ def reduce_Hist(obj):
     view = get_histogram_view(obj)
     h5buf = H5Buffer(view)
 
+    metadata = obj.metadata if hasattr(obj, "metadata") else None
+
     return (
         make_Hist,
-        (axes, obj.storage_type(), obj.metadata, obj.label, obj.name, h5buf),
+        (axes, obj.storage_type(), metadata, obj.label, obj.name, h5buf),
     )
 
 

From c8336775bf01df0dd0c2a2dde7ce0c004dbd56be Mon Sep 17 00:00:00 2001
From: Josh Bendavid <Josh.Bendavid@cern.ch>
Date: Tue, 7 Apr 2026 02:10:41 +0200
Subject: [PATCH 2/6] Add SparseHist wrapper combining a scipy sparse array
 with hist axes

The wrapper stores the dense N-D shape implied by a sequence of hist axes
in the with-flow layout (axis.extent per axis) and provides toarray and
to_flat_csr methods that can extract either the with-flow or no-flow
representation. Also supports dict-style slicing along axes by regular-bin
index for use cases such as multi-systematic dispatch in rabbit.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 wums/sparse_hist.py | 251 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 251 insertions(+)
 create mode 100644 wums/sparse_hist.py

diff --git a/wums/sparse_hist.py b/wums/sparse_hist.py
new file mode 100644
index 0000000..42d7507
--- /dev/null
+++ b/wums/sparse_hist.py
@@ -0,0 +1,251 @@
+"""Sparse histogram wrapper combining a scipy sparse array with hist axes.
+
+The :class:`SparseHist` class pairs a scipy sparse array with a sequence of
+hist axes describing its dense N-D shape. The dense layout is always the
+with-flow layout (each axis contributes ``axis.extent`` bins). Consumers can
+extract either the with-flow or no-flow representation via the ``flow``
+parameter on :meth:`SparseHist.toarray` and :meth:`SparseHist.to_flat_csr`.
+"""
+
+import numpy as np
+
+
+class _AxesTuple(tuple):
+    """Tuple of hist axes that supports lookup by name as well as by index."""
+
+    def __getitem__(self, key):
+        if isinstance(key, str):
+            for ax in self:
+                if ax.name == key:
+                    return ax
+            raise KeyError(f"axis '{key}' not found")
+        return tuple.__getitem__(self, key)
+
+
+class SparseHist:
+    """Wrapper combining a scipy sparse array with hist axes describing its dense shape.
+
+    The dense N-D layout is **always the with-flow layout**: each axis contributes
+    ``axis.extent`` bins, where (for axes with underflow) position 0 is the
+    underflow bin, regular bins follow at positions 1..size, and (for axes with
+    overflow) the overflow bin is at the last position. For axes that have
+    neither underflow nor overflow, ``extent == size`` and the layout matches the
+    no-flow layout exactly.
+
+    The user provides scipy sparse data whose row-major flattening matches the
+    row-major flattening of this with-flow dense shape. Consumers (such as the
+    rabbit ``TensorWriter``) can extract either the with-flow or no-flow layout
+    via the ``flow`` parameter on :meth:`toarray` and :meth:`to_flat_csr`.
+
+    Parameters
+    ----------
+    data : scipy.sparse array or matrix
+        Sparse storage. Total element count must equal the product of axis extents.
+    axes : sequence of hist axes
+        Axes describing the dense N-D shape. Each axis must have ``.name``.
+    """
+
+    @staticmethod
+    def _underflow_offset(ax):
+        """Return 1 if the axis has an underflow bin, 0 otherwise."""
+        traits = getattr(ax, "traits", None)
+        if traits is not None and getattr(traits, "underflow", False):
+            return 1
+        return 0
+
+    def __init__(self, data, axes):
+        self._axes = _AxesTuple(axes)
+        self._dense_shape = tuple(int(a.extent) for a in self._axes)
+        self._size = int(np.prod(self._dense_shape))
+
+        if not (hasattr(data, "toarray") and hasattr(data, "tocoo")):
+            raise TypeError(
+                f"data must be a scipy sparse array/matrix, got {type(data).__name__}"
+            )
+
+        if int(np.prod(data.shape)) != self._size:
+            raise ValueError(
+                f"Total elements in sparse data ({int(np.prod(data.shape))}) does "
+                f"not match product of axis extents {self._dense_shape} = {self._size}"
+            )
+
+        # Internally store as flat (indices, values) corresponding to row-major
+        # flatten of the with-flow dense shape.
+        coo = data.tocoo()
+        if coo.ndim == 2:
+            flat_idx = np.ravel_multi_index((coo.row, coo.col), data.shape)
+        elif coo.ndim == 1:
+            flat_idx = coo.coords[0]
+        else:
+            raise ValueError(f"Unsupported sparse ndim {coo.ndim}")
+
+        self._flat_indices = np.asarray(flat_idx, dtype=np.int64)
+        self._values = np.asarray(coo.data)
+
+    @classmethod
+    def _from_flat(cls, flat_indices, values, axes, size):
+        """Construct directly from flat indices and values, bypassing __init__ checks."""
+        obj = cls.__new__(cls)
+        obj._axes = _AxesTuple(axes)
+        obj._dense_shape = tuple(int(a.extent) for a in obj._axes)
+        obj._size = int(size)
+        obj._flat_indices = np.asarray(flat_indices, dtype=np.int64)
+        obj._values = np.asarray(values)
+        return obj
+
+    @property
+    def axes(self):
+        return self._axes
+
+    @property
+    def shape(self):
+        return self._dense_shape
+
+    @property
+    def dtype(self):
+        return self._values.dtype
+
+    @property
+    def nnz(self):
+        return len(self._flat_indices)
+
+    def toarray(self, flow=True):
+        """Return the dense N-D numpy array.
+
+        If ``flow=True`` (default), the result has the with-flow shape (extents).
+        If ``flow=False``, flow bins are dropped and the result has the no-flow
+        shape (sizes).
+        """
+        out = np.zeros(self._size, dtype=self._values.dtype)
+        out[self._flat_indices] = self._values
+        full = out.reshape(self._dense_shape)
+        if flow:
+            return full
+        slices = tuple(
+            slice(self._underflow_offset(ax), self._underflow_offset(ax) + len(ax))
+            for ax in self._axes
+        )
+        return full[slices]
+
+    def tocoo(self):
+        """Return a 2D scipy COO array of shape (1, size) in the with-flow layout."""
+        import scipy.sparse
+
+        return scipy.sparse.coo_array(
+            (
+                self._values,
+                (np.zeros(len(self._flat_indices), dtype=np.int64), self._flat_indices),
+            ),
+            shape=(1, self._size),
+        )
+
+    def to_flat_csr(self, dtype, flow=True):
+        """Return a flat CSR array of shape (1, size) with sorted indices.
+
+        If ``flow=True`` (default), returns the with-flow CSR (size = product of
+        extents). If ``flow=False``, drops entries that fall in flow bins and
+        returns a CSR in the no-flow layout (size = product of sizes), with
+        indices shifted to that layout.
+        """
+        import scipy.sparse
+
+        if flow:
+            sort_order = np.argsort(self._flat_indices)
+            sorted_idx = self._flat_indices[sort_order].astype(np.int32)
+            sorted_vals = self._values[sort_order].astype(dtype)
+            indptr = np.array([0, len(sorted_vals)], dtype=np.int32)
+            return scipy.sparse.csr_array(
+                (sorted_vals, sorted_idx, indptr), shape=(1, self._size)
+            )
+
+        # No-flow extraction: filter entries in flow bins, shift remaining to
+        # the no-flow layout.
+        no_flow_shape = tuple(int(len(ax)) for ax in self._axes)
+        no_flow_size = int(np.prod(no_flow_shape))
+
+        if len(self._flat_indices) == 0:
+            indptr = np.array([0, 0], dtype=np.int32)
+            return scipy.sparse.csr_array(
+                (
+                    np.zeros(0, dtype=dtype),
+                    np.zeros(0, dtype=np.int32),
+                    indptr,
+                ),
+                shape=(1, no_flow_size),
+            )
+
+        multi = np.unravel_index(self._flat_indices, self._dense_shape)
+        mask = np.ones(len(self._flat_indices), dtype=bool)
+        for i, ax in enumerate(self._axes):
+            u = self._underflow_offset(ax)
+            s = int(len(ax))
+            mask &= (multi[i] >= u) & (multi[i] < u + s)
+
+        shifted = tuple(
+            multi[i][mask] - self._underflow_offset(ax)
+            for i, ax in enumerate(self._axes)
+        )
+
+        if len(no_flow_shape) == 1:
+            new_flat = shifted[0]
+        else:
+            new_flat = np.ravel_multi_index(shifted, no_flow_shape)
+
+        new_values = self._values[mask]
+        sort_order = np.argsort(new_flat)
+        sorted_idx = new_flat[sort_order].astype(np.int32)
+        sorted_vals = new_values[sort_order].astype(dtype)
+        indptr = np.array([0, len(sorted_vals)], dtype=np.int32)
+        return scipy.sparse.csr_array(
+            (sorted_vals, sorted_idx, indptr), shape=(1, no_flow_size)
+        )
+
+    def __getitem__(self, slice_dict):
+        """Slice along one or more axes by integer index, returning a new SparseHist.
+
+        Slice indices are interpreted as regular-bin indices (0..axis.size-1),
+        matching hist's ``h[{"name": i}]`` convention. The underflow offset is
+        added internally so the slice maps to the correct position in the
+        with-flow dense layout.
+        """
+        if not isinstance(slice_dict, dict):
+            raise TypeError(
+                f"SparseHist supports only dict-style index slicing, got {type(slice_dict).__name__}"
+            )
+
+        slice_per_axis = {}
+        for ax_name, ax_idx in slice_dict.items():
+            try:
+                ax_pos = next(i for i, a in enumerate(self._axes) if a.name == ax_name)
+            except StopIteration as ex:
+                raise KeyError(
+                    f"Axis '{ax_name}' not found in SparseHist axes "
+                    f"{[a.name for a in self._axes]}"
+                ) from ex
+            ax = self._axes[ax_pos]
+            slice_per_axis[ax_pos] = int(ax_idx) + self._underflow_offset(ax)
+
+        keep_positions = [i for i in range(len(self._axes)) if i not in slice_per_axis]
+        if not keep_positions:
+            raise ValueError("Cannot slice all axes of a SparseHist")
+        axes_keep = [self._axes[i] for i in keep_positions]
+
+        # Convert flat indices back to multi-dim
+        multi = np.unravel_index(self._flat_indices, self._dense_shape)
+
+        # Filter entries that match the requested slice
+        mask = np.ones(len(self._flat_indices), dtype=bool)
+        for ax_pos, sl in slice_per_axis.items():
+            mask &= multi[ax_pos] == sl
+
+        new_dense_shape = tuple(int(a.extent) for a in axes_keep)
+        new_size = int(np.prod(new_dense_shape))
+
+        if len(keep_positions) == 1:
+            new_flat = multi[keep_positions[0]][mask]
+        else:
+            new_multi = tuple(multi[i][mask] for i in keep_positions)
+            new_flat = np.ravel_multi_index(new_multi, new_dense_shape)
+
+        new_values = self._values[mask]
+        return SparseHist._from_flat(new_flat, new_values, axes_keep, new_size)

From 256be1f58ab506edc96be7380b9d07b6d2ae5acc Mon Sep 17 00:00:00 2001
From: Josh Bendavid <Josh.Bendavid@cern.ch>
Date: Tue, 7 Apr 2026 15:57:54 +0200
Subject: [PATCH 3/6] Use int64 indices in SparseHist.to_flat_csr for large
 flat sizes

The CSR returned by to_flat_csr always cast indices and indptr to int32,
which silently overflowed when the flat target size exceeded the int32
range. This affected SparseHist instances built from large multi-axis
inputs (e.g. a (eta, phi, pt, mass, corparms) hist with ~108k corparms,
where the with-flow flat size is ~6.3 billion bins). Now switch to
int64 whenever the target size does not fit in int32.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 wums/sparse_hist.py | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/wums/sparse_hist.py b/wums/sparse_hist.py
index 42d7507..f651552 100644
--- a/wums/sparse_hist.py
+++ b/wums/sparse_hist.py
@@ -149,29 +149,36 @@ def to_flat_csr(self, dtype, flow=True):
         """
         import scipy.sparse
 
+        if flow:
+            target_size = self._size
+        else:
+            no_flow_shape = tuple(int(len(ax)) for ax in self._axes)
+            target_size = int(np.prod(no_flow_shape))
+
+        # Use int64 indices when the flat size exceeds the int32 range, since
+        # scipy.sparse CSR indices default to int32 and would silently overflow.
+        idx_dtype = np.int64 if target_size > np.iinfo(np.int32).max else np.int32
+
         if flow:
             sort_order = np.argsort(self._flat_indices)
-            sorted_idx = self._flat_indices[sort_order].astype(np.int32)
+            sorted_idx = self._flat_indices[sort_order].astype(idx_dtype)
             sorted_vals = self._values[sort_order].astype(dtype)
-            indptr = np.array([0, len(sorted_vals)], dtype=np.int32)
+            indptr = np.array([0, len(sorted_vals)], dtype=idx_dtype)
             return scipy.sparse.csr_array(
                 (sorted_vals, sorted_idx, indptr), shape=(1, self._size)
             )
 
         # No-flow extraction: filter entries in flow bins, shift remaining to
         # the no-flow layout.
-        no_flow_shape = tuple(int(len(ax)) for ax in self._axes)
-        no_flow_size = int(np.prod(no_flow_shape))
-
         if len(self._flat_indices) == 0:
-            indptr = np.array([0, 0], dtype=np.int32)
+            indptr = np.array([0, 0], dtype=idx_dtype)
             return scipy.sparse.csr_array(
                 (
                     np.zeros(0, dtype=dtype),
-                    np.zeros(0, dtype=np.int32),
+                    np.zeros(0, dtype=idx_dtype),
                     indptr,
                 ),
-                shape=(1, no_flow_size),
+                shape=(1, target_size),
             )
 
         multi = np.unravel_index(self._flat_indices, self._dense_shape)
@@ -193,11 +200,11 @@ def to_flat_csr(self, dtype, flow=True):
 
         new_values = self._values[mask]
         sort_order = np.argsort(new_flat)
-        sorted_idx = new_flat[sort_order].astype(np.int32)
+        sorted_idx = new_flat[sort_order].astype(idx_dtype)
         sorted_vals = new_values[sort_order].astype(dtype)
-        indptr = np.array([0, len(sorted_vals)], dtype=np.int32)
+        indptr = np.array([0, len(sorted_vals)], dtype=idx_dtype)
         return scipy.sparse.csr_array(
-            (sorted_vals, sorted_idx, indptr), shape=(1, no_flow_size)
+            (sorted_vals, sorted_idx, indptr), shape=(1, target_size)
         )
 
     def __getitem__(self, slice_dict):

From a98b9070b369ac2d84edbcf31c60c782ea6cf500 Mon Sep 17 00:00:00 2001
From: Josh Bendavid <Josh.Bendavid@cern.ch>
Date: Sat, 11 Apr 2026 12:49:07 +0200
Subject: [PATCH 4/6] SparseHist: add scalar multiplication operators

Implement __mul__, __rmul__ and __imul__ for multiplication by a scalar
(Python or numpy). __mul__ / __rmul__ return a new SparseHist with
scaled values; __imul__ modifies the underlying _values array in place
via [...] *=. Add a test covering all three operators plus numpy
scalar dispatch.
---
 tests/test_sparse_hist_mul.py | 56 +++++++++++++++++++++++++++++++++++
 wums/sparse_hist.py           | 18 +++++++++++
 2 files changed, 74 insertions(+)
 create mode 100644 tests/test_sparse_hist_mul.py

diff --git a/tests/test_sparse_hist_mul.py b/tests/test_sparse_hist_mul.py
new file mode 100644
index 0000000..64af0e8
--- /dev/null
+++ b/tests/test_sparse_hist_mul.py
@@ -0,0 +1,56 @@
+"""Tests for scalar multiplication operators on SparseHist."""
+
+import hist
+import numpy as np
+import scipy.sparse
+
+from wums.sparse_hist import SparseHist
+
+
+def _make_sh():
+    ax0 = hist.axis.Regular(3, 0, 3, underflow=False, overflow=False, name="x")
+    ax1 = hist.axis.Regular(2, 0, 2, underflow=False, overflow=False, name="y")
+    vals = np.array([2.0, 4.0, -1.5])
+    coo = scipy.sparse.coo_array(
+        (vals, (np.array([0, 0, 0]), np.array([0, 3, 5]))), shape=(1, 6)
+    )
+    return SparseHist(coo, [ax0, ax1]), vals.copy()
+
+
+def test_mul_returns_new_sparsehist():
+    sh, ref = _make_sh()
+    sh2 = sh * 2.5
+    assert isinstance(sh2, SparseHist)
+    assert np.allclose(sh2._values, ref * 2.5)
+    assert np.array_equal(sh2._flat_indices, sh._flat_indices)
+    assert np.allclose(sh._values, ref), "mul must not modify the original"
+
+
+def test_rmul():
+    sh, ref = _make_sh()
+    sh3 = 3 * sh
+    assert isinstance(sh3, SparseHist)
+    assert np.allclose(sh3._values, ref * 3)
+    assert np.allclose(sh._values, ref), "rmul must not modify the original"
+
+
+def test_imul_in_place():
+    sh, ref = _make_sh()
+    buf = sh._values
+    sh *= 2.0
+    assert np.allclose(sh._values, ref * 2)
+    assert sh._values is buf, "imul must modify the underlying values buffer"
+
+
+def test_mul_numpy_scalar():
+    sh, ref = _make_sh()
+    sh4 = sh * np.float64(1.5)
+    assert np.allclose(sh4._values, ref * 1.5)
+
+
+if __name__ == "__main__":
+    test_mul_returns_new_sparsehist()
+    test_rmul()
+    test_imul_in_place()
+    test_mul_numpy_scalar()
+    print("All SparseHist scalar-mul tests passed")
diff --git a/wums/sparse_hist.py b/wums/sparse_hist.py
index f651552..bf0d4fb 100644
--- a/wums/sparse_hist.py
+++ b/wums/sparse_hist.py
@@ -207,6 +207,24 @@ def to_flat_csr(self, dtype, flow=True):
             (sorted_vals, sorted_idx, indptr), shape=(1, target_size)
         )
 
+    def __mul__(self, other):
+        """Multiply all stored values by a scalar, returning a new SparseHist."""
+        if not isinstance(other, (int, float, np.integer, np.floating)):
+            return NotImplemented
+        return SparseHist._from_flat(
+            self._flat_indices, self._values * other, self._axes, self._size
+        )
+
+    def __rmul__(self, other):
+        return self.__mul__(other)
+
+    def __imul__(self, other):
+        """In-place scalar multiplication."""
+        if not isinstance(other, (int, float, np.integer, np.floating)):
+            return NotImplemented
+        self._values[...] *= other
+        return self
+
     def __getitem__(self, slice_dict):
         """Slice along one or more axes by integer index, returning a new SparseHist.
 

From 1e38bb8a05f3b9645e9f6f24691fc8f4100011d2 Mon Sep 17 00:00:00 2001
From: Josh Bendavid <Josh.Bendavid@cern.ch>
Date: Sat, 11 Apr 2026 12:57:54 +0200
Subject: [PATCH 5/6] SparseHist: add optional metadata attribute

Accept a keyword-only metadata argument in __init__ (defaulting to None,
matching the hist.Hist interface), expose it as a settable attribute, and
propagate it through _from_flat, scalar multiplication, and dict-style
slicing. In-place scalar multiplication preserves the attribute trivially
since it returns self. Extend the existing tests with metadata coverage.
---
 tests/test_sparse_hist_mul.py | 45 +++++++++++++++++++++++++++++++----
 wums/sparse_hist.py           | 20 ++++++++++++----
 2 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/tests/test_sparse_hist_mul.py b/tests/test_sparse_hist_mul.py
index 64af0e8..2d3fd57 100644
--- a/tests/test_sparse_hist_mul.py
+++ b/tests/test_sparse_hist_mul.py
@@ -1,4 +1,4 @@
-"""Tests for scalar multiplication operators on SparseHist."""
+"""Tests for scalar multiplication operators and metadata on SparseHist."""
 
 import hist
 import numpy as np
@@ -7,14 +7,14 @@
 from wums.sparse_hist import SparseHist
 
 
-def _make_sh():
+def _make_sh(metadata=None):
     ax0 = hist.axis.Regular(3, 0, 3, underflow=False, overflow=False, name="x")
     ax1 = hist.axis.Regular(2, 0, 2, underflow=False, overflow=False, name="y")
     vals = np.array([2.0, 4.0, -1.5])
     coo = scipy.sparse.coo_array(
         (vals, (np.array([0, 0, 0]), np.array([0, 3, 5]))), shape=(1, 6)
     )
-    return SparseHist(coo, [ax0, ax1]), vals.copy()
+    return SparseHist(coo, [ax0, ax1], metadata=metadata), vals.copy()
 
 
 def test_mul_returns_new_sparsehist():
@@ -48,9 +48,46 @@ def test_mul_numpy_scalar():
     assert np.allclose(sh4._values, ref * 1.5)
 
 
+def test_metadata_default_none():
+    sh, _ = _make_sh()
+    assert sh.metadata is None
+
+
+def test_metadata_ctor_and_setter():
+    sh, _ = _make_sh(metadata={"label": "jpsi", "nevents": 123})
+    assert sh.metadata == {"label": "jpsi", "nevents": 123}
+    sh.metadata = {"new": "meta"}
+    assert sh.metadata == {"new": "meta"}
+
+
+def test_metadata_propagates_through_mul():
+    sh, _ = _make_sh(metadata={"label": "jpsi"})
+    sh2 = sh * 2.0
+    assert sh2.metadata == sh.metadata
+    sh3 = 3 * sh
+    assert sh3.metadata == sh.metadata
+
+
+def test_metadata_propagates_through_getitem():
+    sh, _ = _make_sh(metadata="tag")
+    sub = sh[{"y": 0}]
+    assert sub.metadata == "tag"
+
+
+def test_metadata_preserved_by_imul():
+    sh, _ = _make_sh(metadata="tag")
+    sh *= 3
+    assert sh.metadata == "tag"
+
+
 if __name__ == "__main__":
     test_mul_returns_new_sparsehist()
     test_rmul()
     test_imul_in_place()
     test_mul_numpy_scalar()
-    print("All SparseHist scalar-mul tests passed")
+    test_metadata_default_none()
+    test_metadata_ctor_and_setter()
+    test_metadata_propagates_through_mul()
+    test_metadata_propagates_through_getitem()
+    test_metadata_preserved_by_imul()
+    print("All SparseHist scalar-mul and metadata tests passed")
diff --git a/wums/sparse_hist.py b/wums/sparse_hist.py
index bf0d4fb..00311f1 100644
--- a/wums/sparse_hist.py
+++ b/wums/sparse_hist.py
@@ -43,6 +43,10 @@ class SparseHist:
         Sparse storage. Total element count must equal the product of axis extents.
     axes : sequence of hist axes
         Axes describing the dense N-D shape. Each axis must have ``.name``.
+    metadata : optional
+        Arbitrary user metadata, accessible (and assignable) via the
+        ``.metadata`` attribute. Defaults to ``None``, matching the
+        ``hist.Hist`` interface.
     """
 
     @staticmethod
@@ -53,10 +57,11 @@ def _underflow_offset(ax):
             return 1
         return 0
 
-    def __init__(self, data, axes):
+    def __init__(self, data, axes, *, metadata=None):
         self._axes = _AxesTuple(axes)
         self._dense_shape = tuple(int(a.extent) for a in self._axes)
         self._size = int(np.prod(self._dense_shape))
+        self.metadata = metadata
 
         if not (hasattr(data, "toarray") and hasattr(data, "tocoo")):
             raise TypeError(
@@ -83,7 +88,7 @@ def __init__(self, data, axes):
         self._values = np.asarray(coo.data)
 
     @classmethod
-    def _from_flat(cls, flat_indices, values, axes, size):
+    def _from_flat(cls, flat_indices, values, axes, size, metadata=None):
         """Construct directly from flat indices and values, bypassing __init__ checks."""
         obj = cls.__new__(cls)
         obj._axes = _AxesTuple(axes)
@@ -91,6 +96,7 @@ def _from_flat(cls, flat_indices, values, axes, size):
         obj._size = int(size)
         obj._flat_indices = np.asarray(flat_indices, dtype=np.int64)
         obj._values = np.asarray(values)
+        obj.metadata = metadata
         return obj
 
     @property
@@ -212,7 +218,11 @@ def __mul__(self, other):
         if not isinstance(other, (int, float, np.integer, np.floating)):
             return NotImplemented
         return SparseHist._from_flat(
-            self._flat_indices, self._values * other, self._axes, self._size
+            self._flat_indices,
+            self._values * other,
+            self._axes,
+            self._size,
+            metadata=self.metadata,
         )
 
     def __rmul__(self, other):
@@ -273,4 +283,6 @@ def __getitem__(self, slice_dict):
             new_flat = np.ravel_multi_index(new_multi, new_dense_shape)
 
         new_values = self._values[mask]
-        return SparseHist._from_flat(new_flat, new_values, axes_keep, new_size)
+        return SparseHist._from_flat(
+            new_flat, new_values, axes_keep, new_size, metadata=self.metadata
+        )

From 3ef1cfc58e29758d310cb2ae6fe589943a0dc982 Mon Sep 17 00:00:00 2001
From: Josh Bendavid <Josh.Bendavid@cern.ch>
Date: Tue, 21 Apr 2026 18:07:01 +0200
Subject: [PATCH 6/6] SparseHist: align shape with hist.Hist (no-flow); add
 axes.size/extent

SparseHist.shape now returns the no-flow dense shape
(tuple(len(ax) for ax in axes)), matching the hist.Hist.shape
convention. The internal flat-index layout remains with-flow.

_AxesTuple gains size and extent properties mirroring
hist.NamedAxesTuple, so h.axes.size == h.shape (no-flow per-axis)
and h.axes.extent gives the with-flow per-axis tuple needed for
operations that work with the internal flat indices.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 wums/sparse_hist.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/wums/sparse_hist.py b/wums/sparse_hist.py
index 00311f1..0614eb5 100644
--- a/wums/sparse_hist.py
+++ b/wums/sparse_hist.py
@@ -11,7 +11,8 @@
 
 
 class _AxesTuple(tuple):
-    """Tuple of hist axes that supports lookup by name as well as by index."""
+    """Tuple of hist axes supporting lookup by name and the
+    ``size``/``extent`` attributes from ``hist.NamedAxesTuple``."""
 
     def __getitem__(self, key):
         if isinstance(key, str):
@@ -21,6 +22,17 @@ def __getitem__(self, key):
             raise KeyError(f"axis '{key}' not found")
         return tuple.__getitem__(self, key)
 
+    @property
+    def size(self):
+        """Per-axis size (no-flow), matching ``hist.NamedAxesTuple.size``."""
+        return tuple(int(len(ax)) for ax in self)
+
+    @property
+    def extent(self):
+        """Per-axis extent (with-flow), matching
+        ``hist.NamedAxesTuple.extent``."""
+        return tuple(int(ax.extent) for ax in self)
+
 
 class SparseHist:
     """Wrapper combining a scipy sparse array with hist axes describing its dense shape.
@@ -37,6 +49,10 @@ class SparseHist:
     rabbit ``TensorWriter``) can extract either the with-flow or no-flow layout
     via the ``flow`` parameter on :meth:`toarray` and :meth:`to_flat_csr`.
 
+    The public :attr:`shape` property returns the *no-flow* shape to match
+    ``hist.Hist.shape``. The with-flow dense shape is exposed via
+    ``h.axes.extent``, matching ``hist.NamedAxesTuple.extent``.
+
     Parameters
     ----------
     data : scipy.sparse array or matrix
@@ -105,7 +121,14 @@ def axes(self):
 
     @property
     def shape(self):
-        return self._dense_shape
+        """Dense no-flow shape ``(len(axis) for axis in axes)``.
+
+        Matches the ``hist.Hist.shape`` convention of excluding flow
+        bins. The internal flat-index layout is still with-flow; use
+        ``h.axes.extent`` (same API as ``hist.NamedAxesTuple.extent``)
+        to get the with-flow dense shape when needed.
+        """
+        return self._axes.size
 
     @property
     def dtype(self):