From 71f7eb682b6461d2b940b793b24203ee5bfe7ece Mon Sep 17 00:00:00 2001 From: Josh Bendavid Date: Tue, 7 Apr 2026 02:03:33 +0200 Subject: [PATCH 1/6] protect against future incompatible change in hist --- wums/ioutils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/wums/ioutils.py b/wums/ioutils.py index a47184e..3b2cce2 100644 --- a/wums/ioutils.py +++ b/wums/ioutils.py @@ -2,8 +2,9 @@ import pickle import sys -import wums -sys.modules['narf.ioutils'] = sys.modules['wums.ioutils'] # backwards compatibility to use old files +sys.modules["narf.ioutils"] = sys.modules[ + "wums.ioutils" +] # backwards compatibility to use old files import boost_histogram as bh import h5py @@ -156,9 +157,11 @@ def reduce_Hist(obj): view = get_histogram_view(obj) h5buf = H5Buffer(view) + metadata = obj.metadata if hasattr(obj, "metadata") else None + return ( make_Hist, - (axes, obj.storage_type(), obj.metadata, obj.label, obj.name, h5buf), + (axes, obj.storage_type(), metadata, obj.label, obj.name, h5buf), ) From c8336775bf01df0dd0c2a2dde7ce0c004dbd56be Mon Sep 17 00:00:00 2001 From: Josh Bendavid Date: Tue, 7 Apr 2026 02:10:41 +0200 Subject: [PATCH 2/6] Add SparseHist wrapper combining a scipy sparse array with hist axes The wrapper stores the dense N-D shape implied by a sequence of hist axes in the with-flow layout (axis.extent per axis) and provides toarray and to_flat_csr methods that can extract either the with-flow or no-flow representation. Also supports dict-style slicing along axes by regular-bin index for use cases such as multi-systematic dispatch in rabbit. Co-Authored-By: Claude Opus 4.6 (1M context) --- wums/sparse_hist.py | 251 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 wums/sparse_hist.py diff --git a/wums/sparse_hist.py b/wums/sparse_hist.py new file mode 100644 index 0000000..42d7507 --- /dev/null +++ b/wums/sparse_hist.py @@ -0,0 +1,251 @@ +"""Sparse histogram wrapper combining a scipy sparse array with hist axes. + +The :class:`SparseHist` class pairs a scipy sparse array with a sequence of +hist axes describing its dense N-D shape. The dense layout is always the +with-flow layout (each axis contributes ``axis.extent`` bins). Consumers can +extract either the with-flow or no-flow representation via the ``flow`` +parameter on :meth:`SparseHist.toarray` and :meth:`SparseHist.to_flat_csr`. +""" + +import numpy as np + + +class _AxesTuple(tuple): + """Tuple of hist axes that supports lookup by name as well as by index.""" + + def __getitem__(self, key): + if isinstance(key, str): + for ax in self: + if ax.name == key: + return ax + raise KeyError(f"axis '{key}' not found") + return tuple.__getitem__(self, key) + + +class SparseHist: + """Wrapper combining a scipy sparse array with hist axes describing its dense shape. + + The dense N-D layout is **always the with-flow layout**: each axis contributes + ``axis.extent`` bins, where (for axes with underflow) position 0 is the + underflow bin, regular bins follow at positions 1..size, and (for axes with + overflow) the overflow bin is at the last position. For axes that have + neither underflow nor overflow, ``extent == size`` and the layout matches the + no-flow layout exactly. + + The user provides scipy sparse data whose row-major flattening matches the + row-major flattening of this with-flow dense shape. Consumers (such as the + rabbit ``TensorWriter``) can extract either the with-flow or no-flow layout + via the ``flow`` parameter on :meth:`toarray` and :meth:`to_flat_csr`. + + Parameters + ---------- + data : scipy.sparse array or matrix + Sparse storage. Total element count must equal the product of axis extents. + axes : sequence of hist axes + Axes describing the dense N-D shape. Each axis must have ``.name``. + """ + + @staticmethod + def _underflow_offset(ax): + """Return 1 if the axis has an underflow bin, 0 otherwise.""" + traits = getattr(ax, "traits", None) + if traits is not None and getattr(traits, "underflow", False): + return 1 + return 0 + + def __init__(self, data, axes): + self._axes = _AxesTuple(axes) + self._dense_shape = tuple(int(a.extent) for a in self._axes) + self._size = int(np.prod(self._dense_shape)) + + if not (hasattr(data, "toarray") and hasattr(data, "tocoo")): + raise TypeError( + f"data must be a scipy sparse array/matrix, got {type(data).__name__}" + ) + + if int(np.prod(data.shape)) != self._size: + raise ValueError( + f"Total elements in sparse data ({int(np.prod(data.shape))}) does " + f"not match product of axis extents {self._dense_shape} = {self._size}" + ) + + # Internally store as flat (indices, values) corresponding to row-major + # flatten of the with-flow dense shape. + coo = data.tocoo() + if coo.ndim == 2: + flat_idx = np.ravel_multi_index((coo.row, coo.col), data.shape) + elif coo.ndim == 1: + flat_idx = coo.coords[0] + else: + raise ValueError(f"Unsupported sparse ndim {coo.ndim}") + + self._flat_indices = np.asarray(flat_idx, dtype=np.int64) + self._values = np.asarray(coo.data) + + @classmethod + def _from_flat(cls, flat_indices, values, axes, size): + """Construct directly from flat indices and values, bypassing __init__ checks.""" + obj = cls.__new__(cls) + obj._axes = _AxesTuple(axes) + obj._dense_shape = tuple(int(a.extent) for a in obj._axes) + obj._size = int(size) + obj._flat_indices = np.asarray(flat_indices, dtype=np.int64) + obj._values = np.asarray(values) + return obj + + @property + def axes(self): + return self._axes + + @property + def shape(self): + return self._dense_shape + + @property + def dtype(self): + return self._values.dtype + + @property + def nnz(self): + return len(self._flat_indices) + + def toarray(self, flow=True): + """Return the dense N-D numpy array. + + If ``flow=True`` (default), the result has the with-flow shape (extents). + If ``flow=False``, flow bins are dropped and the result has the no-flow + shape (sizes). + """ + out = np.zeros(self._size, dtype=self._values.dtype) + out[self._flat_indices] = self._values + full = out.reshape(self._dense_shape) + if flow: + return full + slices = tuple( + slice(self._underflow_offset(ax), self._underflow_offset(ax) + len(ax)) + for ax in self._axes + ) + return full[slices] + + def tocoo(self): + """Return a 2D scipy COO array of shape (1, size) in the with-flow layout.""" + import scipy.sparse + + return scipy.sparse.coo_array( + ( + self._values, + (np.zeros(len(self._flat_indices), dtype=np.int64), self._flat_indices), + ), + shape=(1, self._size), + ) + + def to_flat_csr(self, dtype, flow=True): + """Return a flat CSR array of shape (1, size) with sorted indices. + + If ``flow=True`` (default), returns the with-flow CSR (size = product of + extents). If ``flow=False``, drops entries that fall in flow bins and + returns a CSR in the no-flow layout (size = product of sizes), with + indices shifted to that layout. + """ + import scipy.sparse + + if flow: + sort_order = np.argsort(self._flat_indices) + sorted_idx = self._flat_indices[sort_order].astype(np.int32) + sorted_vals = self._values[sort_order].astype(dtype) + indptr = np.array([0, len(sorted_vals)], dtype=np.int32) + return scipy.sparse.csr_array( + (sorted_vals, sorted_idx, indptr), shape=(1, self._size) + ) + + # No-flow extraction: filter entries in flow bins, shift remaining to + # the no-flow layout. + no_flow_shape = tuple(int(len(ax)) for ax in self._axes) + no_flow_size = int(np.prod(no_flow_shape)) + + if len(self._flat_indices) == 0: + indptr = np.array([0, 0], dtype=np.int32) + return scipy.sparse.csr_array( + ( + np.zeros(0, dtype=dtype), + np.zeros(0, dtype=np.int32), + indptr, + ), + shape=(1, no_flow_size), + ) + + multi = np.unravel_index(self._flat_indices, self._dense_shape) + mask = np.ones(len(self._flat_indices), dtype=bool) + for i, ax in enumerate(self._axes): + u = self._underflow_offset(ax) + s = int(len(ax)) + mask &= (multi[i] >= u) & (multi[i] < u + s) + + shifted = tuple( + multi[i][mask] - self._underflow_offset(ax) + for i, ax in enumerate(self._axes) + ) + + if len(no_flow_shape) == 1: + new_flat = shifted[0] + else: + new_flat = np.ravel_multi_index(shifted, no_flow_shape) + + new_values = self._values[mask] + sort_order = np.argsort(new_flat) + sorted_idx = new_flat[sort_order].astype(np.int32) + sorted_vals = new_values[sort_order].astype(dtype) + indptr = np.array([0, len(sorted_vals)], dtype=np.int32) + return scipy.sparse.csr_array( + (sorted_vals, sorted_idx, indptr), shape=(1, no_flow_size) + ) + + def __getitem__(self, slice_dict): + """Slice along one or more axes by integer index, returning a new SparseHist. + + Slice indices are interpreted as regular-bin indices (0..axis.size-1), + matching hist's ``h[{"name": i}]`` convention. The underflow offset is + added internally so the slice maps to the correct position in the + with-flow dense layout. + """ + if not isinstance(slice_dict, dict): + raise TypeError( + f"SparseHist supports only dict-style index slicing, got {type(slice_dict).__name__}" + ) + + slice_per_axis = {} + for ax_name, ax_idx in slice_dict.items(): + try: + ax_pos = next(i for i, a in enumerate(self._axes) if a.name == ax_name) + except StopIteration as ex: + raise KeyError( + f"Axis '{ax_name}' not found in SparseHist axes " + f"{[a.name for a in self._axes]}" + ) from ex + ax = self._axes[ax_pos] + slice_per_axis[ax_pos] = int(ax_idx) + self._underflow_offset(ax) + + keep_positions = [i for i in range(len(self._axes)) if i not in slice_per_axis] + if not keep_positions: + raise ValueError("Cannot slice all axes of a SparseHist") + axes_keep = [self._axes[i] for i in keep_positions] + + # Convert flat indices back to multi-dim + multi = np.unravel_index(self._flat_indices, self._dense_shape) + + # Filter entries that match the requested slice + mask = np.ones(len(self._flat_indices), dtype=bool) + for ax_pos, sl in slice_per_axis.items(): + mask &= multi[ax_pos] == sl + + new_dense_shape = tuple(int(a.extent) for a in axes_keep) + new_size = int(np.prod(new_dense_shape)) + + if len(keep_positions) == 1: + new_flat = multi[keep_positions[0]][mask] + else: + new_multi = tuple(multi[i][mask] for i in keep_positions) + new_flat = np.ravel_multi_index(new_multi, new_dense_shape) + + new_values = self._values[mask] + return SparseHist._from_flat(new_flat, new_values, axes_keep, new_size) From 256be1f58ab506edc96be7380b9d07b6d2ae5acc Mon Sep 17 00:00:00 2001 From: Josh Bendavid Date: Tue, 7 Apr 2026 15:57:54 +0200 Subject: [PATCH 3/6] Use int64 indices in SparseHist.to_flat_csr for large flat sizes The CSR returned by to_flat_csr always cast indices and indptr to int32, which silently overflowed when the flat target size exceeded the int32 range. This affected SparseHist instances built from large multi-axis inputs (e.g. a (eta, phi, pt, mass, corparms) hist with ~108k corparms, where the with-flow flat size is ~6.3 billion bins). Now switch to int64 whenever the target size does not fit in int32. Co-Authored-By: Claude Opus 4.6 (1M context) --- wums/sparse_hist.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/wums/sparse_hist.py b/wums/sparse_hist.py index 42d7507..f651552 100644 --- a/wums/sparse_hist.py +++ b/wums/sparse_hist.py @@ -149,29 +149,36 @@ def to_flat_csr(self, dtype, flow=True): """ import scipy.sparse + if flow: + target_size = self._size + else: + no_flow_shape = tuple(int(len(ax)) for ax in self._axes) + target_size = int(np.prod(no_flow_shape)) + + # Use int64 indices when the flat size exceeds the int32 range, since + # scipy.sparse CSR indices default to int32 and would silently overflow. + idx_dtype = np.int64 if target_size > np.iinfo(np.int32).max else np.int32 + if flow: sort_order = np.argsort(self._flat_indices) - sorted_idx = self._flat_indices[sort_order].astype(np.int32) + sorted_idx = self._flat_indices[sort_order].astype(idx_dtype) sorted_vals = self._values[sort_order].astype(dtype) - indptr = np.array([0, len(sorted_vals)], dtype=np.int32) + indptr = np.array([0, len(sorted_vals)], dtype=idx_dtype) return scipy.sparse.csr_array( (sorted_vals, sorted_idx, indptr), shape=(1, self._size) ) # No-flow extraction: filter entries in flow bins, shift remaining to # the no-flow layout. - no_flow_shape = tuple(int(len(ax)) for ax in self._axes) - no_flow_size = int(np.prod(no_flow_shape)) - if len(self._flat_indices) == 0: - indptr = np.array([0, 0], dtype=np.int32) + indptr = np.array([0, 0], dtype=idx_dtype) return scipy.sparse.csr_array( ( np.zeros(0, dtype=dtype), - np.zeros(0, dtype=np.int32), + np.zeros(0, dtype=idx_dtype), indptr, ), - shape=(1, no_flow_size), + shape=(1, target_size), ) multi = np.unravel_index(self._flat_indices, self._dense_shape) @@ -193,11 +200,11 @@ def to_flat_csr(self, dtype, flow=True): new_values = self._values[mask] sort_order = np.argsort(new_flat) - sorted_idx = new_flat[sort_order].astype(np.int32) + sorted_idx = new_flat[sort_order].astype(idx_dtype) sorted_vals = new_values[sort_order].astype(dtype) - indptr = np.array([0, len(sorted_vals)], dtype=np.int32) + indptr = np.array([0, len(sorted_vals)], dtype=idx_dtype) return scipy.sparse.csr_array( - (sorted_vals, sorted_idx, indptr), shape=(1, no_flow_size) + (sorted_vals, sorted_idx, indptr), shape=(1, target_size) ) def __getitem__(self, slice_dict): From a98b9070b369ac2d84edbcf31c60c782ea6cf500 Mon Sep 17 00:00:00 2001 From: Josh Bendavid Date: Sat, 11 Apr 2026 12:49:07 +0200 Subject: [PATCH 4/6] SparseHist: add scalar multiplication operators Implement __mul__, __rmul__ and __imul__ for multiplication by a scalar (Python or numpy). __mul__ / __rmul__ return a new SparseHist with scaled values; __imul__ modifies the underlying _values array in place via [...] *=. Add a test covering all three operators plus numpy scalar dispatch. --- tests/test_sparse_hist_mul.py | 56 +++++++++++++++++++++++++++++++++++ wums/sparse_hist.py | 18 +++++++++++ 2 files changed, 74 insertions(+) create mode 100644 tests/test_sparse_hist_mul.py diff --git a/tests/test_sparse_hist_mul.py b/tests/test_sparse_hist_mul.py new file mode 100644 index 0000000..64af0e8 --- /dev/null +++ b/tests/test_sparse_hist_mul.py @@ -0,0 +1,56 @@ +"""Tests for scalar multiplication operators on SparseHist.""" + +import hist +import numpy as np +import scipy.sparse + +from wums.sparse_hist import SparseHist + + +def _make_sh(): + ax0 = hist.axis.Regular(3, 0, 3, underflow=False, overflow=False, name="x") + ax1 = hist.axis.Regular(2, 0, 2, underflow=False, overflow=False, name="y") + vals = np.array([2.0, 4.0, -1.5]) + coo = scipy.sparse.coo_array( + (vals, (np.array([0, 0, 0]), np.array([0, 3, 5]))), shape=(1, 6) + ) + return SparseHist(coo, [ax0, ax1]), vals.copy() + + +def test_mul_returns_new_sparsehist(): + sh, ref = _make_sh() + sh2 = sh * 2.5 + assert isinstance(sh2, SparseHist) + assert np.allclose(sh2._values, ref * 2.5) + assert np.array_equal(sh2._flat_indices, sh._flat_indices) + assert np.allclose(sh._values, ref), "mul must not modify the original" + + +def test_rmul(): + sh, ref = _make_sh() + sh3 = 3 * sh + assert isinstance(sh3, SparseHist) + assert np.allclose(sh3._values, ref * 3) + assert np.allclose(sh._values, ref), "rmul must not modify the original" + + +def test_imul_in_place(): + sh, ref = _make_sh() + buf = sh._values + sh *= 2.0 + assert np.allclose(sh._values, ref * 2) + assert sh._values is buf, "imul must modify the underlying values buffer" + + +def test_mul_numpy_scalar(): + sh, ref = _make_sh() + sh4 = sh * np.float64(1.5) + assert np.allclose(sh4._values, ref * 1.5) + + +if __name__ == "__main__": + test_mul_returns_new_sparsehist() + test_rmul() + test_imul_in_place() + test_mul_numpy_scalar() + print("All SparseHist scalar-mul tests passed") diff --git a/wums/sparse_hist.py b/wums/sparse_hist.py index f651552..bf0d4fb 100644 --- a/wums/sparse_hist.py +++ b/wums/sparse_hist.py @@ -207,6 +207,24 @@ def to_flat_csr(self, dtype, flow=True): (sorted_vals, sorted_idx, indptr), shape=(1, target_size) ) + def __mul__(self, other): + """Multiply all stored values by a scalar, returning a new SparseHist.""" + if not isinstance(other, (int, float, np.integer, np.floating)): + return NotImplemented + return SparseHist._from_flat( + self._flat_indices, self._values * other, self._axes, self._size + ) + + def __rmul__(self, other): + return self.__mul__(other) + + def __imul__(self, other): + """In-place scalar multiplication.""" + if not isinstance(other, (int, float, np.integer, np.floating)): + return NotImplemented + self._values[...] *= other + return self + def __getitem__(self, slice_dict): """Slice along one or more axes by integer index, returning a new SparseHist. From 1e38bb8a05f3b9645e9f6f24691fc8f4100011d2 Mon Sep 17 00:00:00 2001 From: Josh Bendavid Date: Sat, 11 Apr 2026 12:57:54 +0200 Subject: [PATCH 5/6] SparseHist: add optional metadata attribute Accept a keyword-only metadata argument in __init__ (defaulting to None, matching the hist.Hist interface), expose it as a settable attribute, and propagate it through _from_flat, scalar multiplication, and dict-style slicing. In-place scalar multiplication preserves the attribute trivially since it returns self. Extend the existing tests with metadata coverage. --- tests/test_sparse_hist_mul.py | 45 +++++++++++++++++++++++++++++++---- wums/sparse_hist.py | 20 ++++++++++++---- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/tests/test_sparse_hist_mul.py b/tests/test_sparse_hist_mul.py index 64af0e8..2d3fd57 100644 --- a/tests/test_sparse_hist_mul.py +++ b/tests/test_sparse_hist_mul.py @@ -1,4 +1,4 @@ -"""Tests for scalar multiplication operators on SparseHist.""" +"""Tests for scalar multiplication operators and metadata on SparseHist.""" import hist import numpy as np @@ -7,14 +7,14 @@ from wums.sparse_hist import SparseHist -def _make_sh(): +def _make_sh(metadata=None): ax0 = hist.axis.Regular(3, 0, 3, underflow=False, overflow=False, name="x") ax1 = hist.axis.Regular(2, 0, 2, underflow=False, overflow=False, name="y") vals = np.array([2.0, 4.0, -1.5]) coo = scipy.sparse.coo_array( (vals, (np.array([0, 0, 0]), np.array([0, 3, 5]))), shape=(1, 6) ) - return SparseHist(coo, [ax0, ax1]), vals.copy() + return SparseHist(coo, [ax0, ax1], metadata=metadata), vals.copy() def test_mul_returns_new_sparsehist(): @@ -48,9 +48,46 @@ def test_mul_numpy_scalar(): assert np.allclose(sh4._values, ref * 1.5) +def test_metadata_default_none(): + sh, _ = _make_sh() + assert sh.metadata is None + + +def test_metadata_ctor_and_setter(): + sh, _ = _make_sh(metadata={"label": "jpsi", "nevents": 123}) + assert sh.metadata == {"label": "jpsi", "nevents": 123} + sh.metadata = {"new": "meta"} + assert sh.metadata == {"new": "meta"} + + +def test_metadata_propagates_through_mul(): + sh, _ = _make_sh(metadata={"label": "jpsi"}) + sh2 = sh * 2.0 + assert sh2.metadata == sh.metadata + sh3 = 3 * sh + assert sh3.metadata == sh.metadata + + +def test_metadata_propagates_through_getitem(): + sh, _ = _make_sh(metadata="tag") + sub = sh[{"y": 0}] + assert sub.metadata == "tag" + + +def test_metadata_preserved_by_imul(): + sh, _ = _make_sh(metadata="tag") + sh *= 3 + assert sh.metadata == "tag" + + if __name__ == "__main__": test_mul_returns_new_sparsehist() test_rmul() test_imul_in_place() test_mul_numpy_scalar() - print("All SparseHist scalar-mul tests passed") + test_metadata_default_none() + test_metadata_ctor_and_setter() + test_metadata_propagates_through_mul() + test_metadata_propagates_through_getitem() + test_metadata_preserved_by_imul() + print("All SparseHist scalar-mul and metadata tests passed") diff --git a/wums/sparse_hist.py b/wums/sparse_hist.py index bf0d4fb..00311f1 100644 --- a/wums/sparse_hist.py +++ b/wums/sparse_hist.py @@ -43,6 +43,10 @@ class SparseHist: Sparse storage. Total element count must equal the product of axis extents. axes : sequence of hist axes Axes describing the dense N-D shape. Each axis must have ``.name``. + metadata : optional + Arbitrary user metadata, accessible (and assignable) via the + ``.metadata`` attribute. Defaults to ``None``, matching the + ``hist.Hist`` interface. """ @staticmethod @@ -53,10 +57,11 @@ def _underflow_offset(ax): return 1 return 0 - def __init__(self, data, axes): + def __init__(self, data, axes, *, metadata=None): self._axes = _AxesTuple(axes) self._dense_shape = tuple(int(a.extent) for a in self._axes) self._size = int(np.prod(self._dense_shape)) + self.metadata = metadata if not (hasattr(data, "toarray") and hasattr(data, "tocoo")): raise TypeError( @@ -83,7 +88,7 @@ def __init__(self, data, axes): self._values = np.asarray(coo.data) @classmethod - def _from_flat(cls, flat_indices, values, axes, size): + def _from_flat(cls, flat_indices, values, axes, size, metadata=None): """Construct directly from flat indices and values, bypassing __init__ checks.""" obj = cls.__new__(cls) obj._axes = _AxesTuple(axes) @@ -91,6 +96,7 @@ def _from_flat(cls, flat_indices, values, axes, size): obj._size = int(size) obj._flat_indices = np.asarray(flat_indices, dtype=np.int64) obj._values = np.asarray(values) + obj.metadata = metadata return obj @property @@ -212,7 +218,11 @@ def __mul__(self, other): if not isinstance(other, (int, float, np.integer, np.floating)): return NotImplemented return SparseHist._from_flat( - self._flat_indices, self._values * other, self._axes, self._size + self._flat_indices, + self._values * other, + self._axes, + self._size, + metadata=self.metadata, ) def __rmul__(self, other): @@ -273,4 +283,6 @@ def __getitem__(self, slice_dict): new_flat = np.ravel_multi_index(new_multi, new_dense_shape) new_values = self._values[mask] - return SparseHist._from_flat(new_flat, new_values, axes_keep, new_size) + return SparseHist._from_flat( + new_flat, new_values, axes_keep, new_size, metadata=self.metadata + ) From 3ef1cfc58e29758d310cb2ae6fe589943a0dc982 Mon Sep 17 00:00:00 2001 From: Josh Bendavid Date: Tue, 21 Apr 2026 18:07:01 +0200 Subject: [PATCH 6/6] SparseHist: align shape with hist.Hist (no-flow); add axes.size/extent SparseHist.shape now returns the no-flow dense shape (tuple(len(ax) for ax in axes)), matching the hist.Hist.shape convention. The internal flat-index layout remains with-flow. _AxesTuple gains size and extent properties mirroring hist.NamedAxesTuple, so h.axes.size == h.shape (no-flow per-axis) and h.axes.extent gives the with-flow per-axis tuple needed for operations that work with the internal flat indices. Co-Authored-By: Claude Opus 4.7 (1M context) --- wums/sparse_hist.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/wums/sparse_hist.py b/wums/sparse_hist.py index 00311f1..0614eb5 100644 --- a/wums/sparse_hist.py +++ b/wums/sparse_hist.py @@ -11,7 +11,8 @@ class _AxesTuple(tuple): - """Tuple of hist axes that supports lookup by name as well as by index.""" + """Tuple of hist axes supporting lookup by name and the + ``size``/``extent`` attributes from ``hist.NamedAxesTuple``.""" def __getitem__(self, key): if isinstance(key, str): @@ -21,6 +22,17 @@ def __getitem__(self, key): raise KeyError(f"axis '{key}' not found") return tuple.__getitem__(self, key) + @property + def size(self): + """Per-axis size (no-flow), matching ``hist.NamedAxesTuple.size``.""" + return tuple(int(len(ax)) for ax in self) + + @property + def extent(self): + """Per-axis extent (with-flow), matching + ``hist.NamedAxesTuple.extent``.""" + return tuple(int(ax.extent) for ax in self) + class SparseHist: """Wrapper combining a scipy sparse array with hist axes describing its dense shape. @@ -37,6 +49,10 @@ class SparseHist: rabbit ``TensorWriter``) can extract either the with-flow or no-flow layout via the ``flow`` parameter on :meth:`toarray` and :meth:`to_flat_csr`. + The public :attr:`shape` property returns the *no-flow* shape to match + ``hist.Hist.shape``. The with-flow dense shape is exposed via + ``h.axes.extent``, matching ``hist.NamedAxesTuple.extent``. + Parameters ---------- data : scipy.sparse array or matrix @@ -105,7 +121,14 @@ def axes(self): @property def shape(self): - return self._dense_shape + """Dense no-flow shape ``(len(axis) for axis in axes)``. + + Matches the ``hist.Hist.shape`` convention of excluding flow + bins. The internal flat-index layout is still with-flow; use + ``h.axes.extent`` (same API as ``hist.NamedAxesTuple.extent``) + to get the with-flow dense shape when needed. + """ + return self._axes.size @property def dtype(self):