diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a7aecccf..8e9cfc5a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,11 +49,11 @@ jobs: pip install -e '.[tests]' - name: Run tests run: ci/test_backends.sh - - uses: codecov/codecov-action@v5 + - uses: codecov/codecov-action@2e6e9c5a74ec004831b6d17edfb76c53a54d4d55 if: always() with: token: ${{ secrets.CODECOV_TOKEN }} - files: ./**/coverage*.xml + files: "\"./**/coverage*.xml\"" examples: runs-on: ubuntu-latest diff --git a/sparse/numba_backend/__init__.py b/sparse/numba_backend/__init__.py index 9f731cc1..789222f3 100644 --- a/sparse/numba_backend/__init__.py +++ b/sparse/numba_backend/__init__.py @@ -157,7 +157,7 @@ where, ) from ._dok import DOK -from ._io import load_npz, save_npz +from ._io import from_binsparse, load_npz, save_npz from ._umath import elemwise from ._utils import random @@ -226,6 +226,7 @@ "float64", "floor", "floor_divide", + "from_binsparse", "full", "full_like", "greater", diff --git a/sparse/numba_backend/_common.py b/sparse/numba_backend/_common.py index d952a0b2..9ce04dc2 100644 --- a/sparse/numba_backend/_common.py +++ b/sparse/numba_backend/_common.py @@ -35,7 +35,7 @@ def _check_device(func): def wrapped(*args, **kwargs): device = kwargs.get("device") if device not in {"cpu", None}: - raise ValueError("Device must be `'cpu'` or `None`.") + raise BufferError("Device must be `'cpu'` or `None`.") return func(*args, **kwargs) return wrapped diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py index 380f539b..95f3180f 100644 --- a/sparse/numba_backend/_compressed/compressed.py +++ b/sparse/numba_backend/_compressed/compressed.py @@ -11,7 +11,6 @@ from .._coo.core import COO from .._sparse_array import SparseArray from .._utils import ( - _zero_of_dtype, can_store, check_compressed_axes, check_fill_value, @@ -175,13 +174,9 @@ def __init__( if self.data.ndim != 1: raise ValueError("data must be a scalar or 1-dimensional.") - self.shape = shape - - if fill_value is None: - fill_value = _zero_of_dtype(self.data.dtype) + SparseArray.__init__(self, shape=shape, fill_value=fill_value) self._compressed_axes = tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None - self.fill_value = self.data.dtype.type(fill_value) if prune: self._prune() @@ -259,32 +254,6 @@ def nnz(self): """ return self.data.shape[0] - @property - def format(self): - """ - The storage format of this array. - - Returns - ------- - str - The storage format of this array. - - See Also - ------- - [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. - - Examples - ------- - >>> import sparse - >>> s = sparse.random((5, 5), density=0.2, format="dok") - >>> s.format - 'dok' - >>> t = sparse.random((5, 5), density=0.2, format="coo") - >>> t.format - 'coo' - """ - return "gcxs" - @property def nbytes(self): """ @@ -443,7 +412,7 @@ def tocoo(self): fill_value=self.fill_value, ) uncompressed = uncompress_dimension(self.indptr) - coords = np.vstack((uncompressed, self.indices)) + coords = np.stack((uncompressed, self.indices)) order = np.argsort(self._axis_order) return ( COO( @@ -844,6 +813,15 @@ def isinf(self): def isnan(self): return self.tocoo().isnan().asformat("gcxs", compressed_axes=self.compressed_axes) + # `GCXS` is a reshaped/transposed `CSR`, but it can't (usually) + # be expressed in the `binsparse` 0.1 language. + # We are missing index maps. + def __binsparse__(self) -> dict: + return super().__binsparse__() + + def __binsparse_descriptor__(self) -> dict[str, np.ndarray]: + return super().__binsparse_descriptor__() + class _Compressed2d(GCXS): class_compressed_axes: tuple[int] @@ -883,6 +861,30 @@ def from_numpy(cls, x, fill_value=0, idx_dtype=None): coo = COO.from_numpy(x, fill_value=fill_value, idx_dtype=idx_dtype) return cls.from_coo(coo, cls.class_compressed_axes, idx_dtype) + def __binsparse_descriptor__(self) -> dict: + from sparse._version import __version__ + + data_dt = str(self.data.dtype) + if np.issubdtype(data_dt, np.complexfloating): + data_dt = f"complex[float{self.data.dtype.itemsize * 4}]" + return { + "binsparse": { + "version": "0.1", + "format": self.format.upper(), + "shape": list(self.shape), + "number_of_stored_values": self.nnz, + "data_types": { + "pointers_to_1": str(self.indices.dtype), + "indices_1": str(self.indptr.dtype), + "values": data_dt, + }, + }, + "original_source": f"`sparse`, version {__version__}", + } + + def __binsparse__(self) -> dict[str, np.ndarray]: + return {"pointers_to_1": self.indptr, "indices_1": self.indices, "values": self.data} + class CSR(_Compressed2d): """ diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py index 2db83fde..afcbec91 100644 --- a/sparse/numba_backend/_coo/core.py +++ b/sparse/numba_backend/_coo/core.py @@ -601,29 +601,6 @@ def nnz(self): """ return self.coords.shape[1] - @property - def format(self): - """ - The storage format of this array. - Returns - ------- - str - The storage format of this array. - See Also - -------- - [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. - Examples - ------- - >>> import sparse - >>> s = sparse.random((5, 5), density=0.2, format="dok") - >>> s.format - 'dok' - >>> t = sparse.random((5, 5), density=0.2, format="coo") - >>> t.format - 'coo' - """ - return "coo" - @property def nbytes(self): """ @@ -1538,6 +1515,46 @@ def isnan(self): prune=True, ) + def __binsparse_descriptor__(self) -> dict: + from sparse._version import __version__ + + data_dt = str(self.data.dtype) + if np.issubdtype(data_dt, np.complexfloating): + data_dt = f"complex[float{self.data.dtype.itemsize * 4}]" + return { + "binsparse": { + "version": "0.1", + "format": { + "custom": { + "level": { + "level_desc": "sparse", + "rank": self.ndim, + "level": { + "level_desc": "element", + }, + } + } + } + if self.ndim != 2 + else "COOR", + "shape": list(self.shape), + "number_of_stored_values": self.nnz, + "data_types": { + "pointers_to_1": "uint64", + "indices_1": str(self.coords.dtype), + "values": data_dt, + }, + }, + "original_source": f"`sparse`, version {__version__}", + } + + def __binsparse__(self) -> dict[str, np.ndarray]: + return { + "pointers_to_1": np.array([0, self.nnz], dtype=np.uint64), + "indices_1": self.coords, + "values": self.data, + } + def as_coo(x, shape=None, fill_value=None, idx_dtype=None): """ diff --git a/sparse/numba_backend/_dok.py b/sparse/numba_backend/_dok.py index 4cfed3bc..7df2d128 100644 --- a/sparse/numba_backend/_dok.py +++ b/sparse/numba_backend/_dok.py @@ -272,29 +272,6 @@ def nnz(self): """ return len(self.data) - @property - def format(self): - """ - The storage format of this array. - Returns - ------- - str - The storage format of this array. - See Also - ------- - [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. - Examples - ------- - >>> import sparse - >>> s = sparse.random((5, 5), density=0.2, format="dok") - >>> s.format - 'dok' - >>> t = sparse.random((5, 5), density=0.2, format="coo") - >>> t.format - 'coo' - """ - return "dok" - @property def nbytes(self): """ @@ -549,6 +526,9 @@ def reshape(self, shape, order="C"): return DOK.from_coo(self.to_coo().reshape(shape)) + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + raise TypeError("`DOK` doesn't support the `__binsparse__` protocol.") + def to_slice(k): """Convert integer indices to one-element slices for consistency""" diff --git a/sparse/numba_backend/_io.py b/sparse/numba_backend/_io.py index 24d9f1db..d73a4a22 100644 --- a/sparse/numba_backend/_io.py +++ b/sparse/numba_backend/_io.py @@ -1,7 +1,9 @@ import numpy as np -from ._compressed import GCXS +from ._common import _check_device +from ._compressed import CSC, CSR, GCXS from ._coo.core import COO +from ._sparse_array import SparseArray def save_npz(filename, matrix, compressed=True): @@ -130,3 +132,172 @@ def load_npz(filename): ) except KeyError as e: raise RuntimeError(f"The file {filename!s} does not contain a valid sparse matrix") from e + + +@_check_device +def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseArray: + desc = arr.__binsparse_descriptor__() + arrs = arr.__binsparse__() + + desc = desc["binsparse"] + version_tuple: tuple[int, ...] = tuple(int(v) for v in desc["version"].split(".")) + if version_tuple != (0, 1): + raise RuntimeError("Unsupported `__binsparse__` protocol version.") + + format = desc["format"] + format_err_str = f"Unsupported format: `{format!r}`." + + if isinstance(format, str): + match format: + case "COO" | "COOR": + desc["format"] = { + "custom": { + "transpose": [0, 1], + "level": { + "level_desc": "sparse", + "rank": 2, + "level": { + "level_desc": "element", + }, + }, + } + } + case "CSC" | "CSR": + desc["format"] = { + "custom": { + "transpose": [0, 1] if format == "CSR" else [0, 1], + "level": { + "level_desc": "dense", + "level": { + "level_desc": "sparse", + "level": { + "level_desc": "element", + }, + }, + }, + }, + } + case _: + raise RuntimeError(format_err_str) + + format = desc["format"]["custom"] + rank = 0 + level = format + while "level" in level: + if "rank" not in level: + level["rank"] = 1 + rank += level["rank"] + level = level["level"] + if "transpose" not in format: + format["transpose"] = list(range(rank)) + + match desc: + case { + "format": { + "custom": { + "transpose": transpose, + "level": { + "level_desc": "sparse", + "rank": ndim, + "level": { + "level_desc": "element", + }, + }, + }, + }, + "shape": shape, + "number_of_stored_values": nnz, + "data_types": { + "pointers_to_1": _, + "indices_1": coords_dtype, + "values": value_dtype, + }, + **_kwargs, + }: + if transpose != list(range(ndim)): + raise RuntimeError(format_err_str) + + ptr_arr: np.ndarray = np.from_dlpack(arrs["pointers_to_1"]) + start, end = ptr_arr + if copy is False and not (start == 0 or end == nnz): + raise RuntimeError(format_err_str) + + coord_arr: np.ndarray = np.from_dlpack(arrs["indices_1"]) + value_arr: np.ndarray = np.from_dlpack(arrs["values"]) + + _check_binsparse_dt(coord_arr, coords_dtype) + _check_binsparse_dt(value_arr, value_dtype) + + return COO( + coord_arr[:, start:end], + value_arr, + shape=shape, + has_duplicates=False, + sorted=True, + prune=False, + idx_dtype=coord_arr.dtype, + ) + case { + "format": { + "custom": { + "transpose": transpose, + "level": { + "level_desc": "dense", + "rank": 1, + "level": { + "level_desc": "sparse", + "rank": 1, + "level": { + "level_desc": "element", + }, + }, + }, + }, + }, + "shape": shape, + "number_of_stored_values": nnz, + "data_types": { + "pointers_to_1": ptr_dtype, + "indices_1": crd_dtype, + "values": val_dtype, + }, + **_kwargs, + }: + crd_arr = np.from_dlpack(arrs["pointers_to_1"]) + _check_binsparse_dt(crd_arr, crd_dtype) + ptr_arr = np.from_dlpack(arrs["indices_1"]) + _check_binsparse_dt(ptr_arr, ptr_dtype) + val_arr = np.from_dlpack(arrs["values"]) + _check_binsparse_dt(val_arr, val_dtype) + + match transpose: + case [0, 1]: + sparse_type = CSR + case [1, 0]: + sparse_type = CSC + case _: + raise RuntimeError(format_err_str) + + return sparse_type((val_arr, ptr_arr, crd_arr), shape=shape) + case _: + raise RuntimeError(format_err_str) + + +def _convert_binsparse_dtype(dt: str) -> np.dtype: + if dt.startswith("complex[float") and dt.endswith("]"): + complex_bits = 2 * int(dt[len("complex[float") : -len("]")]) + dt: str = f"complex{complex_bits}" + + return np.dtype(dt) + + +def _check_binsparse_dt(arr: np.ndarray, dt: str) -> None: + invalid_dtype_str = "Invalid dtype: `{dtype!s}`, expected `{expected!s}`." + dt = _convert_binsparse_dtype(dt) + if dt != arr.dtype: + raise BufferError( + invalid_dtype_str.format( + dtype=arr.dtype, + expected=dt, + ) + ) diff --git a/sparse/numba_backend/_sparse_array.py b/sparse/numba_backend/_sparse_array.py index 13180521..a750fbb0 100644 --- a/sparse/numba_backend/_sparse_array.py +++ b/sparse/numba_backend/_sparse_array.py @@ -145,6 +145,34 @@ def size(self): # returns a float64 for an empty shape. return reduce(operator.mul, self.shape, 1) + @property + def format(self): + """ + The storage format of this array. + + Returns + ------- + str + The storage format of this array. + + See Also + ------- + [`scipy.sparse.coo_matrix.format`][] : The Scipy equivalent property. + [`scipy.sparse.csr_matrix.format`][] : The Scipy equivalent property. + [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. + + Examples + ------- + >>> import sparse + >>> s = sparse.random((5, 5), density=0.2, format="dok") + >>> s.format + 'dok' + >>> t = sparse.random((5, 5), density=0.2, format="coo") + >>> t.format + 'coo' + """ + return type(self).__name__.lower() + @property def density(self): """ @@ -218,6 +246,29 @@ def _str_impl(self, summary): except (ImportError, ValueError): return summary + @abstractmethod + def __binsparse_descriptor__(self) -> dict: + """Return a `dict` equivalent to a parsed JSON [`binsparse` descriptor](https://graphblas.org/binsparse-specification/#descriptor) + of this array. + + Returns + ------- + dict + Parsed `binsparse` descriptor. + """ + raise NotImplementedError + + @abstractmethod + def __binsparse__(self) -> dict[str, np.ndarray]: + """Return a is a `dict[str, np.ndarray]` of the constituent arrays. + + Returns + ------- + dict + Parsed `binsparse` descriptor. + """ + raise NotImplementedError + @abstractmethod def asformat(self, format): """ diff --git a/sparse/numba_backend/tests/test_coo.py b/sparse/numba_backend/tests/test_coo.py index efd7f779..e3b29a54 100644 --- a/sparse/numba_backend/tests/test_coo.py +++ b/sparse/numba_backend/tests/test_coo.py @@ -1896,7 +1896,7 @@ def test_invalid_device(func, args, kwargs): like = sparse.random((5, 5), density=0.5) args = (like,) + args - with pytest.raises(ValueError, match="Device must be"): + with pytest.raises(BufferError, match="Device must be"): func(*args, device="invalid_device", **kwargs) diff --git a/sparse/numba_backend/tests/test_io.py b/sparse/numba_backend/tests/test_io.py index 060b9263..c6caa25f 100644 --- a/sparse/numba_backend/tests/test_io.py +++ b/sparse/numba_backend/tests/test_io.py @@ -28,3 +28,13 @@ def test_load_wrong_format_exception(tmp_path): np.savez(filename, x) with pytest.raises(RuntimeError): load_npz(filename) + + +@pytest.mark.parametrize( + "format", ["coo", "csr", pytest.param("csc", marks=pytest.mark.xfail(reason="`CSC<>COO` round-trip broken"))] +) +def test_round_trip_binsparse(format: str) -> None: + x = sparse.random((20, 30), density=0.25, format=format) + y = sparse.from_binsparse(x) + + assert_eq(x, y) diff --git a/sparse/numba_backend/tests/test_namespace.py b/sparse/numba_backend/tests/test_namespace.py index 39556f99..ca1a4277 100644 --- a/sparse/numba_backend/tests/test_namespace.py +++ b/sparse/numba_backend/tests/test_namespace.py @@ -67,6 +67,7 @@ def test_namespace(): "float64", "floor", "floor_divide", + "from_binsparse", "full", "full_like", "greater",