From 967020fec0482c238b5fa78bee60bbe06796d6c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Fri, 10 May 2024 10:50:16 +0200 Subject: [PATCH] Add keyword to asarray --- pyproject.toml | 2 +- src/finch/tensor.py | 69 +++++++++++++++++++++++++++++++++----------- tests/test_sparse.py | 5 ++-- 3 files changed, 56 insertions(+), 20 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e1534e2..a098502 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "finch-tensor" -version = "0.1.25" +version = "0.1.26" description = "" authors = ["Willow Ahrens "] readme = "README.md" diff --git a/src/finch/tensor.py b/src/finch/tensor.py index 1ce66ec..be6e78e 100644 --- a/src/finch/tensor.py +++ b/src/finch/tensor.py @@ -1,6 +1,5 @@ import builtins from typing import Any, Callable, Optional, Iterable, Literal -import warnings import numpy as np from numpy.core.numeric import normalize_axis_index, normalize_axis_tuple @@ -53,6 +52,9 @@ class Tensor(_Display, SparseArray): order. Default: row-major. fill_value : np.number, optional Only used when `numpy.ndarray` or `scipy.sparse` is passed. + copy : bool, optional + If ``True``, then the object is copied. If ``None`` then the object is copied only if needed. + For ``False`` it raises a ``ValueError`` if a copy cannot be avoided. Default: ``None``. Returns ------- @@ -86,26 +88,33 @@ def __init__( /, *, fill_value: np.number | None = None, + copy: bool | None = None, ): if isinstance(obj, (int, float, complex, bool, list)): - obj = np.array(obj) + obj = np.array(obj, copy=copy) if fill_value is None: fill_value = 0.0 if _is_scipy_sparse_obj(obj): # scipy constructor - jl_data = self._from_scipy_sparse(obj, fill_value=fill_value) + jl_data = self._from_scipy_sparse(obj, fill_value=fill_value, copy=copy) self._obj = jl_data elif isinstance(obj, np.ndarray): # numpy constructor - jl_data = self._from_numpy(obj, fill_value=fill_value) + jl_data = self._from_numpy(obj, fill_value=fill_value, copy=copy) self._obj = jl_data elif isinstance(obj, Storage): # from-storage constructor + if copy: + self._raise_julia_copy_not_supported() order = self.preprocess_order( obj.order, self.get_lvl_ndim(obj.levels_descr._obj) ) self._obj = jl.swizzle(jl.Tensor(obj.levels_descr._obj), *order) elif jl.isa(obj, jl.Finch.Tensor): # raw-Julia-object constructors + if copy: + self._raise_julia_copy_not_supported() self._obj = jl.swizzle(obj, *tuple(range(1, jl.ndims(obj) + 1))) elif jl.isa(obj, jl.Finch.SwizzleArray) or jl.isa(obj, jl.Finch.LazyTensor): + if copy: + self._raise_julia_copy_not_supported() self._obj = obj elif isinstance(obj, Tensor): self._obj = obj._obj @@ -366,7 +375,9 @@ def _from_other_tensor(cls, tensor: "Tensor", storage: Storage | None) -> JuliaO ) @classmethod - def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj: + def _from_numpy(cls, arr: np.ndarray, fill_value: np.number, copy: bool | None = None) -> JuliaObj: + if copy: + arr = arr.copy() order_char = "F" if np.isfortran(arr) else "C" order = cls.preprocess_order(order_char, arr.ndim) inv_order = tuple(i - 1 for i in jl.invperm(order)) @@ -383,21 +394,31 @@ def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj: return jl.swizzle(jl.Tensor(lvl._obj), *order) @classmethod - def from_scipy_sparse(cls, x, fill_value=None) -> "Tensor": + def from_scipy_sparse( + cls, + x, + fill_value: np.number | None = None, + copy: bool | None = None, + ) -> "Tensor": if not _is_scipy_sparse_obj(x): raise ValueError("{x} is not a SciPy sparse object.") - return Tensor(x, fill_value=fill_value) + return Tensor(x, fill_value=fill_value, copy=copy) @classmethod - def _from_scipy_sparse(cls, x, fill_value=None) -> JuliaObj: + def _from_scipy_sparse( + cls, + x, + *, + fill_value: np.number | None = None, + copy: bool | None = None, + ) -> JuliaObj: + if copy is False and not (x.has_canonical_format and x.format in ("coo", "csr", "csc")): + raise ValueError("Unable to avoid copy while creating an array as requested.") + if copy or not x.has_canonical_format: + x = x.copy() if x.format not in ("coo", "csr", "csc"): x = x.asformat("coo") if not x.has_canonical_format: - warnings.warn( - "SciPy sparse input must be in a canonical format. " - "Calling `sum_duplicates`." - ) - x = x.copy() x.sum_duplicates() assert x.has_canonical_format @@ -579,6 +600,10 @@ def to_scipy_sparse(self, accept_fv=None): else: raise ValueError("Tensor can't be converted to scipy.sparse object.") + @staticmethod + def _raise_julia_copy_not_supported() -> None: + raise ValueError("copy=True isn't supported for Julia object inputs") + def __array_namespace__(self, *, api_version: str | None = None) -> Any: if api_version is None: api_version = "2023.12" @@ -603,13 +628,23 @@ def random(shape, density=0.01, random_state=None): return Tensor(jl.fsprand(*args)) -def asarray(obj, /, *, dtype=None, format=None, fill_value=None, device=None): +def asarray( + obj, + /, + *, + dtype: DType | None = None, + format: str | None = None, + fill_value: np.number | None = None, + device: Device | None = None, + copy: bool | None = None, +) -> Tensor: if format not in {"coo", "csr", "csc", "csf", "dense", None}: raise ValueError(f"{format} format not supported.") _validate_device(device) - tensor = obj if isinstance(obj, Tensor) else Tensor(obj, fill_value=fill_value) - + tensor = obj if isinstance(obj, Tensor) else Tensor(obj, fill_value=fill_value, copy=copy) if format is not None: + if copy is False: + raise ValueError("Unable to avoid copy while creating an array as requested.") order = tensor.get_order() if format == "coo": storage = Storage(SparseCOO(tensor.ndim, Element(tensor.fill_value)), order) @@ -631,7 +666,7 @@ def asarray(obj, /, *, dtype=None, format=None, fill_value=None, device=None): tensor = tensor.to_device(storage) if dtype is not None: - return astype(tensor, dtype) + return astype(tensor, dtype, copy=copy) else: return tensor diff --git a/tests/test_sparse.py b/tests/test_sparse.py index e5ab04d..d2bba59 100644 --- a/tests/test_sparse.py +++ b/tests/test_sparse.py @@ -44,9 +44,10 @@ def test_wrappers(dtype, jl_dtype, order): @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128]) @pytest.mark.parametrize("order", ["C", "F", None]) -def test_no_copy_fully_dense(dtype, order, arr3d): +@pytest.mark.parametrize("copy", [True, False, None]) +def test_no_copy_fully_dense(dtype, order, copy, arr3d): arr = np.array(arr3d, dtype=dtype, order=order) - arr_finch = finch.Tensor(arr) + arr_finch = finch.Tensor(arr, copy=copy) arr_todense = arr_finch.todense() assert_equal(arr_todense, arr)