From 967020fec0482c238b5fa78bee60bbe06796d6c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Fri, 10 May 2024 10:50:16 +0200
Subject: [PATCH] Add  keyword to asarray

---
 pyproject.toml       |  2 +-
 src/finch/tensor.py  | 69 +++++++++++++++++++++++++++++++++-----------
 tests/test_sparse.py |  5 ++--
 3 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e1534e2..a098502 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "finch-tensor"
-version = "0.1.25"
+version = "0.1.26"
 description = ""
 authors = ["Willow Ahrens <willow.marie.ahrens@gmail.com>"]
 readme = "README.md"
diff --git a/src/finch/tensor.py b/src/finch/tensor.py
index 1ce66ec..be6e78e 100644
--- a/src/finch/tensor.py
+++ b/src/finch/tensor.py
@@ -1,6 +1,5 @@
 import builtins
 from typing import Any, Callable, Optional, Iterable, Literal
-import warnings
 
 import numpy as np
 from numpy.core.numeric import normalize_axis_index, normalize_axis_tuple
@@ -53,6 +52,9 @@ class Tensor(_Display, SparseArray):
         order. Default: row-major.
     fill_value : np.number, optional
         Only used when `numpy.ndarray` or `scipy.sparse` is passed.
+    copy : bool, optional
+        If ``True``, then the object is copied. If ``None`` then the object is copied only if needed.
+        For ``False`` it raises a ``ValueError`` if a copy cannot be avoided. Default: ``None``.
 
     Returns
     -------
@@ -86,26 +88,33 @@ def __init__(
         /,
         *,
         fill_value: np.number | None = None,
+        copy: bool | None = None,
     ):
         if isinstance(obj, (int, float, complex, bool, list)):
-            obj = np.array(obj)
+            obj = np.array(obj, copy=copy)
         if fill_value is None:
             fill_value = 0.0
 
         if _is_scipy_sparse_obj(obj):  # scipy constructor
-            jl_data = self._from_scipy_sparse(obj, fill_value=fill_value)
+            jl_data = self._from_scipy_sparse(obj, fill_value=fill_value, copy=copy)
             self._obj = jl_data
         elif isinstance(obj, np.ndarray):  # numpy constructor
-            jl_data = self._from_numpy(obj, fill_value=fill_value)
+            jl_data = self._from_numpy(obj, fill_value=fill_value, copy=copy)
             self._obj = jl_data
         elif isinstance(obj, Storage):  # from-storage constructor
+            if copy:
+                self._raise_julia_copy_not_supported()
             order = self.preprocess_order(
                 obj.order, self.get_lvl_ndim(obj.levels_descr._obj)
             )
             self._obj = jl.swizzle(jl.Tensor(obj.levels_descr._obj), *order)
         elif jl.isa(obj, jl.Finch.Tensor):  # raw-Julia-object constructors
+            if copy:
+                self._raise_julia_copy_not_supported()
             self._obj = jl.swizzle(obj, *tuple(range(1, jl.ndims(obj) + 1)))
         elif jl.isa(obj, jl.Finch.SwizzleArray) or jl.isa(obj, jl.Finch.LazyTensor):
+            if copy:
+                self._raise_julia_copy_not_supported()
             self._obj = obj
         elif isinstance(obj, Tensor):
             self._obj = obj._obj
@@ -366,7 +375,9 @@ def _from_other_tensor(cls, tensor: "Tensor", storage: Storage | None) -> JuliaO
         )
 
     @classmethod
-    def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj:
+    def _from_numpy(cls, arr: np.ndarray, fill_value: np.number, copy: bool | None = None) -> JuliaObj:
+        if copy:
+            arr = arr.copy()
         order_char = "F" if np.isfortran(arr) else "C"
         order = cls.preprocess_order(order_char, arr.ndim)
         inv_order = tuple(i - 1 for i in jl.invperm(order))
@@ -383,21 +394,31 @@ def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj:
         return jl.swizzle(jl.Tensor(lvl._obj), *order)
 
     @classmethod
-    def from_scipy_sparse(cls, x, fill_value=None) -> "Tensor":
+    def from_scipy_sparse(
+        cls,
+        x,
+        fill_value: np.number | None = None,
+        copy: bool | None = None,
+    ) -> "Tensor":
         if not _is_scipy_sparse_obj(x):
             raise ValueError("{x} is not a SciPy sparse object.")
-        return Tensor(x, fill_value=fill_value)
+        return Tensor(x, fill_value=fill_value, copy=copy)
 
     @classmethod
-    def _from_scipy_sparse(cls, x, fill_value=None) -> JuliaObj:
+    def _from_scipy_sparse(
+        cls,
+        x,
+        *,
+        fill_value: np.number | None = None,
+        copy: bool | None = None,
+    ) -> JuliaObj:
+        if copy is False and not (x.has_canonical_format and x.format in ("coo", "csr", "csc")):
+            raise ValueError("Unable to avoid copy while creating an array as requested.")
+        if copy or not x.has_canonical_format:
+            x = x.copy()
         if x.format not in ("coo", "csr", "csc"):
             x = x.asformat("coo")
         if not x.has_canonical_format:
-            warnings.warn(
-                "SciPy sparse input must be in a canonical format. "
-                "Calling `sum_duplicates`."
-            )
-            x = x.copy()
             x.sum_duplicates()
             assert x.has_canonical_format
 
@@ -579,6 +600,10 @@ def to_scipy_sparse(self, accept_fv=None):
         else:
             raise ValueError("Tensor can't be converted to scipy.sparse object.")
 
+    @staticmethod
+    def _raise_julia_copy_not_supported() -> None:
+        raise ValueError("copy=True isn't supported for Julia object inputs")
+
     def __array_namespace__(self, *, api_version: str | None = None) -> Any:
         if api_version is None:
             api_version = "2023.12"
@@ -603,13 +628,23 @@ def random(shape, density=0.01, random_state=None):
     return Tensor(jl.fsprand(*args))
 
 
-def asarray(obj, /, *, dtype=None, format=None, fill_value=None, device=None):
+def asarray(
+    obj,
+    /,
+    *,
+    dtype: DType | None = None,
+    format: str | None = None,
+    fill_value: np.number | None = None,
+    device: Device | None = None,
+    copy: bool | None = None,
+) -> Tensor:
     if format not in {"coo", "csr", "csc", "csf", "dense", None}:
         raise ValueError(f"{format} format not supported.")
     _validate_device(device)
-    tensor = obj if isinstance(obj, Tensor) else Tensor(obj, fill_value=fill_value)
-
+    tensor = obj if isinstance(obj, Tensor) else Tensor(obj, fill_value=fill_value, copy=copy)
     if format is not None:
+        if copy is False:
+            raise ValueError("Unable to avoid copy while creating an array as requested.")
         order = tensor.get_order()
         if format == "coo":
             storage = Storage(SparseCOO(tensor.ndim, Element(tensor.fill_value)), order)
@@ -631,7 +666,7 @@ def asarray(obj, /, *, dtype=None, format=None, fill_value=None, device=None):
         tensor = tensor.to_device(storage)
 
     if dtype is not None:
-        return astype(tensor, dtype)
+        return astype(tensor, dtype, copy=copy)
     else:
         return tensor
 
diff --git a/tests/test_sparse.py b/tests/test_sparse.py
index e5ab04d..d2bba59 100644
--- a/tests/test_sparse.py
+++ b/tests/test_sparse.py
@@ -44,9 +44,10 @@ def test_wrappers(dtype, jl_dtype, order):
 
 @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128])
 @pytest.mark.parametrize("order", ["C", "F", None])
-def test_no_copy_fully_dense(dtype, order, arr3d):
+@pytest.mark.parametrize("copy", [True, False, None])
+def test_no_copy_fully_dense(dtype, order, copy, arr3d):
     arr = np.array(arr3d, dtype=dtype, order=order)
-    arr_finch = finch.Tensor(arr)
+    arr_finch = finch.Tensor(arr, copy=copy)
     arr_todense = arr_finch.todense()
 
     assert_equal(arr_todense, arr)