From a3a3aa693c7caf15c7e316b17d2a777c6bd161d6 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 29 Oct 2024 18:17:52 -0700 Subject: [PATCH 01/17] moved AK_dt_unit_from_array to utilities --- src/tri_map.c | 16 ++++++++-------- src/utilities.h | 11 +++++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/tri_map.c b/src/tri_map.c index 48c41c47..959cac45 100644 --- a/src/tri_map.c +++ b/src/tri_map.c @@ -11,14 +11,14 @@ # include "tri_map.h" # include "utilities.h" -static inline NPY_DATETIMEUNIT -AK_dt_unit_from_array(PyArrayObject* a) { - // This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type. - PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref - PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta); - // PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta); - return dma->base; -} +// static inline NPY_DATETIMEUNIT +// AK_dt_unit_from_array(PyArrayObject* a) { +// // This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type. +// PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref +// PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta); +// // PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta); +// return dma->base; +// } typedef struct TriMapOne { Py_ssize_t from; // signed diff --git a/src/utilities.h b/src/utilities.h index 9b85a198..d6c45075 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -222,6 +222,17 @@ AK_slice_to_ascending_slice(PyObject* slice, Py_ssize_t size) -step); } + +static inline NPY_DATETIMEUNIT +AK_dt_unit_from_array(PyArrayObject* a) { + // This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type. + PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref + PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta); + // PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta); + return dma->base; +} + + // Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory provides the best performance at all scales. Using NpyIter, or using, bit masks does not improve performance over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy benefits from first counting the nonzeros, then allocating only enough data for the expexted number of indices. static inline PyObject * AK_nonzero_1d(PyArrayObject* array) { From 6f38224812a8c3cd1388293ba190d78694e6503f Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 29 Oct 2024 18:26:31 -0700 Subject: [PATCH 02/17] skethc case --- src/utilities.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/utilities.h b/src/utilities.h index d6c45075..6df0909e 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -232,6 +232,18 @@ AK_dt_unit_from_array(PyArrayObject* a) { return dma->base; } +// Givne a dt64 array, determine if it can be cast to a object without data loss. +static inline bool +AK_is_objectable_dt64(PyArrayObject* a) +{ + NPY_DATETIMEUNIT unit = AK_dt_unit_from_array(a); + swith (unit) { + case NPY_FR_Y: + case NPY_FR_M; + case NPY_FR_W; + } +} + // Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory provides the best performance at all scales. Using NpyIter, or using, bit masks does not improve performance over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy benefits from first counting the nonzeros, then allocating only enough data for the expexted number of indices. static inline PyObject * From d216f09482ea70993b66bd1810925f206d903163 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 29 Oct 2024 20:04:03 -0700 Subject: [PATCH 03/17] preliminary objectable test --- src/__init__.py | 1 + src/__init__.pyi | 1 + src/_arraykit.c | 1 + src/methods.c | 9 +++++++++ src/methods.h | 3 +++ src/utilities.h | 43 ++++++++++++++++++++++++++++++++++++++--- test/test_objectable.py | 14 ++++++++++++++ 7 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 test/test_objectable.py diff --git a/src/__init__.py b/src/__init__.py index 8c85b5e5..7bf835b5 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -31,3 +31,4 @@ from ._arraykit import array_to_tuple_array as array_to_tuple_array from ._arraykit import array_to_tuple_iter as array_to_tuple_iter from ._arraykit import nonzero_1d as nonzero_1d +from ._arraykit import is_objectable_dt64 as is_objectable_dt64 diff --git a/src/__init__.pyi b/src/__init__.pyi index 25a763c7..ced18cc9 100644 --- a/src/__init__.pyi +++ b/src/__init__.pyi @@ -164,6 +164,7 @@ def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) -> def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ... def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ... def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ... +def is_objectable_dt64(__array: np.ndarray, /) -> np.ndarray: ... def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ... def array_to_tuple_array(__array: np.ndarray) -> np.ndarray: ... def array_to_tuple_iter(__array: np.ndarray) -> tp.Iterator[tp.Tuple[tp.Any, ...]]: ... \ No newline at end of file diff --git a/src/_arraykit.c b/src/_arraykit.c index 5ceacac0..eb5b9e98 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -51,6 +51,7 @@ static PyMethodDef arraykit_methods[] = { NULL}, {"count_iteration", count_iteration, METH_O, NULL}, {"nonzero_1d", nonzero_1d, METH_O, NULL}, + {"is_objectable_dt64", is_objectable_dt64, METH_O, NULL}, {"isna_element", (PyCFunction)isna_element, METH_VARARGS | METH_KEYWORDS, diff --git a/src/methods.c b/src/methods.c index 21bcbeb4..3016d496 100644 --- a/src/methods.c +++ b/src/methods.c @@ -201,6 +201,15 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) { return AK_nonzero_1d(array); } +PyObject * +is_objectable_dt64(PyObject *Py_UNUSED(m), PyObject *a) { + AK_CHECK_NUMPY_ARRAY(a); + PyArrayObject* array = (PyArrayObject*)a; + AK_is_objectable_dt64(array); + Py_RETURN_FALSE; +} + + static char *first_true_1d_kwarg_names[] = { "array", "forward", diff --git a/src/methods.h b/src/methods.h index 751ccf85..340ef705 100644 --- a/src/methods.h +++ b/src/methods.h @@ -47,6 +47,9 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg); PyObject * nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a); +PyObject * +is_objectable_dt64(PyObject *Py_UNUSED(m), PyObject *a); + PyObject * first_true_1d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs); diff --git a/src/utilities.h b/src/utilities.h index 6df0909e..49be1e1b 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -237,14 +237,51 @@ static inline bool AK_is_objectable_dt64(PyArrayObject* a) { NPY_DATETIMEUNIT unit = AK_dt_unit_from_array(a); - swith (unit) { + switch (unit) { + case NPY_FR_ERROR: case NPY_FR_Y: - case NPY_FR_M; - case NPY_FR_W; + case NPY_FR_M: + case NPY_FR_W: + return false; + case NPY_FR_D: + case NPY_FR_h: + case NPY_FR_m: + case NPY_FR_s: + case NPY_FR_ms: + case NPY_FR_us: + break; + case NPY_FR_ns: + case NPY_FR_ps: + case NPY_FR_fs: + case NPY_FR_as: + case NPY_FR_GENERIC: + return false; } + + PyArray_Descr* dt_year = PyArray_DescrFromType(NPY_DATETIME); + if (dt_year == NULL) { + return NULL; + } + // TODO: not sure how to do this + // dt_year->metadata = Py_BuildValue("{s:i}", "unit", NPY_FR_Y); + PyObject* a_year = PyArray_CastToType(a, dt_year, 0); + Py_DECREF(dt_year); + + Py_DECREF(a_year); + return false; + + // years = array[~np.isnat(array)].astype(DT64_YEAR).astype(DTYPE_INT_DEFAULT) + 1970 + // if np.any(years < datetime.MINYEAR): + // return False + // if np.any(years > datetime.MAXYEAR): + // return False + // return True + } + + // Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory provides the best performance at all scales. Using NpyIter, or using, bit masks does not improve performance over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy benefits from first counting the nonzeros, then allocating only enough data for the expexted number of indices. static inline PyObject * AK_nonzero_1d(PyArrayObject* array) { diff --git a/test/test_objectable.py b/test/test_objectable.py new file mode 100644 index 00000000..ba2b8a2e --- /dev/null +++ b/test/test_objectable.py @@ -0,0 +1,14 @@ +import unittest + +import numpy as np + +from arraykit import is_objectable_dt64 + +class TestUnit(unittest.TestCase): + + def test_is_objectable_dt64_a(self) -> None: + a1 = np.array(['2022-01-04', '1954-04-12'], dtype=np.datetime64) + self.assertFalse(is_objectable_dt64(a1)) + + + From ec62103d820fcec41830904e21e3e3d393da811e Mon Sep 17 00:00:00 2001 From: flexatone Date: Tue, 24 Jun 2025 12:13:10 -0700 Subject: [PATCH 04/17] remvoed redundancies --- src/utilities.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/utilities.h b/src/utilities.h index feee4aa4..47753f16 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -229,7 +229,6 @@ AK_dt_unit_from_array(PyArrayObject* a) { // This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type. PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta); - // PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta); return dma->base; } @@ -379,15 +378,6 @@ AK_nonzero_1d(PyArrayObject* array) { return final; } -static inline NPY_DATETIMEUNIT -AK_dt_unit_from_array(PyArrayObject* a) { - // This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type. - PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref - PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta); - // PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta); - return dma->base; -} - static inline NPY_DATETIMEUNIT AK_dt_unit_from_scalar(PyDatetimeScalarObject* dts) { // Based on convert_pyobject_to_datetime and related usage in datetime.c From 6c8d649fd9f19a9693e4cbce5a121a579d45ac6f Mon Sep 17 00:00:00 2001 From: flexatone Date: Tue, 24 Jun 2025 12:43:06 -0700 Subject: [PATCH 05/17] updated readme --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index ac6d1047..8d17f777 100644 --- a/README.rst +++ b/README.rst @@ -41,6 +41,7 @@ What is New in ArrayKit Now building free-threaded compatible wheels for Python 3.13. +Added ``is_objectable_dt64()``. 1.0.9 From e8b3c6a98fce1199cea337fc423bb376b63f64f0 Mon Sep 17 00:00:00 2001 From: flexatone Date: Tue, 24 Jun 2025 15:00:12 -0700 Subject: [PATCH 06/17] progress --- src/methods.c | 11 +++++++-- src/utilities.h | 50 ++++++++++++++++++++++++++++------------- test/test_objectable.py | 9 ++++++++ test/test_util.py | 3 +-- 4 files changed, 53 insertions(+), 20 deletions(-) diff --git a/src/methods.c b/src/methods.c index 3016d496..ab6d8b7c 100644 --- a/src/methods.c +++ b/src/methods.c @@ -205,8 +205,15 @@ PyObject * is_objectable_dt64(PyObject *Py_UNUSED(m), PyObject *a) { AK_CHECK_NUMPY_ARRAY(a); PyArrayObject* array = (PyArrayObject*)a; - AK_is_objectable_dt64(array); - Py_RETURN_FALSE; + switch (AK_is_objectable_dt64(array)) { + case -1: + return NULL; + case 0: + Py_RETURN_FALSE; + case 1: + Py_RETURN_TRUE; + } + return NULL; } diff --git a/src/utilities.h b/src/utilities.h index 47753f16..4163c0fa 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -232,8 +232,8 @@ AK_dt_unit_from_array(PyArrayObject* a) { return dma->base; } -// Givne a dt64 array, determine if it can be cast to a object without data loss. -static inline bool +// Given a dt64 array, determine if it can be cast to a object without data loss. Returns -1 on error +static inline int AK_is_objectable_dt64(PyArrayObject* a) { NPY_DATETIMEUNIT unit = AK_dt_unit_from_array(a); @@ -258,24 +258,42 @@ AK_is_objectable_dt64(PyArrayObject* a) return false; } - PyArray_Descr* dt_year = PyArray_DescrFromType(NPY_DATETIME); - if (dt_year == NULL) { - return NULL; + PyObject* dt_year_str = PyUnicode_FromString("datetime64[Y]"); + if (!dt_year_str) return -1; + + PyArray_Descr* dt_year = NULL; + if (!PyArray_DescrConverter2(dt_year_str, &dt_year)) { + Py_DECREF(dt_year_str); + return -1; } - // TODO: not sure how to do this - // dt_year->metadata = Py_BuildValue("{s:i}", "unit", NPY_FR_Y); + Py_DECREF(dt_year_str); + AK_DEBUG_MSG_OBJ("got descr", (PyObject*)dt_year); + PyObject* a_year = PyArray_CastToType(a, dt_year, 0); - Py_DECREF(dt_year); + if (!a_year) { + Py_DECREF(dt_year); + return -1; + } + AK_DEBUG_MSG_OBJ("a_year", a_year); + + npy_int64* data = (npy_int64*)PyArray_DATA((PyArrayObject*)a_year); + npy_intp size = PyArray_SIZE((PyArrayObject*)a_year); + + for (npy_intp i = 0; i < size; ++i) { + npy_int64 v = data[i]; + // if (v == NPY_DATETIME_NAT) { + // continue; + // } + // offset: 1-1970, 9999-1970 + AK_DEBUG_MSG_OBJ("int values", PyLong_FromSsize_t(v)); + if (v < -1969 || v > 8029) { + Py_DECREF(a_year); + return 0; + } + } Py_DECREF(a_year); - return false; - - // years = array[~np.isnat(array)].astype(DT64_YEAR).astype(DTYPE_INT_DEFAULT) + 1970 - // if np.any(years < datetime.MINYEAR): - // return False - // if np.any(years > datetime.MAXYEAR): - // return False - // return True + return 1; } diff --git a/test/test_objectable.py b/test/test_objectable.py index ba2b8a2e..85485ae3 100644 --- a/test/test_objectable.py +++ b/test/test_objectable.py @@ -8,7 +8,16 @@ class TestUnit(unittest.TestCase): def test_is_objectable_dt64_a(self) -> None: a1 = np.array(['2022-01-04', '1954-04-12'], dtype=np.datetime64) + self.assertTrue(is_objectable_dt64(a1)) + + + def test_is_objectable_dt64_b(self) -> None: + # years are nevery objectable + a1 = np.array(['2022', '2023'], dtype=np.datetime64) self.assertFalse(is_objectable_dt64(a1)) + def test_is_objectable_dt64_c(self) -> None: + a1 = np.array(['-120', '2023'], dtype=np.datetime64) + self.assertFalse(is_objectable_dt64(a1)) diff --git a/test/test_util.py b/test/test_util.py index b300dbfd..7cabbb32 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -17,6 +17,7 @@ from arraykit import immutable_filter from arraykit import array_deepcopy from arraykit import isna_element +from arraykit import is_objectable_dt64 from arraykit import dtype_from_element from arraykit import count_iteration from arraykit import first_true_1d @@ -953,7 +954,5 @@ def test_slice_to_ascending_slice_i(self) -> None: ) - - if __name__ == '__main__': unittest.main() From 5ff6c9285ac87db1d18f434e7996c822c6a1eb9c Mon Sep 17 00:00:00 2001 From: flexatone Date: Tue, 24 Jun 2025 16:13:33 -0700 Subject: [PATCH 07/17] usage of shared dtype --- src/_arraykit.c | 19 +++++++++++++++++-- src/methods.c | 11 +++++++++-- src/utilities.h | 34 +++++++++++++++------------------- test/test_objectable.py | 21 +++++++++++++++++++-- 4 files changed, 60 insertions(+), 25 deletions(-) diff --git a/src/_arraykit.c b/src/_arraykit.c index edb72c21..309c1972 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -96,6 +96,7 @@ PyInit__arraykit(void) return NULL; } + // store a reference to the deepcopy function PyObject *copy = PyImport_ImportModule("copy"); if (copy == NULL) { return NULL; @@ -106,6 +107,18 @@ PyInit__arraykit(void) return NULL; } + // store a year dtype object + PyObject* dt_year_str = PyUnicode_FromString("datetime64[Y]"); + if (!dt_year_str) return NULL; + + PyArray_Descr* dt_year = NULL; + if (!PyArray_DescrConverter2(dt_year_str, &dt_year)) { + Py_DECREF(dt_year_str); + return NULL; + } + Py_DECREF(dt_year_str); + + PyObject *m = PyModule_Create(&arraykit_module); if (!m || PyModule_AddStringConstant(m, "__version__", Py_STRINGIFY(AK_VERSION)) || @@ -129,9 +142,11 @@ PyInit__arraykit(void) PyModule_AddObject(m, "ErrorInitTypeBlocks", ErrorInitTypeBlocks) || PyModule_AddObject(m, "AutoMap", (PyObject *)&AMType) || PyModule_AddObject(m, "FrozenAutoMap", (PyObject *)&FAMType) || - PyModule_AddObject(m, "NonUniqueError", NonUniqueError) + PyModule_AddObject(m, "NonUniqueError", NonUniqueError) || + PyModule_AddObject(m, "dt_year", (PyObject *)dt_year) ){ - Py_DECREF(deepcopy); + Py_XDECREF(deepcopy); + Py_XDECREF(dt_year); Py_XDECREF(m); return NULL; } diff --git a/src/methods.c b/src/methods.c index ab6d8b7c..811cda03 100644 --- a/src/methods.c +++ b/src/methods.c @@ -202,10 +202,17 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) { } PyObject * -is_objectable_dt64(PyObject *Py_UNUSED(m), PyObject *a) { +is_objectable_dt64(PyObject *m, PyObject *a) { AK_CHECK_NUMPY_ARRAY(a); + + // this returns a new reference + PyObject* dt_year = PyObject_GetAttrString(m, "dt_year"); + PyArrayObject* array = (PyArrayObject*)a; - switch (AK_is_objectable_dt64(array)) { + int result = AK_is_objectable_dt64(array, dt_year); + Py_DECREF(dt_year); + + switch (result) { case -1: return NULL; case 0: diff --git a/src/utilities.h b/src/utilities.h index 4163c0fa..50ff58f8 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -232,9 +232,9 @@ AK_dt_unit_from_array(PyArrayObject* a) { return dma->base; } -// Given a dt64 array, determine if it can be cast to a object without data loss. Returns -1 on error +// Given a dt64 array, determine if it can be cast to a object without data loss. Returns -1 on error. NOTE: if we use dt_year, must incref first static inline int -AK_is_objectable_dt64(PyArrayObject* a) +AK_is_objectable_dt64(PyArrayObject* a, PyObject* dt_year) { NPY_DATETIMEUNIT unit = AK_dt_unit_from_array(a); switch (unit) { @@ -258,43 +258,39 @@ AK_is_objectable_dt64(PyArrayObject* a) return false; } - PyObject* dt_year_str = PyUnicode_FromString("datetime64[Y]"); - if (!dt_year_str) return -1; + // PyObject* dt_year_str = PyUnicode_FromString("datetime64[Y]"); + // if (!dt_year_str) return -1; - PyArray_Descr* dt_year = NULL; - if (!PyArray_DescrConverter2(dt_year_str, &dt_year)) { - Py_DECREF(dt_year_str); - return -1; - } - Py_DECREF(dt_year_str); - AK_DEBUG_MSG_OBJ("got descr", (PyObject*)dt_year); + // PyArray_Descr* dt_year = NULL; + // if (!PyArray_DescrConverter2(dt_year_str, &dt_year)) { + // Py_DECREF(dt_year_str); + // return -1; + // } + // Py_DECREF(dt_year_str); - PyObject* a_year = PyArray_CastToType(a, dt_year, 0); + Py_INCREF(dt_year); + PyObject* a_year = PyArray_CastToType(a, (PyArray_Descr*)dt_year, 0); if (!a_year) { Py_DECREF(dt_year); return -1; } - AK_DEBUG_MSG_OBJ("a_year", a_year); npy_int64* data = (npy_int64*)PyArray_DATA((PyArrayObject*)a_year); npy_intp size = PyArray_SIZE((PyArrayObject*)a_year); for (npy_intp i = 0; i < size; ++i) { npy_int64 v = data[i]; - // if (v == NPY_DATETIME_NAT) { - // continue; - // } + if (v == NPY_DATETIME_NAT) { + continue; + } // offset: 1-1970, 9999-1970 - AK_DEBUG_MSG_OBJ("int values", PyLong_FromSsize_t(v)); if (v < -1969 || v > 8029) { Py_DECREF(a_year); return 0; } } - Py_DECREF(a_year); return 1; - } diff --git a/test/test_objectable.py b/test/test_objectable.py index 85485ae3..eab64ba5 100644 --- a/test/test_objectable.py +++ b/test/test_objectable.py @@ -6,10 +6,18 @@ class TestUnit(unittest.TestCase): - def test_is_objectable_dt64_a(self) -> None: + def test_is_objectable_dt64_a1(self) -> None: a1 = np.array(['2022-01-04', '1954-04-12'], dtype=np.datetime64) self.assertTrue(is_objectable_dt64(a1)) + def test_is_objectable_dt64_a2(self) -> None: + a1 = np.array(['2022-01-04', '', '1954-04-12'], dtype=np.datetime64) + self.assertTrue(is_objectable_dt64(a1)) + + def test_is_objectable_dt64_a3(self) -> None: + a1 = np.array(['2022-01-04', '1954-04-12', '', ''], dtype=np.datetime64) + self.assertTrue(is_objectable_dt64(a1)) + def test_is_objectable_dt64_b(self) -> None: # years are nevery objectable @@ -18,6 +26,15 @@ def test_is_objectable_dt64_b(self) -> None: def test_is_objectable_dt64_c(self) -> None: - a1 = np.array(['-120', '2023'], dtype=np.datetime64) + a1 = np.array(['-120-01-01', '2023-04-05'], dtype='datetime64[m]') + self.assertFalse(is_objectable_dt64(a1)) + + def test_is_objectable_dt64_d(self) -> None: + a1 = np.array(['2024-01-01', '2023-04-05', '10000-01-01'], dtype='datetime64[s]') + self.assertFalse(is_objectable_dt64(a1)) + + + def test_is_objectable_dt64_e(self) -> None: + a1 = np.array(['2024-01-01', '2023-04-05'], dtype='datetime64[ns]') self.assertFalse(is_objectable_dt64(a1)) From 50aedfe05c944226d69a5706ee22826f23633535 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 24 Jun 2025 16:51:34 -0700 Subject: [PATCH 08/17] cleanup --- src/utilities.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/utilities.h b/src/utilities.h index 50ff58f8..05d81bcf 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -258,16 +258,6 @@ AK_is_objectable_dt64(PyArrayObject* a, PyObject* dt_year) return false; } - // PyObject* dt_year_str = PyUnicode_FromString("datetime64[Y]"); - // if (!dt_year_str) return -1; - - // PyArray_Descr* dt_year = NULL; - // if (!PyArray_DescrConverter2(dt_year_str, &dt_year)) { - // Py_DECREF(dt_year_str); - // return -1; - // } - // Py_DECREF(dt_year_str); - Py_INCREF(dt_year); PyObject* a_year = PyArray_CastToType(a, (PyArray_Descr*)dt_year, 0); if (!a_year) { From a9185aff62603dd268b92034a3d8079a7abb06a7 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 24 Jun 2025 17:58:02 -0700 Subject: [PATCH 09/17] completed implementation of astype_array --- src/__init__.py | 1 + src/__init__.pyi | 1 + src/_arraykit.c | 1 + src/methods.c | 75 +++++++++++++++++++++++++++++++++++++-- src/methods.h | 5 ++- test/test_astype_array.py | 16 +++++++++ 6 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 test/test_astype_array.py diff --git a/src/__init__.py b/src/__init__.py index 0833ae0b..ca6f5398 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -32,6 +32,7 @@ from ._arraykit import array_to_tuple_iter as array_to_tuple_iter from ._arraykit import nonzero_1d as nonzero_1d from ._arraykit import is_objectable_dt64 as is_objectable_dt64 +from ._arraykit import astype_array as astype_array from ._arraykit import AutoMap as AutoMap from ._arraykit import FrozenAutoMap as FrozenAutoMap from ._arraykit import NonUniqueError as NonUniqueError diff --git a/src/__init__.pyi b/src/__init__.pyi index 008a421e..d7e2c7cb 100644 --- a/src/__init__.pyi +++ b/src/__init__.pyi @@ -204,6 +204,7 @@ def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ... def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ... def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ... def is_objectable_dt64(__array: np.ndarray, /) -> np.ndarray: ... +def astype_array(__array: np.ndarray, __dtype: np.dtype | None, /) -> np.ndarray: ... def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ... def array_to_tuple_array(__array: np.ndarray) -> np.ndarray: ... def array_to_tuple_iter(__array: np.ndarray) -> tp.Iterator[tp.Tuple[tp.Any, ...]]: ... \ No newline at end of file diff --git a/src/_arraykit.c b/src/_arraykit.c index 309c1972..bbfc06af 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -53,6 +53,7 @@ static PyMethodDef arraykit_methods[] = { {"count_iteration", count_iteration, METH_O, NULL}, {"nonzero_1d", nonzero_1d, METH_O, NULL}, {"is_objectable_dt64", is_objectable_dt64, METH_O, NULL}, + {"astype_array", astype_array, METH_VARARGS, NULL}, {"isna_element", (PyCFunction)isna_element, METH_VARARGS | METH_KEYWORDS, diff --git a/src/methods.c b/src/methods.c index 811cda03..9e36480c 100644 --- a/src/methods.c +++ b/src/methods.c @@ -201,7 +201,7 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) { return AK_nonzero_1d(array); } -PyObject * +PyObject* is_objectable_dt64(PyObject *m, PyObject *a) { AK_CHECK_NUMPY_ARRAY(a); @@ -209,10 +209,10 @@ is_objectable_dt64(PyObject *m, PyObject *a) { PyObject* dt_year = PyObject_GetAttrString(m, "dt_year"); PyArrayObject* array = (PyArrayObject*)a; - int result = AK_is_objectable_dt64(array, dt_year); + int is_objectable = AK_is_objectable_dt64(array, dt_year); Py_DECREF(dt_year); - switch (result) { + switch (is_objectable) { case -1: return NULL; case 0: @@ -224,6 +224,75 @@ is_objectable_dt64(PyObject *m, PyObject *a) { } +PyObject* +astype_array(PyObject* m, PyObject* args) { + + PyObject* a = NULL; + PyObject* dtype_spec = Py_None; + + if (!PyArg_ParseTuple(args, "O|O", &a, &dtype_spec)) { + return NULL; + } + AK_CHECK_NUMPY_ARRAY(a); + PyArrayObject* array = (PyArrayObject*)a; + + PyArray_Descr* dtype = NULL; + if (dtype_spec == Py_None) { + dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE); + } else { + if (!PyArray_DescrConverter(dtype_spec, &dtype)) { + Py_DECREF((PyObject*)array); + return NULL; + } + } + + int dt_equal = PyArray_EquivTypes(PyArray_DESCR(array), dtype); + if (dt_equal && !PyArray_ISWRITEABLE(array)) { + Py_DECREF(dtype); + Py_INCREF(a); + return a; + } + // if not already an object and converting to an object + if (!dt_equal && dtype->type_num == NPY_OBJECT) { + char kind = PyArray_DESCR(array)->kind; + if ((kind == 'M' || kind == 'm')) { + PyObject* dt_year = PyObject_GetAttrString(m, "dt_year"); + int is_objectable = AK_is_objectable_dt64(array, dt_year); + Py_DECREF(dt_year); + + if (!is_objectable) { + PyObject* result = PyArray_NewLikeArray(array, NPY_ANYORDER, dtype, 0); + if (!result) { + Py_DECREF(dtype); + return NULL; + } + PyObject** data = (PyObject**)PyArray_DATA((PyArrayObject*)result); + npy_intp size = PyArray_SIZE(array); + + for (npy_intp i = 0; i < size; ++i) { + PyObject* item = PyArray_GETITEM(array, PyArray_GETPTR1(array, i)); + if (!item) { + Py_DECREF(result); + return NULL; + } + data[i] = item; + } + return result; + } + } + } + // all other cases: do a standard cast conversion + PyObject* result = PyArray_CastToType((PyArrayObject*)array, dtype, 0); + if (!result) { + Py_DECREF(dtype); + return NULL; + } + PyArray_CLEARFLAGS((PyArrayObject *)result, NPY_ARRAY_WRITEABLE); + return result; +} + + + static char *first_true_1d_kwarg_names[] = { "array", "forward", diff --git a/src/methods.h b/src/methods.h index 340ef705..38fdac8c 100644 --- a/src/methods.h +++ b/src/methods.h @@ -48,7 +48,10 @@ PyObject * nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a); PyObject * -is_objectable_dt64(PyObject *Py_UNUSED(m), PyObject *a); +is_objectable_dt64(PyObject *m, PyObject *a); + +PyObject * +astype_array(PyObject *m, PyObject *args); PyObject * first_true_1d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs); diff --git a/test/test_astype_array.py b/test/test_astype_array.py new file mode 100644 index 00000000..a641ff20 --- /dev/null +++ b/test/test_astype_array.py @@ -0,0 +1,16 @@ +import unittest + +import numpy as np + +from arraykit import astype_array + +class TestUnit(unittest.TestCase): + + def test_astype_array_a1(self) -> None: + a1 = np.array([10, 20, 30], dtype=np.int64) + a1.flags.writeable = False + + a2 = astype_array(a1, np.int64) + self.assertEqual(id(a1), id(a2)) + + From a9d92ef6af3312388e3a2db6252bdd4ce8fe9d20 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 24 Jun 2025 18:01:05 -0700 Subject: [PATCH 10/17] additional test --- test/test_astype_array.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/test_astype_array.py b/test/test_astype_array.py index a641ff20..40d94f05 100644 --- a/test/test_astype_array.py +++ b/test/test_astype_array.py @@ -14,3 +14,20 @@ def test_astype_array_a1(self) -> None: self.assertEqual(id(a1), id(a2)) + def test_astype_array_a2(self) -> None: + a1 = np.array([10, 20, 30], dtype=np.int64) + a1.flags.writeable = False + + a2 = astype_array(a1, np.float64) + self.assertNotEqual(id(a1), id(a2)) + self.assertEqual(a2.dtype, np.dtype(np.float64)) + + + + def test_astype_array_a3(self) -> None: + a1 = np.array([False, True, False]) + + a2 = astype_array(a1, np.int8) + self.assertEqual(a2.dtype, np.dtype(np.int8)) + self.assertFalse(a2.flags.writeable) + From 5194c1dea81e7448b4cac07b557d994e53c783f5 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 24 Jun 2025 18:09:43 -0700 Subject: [PATCH 11/17] additional tests, refinements to astype_array --- src/methods.c | 6 ++++-- test/test_astype_array.py | 9 ++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/methods.c b/src/methods.c index 9e36480c..bc08b7be 100644 --- a/src/methods.c +++ b/src/methods.c @@ -254,7 +254,8 @@ astype_array(PyObject* m, PyObject* args) { } // if not already an object and converting to an object if (!dt_equal && dtype->type_num == NPY_OBJECT) { - char kind = PyArray_DESCR(array)->kind; + PyArray_Descr* array_dt = PyArray_DESCR(array); + char kind = array_dt->kind; if ((kind == 'M' || kind == 'm')) { PyObject* dt_year = PyObject_GetAttrString(m, "dt_year"); int is_objectable = AK_is_objectable_dt64(array, dt_year); @@ -270,13 +271,14 @@ astype_array(PyObject* m, PyObject* args) { npy_intp size = PyArray_SIZE(array); for (npy_intp i = 0; i < size; ++i) { - PyObject* item = PyArray_GETITEM(array, PyArray_GETPTR1(array, i)); + PyObject* item = PyArray_Scalar(PyArray_GETPTR1(array, i), array_dt, a); if (!item) { Py_DECREF(result); return NULL; } data[i] = item; } + PyArray_CLEARFLAGS((PyArrayObject *)result, NPY_ARRAY_WRITEABLE); return result; } } diff --git a/test/test_astype_array.py b/test/test_astype_array.py index 40d94f05..070ec6fa 100644 --- a/test/test_astype_array.py +++ b/test/test_astype_array.py @@ -23,7 +23,6 @@ def test_astype_array_a2(self) -> None: self.assertEqual(a2.dtype, np.dtype(np.float64)) - def test_astype_array_a3(self) -> None: a1 = np.array([False, True, False]) @@ -31,3 +30,11 @@ def test_astype_array_a3(self) -> None: self.assertEqual(a2.dtype, np.dtype(np.int8)) self.assertFalse(a2.flags.writeable) + def test_astype_array_b(self) -> None: + a1 = np.array(['2021', '2024'], dtype=np.datetime64) + + a2 = astype_array(a1, np.object_) + self.assertEqual(a2.dtype, np.dtype(np.object_)) + self.assertFalse(a2.flags.writeable) + self.assertEqual(list(a2), [np.datetime64('2021'), np.datetime64('2024')]) + From 57f71c5fe502634847986ef1106764fa81105506 Mon Sep 17 00:00:00 2001 From: flexatone Date: Wed, 25 Jun 2025 08:39:59 -0700 Subject: [PATCH 12/17] added is_objectable --- README.rst | 4 +++- src/__init__.py | 1 + src/__init__.pyi | 3 ++- src/_arraykit.c | 1 + src/methods.c | 34 ++++++++++++++++++++++++++++++---- src/methods.h | 3 +++ 6 files changed, 40 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 8d17f777..78e24af2 100644 --- a/README.rst +++ b/README.rst @@ -41,7 +41,9 @@ What is New in ArrayKit Now building free-threaded compatible wheels for Python 3.13. -Added ``is_objectable_dt64()``. +Added ``is_objectable()`` and ``is_objectable_dt64()``. + +Added ``astype_array()``. 1.0.9 diff --git a/src/__init__.py b/src/__init__.py index ca6f5398..9c8bfb8d 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -32,6 +32,7 @@ from ._arraykit import array_to_tuple_iter as array_to_tuple_iter from ._arraykit import nonzero_1d as nonzero_1d from ._arraykit import is_objectable_dt64 as is_objectable_dt64 +from ._arraykit import is_objectable as is_objectable from ._arraykit import astype_array as astype_array from ._arraykit import AutoMap as AutoMap from ._arraykit import FrozenAutoMap as FrozenAutoMap diff --git a/src/__init__.pyi b/src/__init__.pyi index d7e2c7cb..888bb58c 100644 --- a/src/__init__.pyi +++ b/src/__init__.pyi @@ -203,7 +203,8 @@ def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) -> def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ... def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ... def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ... -def is_objectable_dt64(__array: np.ndarray, /) -> np.ndarray: ... +def is_objectable_dt64(__array: np.ndarray, /) -> bool: ... +def is_objectable(__array: np.ndarray, /) -> bool: ... def astype_array(__array: np.ndarray, __dtype: np.dtype | None, /) -> np.ndarray: ... def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ... def array_to_tuple_array(__array: np.ndarray) -> np.ndarray: ... diff --git a/src/_arraykit.c b/src/_arraykit.c index bbfc06af..49b30298 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -53,6 +53,7 @@ static PyMethodDef arraykit_methods[] = { {"count_iteration", count_iteration, METH_O, NULL}, {"nonzero_1d", nonzero_1d, METH_O, NULL}, {"is_objectable_dt64", is_objectable_dt64, METH_O, NULL}, + {"is_objectable", is_objectable, METH_O, NULL}, {"astype_array", astype_array, METH_VARARGS, NULL}, {"isna_element", (PyCFunction)isna_element, diff --git a/src/methods.c b/src/methods.c index bc08b7be..737cd240 100644 --- a/src/methods.c +++ b/src/methods.c @@ -204,11 +204,10 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) { PyObject* is_objectable_dt64(PyObject *m, PyObject *a) { AK_CHECK_NUMPY_ARRAY(a); + PyArrayObject* array = (PyArrayObject*)a; // this returns a new reference PyObject* dt_year = PyObject_GetAttrString(m, "dt_year"); - - PyArrayObject* array = (PyArrayObject*)a; int is_objectable = AK_is_objectable_dt64(array, dt_year); Py_DECREF(dt_year); @@ -224,16 +223,43 @@ is_objectable_dt64(PyObject *m, PyObject *a) { } +PyObject* +is_objectable(PyObject *m, PyObject *a) { + AK_CHECK_NUMPY_ARRAY(a); + PyArrayObject* array = (PyArrayObject*)a; + + char kind = PyArray_DESCR(array)->kind; + if ((kind == 'M' || kind == 'm')) { + // this returns a new reference + PyObject* dt_year = PyObject_GetAttrString(m, "dt_year"); + int is_objectable = AK_is_objectable_dt64(array, dt_year); + Py_DECREF(dt_year); + + switch (is_objectable) { + case -1: + return NULL; + case 0: + Py_RETURN_FALSE; + case 1: + Py_RETURN_TRUE; + } + } + Py_RETURN_TRUE; +} + + PyObject* astype_array(PyObject* m, PyObject* args) { PyObject* a = NULL; PyObject* dtype_spec = Py_None; - if (!PyArg_ParseTuple(args, "O|O", &a, &dtype_spec)) { + if (!PyArg_ParseTuple(args, "O!|O:astype_array", + &PyArray_Type, &a, + &dtype_spec)) { return NULL; } - AK_CHECK_NUMPY_ARRAY(a); + // AK_CHECK_NUMPY_ARRAY(a); PyArrayObject* array = (PyArrayObject*)a; PyArray_Descr* dtype = NULL; diff --git a/src/methods.h b/src/methods.h index 38fdac8c..1d33a558 100644 --- a/src/methods.h +++ b/src/methods.h @@ -50,6 +50,9 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a); PyObject * is_objectable_dt64(PyObject *m, PyObject *a); +PyObject * +is_objectable(PyObject *m, PyObject *a); + PyObject * astype_array(PyObject *m, PyObject *args); From cd72e5ca7b41dad72ebf5992f4e0e906325704e3 Mon Sep 17 00:00:00 2001 From: flexatone Date: Wed, 25 Jun 2025 08:51:44 -0700 Subject: [PATCH 13/17] additional testing of astype_array --- test/test_astype_array.py | 22 +++++++++++++++++++++- test/test_objectable.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/test/test_astype_array.py b/test/test_astype_array.py index 070ec6fa..abe59140 100644 --- a/test/test_astype_array.py +++ b/test/test_astype_array.py @@ -30,7 +30,7 @@ def test_astype_array_a3(self) -> None: self.assertEqual(a2.dtype, np.dtype(np.int8)) self.assertFalse(a2.flags.writeable) - def test_astype_array_b(self) -> None: + def test_astype_array_b1(self) -> None: a1 = np.array(['2021', '2024'], dtype=np.datetime64) a2 = astype_array(a1, np.object_) @@ -38,3 +38,23 @@ def test_astype_array_b(self) -> None: self.assertFalse(a2.flags.writeable) self.assertEqual(list(a2), [np.datetime64('2021'), np.datetime64('2024')]) + + def test_astype_array_b2(self) -> None: + a1 = np.array(['2021', '1642'], dtype=np.datetime64) + + a2 = astype_array(a1, np.object_) + self.assertEqual(a2.dtype, np.dtype(np.object_)) + self.assertFalse(a2.flags.writeable) + self.assertEqual(list(a2), [np.datetime64('2021'), np.datetime64('1642')]) + + + def test_astype_array_b3(self) -> None: + a1 = np.array(['2021', '2024', '1984', '1642'], dtype=np.datetime64).reshape((2, 2)) + + a2 = astype_array(a1, np.object_) + self.assertEqual(a2.dtype, np.dtype(np.object_)) + self.assertFalse(a2.flags.writeable) + import ipdb; ipdb.set_trace() + self.assertEqual( + list(list(a) for a in a2), + [[np.datetime64('2021'), np.datetime64('2024')], [np.datetime64('1984'), np.datetime64('1642')]]) diff --git a/test/test_objectable.py b/test/test_objectable.py index eab64ba5..9d617632 100644 --- a/test/test_objectable.py +++ b/test/test_objectable.py @@ -3,9 +3,37 @@ import numpy as np from arraykit import is_objectable_dt64 +from arraykit import is_objectable class TestUnit(unittest.TestCase): + def test_is_objectable_a1(self) -> None: + a1 = np.array(['2022-01-04', '1954-04-12'], dtype=np.datetime64) + self.assertTrue(is_objectable(a1)) + + def test_is_objectable_a2(self) -> None: + a1 = np.array(['10000-01-04', '1954-04-12'], dtype=np.datetime64) + self.assertFalse(is_objectable(a1)) + + def test_is_objectable_b(self) -> None: + a1 = np.array([10, 20]) + self.assertTrue(is_objectable(a1)) + + def test_is_objectable_c(self) -> None: + a1 = np.array([True, False]) + self.assertTrue(is_objectable(a1)) + + def test_is_objectable_d(self) -> None: + a1 = np.array(['b', 'ccc']) + self.assertTrue(is_objectable(a1)) + + def test_is_objectable_e(self) -> None: + a1 = np.array(['b', None, False], dtype=object) + self.assertTrue(is_objectable(a1)) + + + #--------------------------------------------------------------------------- + def test_is_objectable_dt64_a1(self) -> None: a1 = np.array(['2022-01-04', '1954-04-12'], dtype=np.datetime64) self.assertTrue(is_objectable_dt64(a1)) From d3144e3d1cd056ac43ee0ba68dd9dd95f4acf6c3 Mon Sep 17 00:00:00 2001 From: flexatone Date: Wed, 25 Jun 2025 09:55:50 -0700 Subject: [PATCH 14/17] improved handling of 2d arrays --- src/methods.c | 18 +++++++++++++----- test/test_astype_array.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/methods.c b/src/methods.c index 737cd240..5b42252b 100644 --- a/src/methods.c +++ b/src/methods.c @@ -259,7 +259,6 @@ astype_array(PyObject* m, PyObject* args) { &dtype_spec)) { return NULL; } - // AK_CHECK_NUMPY_ARRAY(a); PyArrayObject* array = (PyArrayObject*)a; PyArray_Descr* dtype = NULL; @@ -294,16 +293,25 @@ astype_array(PyObject* m, PyObject* args) { return NULL; } PyObject** data = (PyObject**)PyArray_DATA((PyArrayObject*)result); - npy_intp size = PyArray_SIZE(array); - for (npy_intp i = 0; i < size; ++i) { - PyObject* item = PyArray_Scalar(PyArray_GETPTR1(array, i), array_dt, a); + PyArrayIterObject* it = (PyArrayIterObject*)PyArray_IterNew(a); + if (!it) { + Py_DECREF(result); + return NULL; + } + + npy_intp i = 0; + while (it->index < it->size) { + PyObject* item = PyArray_ToScalar(it->dataptr, array); if (!item) { Py_DECREF(result); + Py_DECREF(it); return NULL; } - data[i] = item; + data[i++] = item; + PyArray_ITER_NEXT(it); } + Py_DECREF(it); PyArray_CLEARFLAGS((PyArrayObject *)result, NPY_ARRAY_WRITEABLE); return result; } diff --git a/test/test_astype_array.py b/test/test_astype_array.py index abe59140..c64af661 100644 --- a/test/test_astype_array.py +++ b/test/test_astype_array.py @@ -54,7 +54,40 @@ def test_astype_array_b3(self) -> None: a2 = astype_array(a1, np.object_) self.assertEqual(a2.dtype, np.dtype(np.object_)) self.assertFalse(a2.flags.writeable) - import ipdb; ipdb.set_trace() self.assertEqual( list(list(a) for a in a2), [[np.datetime64('2021'), np.datetime64('2024')], [np.datetime64('1984'), np.datetime64('1642')]]) + + def test_astype_array_b4(self) -> None: + a1 = np.array(['2021', '2024', '1532', '1984', '1642', '899'], dtype=np.datetime64).reshape((2, 3)) + + a2 = astype_array(a1, np.object_) + self.assertEqual(a2.dtype, np.dtype(np.object_)) + self.assertEqual(a2.shape, (2, 3)) + self.assertFalse(a2.flags.writeable) + self.assertEqual( + list(list(a) for a in a2), + [[np.datetime64('2021'), np.datetime64('2024'), np.datetime64('1532')], + [np.datetime64('1984'), np.datetime64('1642'), np.datetime64('899')]]) + + def test_astype_array_c(self) -> None: + with self.assertRaises(TypeError): + _ = astype_array([3, 4, 5], np.int64) + + + def test_astype_array_d1(self) -> None: + a1 = np.array([10, 20, 30], dtype=np.int64) + a2 = astype_array(a1) + + self.assertEqual(a2.dtype, np.dtype(np.float64)) + self.assertEqual(a2.shape, (3,)) + self.assertFalse(a2.flags.writeable) + + + def test_astype_array_d2(self) -> None: + a1 = np.array([10, 20, 30], dtype=np.int64) + a2 = astype_array(a1, None) + + self.assertEqual(a2.dtype, np.dtype(np.float64)) + self.assertEqual(a2.shape, (3,)) + self.assertFalse(a2.flags.writeable) From b997ba3ed276df2d72f03a348774be960519c795 Mon Sep 17 00:00:00 2001 From: flexatone Date: Wed, 25 Jun 2025 10:32:13 -0700 Subject: [PATCH 15/17] additional tests --- src/methods.c | 25 ++++++++++++++++--------- test/test_astype_array.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/methods.c b/src/methods.c index 5b42252b..6755fcee 100644 --- a/src/methods.c +++ b/src/methods.c @@ -266,21 +266,28 @@ astype_array(PyObject* m, PyObject* args) { dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE); } else { if (!PyArray_DescrConverter(dtype_spec, &dtype)) { - Py_DECREF((PyObject*)array); return NULL; } } - int dt_equal = PyArray_EquivTypes(PyArray_DESCR(array), dtype); - if (dt_equal && !PyArray_ISWRITEABLE(array)) { + if (PyArray_EquivTypes(PyArray_DESCR(array), dtype)) { Py_DECREF(dtype); - Py_INCREF(a); - return a; + if (PyArray_ISWRITEABLE(array)) { + PyObject* result = PyArray_NewCopy(array, NPY_ANYORDER); + if (!result) { + return NULL; + } + PyArray_CLEARFLAGS((PyArrayObject *)result, NPY_ARRAY_WRITEABLE); + return result; + } + else { // already immutable + Py_INCREF(a); + return a; + } } - // if not already an object and converting to an object - if (!dt_equal && dtype->type_num == NPY_OBJECT) { - PyArray_Descr* array_dt = PyArray_DESCR(array); - char kind = array_dt->kind; + // if converting to an object + if (dtype->type_num == NPY_OBJECT) { + char kind = PyArray_DESCR(array)->kind; if ((kind == 'M' || kind == 'm')) { PyObject* dt_year = PyObject_GetAttrString(m, "dt_year"); int is_objectable = AK_is_objectable_dt64(array, dt_year); diff --git a/test/test_astype_array.py b/test/test_astype_array.py index c64af661..f4283739 100644 --- a/test/test_astype_array.py +++ b/test/test_astype_array.py @@ -91,3 +91,33 @@ def test_astype_array_d2(self) -> None: self.assertEqual(a2.dtype, np.dtype(np.float64)) self.assertEqual(a2.shape, (3,)) self.assertFalse(a2.flags.writeable) + + + + def test_astype_array_d3(self) -> None: + a1 = np.array([10, 20, 30], dtype=np.int64) + a2 = astype_array(a1, np.int64) + + self.assertEqual(a2.dtype, np.dtype(np.int64)) + self.assertEqual(a2.shape, (3,)) + self.assertFalse(a2.flags.writeable) + + self.assertNotEqual(id(a1), id(a2)) + + def test_astype_array_e(self) -> None: + a1 = np.array(['2021', '2024', '1997', '1984', '2000', '1999'], dtype='datetime64[ns]').reshape((2, 3)) + + a2 = astype_array(a1, np.object_) + self.assertEqual(a2.dtype, np.dtype(np.object_)) + self.assertEqual(a2.shape, (2, 3)) + self.assertFalse(a2.flags.writeable) + self.assertEqual( + list(list(a) for a in a2), + [[np.datetime64('2021-01-01T00:00:00.000000000'), + np.datetime64('2024-01-01T00:00:00.000000000'), + np.datetime64('1997-01-01T00:00:00.000000000')], + [np.datetime64('1984-01-01T00:00:00.000000000'), + np.datetime64('2000-01-01T00:00:00.000000000'), + np.datetime64('1999-01-01T00:00:00.000000000')]] + ) + From 40cd3a8b1f07b97ebec53f35d40781d34dc37175 Mon Sep 17 00:00:00 2001 From: flexatone Date: Wed, 25 Jun 2025 11:26:57 -0700 Subject: [PATCH 16/17] do not make newly created arrays immutable --- src/methods.c | 6 ++---- test/test_astype_array.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/methods.c b/src/methods.c index 6755fcee..d302bff5 100644 --- a/src/methods.c +++ b/src/methods.c @@ -247,7 +247,7 @@ is_objectable(PyObject *m, PyObject *a) { Py_RETURN_TRUE; } - +// Convert array to the dtype provided. NOTE: mutable arrays will be returned unless the input array is immutable and no dtype change is needed PyObject* astype_array(PyObject* m, PyObject* args) { @@ -272,12 +272,12 @@ astype_array(PyObject* m, PyObject* args) { if (PyArray_EquivTypes(PyArray_DESCR(array), dtype)) { Py_DECREF(dtype); + if (PyArray_ISWRITEABLE(array)) { PyObject* result = PyArray_NewCopy(array, NPY_ANYORDER); if (!result) { return NULL; } - PyArray_CLEARFLAGS((PyArrayObject *)result, NPY_ARRAY_WRITEABLE); return result; } else { // already immutable @@ -319,7 +319,6 @@ astype_array(PyObject* m, PyObject* args) { PyArray_ITER_NEXT(it); } Py_DECREF(it); - PyArray_CLEARFLAGS((PyArrayObject *)result, NPY_ARRAY_WRITEABLE); return result; } } @@ -330,7 +329,6 @@ astype_array(PyObject* m, PyObject* args) { Py_DECREF(dtype); return NULL; } - PyArray_CLEARFLAGS((PyArrayObject *)result, NPY_ARRAY_WRITEABLE); return result; } diff --git a/test/test_astype_array.py b/test/test_astype_array.py index f4283739..acfd32fb 100644 --- a/test/test_astype_array.py +++ b/test/test_astype_array.py @@ -28,14 +28,14 @@ def test_astype_array_a3(self) -> None: a2 = astype_array(a1, np.int8) self.assertEqual(a2.dtype, np.dtype(np.int8)) - self.assertFalse(a2.flags.writeable) + self.assertTrue(a2.flags.writeable) def test_astype_array_b1(self) -> None: a1 = np.array(['2021', '2024'], dtype=np.datetime64) a2 = astype_array(a1, np.object_) self.assertEqual(a2.dtype, np.dtype(np.object_)) - self.assertFalse(a2.flags.writeable) + self.assertTrue(a2.flags.writeable) self.assertEqual(list(a2), [np.datetime64('2021'), np.datetime64('2024')]) @@ -44,7 +44,7 @@ def test_astype_array_b2(self) -> None: a2 = astype_array(a1, np.object_) self.assertEqual(a2.dtype, np.dtype(np.object_)) - self.assertFalse(a2.flags.writeable) + self.assertTrue(a2.flags.writeable) self.assertEqual(list(a2), [np.datetime64('2021'), np.datetime64('1642')]) @@ -53,7 +53,7 @@ def test_astype_array_b3(self) -> None: a2 = astype_array(a1, np.object_) self.assertEqual(a2.dtype, np.dtype(np.object_)) - self.assertFalse(a2.flags.writeable) + self.assertTrue(a2.flags.writeable) self.assertEqual( list(list(a) for a in a2), [[np.datetime64('2021'), np.datetime64('2024')], [np.datetime64('1984'), np.datetime64('1642')]]) @@ -64,7 +64,7 @@ def test_astype_array_b4(self) -> None: a2 = astype_array(a1, np.object_) self.assertEqual(a2.dtype, np.dtype(np.object_)) self.assertEqual(a2.shape, (2, 3)) - self.assertFalse(a2.flags.writeable) + self.assertTrue(a2.flags.writeable) self.assertEqual( list(list(a) for a in a2), [[np.datetime64('2021'), np.datetime64('2024'), np.datetime64('1532')], @@ -81,7 +81,7 @@ def test_astype_array_d1(self) -> None: self.assertEqual(a2.dtype, np.dtype(np.float64)) self.assertEqual(a2.shape, (3,)) - self.assertFalse(a2.flags.writeable) + self.assertTrue(a2.flags.writeable) def test_astype_array_d2(self) -> None: @@ -90,7 +90,7 @@ def test_astype_array_d2(self) -> None: self.assertEqual(a2.dtype, np.dtype(np.float64)) self.assertEqual(a2.shape, (3,)) - self.assertFalse(a2.flags.writeable) + self.assertTrue(a2.flags.writeable) @@ -100,7 +100,7 @@ def test_astype_array_d3(self) -> None: self.assertEqual(a2.dtype, np.dtype(np.int64)) self.assertEqual(a2.shape, (3,)) - self.assertFalse(a2.flags.writeable) + self.assertTrue(a2.flags.writeable) self.assertNotEqual(id(a1), id(a2)) @@ -110,7 +110,7 @@ def test_astype_array_e(self) -> None: a2 = astype_array(a1, np.object_) self.assertEqual(a2.dtype, np.dtype(np.object_)) self.assertEqual(a2.shape, (2, 3)) - self.assertFalse(a2.flags.writeable) + self.assertTrue(a2.flags.writeable) self.assertEqual( list(list(a) for a in a2), [[np.datetime64('2021-01-01T00:00:00.000000000'), From f2ac2ff8de839480e275fbfceda76adff069442a Mon Sep 17 00:00:00 2001 From: flexatone Date: Wed, 25 Jun 2025 11:30:58 -0700 Subject: [PATCH 17/17] remove unnecessary cast --- src/methods.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods.c b/src/methods.c index d302bff5..d98f75cd 100644 --- a/src/methods.c +++ b/src/methods.c @@ -324,7 +324,7 @@ astype_array(PyObject* m, PyObject* args) { } } // all other cases: do a standard cast conversion - PyObject* result = PyArray_CastToType((PyArrayObject*)array, dtype, 0); + PyObject* result = PyArray_CastToType(array, dtype, 0); if (!result) { Py_DECREF(dtype); return NULL;