@@ -4178,6 +4178,8 @@ typedef struct BlockIndexObject {
41784178 Py_ssize_t bir_capacity ;
41794179 BlockIndexRecord * bir ;
41804180 PyArray_Descr * dtype ;
4181+ int8_t shape_recache ;
4182+ PyObject * shape ;
41814183} BlockIndexObject ;
41824184
41834185
@@ -4341,7 +4343,7 @@ BIIterSeq_iternext(BIIterSeqObject *self) {
43414343 PyArrayObject * a = (PyArrayObject * )self -> selector ;
43424344 switch (PyArray_TYPE (a )) { // type of passed in array
43434345 case NPY_INT64 :
4344- t = * (npy_int64 * )PyArray_GETPTR1 (a , i );
4346+ t = ( Py_ssize_t ) * (npy_int64 * )PyArray_GETPTR1 (a , i );
43454347 break ;
43464348 case NPY_INT32 :
43474349 t = * (npy_int32 * )PyArray_GETPTR1 (a , i );
@@ -4353,7 +4355,7 @@ BIIterSeq_iternext(BIIterSeqObject *self) {
43534355 t = * (npy_int8 * )PyArray_GETPTR1 (a , i );
43544356 break ;
43554357 case NPY_UINT64 :
4356- t = * (npy_uint64 * )PyArray_GETPTR1 (a , i );
4358+ t = ( Py_ssize_t ) * (npy_uint64 * )PyArray_GETPTR1 (a , i );
43574359 break ;
43584360 case NPY_UINT32 :
43594361 t = * (npy_uint32 * )PyArray_GETPTR1 (a , i );
@@ -4776,6 +4778,9 @@ BlockIndex_init(PyObject *self, PyObject *args, PyObject *kwargs) {
47764778 bi -> bir_count = bir_count ;
47774779 bi -> bir_capacity = bir_capacity ;
47784780
4781+ bi -> shape_recache = 1 ; // always init to true
4782+ bi -> shape = NULL ;
4783+
47794784 // Load the bi->bir struct array, if defined
47804785 bi -> bir = NULL ;
47814786 // always set bi to capacity defined at this point
@@ -4800,6 +4805,7 @@ BlockIndex_init(PyObject *self, PyObject *args, PyObject *kwargs) {
48004805 return -1 ;
48014806 }
48024807 }
4808+
48034809 return 0 ;
48044810}
48054811
@@ -4808,9 +4814,9 @@ BlockIndex_dealloc(BlockIndexObject *self) {
48084814 if (self -> bir != NULL ) {
48094815 PyMem_Free (self -> bir );
48104816 }
4811- if ( self -> dtype != NULL ) {
4812- Py_DECREF ((PyObject * )self -> dtype );
4813- }
4817+ // both dtype and shape might not be set
4818+ Py_XDECREF ((PyObject * )self -> dtype );
4819+ Py_XDECREF ( self -> shape );
48144820 Py_TYPE (self )-> tp_free ((PyObject * )self );
48154821}
48164822
@@ -4841,7 +4847,7 @@ BlockIndex_register(BlockIndexObject *self, PyObject *value) {
48414847 }
48424848 Py_ssize_t increment = ndim == 1 ? 1 : PyArray_DIM (a , 1 );
48434849
4844- // assign alignment on first observation; otherwise take
4850+ // assign alignment on first observation; otherwise force alignemnt. We do this regardless of if the array has no columns.
48454851 Py_ssize_t alignment = PyArray_DIM (a , 0 );
48464852 if (self -> row_count == -1 ) {
48474853 self -> row_count = alignment ;
@@ -4854,16 +4860,20 @@ BlockIndex_register(BlockIndexObject *self, PyObject *value) {
48544860 return NULL ;
48554861 }
48564862
4863+ // if we are not adding columns, we are not adding types, so we are not changing the dtype or shape
48574864 if (increment == 0 ) {
48584865 Py_RETURN_FALSE ;
48594866 }
48604867
4868+
48614869 PyArray_Descr * dt = PyArray_DESCR (a ); // borrowed ref
4862- if (self -> dtype == NULL ) {
4870+ self -> shape_recache = 1 ; // adjusting columns, must recache shape
4871+
4872+ if (self -> dtype == NULL ) { // if not already set
48634873 Py_INCREF ((PyObject * )dt );
48644874 self -> dtype = dt ;
48654875 }
4866- else if (!PyDataType_ISOBJECT (self -> dtype )) {
4876+ else if (!PyDataType_ISOBJECT (self -> dtype )) { // if object cannot resolve further
48674877 PyArray_Descr * dtr = AK_ResolveDTypes (self -> dtype , dt ); // new ref
48684878 Py_DECREF ((PyObject * )self -> dtype );
48694879 self -> dtype = dtr ;
@@ -4972,6 +4982,9 @@ BlockIndex_copy(BlockIndexObject *self, PyObject *Py_UNUSED(unused))
49724982 bi -> bir_count = self -> bir_count ;
49734983 bi -> bir_capacity = self -> bir_capacity ;
49744984
4985+ bi -> shape_recache = 1 ; // could copy, but do not want to copy a pending cache state
4986+ bi -> shape = NULL ;
4987+
49754988 bi -> bir = NULL ;
49764989 AK_BI_BIR_new (bi ); // do initial alloc to self->bir_capacity
49774990 memcpy (bi -> bir ,
@@ -4993,9 +5006,16 @@ BlockIndex_iter(BlockIndexObject* self) {
49935006
49945007
49955008static PyObject *
4996- BlockIndex_shape_getter (BlockIndexObject * self , void * Py_UNUSED (closure )){
4997- // NOTE: this could be cached
4998- return Py_BuildValue ("nn" , self -> row_count , self -> bir_count );
5009+ BlockIndex_shape_getter (BlockIndexObject * self , void * Py_UNUSED (closure ))
5010+ {
5011+ if (self -> shape == NULL || self -> shape_recache ) {
5012+ Py_XDECREF (self -> shape ); // get rid of old if it exists
5013+ self -> shape = Py_BuildValue ("nn" , self -> row_count , self -> bir_count ); // new ref
5014+ }
5015+ // shape is not null and shape_recache is false
5016+ Py_INCREF (self -> shape ); // for caller
5017+ self -> shape_recache = 0 ;
5018+ return self -> shape ;
49995019}
50005020
50015021static PyObject *
0 commit comments