From 80c1fba1077fdae23acbab1d1c434ca5f664b68b Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Fri, 26 Mar 2021 16:44:37 -0700 Subject: [PATCH 01/15] Adds reference code and initial impl for roll_1d. --- arraykit.c | 139 ++++++++++++++++++++++++++++++++++ arraykit.pyi | 1 + performance/reference/util.py | 60 +++++++++++++++ test/test_util.py | 80 ++++++++++++++++++- 4 files changed, 278 insertions(+), 2 deletions(-) diff --git a/arraykit.c b/arraykit.c index 00d29264..073c661a 100644 --- a/arraykit.c +++ b/arraykit.c @@ -270,6 +270,144 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) return (PyObject *)AK_ResolveDTypeIter(arg); } +//------------------------------------------------------------------------------ +// rolling + +static int +assign_into_slice_from_slice(PyObject *dest, PyObject *src, PyObject *dest_slice, PyObject *src_slice) +{ + PyObject* shifted_src = PyObject_GetItem((PyObject*)src, src_slice); + if (!shifted_src) { + return -1; + } + + int success = PyObject_SetItem(dest, dest_slice, shifted_src); + Py_DECREF(shifted_src); + return success; +} + +static PyObject * +roll_1d(PyObject *Py_UNUSED(m), PyObject *args) +{ + /* Algorithm. + + size = len(array) + if size <= 1: + return array.copy() + + shift = shift % size + if shift == 0: + return array.copy() + + post = np.empty(size, dtype=array.dtype) + post[0:shift] = array[-shift:] + post[shift:] = array[0:-shift] + return post + */ + PyArrayObject *array; + int shift; + + if (!PyArg_ParseTuple(args, "O!i:roll_1d", &PyArray_Type, &array, &shift)) + { + return NULL; + } + + // Must be signed in order for modulo to work properly for negative shift values + int size = (int)PyArray_SIZE(array); + + uint8_t is_empty = (size == 0); + + if (!is_empty) { + shift = shift % size; + } + + if (is_empty || (shift == 0)) { + PyObject* copy = PyArray_Copy(array); + if (!copy) { + return NULL; + } + return copy; + } + + // Create an empty array + PyArray_Descr* dtype = PyArray_DESCR(array); + Py_INCREF(dtype); // PyArray_Empty steals a reference to dtype + + PyObject* post = PyArray_Empty( + PyArray_NDIM(array), + PyArray_DIMS(array), + dtype, + 0); + if (!post) { + return NULL; + } + + // Build integers + PyObject* zero = PyLong_FromLong(0); + PyObject* pos_shift = PyLong_FromLong(shift); + PyObject* neg_shift = PyLong_FromLong(-shift); + if (!zero || !pos_shift || !neg_shift) { + goto integer_build_failure; + } + + // Build slices + PyObject* first_dest_slice = PySlice_New(zero, pos_shift, Py_None); // [0:shift] + PyObject* first_src_slice = PySlice_New(neg_shift, Py_None, Py_None); // [-shift:] + PyObject* second_dest_slice = PySlice_New(pos_shift, Py_None, Py_None); // [shift:] + PyObject* second_src_slice = PySlice_New(zero, neg_shift, Py_None); // [0:-shift] + Py_DECREF(zero); + Py_DECREF(pos_shift); + Py_DECREF(neg_shift); + if (!first_dest_slice || !first_src_slice || !second_dest_slice || !second_src_slice) { + goto slice_build_failure; + } + + int success; + + // First Assign + success = assign_into_slice_from_slice(post, (PyObject*)array, first_dest_slice, first_src_slice); + Py_DECREF(first_dest_slice); + Py_DECREF(first_src_slice); + if (success == -1) { + Py_DECREF(second_dest_slice); + Py_DECREF(second_src_slice); + goto failure; + } + + // First Assign + success = assign_into_slice_from_slice(post, (PyObject*)array, second_dest_slice, second_src_slice); + Py_DECREF(second_src_slice); + Py_DECREF(second_dest_slice); + if (success == -1) { + goto failure; + } + + return post; + +// Handled potentially leaked integer objects +integer_build_failure: + Py_XDECREF(zero); + Py_XDECREF(pos_shift); + Py_XDECREF(neg_shift); + goto failure; + +// Handled potentially leaked slice objects +slice_build_failure: + // Integers objects have all been cleaned up. + Py_XDECREF(first_dest_slice); + Py_XDECREF(first_src_slice); + Py_XDECREF(second_dest_slice); + Py_XDECREF(second_src_slice); + goto failure; + +// Handle final object that will always exist at this point. +failure: + // Integers objects have all been cleaned up. + // Slice objects have all been cleaned up. + Py_DECREF(post); + return NULL; +} + //------------------------------------------------------------------------------ // ArrayGO //------------------------------------------------------------------------------ @@ -546,6 +684,7 @@ static PyMethodDef arraykit_methods[] = { {"row_1d_filter", row_1d_filter, METH_O, NULL}, {"resolve_dtype", resolve_dtype, METH_VARARGS, NULL}, {"resolve_dtype_iter", resolve_dtype_iter, METH_O, NULL}, + {"roll_1d", roll_1d, METH_VARARGS, NULL}, {NULL}, }; diff --git a/arraykit.pyi b/arraykit.pyi index b5a78afc..28c787db 100644 --- a/arraykit.pyi +++ b/arraykit.pyi @@ -27,3 +27,4 @@ def column_1d_filter(__array: np.array) -> np.ndarray: ... def row_1d_filter(__array: np.array) -> np.ndarray: ... def resolve_dtype(__d1: np.dtype, __d2: np.dtype) -> np.dtype: ... def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ... +def roll_1d(__array: np.ndarray, __shift: int) -> np.ndarray: ... diff --git a/performance/reference/util.py b/performance/reference/util.py index 6d437b28..566c8bca 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -181,3 +181,63 @@ def array_deepcopy( if memo is not None: memo[ident] = post return post + + +def roll_1d(array: np.ndarray, shift: int) -> np.ndarray: + ''' + Specialized form of np.roll that, by focusing on the 1D solution, is at least four times faster. + ''' + size = len(array) + if size <= 1: + return array.copy() + + # result will be positive + shift = shift % size + if shift == 0: + return array.copy() + + post = np.empty(size, dtype=array.dtype) + + post[0:shift] = array[-shift:] + post[shift:] = array[0:-shift] + return post + + +def roll_2d(array: np.ndarray, + shift: int, + axis: int + ) -> np.ndarray: + ''' + Specialized form of np.roll that, by focusing on the 2D solution + ''' + post = np.empty(array.shape, dtype=array.dtype) + + if axis == 0: # roll rows + size = array.shape[0] + if size <= 1: + return array.copy() + + # result will be positive + shift = shift % size + if shift == 0: + return array.copy() + + post[0:shift, :] = array[-shift:, :] + post[shift:, :] = array[0:-shift, :] + return post + + elif axis == 1: # roll columns + size = array.shape[1] + if size <= 1: + return array.copy() + + # result will be positive + shift = shift % size + if shift == 0: + return array.copy() + + post[:, 0:shift] = array[:, -shift:] + post[:, shift:] = array[:, 0:-shift] + return post + + raise NotImplementedError() diff --git a/test/test_util.py b/test/test_util.py index 64c45cdd..49378517 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -12,6 +12,9 @@ from arraykit import immutable_filter from performance.reference.util import mloc as mloc_ref +#from performance.reference.util import roll_1d +from arraykit import roll_1d +from performance.reference.util import roll_2d class TestUnit(unittest.TestCase): @@ -167,8 +170,81 @@ def test_row_1d_filter_a(self) -> None: with self.assertRaises(NotImplementedError): row_1d_filter(a1.reshape(1,2,5)) -if __name__ == '__main__': - unittest.main() + #--------------------------------------------------------------------------- + + def test_roll_1d_a(self) -> None: + a1 = np.arange(12) + + for i in range(len(a1) + 1): + post = roll_1d(a1, i) + self.assertEqual(post.tolist(), np.roll(a1, i).tolist()) + + post = roll_1d(a1, -i) + self.assertEqual(post.tolist(), np.roll(a1, -i).tolist()) + + def test_roll_1d_b(self) -> None: + post = roll_1d(np.array([]), -4) + self.assertEqual([], post.tolist()) + + def test_roll_1d_c(self) -> None: + a1 = np.array([3, 4, 5, 6]) + self.assertEqual(roll_1d(a1, 1).tolist(), [6, 3, 4, 5]) + self.assertEqual(roll_1d(a1, -1).tolist(), [4, 5, 6, 3]) + + #--------------------------------------------------------------------------- + + def test_roll_2d_a(self) -> None: + a1 = np.arange(12).reshape((3,4)) + + for i in range(a1.shape[0] + 1): + post = roll_2d(a1, i, axis=0) + self.assertEqual(post.tolist(), np.roll(a1, i, axis=0).tolist()) + + post = roll_2d(a1, -i, axis=0) + self.assertEqual(post.tolist(), np.roll(a1, -i, axis=0).tolist()) + + for i in range(a1.shape[1] + 1): + post = roll_2d(a1, i, axis=1) + self.assertEqual(post.tolist(), np.roll(a1, i, axis=1).tolist()) + post = roll_2d(a1, -i, axis=1) + self.assertEqual(post.tolist(), np.roll(a1, -i, axis=1).tolist()) + def test_roll_2d_b(self) -> None: + post = roll_2d(np.array([[]]), -4, axis=1) + self.assertEqual(post.shape, (1, 0)) + def test_roll_2d_c(self) -> None: + a1 = np.arange(12).reshape((3,4)) + + self.assertEqual(roll_2d(a1, -2, axis=0).tolist(), + [[8, 9, 10, 11], [0, 1, 2, 3], [4, 5, 6, 7]]) + + self.assertEqual(roll_2d(a1, -2, axis=1).tolist(), + [[2, 3, 0, 1], [6, 7, 4, 5], [10, 11, 8, 9]]) + + def test_roll_2d_d(self) -> None: + a1 = np.arange(6).reshape((2, 3)) + + self.assertEqual(roll_2d(a1, 1, axis=1).tolist(), + [[2, 0, 1], [5, 3, 4]]) + self.assertEqual(roll_2d(a1, -1, axis=1).tolist(), + [[1, 2, 0], [4, 5, 3]]) + + def test_roll_2d_e(self) -> None: + a1 = np.arange(6).reshape((3, 2)) + + self.assertEqual(roll_2d(a1, 1, axis=0).tolist(), + [[4, 5], [0, 1], [2, 3]] + ) + self.assertEqual(roll_2d(a1, -1, axis=0).tolist(), + [[2, 3], [4, 5], [0, 1]] + ) + + def test_roll_2d_f(self) -> None: + with self.assertRaises(NotImplementedError): + roll_2d(np.arange(4).reshape((2, 2)), 1, axis=2) + + +if __name__ == '__main__': + unittest.main() From 90ffd3da35e17b9685b4cc5152abf1d6b452e3a2 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Fri, 26 Mar 2021 16:54:24 -0700 Subject: [PATCH 02/15] Adds performance benchmark for roll_1d. --- performance/main.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/performance/main.py b/performance/main.py index d0bd0809..9bd24d37 100644 --- a/performance/main.py +++ b/performance/main.py @@ -15,6 +15,7 @@ from performance.reference.util import row_1d_filter as row_1d_filter_ref from performance.reference.util import resolve_dtype as resolve_dtype_ref from performance.reference.util import resolve_dtype_iter as resolve_dtype_iter_ref +from performance.reference.util import roll_1d as roll_1d_ref from performance.reference.array_go import ArrayGO as ArrayGOREF @@ -27,6 +28,7 @@ from arraykit import row_1d_filter as row_1d_filter_ak from arraykit import resolve_dtype as resolve_dtype_ak from arraykit import resolve_dtype_iter as resolve_dtype_iter_ak +from arraykit import roll_1d as roll_1d_ak from arraykit import ArrayGO as ArrayGOAK @@ -221,6 +223,26 @@ class ArrayGOPerfREF(ArrayGOPerf): entry = staticmethod(ArrayGOREF) +#------------------------------------------------------------------------------- +class Roll1d(Perf): + NUMBER = 10 + SIZE = 20_000 + + def pre(self): + self.array = np.arange(self.SIZE) + + def main(self): + for i in range(-(self.SIZE+1), self.SIZE+1): + self.entry(self.array, i) + + +class Roll1dAK(Roll1d): + entry = staticmethod(roll_1d_ak) + +class Roll1dREF(Roll1d): + entry = staticmethod(roll_1d_ref) + + #------------------------------------------------------------------------------- def get_arg_parser(): From f86238ae1863b4505402c67ecd18567ccd2aca82 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Fri, 26 Mar 2021 19:05:12 -0700 Subject: [PATCH 03/15] Adds an alternative implementation for roll_1d. --- arraykit.c | 176 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 141 insertions(+), 35 deletions(-) diff --git a/arraykit.c b/arraykit.c index 073c661a..32967ae4 100644 --- a/arraykit.c +++ b/arraykit.c @@ -287,47 +287,14 @@ assign_into_slice_from_slice(PyObject *dest, PyObject *src, PyObject *dest_slice } static PyObject * -roll_1d(PyObject *Py_UNUSED(m), PyObject *args) +_roll_1d_a(PyArrayObject* array, int shift) { - /* Algorithm. - - size = len(array) - if size <= 1: - return array.copy() - - shift = shift % size - if shift == 0: - return array.copy() - + /* post = np.empty(size, dtype=array.dtype) post[0:shift] = array[-shift:] post[shift:] = array[0:-shift] return post */ - PyArrayObject *array; - int shift; - - if (!PyArg_ParseTuple(args, "O!i:roll_1d", &PyArray_Type, &array, &shift)) - { - return NULL; - } - - // Must be signed in order for modulo to work properly for negative shift values - int size = (int)PyArray_SIZE(array); - - uint8_t is_empty = (size == 0); - - if (!is_empty) { - shift = shift % size; - } - - if (is_empty || (shift == 0)) { - PyObject* copy = PyArray_Copy(array); - if (!copy) { - return NULL; - } - return copy; - } // Create an empty array PyArray_Descr* dtype = PyArray_DESCR(array); @@ -408,6 +375,145 @@ roll_1d(PyObject *Py_UNUSED(m), PyObject *args) return NULL; } +static PyObject * +_roll_1d_b(PyArrayObject* array, int shift, int size) +{ + /* + post = np.empty(size, dtype=array.dtype) + post[0:shift] = array[-shift:] + post[shift:] = array[0:-shift] + return post + */ + + // Create an empty array + PyArray_Descr* dtype = PyArray_DESCR(array); + Py_INCREF(dtype); // PyArray_Empty steals a reference to dtype + + PyArrayObject* post = (PyArrayObject*)PyArray_Empty( + PyArray_NDIM(array), + PyArray_DIMS(array), + dtype, + 0); + if (!post) { + return NULL; + } + + npy_intp array_stride = PyArray_STRIDE(array, 0); + npy_intp post_stride = PyArray_STRIDE(post, 0); + char* array_dataptr = PyArray_BYTES(array); + char* post_dataptr = PyArray_BYTES(post); + + for (int i = 0; i < size; ++i) { + int src_i = (i + size - shift) % size; + + PyObject* obj = PyArray_GETITEM(array, array_dataptr + (array_stride * src_i)); + if (!obj) { + Py_DECREF(post); + return NULL; + } + + if (PyArray_SETITEM(post, post_dataptr + (i * post_stride), obj) == -1) { + Py_DECREF(post); + return NULL; + } + } + + return (PyObject*)post; +} + +static PyObject * +_roll_1d_c(PyArrayObject* array, int shift, int size) +{ + /* + post = np.empty(size, dtype=array.dtype) + post[0:shift] = array[-shift:] + post[shift:] = array[0:-shift] + return post + */ + + // Create an empty array + PyArray_Descr* dtype = PyArray_DESCR(array); + Py_INCREF(dtype); // PyArray_Empty steals a reference to dtype + + PyArrayObject* post = (PyArrayObject*)PyArray_Empty( + PyArray_NDIM(array), + PyArray_DIMS(array), + dtype, + 0); + if (!post) { + return NULL; + } + + npy_intp array_stride = PyArray_STRIDE(array, 0); + npy_intp post_stride = PyArray_STRIDE(post, 0); + char* array_dataptr = PyArray_BYTES(array); + char* post_dataptr = PyArray_BYTES(post); + + for (int i = 0; i < size; ++i) { + int src_i = (i + size - shift) % size; + + PyObject* obj = PyArray_GETITEM(array, array_dataptr + (array_stride * src_i)); + if (!obj) { + Py_DECREF(post); + return NULL; + } + + if (PyArray_SETITEM(post, post_dataptr + (i * post_stride), obj) == -1) { + Py_DECREF(post); + return NULL; + } + } + + return (PyObject*)post; +} + +static PyObject * +roll_1d(PyObject *Py_UNUSED(m), PyObject *args) +{ + /* Algorithm. + + size = len(array) + if size <= 1: + return array.copy() + + shift = shift % size + if shift == 0: + return array.copy() + + post = np.empty(size, dtype=array.dtype) + post[0:shift] = array[-shift:] + post[shift:] = array[0:-shift] + return post + */ + PyArrayObject *array; + int shift; + + if (!PyArg_ParseTuple(args, "O!i:roll_1d", &PyArray_Type, &array, &shift)) + { + return NULL; + } + + // Must be signed in order for modulo to work properly for negative shift values + int size = (int)PyArray_SIZE(array); + + uint8_t is_empty = (size == 0); + + if (!is_empty) { + shift = shift % size; + } + + if (is_empty || (shift == 0)) { + PyObject* copy = PyArray_Copy(array); + if (!copy) { + return NULL; + } + return copy; + } + return _roll_1d_a(array, shift); + return _roll_1d_b(array, shift, size); +} + + //------------------------------------------------------------------------------ // ArrayGO //------------------------------------------------------------------------------ From fae903845ecb066944d943d1bbd87a29f8eeb7be Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Fri, 26 Mar 2021 20:13:26 -0700 Subject: [PATCH 04/15] Finishes third_implementation. --- arraykit.c | 88 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 35 deletions(-) diff --git a/arraykit.c b/arraykit.c index 32967ae4..c64244ba 100644 --- a/arraykit.c +++ b/arraykit.c @@ -422,49 +422,67 @@ _roll_1d_b(PyArrayObject* array, int shift, int size) } static PyObject * -_roll_1d_c(PyArrayObject* array, int shift, int size) +_roll_1d_c(PyArrayObject *array, int shift) { - /* - post = np.empty(size, dtype=array.dtype) - post[0:shift] = array[-shift:] - post[shift:] = array[0:-shift] - return post - */ - - // Create an empty array - PyArray_Descr* dtype = PyArray_DESCR(array); - Py_INCREF(dtype); // PyArray_Empty steals a reference to dtype + // Tell the constructor to automatically allocate the output. + // The data type of the output will match that of the input. + PyArrayObject *arrays[2]; + npy_uint32 arrays_flags[2]; + arrays[0] = array; + arrays[1] = NULL; + arrays_flags[0] = NPY_ITER_READONLY; + arrays_flags[1] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE; + + // Construct the iterator + NpyIter *iter = NpyIter_MultiNew( + 2, // number of arrays + arrays, + NPY_ITER_EXTERNAL_LOOP, // No inner iteration - inner loop is handled by CopyArray code + NPY_KEEPORDER, // Maintain existing order + NPY_NO_CASTING, // Only allows identical types + arrays_flags, + NULL); // We don't have to specify dtypes since it will use array's + + if (iter == NULL) { + return NULL; + } - PyArrayObject* post = (PyArrayObject*)PyArray_Empty( - PyArray_NDIM(array), - PyArray_DIMS(array), - dtype, - 0); - if (!post) { + NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); + if (!iternext) { + NpyIter_Deallocate(iter); return NULL; } - npy_intp array_stride = PyArray_STRIDE(array, 0); - npy_intp post_stride = PyArray_STRIDE(post, 0); - char* array_dataptr = PyArray_BYTES(array); - char* post_dataptr = PyArray_BYTES(post); + char** dataptr = NpyIter_GetDataPtrArray(iter); + npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); + npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; - for (int i = 0; i < size; ++i) { - int src_i = (i + size - shift) % size; + do { + char* src_data = dataptr[0]; + char* dst_data = dataptr[1]; + npy_intp size = *sizeptr; - PyObject* obj = PyArray_GETITEM(array, array_dataptr + (array_stride * src_i)); - if (!obj) { - Py_DECREF(post); - return NULL; - } + int offset = ((size - shift) % size) * itemsize; + int first_chunk = (size * itemsize) - offset; - if (PyArray_SETITEM(post, post_dataptr + (i * post_stride), obj) == -1) { - Py_DECREF(post); - return NULL; - } + memcpy(dst_data, src_data + offset, first_chunk); + memcpy(dst_data + first_chunk, src_data, offset); + } while (iternext(iter)); + + // Get the result from the iterator object array + PyObject *ret = (PyObject*)NpyIter_GetOperandArray(iter)[1]; + if (!ret) { + NpyIter_Deallocate(iter); + return NULL; } + Py_INCREF(ret); - return (PyObject*)post; + if (NpyIter_Deallocate(iter) != NPY_SUCCEED) { + Py_DECREF(ret); + return NULL; + } + + return ret; } static PyObject * @@ -509,8 +527,8 @@ roll_1d(PyObject *Py_UNUSED(m), PyObject *args) } return copy; } - return _roll_1d_a(array, shift); - return _roll_1d_b(array, shift, size); + + return _roll_1d_c(array, shift); } From 7927694ebfebb72c75e11ad4a56e04263a5d7eb6 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Fri, 26 Mar 2021 21:16:12 -0700 Subject: [PATCH 05/15] So little to show for so much struggle.... Object arrays are hard. --- arraykit.c | 5 +++-- test/test_util.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arraykit.c b/arraykit.c index c64244ba..939c31c4 100644 --- a/arraykit.c +++ b/arraykit.c @@ -457,6 +457,7 @@ _roll_1d_c(PyArrayObject *array, int shift) npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; + // TODO: This does NOT work with objects.... do { char* src_data = dataptr[0]; char* dst_data = dataptr[1]; @@ -470,7 +471,7 @@ _roll_1d_c(PyArrayObject *array, int shift) } while (iternext(iter)); // Get the result from the iterator object array - PyObject *ret = (PyObject*)NpyIter_GetOperandArray(iter)[1]; + PyArrayObject *ret = NpyIter_GetOperandArray(iter)[1]; if (!ret) { NpyIter_Deallocate(iter); return NULL; @@ -482,7 +483,7 @@ _roll_1d_c(PyArrayObject *array, int shift) return NULL; } - return ret; + return (PyObject*)ret; } static PyObject * diff --git a/test/test_util.py b/test/test_util.py index 49378517..f41d825b 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -173,7 +173,7 @@ def test_row_1d_filter_a(self) -> None: #--------------------------------------------------------------------------- def test_roll_1d_a(self) -> None: - a1 = np.arange(12) + a1 = np.arange(12, dtype=float) for i in range(len(a1) + 1): post = roll_1d(a1, i) From bb31b962c3233351dc6391fdb47f3f42728a7d94 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Mon, 29 Mar 2021 00:08:47 -0700 Subject: [PATCH 06/15] Adds support for object dtype arrays. --- arraykit.c | 75 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/arraykit.c b/arraykit.c index 939c31c4..802a6508 100644 --- a/arraykit.c +++ b/arraykit.c @@ -433,15 +433,29 @@ _roll_1d_c(PyArrayObject *array, int shift) arrays_flags[0] = NPY_ITER_READONLY; arrays_flags[1] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE; + // No inner iteration - inner loop is handled by CopyArray code + // Reference objects are OK. + int iter_flags = NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK; + // Construct the iterator NpyIter *iter = NpyIter_MultiNew( - 2, // number of arrays + 2, // number of arrays arrays, - NPY_ITER_EXTERNAL_LOOP, // No inner iteration - inner loop is handled by CopyArray code - NPY_KEEPORDER, // Maintain existing order - NPY_NO_CASTING, // Only allows identical types + iter_flags, + NPY_KEEPORDER, // Maintain existing order for `array` + NPY_NO_CASTING, // Both arrays will have the same dtype so casting isn't needed or allowed arrays_flags, - NULL); // We don't have to specify dtypes since it will use array's + NULL); // We don't have to specify dtypes since it will use array's + + /* Per the documentation for NPY_ITER_REFS_OK: + + Indicates that arrays with reference types (object arrays or structured arrays + containing an object type) may be accepted and used in the iterator. If this flag + is enabled, the caller must be sure to check whether NpyIter_IterationNeedsAPI(iter) + is true, in which case it may not release the GIL during iteration. + + However, `NpyIter_IterationNeedsAPI` is not documented at all. So....... + */ if (iter == NULL) { return NULL; @@ -457,18 +471,49 @@ _roll_1d_c(PyArrayObject *array, int shift) npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; - // TODO: This does NOT work with objects.... - do { - char* src_data = dataptr[0]; - char* dst_data = dataptr[1]; - npy_intp size = *sizeptr; + if (!PyDataType_ISOBJECT(PyArray_DESCR(array))) { + do { + char* src_data = dataptr[0]; + char* dst_data = dataptr[1]; + npy_intp size = *sizeptr; + + int offset = ((size - shift) % size) * itemsize; + int first_chunk = (size * itemsize) - offset; + + memcpy(dst_data, src_data + offset, first_chunk); + memcpy(dst_data + first_chunk, src_data, offset); + } while (iternext(iter)); + } + else { + // Object arrays contain pointers to arrays. + do { + char* src_data = dataptr[0]; + char* dst_data = dataptr[1]; + npy_intp size = *sizeptr; - int offset = ((size - shift) % size) * itemsize; - int first_chunk = (size * itemsize) - offset; + PyObject* src_ref = NULL; + PyObject* dst_ref = NULL; - memcpy(dst_data, src_data + offset, first_chunk); - memcpy(dst_data + first_chunk, src_data, offset); - } while (iternext(iter)); + for (int i = 0; i < size; ++i) { + int offset = ((i + size - shift) % size) * itemsize; + + // Update our temp PyObject* 's + memcpy(&src_ref, src_data + offset, sizeof(src_ref)); + memcpy(&dst_ref, dst_data, sizeof(dst_ref)); + + // Copy the reference + memcpy(dst_data, &src_ref, sizeof(src_ref)); + + // Claim the reference + Py_XINCREF(src_ref); + + // Release the reference in dst + Py_XDECREF(dst_ref); + + dst_data += itemsize; + } + } while (iternext(iter)); + } // Get the result from the iterator object array PyArrayObject *ret = NpyIter_GetOperandArray(iter)[1]; From 971ed2ccae052e8b8e15969eb7dd5887776f476b Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Mon, 29 Mar 2021 13:18:24 -0700 Subject: [PATCH 07/15] Addresses significant speed issue with last object approach. Adds more perf tests. --- arraykit.c | 130 +++++++++++++++++++++++++++++++++++++++++--- performance/main.py | 73 +++++++++++++++++++++++-- 2 files changed, 188 insertions(+), 15 deletions(-) diff --git a/arraykit.c b/arraykit.c index 802a6508..dd0b97d0 100644 --- a/arraykit.c +++ b/arraykit.c @@ -286,14 +286,15 @@ assign_into_slice_from_slice(PyObject *dest, PyObject *src, PyObject *dest_slice return success; } +// Naive Re-implementation of C static PyObject * _roll_1d_a(PyArrayObject* array, int shift) { /* - post = np.empty(size, dtype=array.dtype) - post[0:shift] = array[-shift:] - post[shift:] = array[0:-shift] - return post + cls ak ref ref/ak + Roll1dInt 3.32787074 4.06750092 1.22225328 + Roll1dFloat 3.32698173 4.06643037 1.2222581 + Roll1dObject 37.89614459 38.76268129 1.02286609 */ // Create an empty array @@ -375,14 +376,15 @@ _roll_1d_a(PyArrayObject* array, int shift) return NULL; } +// Manual iteration using Numpy C api static PyObject * _roll_1d_b(PyArrayObject* array, int shift, int size) { /* - post = np.empty(size, dtype=array.dtype) - post[0:shift] = array[-shift:] - post[shift:] = array[0:-shift] - return post + cls ak ref ref/ak + Roll1dInt 3.94763173 0.13514971 0.03423564 + Roll1dFloat 3.95269516 0.13621643 0.03446166 + Roll1dObject 1.03418866 0.46459488 0.4492361 */ // Create an empty array @@ -421,9 +423,16 @@ _roll_1d_b(PyArrayObject* array, int shift, int size) return (PyObject*)post; } +// Being clever with C for primitives, struggling with Objects static PyObject * _roll_1d_c(PyArrayObject *array, int shift) { + /* + cls ak ref ref/ak + Roll1dInt 2.82467638 4.14947038 1.46900736 + Roll1dFloat 2.89442847 4.13699139 1.42929474 + Roll1dObject 112.6879144 38.81264949 0.34442602 + */ // Tell the constructor to automatically allocate the output. // The data type of the output will match that of the input. PyArrayObject *arrays[2]; @@ -531,6 +540,106 @@ _roll_1d_c(PyArrayObject *array, int shift) return (PyObject*)ret; } +// Being clever with C for primitives, and figuring out Objects +static PyObject * +_roll_1d_d(PyArrayObject *array, int shift) +{ + /* + Roll1d20kInt 2.91365521 4.25724612 1.46113586 + Roll1d20kFloat 3.21448036 4.40039245 1.36892809 + Roll1d20kObject 6.7969062 8.32454664 1.22475526 + Roll1d1kInt 0.33637808 1.32518703 3.93957601 + Roll1d1kFloat 0.32248451 1.24809331 3.87024272 + Roll1d1kObject 1.46907919 2.9891046 2.03467901 + */ + // Tell the constructor to automatically allocate the output. + // The data type of the output will match that of the input. + PyArrayObject *arrays[2]; + npy_uint32 arrays_flags[2]; + arrays[0] = array; + arrays[1] = NULL; + arrays_flags[0] = NPY_ITER_READONLY; + arrays_flags[1] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE; + + // No inner iteration - inner loop is handled by CopyArray code + // Reference objects are OK. + int iter_flags = NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK; + + // Construct the iterator + NpyIter *iter = NpyIter_MultiNew( + 2, // number of arrays + arrays, + iter_flags, + NPY_KEEPORDER, // Maintain existing order for `array` + NPY_NO_CASTING, // Both arrays will have the same dtype so casting isn't needed or allowed + arrays_flags, + NULL); // We don't have to specify dtypes since it will use array's + + /* Per the documentation for NPY_ITER_REFS_OK: + + Indicates that arrays with reference types (object arrays or structured arrays + containing an object type) may be accepted and used in the iterator. If this flag + is enabled, the caller must be sure to check whether NpyIter_IterationNeedsAPI(iter) + is true, in which case it may not release the GIL during iteration. + + However, `NpyIter_IterationNeedsAPI` is not documented at all. So....... + */ + + if (iter == NULL) { + return NULL; + } + + NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); + if (!iternext) { + NpyIter_Deallocate(iter); + return NULL; + } + + char** dataptr = NpyIter_GetDataPtrArray(iter); + npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); + npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; + + uint8_t is_object = PyDataType_ISOBJECT(PyArray_DESCR(array)); + + do { + char* src_data = dataptr[0]; + char* dst_data = dataptr[1]; + npy_intp size = *sizeptr; + + int offset = ((size - shift) % size) * itemsize; + int first_chunk = (size * itemsize) - offset; + + memcpy(dst_data, src_data + offset, first_chunk); + memcpy(dst_data + first_chunk, src_data, offset); + + // Increment ref counts of objects. + if (PyDataType_ISOBJECT(PyArray_DESCR(array))) { + dst_data = dataptr[1]; + for (int i = 0; i < size; ++i) { + PyObject* dst_ref = NULL; + memcpy(&dst_ref, dst_data, sizeof(dst_ref)); + Py_INCREF(dst_ref); + dst_data += itemsize; + } + } + } while (iternext(iter)); + + // Get the result from the iterator object array + PyArrayObject *ret = NpyIter_GetOperandArray(iter)[1]; + if (!ret) { + NpyIter_Deallocate(iter); + return NULL; + } + Py_INCREF(ret); + + if (NpyIter_Deallocate(iter) != NPY_SUCCEED) { + Py_DECREF(ret); + return NULL; + } + + return (PyObject*)ret; +} + static PyObject * roll_1d(PyObject *Py_UNUSED(m), PyObject *args) { @@ -574,7 +683,10 @@ roll_1d(PyObject *Py_UNUSED(m), PyObject *args) return copy; } - return _roll_1d_c(array, shift); + //return _roll_1d_a(array, shift); // Basically the same + //return _roll_1d_b(array, shift, size); // Way slower + //return _roll_1d_c(array, shift); // Faster for primitives, same for objects + return _roll_1d_d(array, shift); // Faster for primitives & objects! } diff --git a/performance/main.py b/performance/main.py index 9bd24d37..2e39b59b 100644 --- a/performance/main.py +++ b/performance/main.py @@ -223,8 +223,14 @@ class ArrayGOPerfREF(ArrayGOPerf): entry = staticmethod(ArrayGOREF) +storage = [] +def build_subclassses(klass, ak_meth, ref_meth): + storage.append(type(f'{klass.__name__}AK', (klass,), dict(entry=staticmethod(ak_meth)))) + storage.append(type(f'{klass.__name__}REF', (klass,), dict(entry=staticmethod(ref_meth)))) + + #------------------------------------------------------------------------------- -class Roll1d(Perf): +class Roll1d20kInt(Perf): NUMBER = 10 SIZE = 20_000 @@ -232,16 +238,71 @@ def pre(self): self.array = np.arange(self.SIZE) def main(self): - for i in range(-(self.SIZE+1), self.SIZE+1): + for i in range(-20_001, 20_001): self.entry(self.array, i) +class Roll1d20kFloat(Perf): + NUMBER = 10 + SIZE = 20_000 + + def pre(self): + self.array = np.arange(self.SIZE).astype(float) -class Roll1dAK(Roll1d): - entry = staticmethod(roll_1d_ak) + def main(self): + for i in range(-20_001, 20_001): + self.entry(self.array, i) + +class Roll1d20kObject(Perf): + NUMBER = 2 + SIZE = 20_000 + + def pre(self): + self.array = np.arange(self.SIZE).astype(object) + + def main(self): + for i in range(-20_001, 20_001): + self.entry(self.array, i) + +class Roll1d1kInt(Perf): + NUMBER = 10 + SIZE = 1_000 + + def pre(self): + self.array = np.arange(self.SIZE) + + def main(self): + for i in range(-20_000, 20_000): + self.entry(self.array, i) + +class Roll1d1kFloat(Perf): + NUMBER = 10 + SIZE = 1_000 + + def pre(self): + self.array = np.arange(self.SIZE).astype(float) + + def main(self): + for i in range(-20_000, 20_000): + self.entry(self.array, i) + +class Roll1d1kObject(Perf): + NUMBER = 10 + SIZE = 1_000 + + def pre(self): + self.array = np.arange(self.SIZE).astype(object) + + def main(self): + for i in range(-20_000, 20_000): + self.entry(self.array, i) -class Roll1dREF(Roll1d): - entry = staticmethod(roll_1d_ref) +build_subclassses(Roll1d20kInt, roll_1d_ak, roll_1d_ref) +build_subclassses(Roll1d20kFloat, roll_1d_ak, roll_1d_ref) +build_subclassses(Roll1d20kObject, roll_1d_ak, roll_1d_ref) +build_subclassses(Roll1d1kInt, roll_1d_ak, roll_1d_ref) +build_subclassses(Roll1d1kFloat, roll_1d_ak, roll_1d_ref) +build_subclassses(Roll1d1kObject, roll_1d_ak, roll_1d_ref) #------------------------------------------------------------------------------- From 8843c09c255590a6537f38d0de470b3d249281eb Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Mon, 29 Mar 2021 20:34:25 -0700 Subject: [PATCH 08/15] Cleans up misc code. --- arraykit.c | 72 +++++++++--------------------------------------------- 1 file changed, 12 insertions(+), 60 deletions(-) diff --git a/arraykit.c b/arraykit.c index dd0b97d0..af4422ea 100644 --- a/arraykit.c +++ b/arraykit.c @@ -274,14 +274,15 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) // rolling static int -assign_into_slice_from_slice(PyObject *dest, PyObject *src, PyObject *dest_slice, PyObject *src_slice) +assign_into_slice_from_slice(PyObject *dst, int dst_start, int dst_stop, + PyObject *src, int src_start, int src_stop) { - PyObject* shifted_src = PyObject_GetItem((PyObject*)src, src_slice); + PyObject* shifted_src = PySequence_GetSlice(src, src_start, src_stop); if (!shifted_src) { return -1; } - int success = PyObject_SetItem(dest, dest_slice, shifted_src); + int success = PySequence_SetSlice(dst, dst_start, dst_stop, shifted_src); Py_DECREF(shifted_src); return success; } @@ -310,70 +311,23 @@ _roll_1d_a(PyArrayObject* array, int shift) return NULL; } - // Build integers - PyObject* zero = PyLong_FromLong(0); - PyObject* pos_shift = PyLong_FromLong(shift); - PyObject* neg_shift = PyLong_FromLong(-shift); - if (!zero || !pos_shift || !neg_shift) { - goto integer_build_failure; - } - - // Build slices - PyObject* first_dest_slice = PySlice_New(zero, pos_shift, Py_None); // [0:shift] - PyObject* first_src_slice = PySlice_New(neg_shift, Py_None, Py_None); // [-shift:] - PyObject* second_dest_slice = PySlice_New(pos_shift, Py_None, Py_None); // [shift:] - PyObject* second_src_slice = PySlice_New(zero, neg_shift, Py_None); // [0:-shift] - Py_DECREF(zero); - Py_DECREF(pos_shift); - Py_DECREF(neg_shift); - if (!first_dest_slice || !first_src_slice || !second_dest_slice || !second_src_slice) { - goto slice_build_failure; - } - int success; // First Assign - success = assign_into_slice_from_slice(post, (PyObject*)array, first_dest_slice, first_src_slice); - Py_DECREF(first_dest_slice); - Py_DECREF(first_src_slice); + success = assign_into_slice_from_slice(post, 0, shift, (PyObject*)array, -shift, PyArray_SIZE(array)); if (success == -1) { - Py_DECREF(second_dest_slice); - Py_DECREF(second_src_slice); - goto failure; + Py_DECREF(post); + return NULL; } - // First Assign - success = assign_into_slice_from_slice(post, (PyObject*)array, second_dest_slice, second_src_slice); - Py_DECREF(second_src_slice); - Py_DECREF(second_dest_slice); + // Second Assign + success = assign_into_slice_from_slice(post, shift, PyArray_SIZE(array), (PyObject*)array, 0, -shift); if (success == -1) { - goto failure; + Py_DECREF(post); + return NULL; } return post; - -// Handled potentially leaked integer objects -integer_build_failure: - Py_XDECREF(zero); - Py_XDECREF(pos_shift); - Py_XDECREF(neg_shift); - goto failure; - -// Handled potentially leaked slice objects -slice_build_failure: - // Integers objects have all been cleaned up. - Py_XDECREF(first_dest_slice); - Py_XDECREF(first_src_slice); - Py_XDECREF(second_dest_slice); - Py_XDECREF(second_src_slice); - goto failure; - -// Handle final object that will always exist at this point. -failure: - // Integers objects have all been cleaned up. - // Slice objects have all been cleaned up. - Py_DECREF(post); - return NULL; } // Manual iteration using Numpy C api @@ -599,8 +553,6 @@ _roll_1d_d(PyArrayObject *array, int shift) npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; - uint8_t is_object = PyDataType_ISOBJECT(PyArray_DESCR(array)); - do { char* src_data = dataptr[0]; char* dst_data = dataptr[1]; @@ -615,7 +567,7 @@ _roll_1d_d(PyArrayObject *array, int shift) // Increment ref counts of objects. if (PyDataType_ISOBJECT(PyArray_DESCR(array))) { dst_data = dataptr[1]; - for (int i = 0; i < size; ++i) { + while (size--) { PyObject* dst_ref = NULL; memcpy(&dst_ref, dst_data, sizeof(dst_ref)); Py_INCREF(dst_ref); From 92209ed7b4c22543bbfbd732d794c3b6e793ff97 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Mon, 29 Mar 2021 23:16:38 -0700 Subject: [PATCH 09/15] Adds initial support & really cool ASCII art for roll_2d. --- arraykit.c | 320 ++++++++++++++++++++++++++++++++++ arraykit.pyi | 1 + performance/reference/util.py | 5 +- test/test_util.py | 19 +- 4 files changed, 339 insertions(+), 6 deletions(-) diff --git a/arraykit.c b/arraykit.c index af4422ea..189fd5a9 100644 --- a/arraykit.c +++ b/arraykit.c @@ -641,6 +641,325 @@ roll_1d(PyObject *Py_UNUSED(m), PyObject *args) return _roll_1d_d(array, shift); // Faster for primitives & objects! } +// ----------------------------------------------------------------------------- + +static PyObject * +_roll_2d_a(PyArrayObject *array, uint32_t shift, int axis) +{ + /* + if axis == 0: # roll rows + post[0:shift, :] = array[-shift:, :] + post[shift:, :] = array[0:-shift, :] + return post + + # roll columns + post[:, 0:shift] = array[:, -shift:] + post[:, shift:] = array[:, 0:-shift] + */ + // Tell the constructor to automatically allocate the output. + // The data type of the output will match that of the input. + PyArrayObject *arrays[2]; + npy_uint32 arrays_flags[2]; + arrays[0] = array; + arrays[1] = NULL; + arrays_flags[0] = NPY_ITER_READONLY; + arrays_flags[1] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE; + + // No inner iteration - inner loop is handled by CopyArray code + // Reference objects are OK. + int iter_flags = NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK; + + // Construct the iterator + NpyIter *iter = NpyIter_MultiNew( + 2, // number of arrays + arrays, + iter_flags, + NPY_KEEPORDER, + NPY_NO_CASTING, // Both arrays will have the same dtype so casting isn't needed or allowed + arrays_flags, + NULL); // We don't have to specify dtypes since it will use array's + + /* Per the documentation for NPY_ITER_REFS_OK: + + Indicates that arrays with reference types (object arrays or structured arrays + containing an object type) may be accepted and used in the iterator. If this flag + is enabled, the caller must be sure to check whether NpyIter_IterationNeedsAPI(iter) + is true, in which case it may not release the GIL during iteration. + + However, `NpyIter_IterationNeedsAPI` is not documented at all. So....... + */ + + if (iter == NULL) { + return NULL; + } + + NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); + if (!iternext) { + NpyIter_Deallocate(iter); + return NULL; + } + + char** dataptr = NpyIter_GetDataPtrArray(iter); + npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); + npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; + + uint32_t NUM_ROWS = PyArray_DIM(array, 0); // 3 rows + uint32_t rowsize = PyArray_DIM(array, 1); // 5 cols (or 5 elements in each row) + + do { + char* src_data = dataptr[0]; + char* dst_data = dataptr[1]; + npy_intp size = *sizeptr; + + if (axis == 0) { + /* + Shift by rows! This is the easy case. + + Imagine we have this array: + [0 1 2] + [3 4 5] + [6 7 8] + + In memory, this is stored contiguously as: [0 1 2 3 4 5 6 7 8] + Placing parentheses, we can visualize where the columns are like so: + [(0 1 2) (3 4 5) (6 7 8)] + + Given this, all we are concerned about is two contiguous blocks of memory. + + For example, if shift = 1, we can copy from row[1] -> END to the front + + source = [(0 1 2) (3 4 5) (6 7 8)] + | | | | | | + ----------------- + | | | | | | + V V V V V V + buffer = [(3 4 5) (6 7 8) (X X X)] + + Now, we fill in the missing tail bytes with row[0] from the src buffer + + source = [(0 1 2) (3 4 5) (6 7 8)] + | | | + ----------------- + | | | + V V V + buffer = [(3 4 5) (6 7 8) (0 1 2)] + + Now, our internal memory represents the result of a row shift. + We can see this if we represent the final buffer as a 2D grid: + + [6 7 8] + [0 1 2] + [3 4 5] + */ + + // Easiest case! Merely shift the rows + int offset = ((NUM_ROWS - shift) % NUM_ROWS) * rowsize * itemsize; + int first_chunk = (size * itemsize) - offset; + + memcpy(dst_data, src_data + offset, first_chunk); + memcpy(dst_data + first_chunk, src_data, offset); + } + else { + /* + Shift by columns! This is the more difficult case. + + Let's use a slightly different array + [0 1 2 3 4] + [5 6 7 8 9] + [A B C D E] + + If we shift by 2, our goal array will be: + [3 4 0 1 2] + [8 9 5 6 7] + [D E A B C] + + Alternatively, we want our contiguous memory to go from: + + source = [(0 1 2 3 4) (5 6 7 8 9) (A B C D E)] + buffer = [(3 4 0 1 2) (8 9 5 6 7) (D E A B C)] + + In order to do this as efficiently as possible, we first fill the result buffer with the source shifted. + + source = [(0 1 2 3 4) (5 6 7 8 9) (A B C D E)] + \ \ \ \ \ \ \ \ \ \ \ \ \ + \ \ \ ---- \ \ \ ---- \ \ \ + \ \ \ \ \ \ \ \ \ \ \ \ \ + buffer = [(X X 0 1 2) (3 4 5 6 7) (8 9 A B C)] + + Now, all that's left is to fix the incorrect values + + buffer = [(X X 0 1 2) (3 4 5 6 7) (8 9 A B C)] + ^ ^ ^ ^ ^ ^ + + We can fill these by copying the values from each row + + source = [(0 1 2 3 4) (5 6 7 8 9) (A B C D E)] + | | | | | | + ------- ------- ------- + | | | | | | + V V V V V V + buffer = [(3 4 0 1 2) (8 9 5 6 7) (D E A B C)] + + Now, our internal memory represents the result of a row shift. + We can see this if we represent the final buffer as a 2D grid: + + [3 4 0 1 2] + [8 9 5 6 7] + [D E A B C] + */ + if (shift > rowsize / 2) { + /* SHIFT LEFT + + This branch is optimized for cases where the offset is greater than half of the columns. + + For this, instead of shifting right and being forced to fill in a large section for each row, + we shift left and only have to fill in small section + + Example: + + Inefficient + [0 1 2 3 4] [0 1 2 3 4] + \ | | | | + ------ ------- + \ | | | | + V V V V V + [X X X X 0] [1 2 3 4 0] + + Efficient + [0 1 2 3 4] [0 1 2 3 4] + / / / / | + | | | | ------- + | | | | | + / / / / V + [1 2 3 4 X] [1 2 3 4 0] + */ + int offset = (rowsize - shift) * itemsize; + int num_bytes = (size * itemsize) - offset; + memcpy(dst_data, src_data+offset, num_bytes); + + num_bytes = offset; // This is how much we need to copy for each column. + + // Update the shifted portion of each row. + for (size_t i = 0; i < NUM_ROWS; ++i) { + int row_offset = i * rowsize * itemsize; + + // We need to fill in the rightmost values of this row since we shifted by an offset + int dst_offset = row_offset + ((rowsize * itemsize) - offset); + int src_offset = row_offset; + + memcpy(dst_data + dst_offset, src_data + src_offset, num_bytes); + } + } + else { + // SHIFT RIGHT + int offset = shift * itemsize; + int num_bytes = (size * itemsize) - offset; + memcpy(dst_data+offset, src_data, num_bytes); + + num_bytes = offset; // This is how much we need to copy for each column. + + // Update the shifted portion of each row. + for (size_t i = 0; i < NUM_ROWS; ++i) { + int row_offset = i * rowsize * itemsize; + + // We need to fill in the leftmost values of this row since we shifted by an offset + int dst_offset = row_offset; + int src_offset = row_offset + ((rowsize - shift) * itemsize); + + memcpy(dst_data + dst_offset, src_data + src_offset, num_bytes); + } + } + } + } while (iternext(iter)); + + // Get the result from the iterator object array + PyArrayObject *ret = NpyIter_GetOperandArray(iter)[1]; + if (!ret) { + NpyIter_Deallocate(iter); + return NULL; + } + Py_INCREF(ret); + + if (NpyIter_Deallocate(iter) != NPY_SUCCEED) { + Py_DECREF(ret); + return NULL; + } + + return (PyObject*)ret; +} + +static PyObject * +roll_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) +{ + /* Algorithm. + + size = array.shape[axis] + + if shift != 0: + shift = shift % size + + if size <= 1 or shift == 0: + return array.copy() + + if shift < 0: + shift = size + shift + + if axis == 0: + post[0:shift, :] = array[-shift:, :] + post[shift:, :] = array[0:-shift, :] + return post + + post[:, 0:shift] = array[:, -shift:] + post[:, shift:] = array[:, 0:-shift] + return post + */ + PyArrayObject *array; + int shift; + int axis; + + static char *kwlist[] = {"array", "shift", "axis", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!ii:roll_1d", + kwlist, + &PyArray_Type, &array, + &shift, &axis)) + { + return NULL; + } + + if (axis != 0 && axis != 1) { + PyErr_SetString(PyExc_ValueError, "Axis must be 0 or 1"); + return NULL; + } + + if (PyArray_NDIM(array) != 2) { + PyErr_SetString(PyExc_ValueError, "Array must be 2D"); + return NULL; + } + + // Must be signed in order for modulo to work properly for negative shift values + int size = (int)PyArray_DIM(array, axis); + + uint8_t is_empty = (size == 0); + + if (!is_empty) { + shift = shift % size; + if (shift < 0) { + shift = size + shift; + } + } + + if (is_empty || (shift == 0)) { + PyObject* copy = PyArray_Copy(array); + if (!copy) { + return NULL; + } + return copy; + } + + return _roll_2d_a(array, (uint32_t)shift, axis); +} + //------------------------------------------------------------------------------ // ArrayGO @@ -919,6 +1238,7 @@ static PyMethodDef arraykit_methods[] = { {"resolve_dtype", resolve_dtype, METH_VARARGS, NULL}, {"resolve_dtype_iter", resolve_dtype_iter, METH_O, NULL}, {"roll_1d", roll_1d, METH_VARARGS, NULL}, + {"roll_2d", (PyCFunction)roll_2d, METH_VARARGS | METH_KEYWORDS, NULL}, {NULL}, }; diff --git a/arraykit.pyi b/arraykit.pyi index 28c787db..9ede4fc0 100644 --- a/arraykit.pyi +++ b/arraykit.pyi @@ -28,3 +28,4 @@ def row_1d_filter(__array: np.array) -> np.ndarray: ... def resolve_dtype(__d1: np.dtype, __d2: np.dtype) -> np.dtype: ... def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ... def roll_1d(__array: np.ndarray, __shift: int) -> np.ndarray: ... +def roll_2d(__array: np.ndarray, __shift: int, __axis: int) -> np.ndarray: ... diff --git a/performance/reference/util.py b/performance/reference/util.py index 566c8bca..a9c4a8f9 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -203,10 +203,7 @@ def roll_1d(array: np.ndarray, shift: int) -> np.ndarray: return post -def roll_2d(array: np.ndarray, - shift: int, - axis: int - ) -> np.ndarray: +def roll_2d(array: np.ndarray, shift: int, axis: int) -> np.ndarray: ''' Specialized form of np.roll that, by focusing on the 2D solution ''' diff --git a/test/test_util.py b/test/test_util.py index f41d825b..75784466 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -14,7 +14,7 @@ from performance.reference.util import mloc as mloc_ref #from performance.reference.util import roll_1d from arraykit import roll_1d -from performance.reference.util import roll_2d +from arraykit import roll_2d class TestUnit(unittest.TestCase): @@ -192,6 +192,21 @@ def test_roll_1d_c(self) -> None: self.assertEqual(roll_1d(a1, -1).tolist(), [4, 5, 6, 3]) #--------------------------------------------------------------------------- + def test_roll_2d_row(self) -> None: + arr = np.arange(15).reshape(3, 5) + + for shift in range(-10, 10): + np_result = np.roll(arr, shift, axis=0) + ak_result = roll_2d(arr, shift, axis=0) + self.assertTrue((np_result == ak_result).all()) + + def test_roll_2d_col(self) -> None: + arr = np.arange(15).reshape(3, 5) + + for shift in range(-10, 10): + np_result = np.roll(arr, shift, axis=1) + ak_result = roll_2d(arr, shift, axis=1) + self.assertTrue((np_result == ak_result).all()) def test_roll_2d_a(self) -> None: a1 = np.arange(12).reshape((3,4)) @@ -242,7 +257,7 @@ def test_roll_2d_e(self) -> None: ) def test_roll_2d_f(self) -> None: - with self.assertRaises(NotImplementedError): + with self.assertRaises(ValueError): roll_2d(np.arange(4).reshape((2, 2)), 1, axis=2) From 387706e5062703f3d554e9bce95639a6a37ca0c4 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Tue, 30 Mar 2021 11:39:48 -0700 Subject: [PATCH 10/15] Removes unnecessary memcpy. Addresses conversion warnings. --- arraykit.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/arraykit.c b/arraykit.c index 189fd5a9..a1bffced 100644 --- a/arraykit.c +++ b/arraykit.c @@ -190,9 +190,9 @@ shape_filter(PyObject *Py_UNUSED(m), PyObject *a) AK_CHECK_NUMPY_ARRAY_1D_2D(a); PyArrayObject *array = (PyArrayObject *)a; - int size0 = PyArray_DIM(array, 0); + int size0 = (int)PyArray_DIM(array, 0); // If 1D array, set size for axis 1 at 1, else use 2D array to get the size of axis 1 - int size1 = PyArray_NDIM(array) == 1 ? 1 : PyArray_DIM(array, 1); + int size1 = (int)(PyArray_NDIM(array) == 1 ? 1 : PyArray_DIM(array, 1)); return Py_BuildValue("ii", size0, size1); } @@ -314,14 +314,14 @@ _roll_1d_a(PyArrayObject* array, int shift) int success; // First Assign - success = assign_into_slice_from_slice(post, 0, shift, (PyObject*)array, -shift, PyArray_SIZE(array)); + success = assign_into_slice_from_slice(post, 0, shift, (PyObject*)array, -shift, (int)PyArray_SIZE(array)); if (success == -1) { Py_DECREF(post); return NULL; } // Second Assign - success = assign_into_slice_from_slice(post, shift, PyArray_SIZE(array), (PyObject*)array, 0, -shift); + success = assign_into_slice_from_slice(post, shift, (int)PyArray_SIZE(array), (PyObject*)array, 0, -shift); if (success == -1) { Py_DECREF(post); return NULL; @@ -440,8 +440,8 @@ _roll_1d_c(PyArrayObject *array, int shift) char* dst_data = dataptr[1]; npy_intp size = *sizeptr; - int offset = ((size - shift) % size) * itemsize; - int first_chunk = (size * itemsize) - offset; + npy_intp offset = ((size - shift) % size) * itemsize; + npy_intp first_chunk = (size * itemsize) - offset; memcpy(dst_data, src_data + offset, first_chunk); memcpy(dst_data + first_chunk, src_data, offset); @@ -458,7 +458,7 @@ _roll_1d_c(PyArrayObject *array, int shift) PyObject* dst_ref = NULL; for (int i = 0; i < size; ++i) { - int offset = ((i + size - shift) % size) * itemsize; + npy_intp offset = ((i + size - shift) % size) * itemsize; // Update our temp PyObject* 's memcpy(&src_ref, src_data + offset, sizeof(src_ref)); @@ -558,8 +558,8 @@ _roll_1d_d(PyArrayObject *array, int shift) char* dst_data = dataptr[1]; npy_intp size = *sizeptr; - int offset = ((size - shift) % size) * itemsize; - int first_chunk = (size * itemsize) - offset; + npy_intp offset = ((size - shift) % size) * itemsize; + npy_intp first_chunk = (size * itemsize) - offset; memcpy(dst_data, src_data + offset, first_chunk); memcpy(dst_data + first_chunk, src_data, offset); @@ -568,9 +568,7 @@ _roll_1d_d(PyArrayObject *array, int shift) if (PyDataType_ISOBJECT(PyArray_DESCR(array))) { dst_data = dataptr[1]; while (size--) { - PyObject* dst_ref = NULL; - memcpy(&dst_ref, dst_data, sizeof(dst_ref)); - Py_INCREF(dst_ref); + Py_INCREF(*(PyObject**)dst_data); dst_data += itemsize; } } @@ -703,8 +701,8 @@ _roll_2d_a(PyArrayObject *array, uint32_t shift, int axis) npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; - uint32_t NUM_ROWS = PyArray_DIM(array, 0); // 3 rows - uint32_t rowsize = PyArray_DIM(array, 1); // 5 cols (or 5 elements in each row) + uint32_t NUM_ROWS = (uint32_t)PyArray_DIM(array, 0); + uint32_t rowsize = (uint32_t)PyArray_DIM(array, 1); do { char* src_data = dataptr[0]; From a487ea2f92319b845ff1d84ed1c8e716f4c89604 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Tue, 30 Mar 2021 17:02:01 -0700 Subject: [PATCH 11/15] Updates some types and clears up ambiguity of some arithmatic expressions. --- arraykit.c | 63 +++++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/arraykit.c b/arraykit.c index a1bffced..ab5089db 100644 --- a/arraykit.c +++ b/arraykit.c @@ -633,16 +633,19 @@ roll_1d(PyObject *Py_UNUSED(m), PyObject *args) return copy; } - //return _roll_1d_a(array, shift); // Basically the same - //return _roll_1d_b(array, shift, size); // Way slower - //return _roll_1d_c(array, shift); // Faster for primitives, same for objects + // Silence UnuSEd fUnCTioN warnings. + if (0) { + return _roll_1d_a(array, shift); // Basically the same + return _roll_1d_b(array, shift, size); // Way slower + return _roll_1d_c(array, shift); // Faster for primitives, same for objects + } return _roll_1d_d(array, shift); // Faster for primitives & objects! } // ----------------------------------------------------------------------------- static PyObject * -_roll_2d_a(PyArrayObject *array, uint32_t shift, int axis) +_roll_2d_a(PyArrayObject *array, npy_uint shift, int axis) { /* if axis == 0: # roll rows @@ -701,13 +704,15 @@ _roll_2d_a(PyArrayObject *array, uint32_t shift, int axis) npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; - uint32_t NUM_ROWS = (uint32_t)PyArray_DIM(array, 0); - uint32_t rowsize = (uint32_t)PyArray_DIM(array, 1); + npy_uint NUM_ROWS = (npy_uint)PyArray_DIM(array, 0); + npy_uint rowsize = (npy_uint)PyArray_DIM(array, 1); + npy_uint bytes_in_row = rowsize * itemsize; do { - char* src_data = dataptr[0]; - char* dst_data = dataptr[1]; + char *src_data = dataptr[0]; + char *dst_data = dataptr[1]; npy_intp size = *sizeptr; + npy_uint total_bytes = size * itemsize; if (axis == 0) { /* @@ -719,12 +724,12 @@ _roll_2d_a(PyArrayObject *array, uint32_t shift, int axis) [6 7 8] In memory, this is stored contiguously as: [0 1 2 3 4 5 6 7 8] - Placing parentheses, we can visualize where the columns are like so: + Placing parentheses, we can visualize where the rows are like so: [(0 1 2) (3 4 5) (6 7 8)] Given this, all we are concerned about is two contiguous blocks of memory. - For example, if shift = 1, we can copy from row[1] -> END to the front + For example, if shift = -1, we can copy from row[1] -> END to the front source = [(0 1 2) (3 4 5) (6 7 8)] | | | | | | @@ -745,17 +750,17 @@ _roll_2d_a(PyArrayObject *array, uint32_t shift, int axis) Now, our internal memory represents the result of a row shift. We can see this if we represent the final buffer as a 2D grid: + [3 4 5] [6 7 8] [0 1 2] - [3 4 5] */ // Easiest case! Merely shift the rows - int offset = ((NUM_ROWS - shift) % NUM_ROWS) * rowsize * itemsize; - int first_chunk = (size * itemsize) - offset; + npy_intp offset = (NUM_ROWS - shift) * bytes_in_row; + npy_intp chunksize = total_bytes - offset; - memcpy(dst_data, src_data + offset, first_chunk); - memcpy(dst_data + first_chunk, src_data, offset); + memcpy(dst_data, src_data + offset, chunksize); + memcpy(dst_data + chunksize, src_data, offset); } else { /* @@ -813,7 +818,7 @@ _roll_2d_a(PyArrayObject *array, uint32_t shift, int axis) For this, instead of shifting right and being forced to fill in a large section for each row, we shift left and only have to fill in small section - Example: + Example: Shift by 4 Inefficient [0 1 2 3 4] [0 1 2 3 4] @@ -831,38 +836,38 @@ _roll_2d_a(PyArrayObject *array, uint32_t shift, int axis) / / / / V [1 2 3 4 X] [1 2 3 4 0] */ - int offset = (rowsize - shift) * itemsize; - int num_bytes = (size * itemsize) - offset; - memcpy(dst_data, src_data+offset, num_bytes); + npy_intp offset = (rowsize - shift) * itemsize; + npy_intp num_bytes = total_bytes - offset; + memcpy(dst_data, src_data + offset, num_bytes); num_bytes = offset; // This is how much we need to copy for each column. // Update the shifted portion of each row. for (size_t i = 0; i < NUM_ROWS; ++i) { - int row_offset = i * rowsize * itemsize; + npy_intp row_offset = i * bytes_in_row; // We need to fill in the rightmost values of this row since we shifted by an offset - int dst_offset = row_offset + ((rowsize * itemsize) - offset); - int src_offset = row_offset; + npy_intp dst_offset = row_offset + bytes_in_row - num_bytes; + npy_intp src_offset = row_offset; memcpy(dst_data + dst_offset, src_data + src_offset, num_bytes); } } else { // SHIFT RIGHT - int offset = shift * itemsize; - int num_bytes = (size * itemsize) - offset; + npy_intp offset = shift * itemsize; + npy_intp num_bytes = total_bytes - offset; memcpy(dst_data+offset, src_data, num_bytes); num_bytes = offset; // This is how much we need to copy for each column. // Update the shifted portion of each row. for (size_t i = 0; i < NUM_ROWS; ++i) { - int row_offset = i * rowsize * itemsize; + npy_intp row_offset = i * bytes_in_row; // We need to fill in the leftmost values of this row since we shifted by an offset - int dst_offset = row_offset; - int src_offset = row_offset + ((rowsize - shift) * itemsize); + npy_intp dst_offset = row_offset; + npy_intp src_offset = row_offset + ((rowsize - shift) * itemsize); memcpy(dst_data + dst_offset, src_data + src_offset, num_bytes); } @@ -913,7 +918,7 @@ roll_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) */ PyArrayObject *array; int shift; - int axis; + int axis; // npy_intp static char *kwlist[] = {"array", "shift", "axis", NULL}; @@ -955,7 +960,7 @@ roll_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) return copy; } - return _roll_2d_a(array, (uint32_t)shift, axis); + return _roll_2d_a(array, (npy_uint)shift, axis); } From 7d5f41a92ef8ff06e8077623bf3185fd64b18f38 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Tue, 30 Mar 2021 18:29:33 -0700 Subject: [PATCH 12/15] Implements a roll_2d approach that handles bytes manually. --- arraykit.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/arraykit.c b/arraykit.c index ab5089db..d920da2f 100644 --- a/arraykit.c +++ b/arraykit.c @@ -891,6 +891,85 @@ _roll_2d_a(PyArrayObject *array, npy_uint shift, int axis) return (PyObject*)ret; } +static PyObject * +_roll_2d_b(PyArrayObject *array, npy_uint shift, int axis) +{ + // Create an empty array + PyArray_Descr* dtype = PyArray_DESCR(array); + Py_INCREF(dtype); // PyArray_Empty steals a reference to dtype + + PyArrayObject* post = (PyArrayObject*)PyArray_Empty( + PyArray_NDIM(array), + PyArray_DIMS(array), + dtype, + 0); + if (!post) { + return NULL; + } + + npy_intp itemsize = dtype->elsize; + npy_intp size = PyArray_SIZE(array); + + char* src_data = PyArray_BYTES(array); + char* dst_data = PyArray_BYTES(post); + + npy_uint NUM_ROWS = (npy_uint)PyArray_DIM(array, 0); + npy_uint rowsize = (npy_uint)PyArray_DIM(array, 1); + npy_uint bytes_in_row = rowsize * itemsize; + npy_uint total_bytes = size * itemsize; + + if (axis == 0) { + // Easiest case! Merely shift the rows + npy_intp offset = (NUM_ROWS - shift) * bytes_in_row; + npy_intp chunksize = total_bytes - offset; + + memcpy(dst_data, src_data + offset, chunksize); + memcpy(dst_data + chunksize, src_data, offset); + } + else { + if (shift > rowsize / 2) { + // SHIFT LEFT + npy_intp offset = (rowsize - shift) * itemsize; + npy_intp num_bytes = total_bytes - offset; + memcpy(dst_data, src_data + offset, num_bytes); + + num_bytes = offset; // This is how much we need to copy for each column. + + // Update the shifted portion of each row. + for (size_t i = 0; i < NUM_ROWS; ++i) { + npy_intp row_offset = i * bytes_in_row; + + // We need to fill in the rightmost values of this row since we shifted by an offset + npy_intp dst_offset = row_offset + bytes_in_row - num_bytes; + npy_intp src_offset = row_offset; + + memcpy(dst_data + dst_offset, src_data + src_offset, num_bytes); + } + } + else { + // SHIFT RIGHT + npy_intp offset = shift * itemsize; + npy_intp num_bytes = total_bytes - offset; + memcpy(dst_data + offset, src_data, num_bytes); + + num_bytes = offset; // This is how much we need to copy for each column. + + // Update the shifted portion of each row. + for (size_t i = 0; i < NUM_ROWS; ++i) { + npy_intp row_offset = i * bytes_in_row; + + // We need to fill in the leftmost values of this row since we shifted by an offset + npy_intp dst_offset = row_offset; + npy_intp src_offset = row_offset + ((rowsize - shift) * itemsize); + + memcpy(dst_data + dst_offset, src_data + src_offset, num_bytes); + } + } + } + + return (PyObject*)post; +} + static PyObject * roll_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) { @@ -943,7 +1022,7 @@ roll_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) // Must be signed in order for modulo to work properly for negative shift values int size = (int)PyArray_DIM(array, axis); - uint8_t is_empty = (size == 0); + npy_uint8 is_empty = (size == 0); if (!is_empty) { shift = shift % size; @@ -960,7 +1039,10 @@ roll_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) return copy; } - return _roll_2d_a(array, (npy_uint)shift, axis); + if (0) { + return _roll_2d_a(array, (npy_uint)shift, axis); + } + return _roll_2d_b(array, (npy_uint)shift, axis); } From 05b9849b5aa5bacf1e4ad7af8c96f23594a7d26c Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Fri, 9 Apr 2021 13:08:12 -0700 Subject: [PATCH 13/15] Removes roll_2d -> nothing to improve. Removes old roll_1d impls. Adds multithreading option for 1d. --- arraykit.c | 674 +--------------------------------- arraykit.pyi | 1 - performance/reference/util.py | 37 -- test/test_util.py | 70 ---- 4 files changed, 8 insertions(+), 774 deletions(-) diff --git a/arraykit.c b/arraykit.c index d920da2f..b17060eb 100644 --- a/arraykit.c +++ b/arraykit.c @@ -273,120 +273,9 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) //------------------------------------------------------------------------------ // rolling -static int -assign_into_slice_from_slice(PyObject *dst, int dst_start, int dst_stop, - PyObject *src, int src_start, int src_stop) -{ - PyObject* shifted_src = PySequence_GetSlice(src, src_start, src_stop); - if (!shifted_src) { - return -1; - } - - int success = PySequence_SetSlice(dst, dst_start, dst_stop, shifted_src); - Py_DECREF(shifted_src); - return success; -} - -// Naive Re-implementation of C -static PyObject * -_roll_1d_a(PyArrayObject* array, int shift) -{ - /* - cls ak ref ref/ak - Roll1dInt 3.32787074 4.06750092 1.22225328 - Roll1dFloat 3.32698173 4.06643037 1.2222581 - Roll1dObject 37.89614459 38.76268129 1.02286609 - */ - - // Create an empty array - PyArray_Descr* dtype = PyArray_DESCR(array); - Py_INCREF(dtype); // PyArray_Empty steals a reference to dtype - - PyObject* post = PyArray_Empty( - PyArray_NDIM(array), - PyArray_DIMS(array), - dtype, - 0); - if (!post) { - return NULL; - } - - int success; - - // First Assign - success = assign_into_slice_from_slice(post, 0, shift, (PyObject*)array, -shift, (int)PyArray_SIZE(array)); - if (success == -1) { - Py_DECREF(post); - return NULL; - } - - // Second Assign - success = assign_into_slice_from_slice(post, shift, (int)PyArray_SIZE(array), (PyObject*)array, 0, -shift); - if (success == -1) { - Py_DECREF(post); - return NULL; - } - - return post; -} - -// Manual iteration using Numpy C api static PyObject * -_roll_1d_b(PyArrayObject* array, int shift, int size) +_roll_1d(PyArrayObject *array, int shift) { - /* - cls ak ref ref/ak - Roll1dInt 3.94763173 0.13514971 0.03423564 - Roll1dFloat 3.95269516 0.13621643 0.03446166 - Roll1dObject 1.03418866 0.46459488 0.4492361 - */ - - // Create an empty array - PyArray_Descr* dtype = PyArray_DESCR(array); - Py_INCREF(dtype); // PyArray_Empty steals a reference to dtype - - PyArrayObject* post = (PyArrayObject*)PyArray_Empty( - PyArray_NDIM(array), - PyArray_DIMS(array), - dtype, - 0); - if (!post) { - return NULL; - } - - npy_intp array_stride = PyArray_STRIDE(array, 0); - npy_intp post_stride = PyArray_STRIDE(post, 0); - char* array_dataptr = PyArray_BYTES(array); - char* post_dataptr = PyArray_BYTES(post); - - for (int i = 0; i < size; ++i) { - int src_i = (i + size - shift) % size; - - PyObject* obj = PyArray_GETITEM(array, array_dataptr + (array_stride * src_i)); - if (!obj) { - Py_DECREF(post); - return NULL; - } - - if (PyArray_SETITEM(post, post_dataptr + (i * post_stride), obj) == -1) { - Py_DECREF(post); - return NULL; - } - } - - return (PyObject*)post; -} - -// Being clever with C for primitives, struggling with Objects -static PyObject * -_roll_1d_c(PyArrayObject *array, int shift) -{ - /* - cls ak ref ref/ak - Roll1dInt 2.82467638 4.14947038 1.46900736 - Roll1dFloat 2.89442847 4.13699139 1.42929474 - Roll1dObject 112.6879144 38.81264949 0.34442602 - */ // Tell the constructor to automatically allocate the output. // The data type of the output will match that of the input. PyArrayObject *arrays[2]; @@ -410,16 +299,6 @@ _roll_1d_c(PyArrayObject *array, int shift) arrays_flags, NULL); // We don't have to specify dtypes since it will use array's - /* Per the documentation for NPY_ITER_REFS_OK: - - Indicates that arrays with reference types (object arrays or structured arrays - containing an object type) may be accepted and used in the iterator. If this flag - is enabled, the caller must be sure to check whether NpyIter_IterationNeedsAPI(iter) - is true, in which case it may not release the GIL during iteration. - - However, `NpyIter_IterationNeedsAPI` is not documented at all. So....... - */ - if (iter == NULL) { return NULL; } @@ -434,125 +313,12 @@ _roll_1d_c(PyArrayObject *array, int shift) npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; - if (!PyDataType_ISOBJECT(PyArray_DESCR(array))) { - do { - char* src_data = dataptr[0]; - char* dst_data = dataptr[1]; - npy_intp size = *sizeptr; - - npy_intp offset = ((size - shift) % size) * itemsize; - npy_intp first_chunk = (size * itemsize) - offset; - - memcpy(dst_data, src_data + offset, first_chunk); - memcpy(dst_data + first_chunk, src_data, offset); - } while (iternext(iter)); - } - else { - // Object arrays contain pointers to arrays. - do { - char* src_data = dataptr[0]; - char* dst_data = dataptr[1]; - npy_intp size = *sizeptr; - - PyObject* src_ref = NULL; - PyObject* dst_ref = NULL; - - for (int i = 0; i < size; ++i) { - npy_intp offset = ((i + size - shift) % size) * itemsize; - - // Update our temp PyObject* 's - memcpy(&src_ref, src_data + offset, sizeof(src_ref)); - memcpy(&dst_ref, dst_data, sizeof(dst_ref)); - - // Copy the reference - memcpy(dst_data, &src_ref, sizeof(src_ref)); - - // Claim the reference - Py_XINCREF(src_ref); - - // Release the reference in dst - Py_XDECREF(dst_ref); - - dst_data += itemsize; - } - } while (iternext(iter)); - } - - // Get the result from the iterator object array - PyArrayObject *ret = NpyIter_GetOperandArray(iter)[1]; - if (!ret) { - NpyIter_Deallocate(iter); - return NULL; - } - Py_INCREF(ret); - - if (NpyIter_Deallocate(iter) != NPY_SUCCEED) { - Py_DECREF(ret); - return NULL; - } - - return (PyObject*)ret; -} - -// Being clever with C for primitives, and figuring out Objects -static PyObject * -_roll_1d_d(PyArrayObject *array, int shift) -{ - /* - Roll1d20kInt 2.91365521 4.25724612 1.46113586 - Roll1d20kFloat 3.21448036 4.40039245 1.36892809 - Roll1d20kObject 6.7969062 8.32454664 1.22475526 - Roll1d1kInt 0.33637808 1.32518703 3.93957601 - Roll1d1kFloat 0.32248451 1.24809331 3.87024272 - Roll1d1kObject 1.46907919 2.9891046 2.03467901 - */ - // Tell the constructor to automatically allocate the output. - // The data type of the output will match that of the input. - PyArrayObject *arrays[2]; - npy_uint32 arrays_flags[2]; - arrays[0] = array; - arrays[1] = NULL; - arrays_flags[0] = NPY_ITER_READONLY; - arrays_flags[1] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE; - - // No inner iteration - inner loop is handled by CopyArray code - // Reference objects are OK. - int iter_flags = NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK; - - // Construct the iterator - NpyIter *iter = NpyIter_MultiNew( - 2, // number of arrays - arrays, - iter_flags, - NPY_KEEPORDER, // Maintain existing order for `array` - NPY_NO_CASTING, // Both arrays will have the same dtype so casting isn't needed or allowed - arrays_flags, - NULL); // We don't have to specify dtypes since it will use array's - - /* Per the documentation for NPY_ITER_REFS_OK: - - Indicates that arrays with reference types (object arrays or structured arrays - containing an object type) may be accepted and used in the iterator. If this flag - is enabled, the caller must be sure to check whether NpyIter_IterationNeedsAPI(iter) - is true, in which case it may not release the GIL during iteration. - - However, `NpyIter_IterationNeedsAPI` is not documented at all. So....... - */ - - if (iter == NULL) { - return NULL; + // If we don't need the GIL, iteration can be multi-threaded! + NPY_BEGIN_THREADS_DEF; + if (!NpyIter_IterationNeedsAPI(iter)) { + NPY_BEGIN_THREADS; } - NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); - if (!iternext) { - NpyIter_Deallocate(iter); - return NULL; - } - - char** dataptr = NpyIter_GetDataPtrArray(iter); - npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); - npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; - do { char* src_data = dataptr[0]; char* dst_data = dataptr[1]; @@ -574,6 +340,8 @@ _roll_1d_d(PyArrayObject *array, int shift) } } while (iternext(iter)); + NPY_END_THREADS; + // Get the result from the iterator object array PyArrayObject *ret = NpyIter_GetOperandArray(iter)[1]; if (!ret) { @@ -593,21 +361,6 @@ _roll_1d_d(PyArrayObject *array, int shift) static PyObject * roll_1d(PyObject *Py_UNUSED(m), PyObject *args) { - /* Algorithm. - - size = len(array) - if size <= 1: - return array.copy() - - shift = shift % size - if shift == 0: - return array.copy() - - post = np.empty(size, dtype=array.dtype) - post[0:shift] = array[-shift:] - post[shift:] = array[0:-shift] - return post - */ PyArrayObject *array; int shift; @@ -633,419 +386,9 @@ roll_1d(PyObject *Py_UNUSED(m), PyObject *args) return copy; } - // Silence UnuSEd fUnCTioN warnings. - if (0) { - return _roll_1d_a(array, shift); // Basically the same - return _roll_1d_b(array, shift, size); // Way slower - return _roll_1d_c(array, shift); // Faster for primitives, same for objects - } - return _roll_1d_d(array, shift); // Faster for primitives & objects! + return _roll_1d(array, shift); } -// ----------------------------------------------------------------------------- - -static PyObject * -_roll_2d_a(PyArrayObject *array, npy_uint shift, int axis) -{ - /* - if axis == 0: # roll rows - post[0:shift, :] = array[-shift:, :] - post[shift:, :] = array[0:-shift, :] - return post - - # roll columns - post[:, 0:shift] = array[:, -shift:] - post[:, shift:] = array[:, 0:-shift] - */ - // Tell the constructor to automatically allocate the output. - // The data type of the output will match that of the input. - PyArrayObject *arrays[2]; - npy_uint32 arrays_flags[2]; - arrays[0] = array; - arrays[1] = NULL; - arrays_flags[0] = NPY_ITER_READONLY; - arrays_flags[1] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE; - - // No inner iteration - inner loop is handled by CopyArray code - // Reference objects are OK. - int iter_flags = NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK; - - // Construct the iterator - NpyIter *iter = NpyIter_MultiNew( - 2, // number of arrays - arrays, - iter_flags, - NPY_KEEPORDER, - NPY_NO_CASTING, // Both arrays will have the same dtype so casting isn't needed or allowed - arrays_flags, - NULL); // We don't have to specify dtypes since it will use array's - - /* Per the documentation for NPY_ITER_REFS_OK: - - Indicates that arrays with reference types (object arrays or structured arrays - containing an object type) may be accepted and used in the iterator. If this flag - is enabled, the caller must be sure to check whether NpyIter_IterationNeedsAPI(iter) - is true, in which case it may not release the GIL during iteration. - - However, `NpyIter_IterationNeedsAPI` is not documented at all. So....... - */ - - if (iter == NULL) { - return NULL; - } - - NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); - if (!iternext) { - NpyIter_Deallocate(iter); - return NULL; - } - - char** dataptr = NpyIter_GetDataPtrArray(iter); - npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); - npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; - - npy_uint NUM_ROWS = (npy_uint)PyArray_DIM(array, 0); - npy_uint rowsize = (npy_uint)PyArray_DIM(array, 1); - npy_uint bytes_in_row = rowsize * itemsize; - - do { - char *src_data = dataptr[0]; - char *dst_data = dataptr[1]; - npy_intp size = *sizeptr; - npy_uint total_bytes = size * itemsize; - - if (axis == 0) { - /* - Shift by rows! This is the easy case. - - Imagine we have this array: - [0 1 2] - [3 4 5] - [6 7 8] - - In memory, this is stored contiguously as: [0 1 2 3 4 5 6 7 8] - Placing parentheses, we can visualize where the rows are like so: - [(0 1 2) (3 4 5) (6 7 8)] - - Given this, all we are concerned about is two contiguous blocks of memory. - - For example, if shift = -1, we can copy from row[1] -> END to the front - - source = [(0 1 2) (3 4 5) (6 7 8)] - | | | | | | - ----------------- - | | | | | | - V V V V V V - buffer = [(3 4 5) (6 7 8) (X X X)] - - Now, we fill in the missing tail bytes with row[0] from the src buffer - - source = [(0 1 2) (3 4 5) (6 7 8)] - | | | - ----------------- - | | | - V V V - buffer = [(3 4 5) (6 7 8) (0 1 2)] - - Now, our internal memory represents the result of a row shift. - We can see this if we represent the final buffer as a 2D grid: - - [3 4 5] - [6 7 8] - [0 1 2] - */ - - // Easiest case! Merely shift the rows - npy_intp offset = (NUM_ROWS - shift) * bytes_in_row; - npy_intp chunksize = total_bytes - offset; - - memcpy(dst_data, src_data + offset, chunksize); - memcpy(dst_data + chunksize, src_data, offset); - } - else { - /* - Shift by columns! This is the more difficult case. - - Let's use a slightly different array - [0 1 2 3 4] - [5 6 7 8 9] - [A B C D E] - - If we shift by 2, our goal array will be: - [3 4 0 1 2] - [8 9 5 6 7] - [D E A B C] - - Alternatively, we want our contiguous memory to go from: - - source = [(0 1 2 3 4) (5 6 7 8 9) (A B C D E)] - buffer = [(3 4 0 1 2) (8 9 5 6 7) (D E A B C)] - - In order to do this as efficiently as possible, we first fill the result buffer with the source shifted. - - source = [(0 1 2 3 4) (5 6 7 8 9) (A B C D E)] - \ \ \ \ \ \ \ \ \ \ \ \ \ - \ \ \ ---- \ \ \ ---- \ \ \ - \ \ \ \ \ \ \ \ \ \ \ \ \ - buffer = [(X X 0 1 2) (3 4 5 6 7) (8 9 A B C)] - - Now, all that's left is to fix the incorrect values - - buffer = [(X X 0 1 2) (3 4 5 6 7) (8 9 A B C)] - ^ ^ ^ ^ ^ ^ - - We can fill these by copying the values from each row - - source = [(0 1 2 3 4) (5 6 7 8 9) (A B C D E)] - | | | | | | - ------- ------- ------- - | | | | | | - V V V V V V - buffer = [(3 4 0 1 2) (8 9 5 6 7) (D E A B C)] - - Now, our internal memory represents the result of a row shift. - We can see this if we represent the final buffer as a 2D grid: - - [3 4 0 1 2] - [8 9 5 6 7] - [D E A B C] - */ - if (shift > rowsize / 2) { - /* SHIFT LEFT - - This branch is optimized for cases where the offset is greater than half of the columns. - - For this, instead of shifting right and being forced to fill in a large section for each row, - we shift left and only have to fill in small section - - Example: Shift by 4 - - Inefficient - [0 1 2 3 4] [0 1 2 3 4] - \ | | | | - ------ ------- - \ | | | | - V V V V V - [X X X X 0] [1 2 3 4 0] - - Efficient - [0 1 2 3 4] [0 1 2 3 4] - / / / / | - | | | | ------- - | | | | | - / / / / V - [1 2 3 4 X] [1 2 3 4 0] - */ - npy_intp offset = (rowsize - shift) * itemsize; - npy_intp num_bytes = total_bytes - offset; - memcpy(dst_data, src_data + offset, num_bytes); - - num_bytes = offset; // This is how much we need to copy for each column. - - // Update the shifted portion of each row. - for (size_t i = 0; i < NUM_ROWS; ++i) { - npy_intp row_offset = i * bytes_in_row; - - // We need to fill in the rightmost values of this row since we shifted by an offset - npy_intp dst_offset = row_offset + bytes_in_row - num_bytes; - npy_intp src_offset = row_offset; - - memcpy(dst_data + dst_offset, src_data + src_offset, num_bytes); - } - } - else { - // SHIFT RIGHT - npy_intp offset = shift * itemsize; - npy_intp num_bytes = total_bytes - offset; - memcpy(dst_data+offset, src_data, num_bytes); - - num_bytes = offset; // This is how much we need to copy for each column. - - // Update the shifted portion of each row. - for (size_t i = 0; i < NUM_ROWS; ++i) { - npy_intp row_offset = i * bytes_in_row; - - // We need to fill in the leftmost values of this row since we shifted by an offset - npy_intp dst_offset = row_offset; - npy_intp src_offset = row_offset + ((rowsize - shift) * itemsize); - - memcpy(dst_data + dst_offset, src_data + src_offset, num_bytes); - } - } - } - } while (iternext(iter)); - - // Get the result from the iterator object array - PyArrayObject *ret = NpyIter_GetOperandArray(iter)[1]; - if (!ret) { - NpyIter_Deallocate(iter); - return NULL; - } - Py_INCREF(ret); - - if (NpyIter_Deallocate(iter) != NPY_SUCCEED) { - Py_DECREF(ret); - return NULL; - } - - return (PyObject*)ret; -} - -static PyObject * -_roll_2d_b(PyArrayObject *array, npy_uint shift, int axis) -{ - // Create an empty array - PyArray_Descr* dtype = PyArray_DESCR(array); - Py_INCREF(dtype); // PyArray_Empty steals a reference to dtype - - PyArrayObject* post = (PyArrayObject*)PyArray_Empty( - PyArray_NDIM(array), - PyArray_DIMS(array), - dtype, - 0); - if (!post) { - return NULL; - } - - npy_intp itemsize = dtype->elsize; - npy_intp size = PyArray_SIZE(array); - - char* src_data = PyArray_BYTES(array); - char* dst_data = PyArray_BYTES(post); - - npy_uint NUM_ROWS = (npy_uint)PyArray_DIM(array, 0); - npy_uint rowsize = (npy_uint)PyArray_DIM(array, 1); - npy_uint bytes_in_row = rowsize * itemsize; - npy_uint total_bytes = size * itemsize; - - if (axis == 0) { - // Easiest case! Merely shift the rows - npy_intp offset = (NUM_ROWS - shift) * bytes_in_row; - npy_intp chunksize = total_bytes - offset; - - memcpy(dst_data, src_data + offset, chunksize); - memcpy(dst_data + chunksize, src_data, offset); - } - else { - if (shift > rowsize / 2) { - // SHIFT LEFT - npy_intp offset = (rowsize - shift) * itemsize; - npy_intp num_bytes = total_bytes - offset; - memcpy(dst_data, src_data + offset, num_bytes); - - num_bytes = offset; // This is how much we need to copy for each column. - - // Update the shifted portion of each row. - for (size_t i = 0; i < NUM_ROWS; ++i) { - npy_intp row_offset = i * bytes_in_row; - - // We need to fill in the rightmost values of this row since we shifted by an offset - npy_intp dst_offset = row_offset + bytes_in_row - num_bytes; - npy_intp src_offset = row_offset; - - memcpy(dst_data + dst_offset, src_data + src_offset, num_bytes); - } - } - else { - // SHIFT RIGHT - npy_intp offset = shift * itemsize; - npy_intp num_bytes = total_bytes - offset; - memcpy(dst_data + offset, src_data, num_bytes); - - num_bytes = offset; // This is how much we need to copy for each column. - - // Update the shifted portion of each row. - for (size_t i = 0; i < NUM_ROWS; ++i) { - npy_intp row_offset = i * bytes_in_row; - - // We need to fill in the leftmost values of this row since we shifted by an offset - npy_intp dst_offset = row_offset; - npy_intp src_offset = row_offset + ((rowsize - shift) * itemsize); - - memcpy(dst_data + dst_offset, src_data + src_offset, num_bytes); - } - } - } - - return (PyObject*)post; -} - -static PyObject * -roll_2d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) -{ - /* Algorithm. - - size = array.shape[axis] - - if shift != 0: - shift = shift % size - - if size <= 1 or shift == 0: - return array.copy() - - if shift < 0: - shift = size + shift - - if axis == 0: - post[0:shift, :] = array[-shift:, :] - post[shift:, :] = array[0:-shift, :] - return post - - post[:, 0:shift] = array[:, -shift:] - post[:, shift:] = array[:, 0:-shift] - return post - */ - PyArrayObject *array; - int shift; - int axis; // npy_intp - - static char *kwlist[] = {"array", "shift", "axis", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!ii:roll_1d", - kwlist, - &PyArray_Type, &array, - &shift, &axis)) - { - return NULL; - } - - if (axis != 0 && axis != 1) { - PyErr_SetString(PyExc_ValueError, "Axis must be 0 or 1"); - return NULL; - } - - if (PyArray_NDIM(array) != 2) { - PyErr_SetString(PyExc_ValueError, "Array must be 2D"); - return NULL; - } - - // Must be signed in order for modulo to work properly for negative shift values - int size = (int)PyArray_DIM(array, axis); - - npy_uint8 is_empty = (size == 0); - - if (!is_empty) { - shift = shift % size; - if (shift < 0) { - shift = size + shift; - } - } - - if (is_empty || (shift == 0)) { - PyObject* copy = PyArray_Copy(array); - if (!copy) { - return NULL; - } - return copy; - } - - if (0) { - return _roll_2d_a(array, (npy_uint)shift, axis); - } - return _roll_2d_b(array, (npy_uint)shift, axis); -} - - //------------------------------------------------------------------------------ // ArrayGO //------------------------------------------------------------------------------ @@ -1323,7 +666,6 @@ static PyMethodDef arraykit_methods[] = { {"resolve_dtype", resolve_dtype, METH_VARARGS, NULL}, {"resolve_dtype_iter", resolve_dtype_iter, METH_O, NULL}, {"roll_1d", roll_1d, METH_VARARGS, NULL}, - {"roll_2d", (PyCFunction)roll_2d, METH_VARARGS | METH_KEYWORDS, NULL}, {NULL}, }; diff --git a/arraykit.pyi b/arraykit.pyi index 9ede4fc0..28c787db 100644 --- a/arraykit.pyi +++ b/arraykit.pyi @@ -28,4 +28,3 @@ def row_1d_filter(__array: np.array) -> np.ndarray: ... def resolve_dtype(__d1: np.dtype, __d2: np.dtype) -> np.dtype: ... def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ... def roll_1d(__array: np.ndarray, __shift: int) -> np.ndarray: ... -def roll_2d(__array: np.ndarray, __shift: int, __axis: int) -> np.ndarray: ... diff --git a/performance/reference/util.py b/performance/reference/util.py index a9c4a8f9..8a566a01 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -201,40 +201,3 @@ def roll_1d(array: np.ndarray, shift: int) -> np.ndarray: post[0:shift] = array[-shift:] post[shift:] = array[0:-shift] return post - - -def roll_2d(array: np.ndarray, shift: int, axis: int) -> np.ndarray: - ''' - Specialized form of np.roll that, by focusing on the 2D solution - ''' - post = np.empty(array.shape, dtype=array.dtype) - - if axis == 0: # roll rows - size = array.shape[0] - if size <= 1: - return array.copy() - - # result will be positive - shift = shift % size - if shift == 0: - return array.copy() - - post[0:shift, :] = array[-shift:, :] - post[shift:, :] = array[0:-shift, :] - return post - - elif axis == 1: # roll columns - size = array.shape[1] - if size <= 1: - return array.copy() - - # result will be positive - shift = shift % size - if shift == 0: - return array.copy() - - post[:, 0:shift] = array[:, -shift:] - post[:, shift:] = array[:, 0:-shift] - return post - - raise NotImplementedError() diff --git a/test/test_util.py b/test/test_util.py index 75784466..5b813790 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -14,7 +14,6 @@ from performance.reference.util import mloc as mloc_ref #from performance.reference.util import roll_1d from arraykit import roll_1d -from arraykit import roll_2d class TestUnit(unittest.TestCase): @@ -191,75 +190,6 @@ def test_roll_1d_c(self) -> None: self.assertEqual(roll_1d(a1, 1).tolist(), [6, 3, 4, 5]) self.assertEqual(roll_1d(a1, -1).tolist(), [4, 5, 6, 3]) - #--------------------------------------------------------------------------- - def test_roll_2d_row(self) -> None: - arr = np.arange(15).reshape(3, 5) - - for shift in range(-10, 10): - np_result = np.roll(arr, shift, axis=0) - ak_result = roll_2d(arr, shift, axis=0) - self.assertTrue((np_result == ak_result).all()) - - def test_roll_2d_col(self) -> None: - arr = np.arange(15).reshape(3, 5) - - for shift in range(-10, 10): - np_result = np.roll(arr, shift, axis=1) - ak_result = roll_2d(arr, shift, axis=1) - self.assertTrue((np_result == ak_result).all()) - - def test_roll_2d_a(self) -> None: - a1 = np.arange(12).reshape((3,4)) - - for i in range(a1.shape[0] + 1): - post = roll_2d(a1, i, axis=0) - self.assertEqual(post.tolist(), np.roll(a1, i, axis=0).tolist()) - - post = roll_2d(a1, -i, axis=0) - self.assertEqual(post.tolist(), np.roll(a1, -i, axis=0).tolist()) - - for i in range(a1.shape[1] + 1): - post = roll_2d(a1, i, axis=1) - self.assertEqual(post.tolist(), np.roll(a1, i, axis=1).tolist()) - - post = roll_2d(a1, -i, axis=1) - self.assertEqual(post.tolist(), np.roll(a1, -i, axis=1).tolist()) - - def test_roll_2d_b(self) -> None: - post = roll_2d(np.array([[]]), -4, axis=1) - self.assertEqual(post.shape, (1, 0)) - - def test_roll_2d_c(self) -> None: - a1 = np.arange(12).reshape((3,4)) - - self.assertEqual(roll_2d(a1, -2, axis=0).tolist(), - [[8, 9, 10, 11], [0, 1, 2, 3], [4, 5, 6, 7]]) - - self.assertEqual(roll_2d(a1, -2, axis=1).tolist(), - [[2, 3, 0, 1], [6, 7, 4, 5], [10, 11, 8, 9]]) - - def test_roll_2d_d(self) -> None: - a1 = np.arange(6).reshape((2, 3)) - - self.assertEqual(roll_2d(a1, 1, axis=1).tolist(), - [[2, 0, 1], [5, 3, 4]]) - self.assertEqual(roll_2d(a1, -1, axis=1).tolist(), - [[1, 2, 0], [4, 5, 3]]) - - def test_roll_2d_e(self) -> None: - a1 = np.arange(6).reshape((3, 2)) - - self.assertEqual(roll_2d(a1, 1, axis=0).tolist(), - [[4, 5], [0, 1], [2, 3]] - ) - self.assertEqual(roll_2d(a1, -1, axis=0).tolist(), - [[2, 3], [4, 5], [0, 1]] - ) - - def test_roll_2d_f(self) -> None: - with self.assertRaises(ValueError): - roll_2d(np.arange(4).reshape((2, 2)), 1, axis=2) - if __name__ == '__main__': unittest.main() From aa8eb1bb7be3c174c145fbc0e1e44c78c4c9d9b0 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Mon, 26 Apr 2021 10:39:47 -0700 Subject: [PATCH 14/15] Removes some test code. --- test/test_util.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 791a771c..04a23745 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -11,10 +11,9 @@ from arraykit import mloc from arraykit import immutable_filter from arraykit import array_deepcopy +from arraykit import roll_1d from performance.reference.util import mloc as mloc_ref -#from performance.reference.util import roll_1d -from arraykit import roll_1d class TestUnit(unittest.TestCase): From cd046b3c31d167f277ce2c70bc693240a62d8de5 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Mon, 21 Jun 2021 15:06:40 -0700 Subject: [PATCH 15/15] Fixes mistakes introduced by merge conflicts. --- performance/__main__.py | 12 ++++++------ src/__init__.py | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/performance/__main__.py b/performance/__main__.py index c182e787..ac9d8018 100644 --- a/performance/__main__.py +++ b/performance/__main__.py @@ -374,7 +374,7 @@ class Roll1d20kInt(Perf): NUMBER = 10 SIZE = 20_000 - def pre(self): + def __init__(self): self.array = np.arange(self.SIZE) def main(self): @@ -385,7 +385,7 @@ class Roll1d20kFloat(Perf): NUMBER = 10 SIZE = 20_000 - def pre(self): + def __init__(self): self.array = np.arange(self.SIZE).astype(float) def main(self): @@ -396,7 +396,7 @@ class Roll1d20kObject(Perf): NUMBER = 2 SIZE = 20_000 - def pre(self): + def __init__(self): self.array = np.arange(self.SIZE).astype(object) def main(self): @@ -407,7 +407,7 @@ class Roll1d1kInt(Perf): NUMBER = 10 SIZE = 1_000 - def pre(self): + def __init__(self): self.array = np.arange(self.SIZE) def main(self): @@ -418,7 +418,7 @@ class Roll1d1kFloat(Perf): NUMBER = 10 SIZE = 1_000 - def pre(self): + def __init__(self): self.array = np.arange(self.SIZE).astype(float) def main(self): @@ -429,7 +429,7 @@ class Roll1d1kObject(Perf): NUMBER = 10 SIZE = 1_000 - def pre(self): + def __init__(self): self.array = np.arange(self.SIZE).astype(object) def main(self): diff --git a/src/__init__.py b/src/__init__.py index 988ca110..52945c33 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -16,3 +16,4 @@ from ._arraykit import resolve_dtype_iter as resolve_dtype_iter from ._arraykit import isna_element as isna_element from ._arraykit import dtype_from_element as dtype_from_element +from ._arraykit import roll_1d as roll_1d