From 4f51d2375512d750080cc7bb740a4188d5f7f14f Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Fri, 23 May 2025 13:20:00 +0700 Subject: [PATCH 01/30] add oindex method to AsyncArray --- src/zarr/core/array.py | 22 ++++++++++++++++++++++ src/zarr/core/indexing.py | 23 +++++++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 78b5e92ed6..e9a8dd4167 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -79,6 +79,7 @@ MaskIndexer, MaskSelection, OIndex, + AsyncOIndex, OrthogonalIndexer, OrthogonalSelection, Selection, @@ -1358,6 +1359,21 @@ async def getitem( ) return await self._get_selection(indexer, prototype=prototype) + async def get_orthogonal_selection( + self, + selection: OrthogonalSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLike: + if prototype is None: + prototype = default_buffer_prototype() + indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) + return await self._async_array._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = False) -> None: """ Asynchronously save the array metadata. @@ -1488,6 +1504,12 @@ async def setitem( ) return await self._set_selection(indexer, value, prototype=prototype) + @property + def oindex(self) -> AsyncOIndex: + """Shortcut for orthogonal (outer) indexing, see :func:`get_orthogonal_selection` and + :func:`set_orthogonal_selection` for documentation and examples.""" + return AsyncOIndex(self) + async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: """ Asynchronously resize the array to a new shape. diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 998fe156a1..665b9ddb2e 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -28,7 +28,7 @@ from zarr.core.common import product if TYPE_CHECKING: - from zarr.core.array import Array + from zarr.core.array import Array, AsyncArray from zarr.core.buffer import NDArrayLikeOrScalar from zarr.core.chunk_grids import ChunkGrid from zarr.core.common import ChunkCoords @@ -950,7 +950,7 @@ def __getitem__(self, selection: OrthogonalSelection | Array) -> NDArrayLikeOrSc return self.array.get_orthogonal_selection( cast(OrthogonalSelection, new_selection), fields=fields ) - + def __setitem__(self, selection: OrthogonalSelection, value: npt.ArrayLike) -> None: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) @@ -960,6 +960,25 @@ def __setitem__(self, selection: OrthogonalSelection, value: npt.ArrayLike) -> N ) +@dataclass(frozen=True) +class AsyncOIndex: + array: AsyncArray + + async def getitem(self, selection: OrthogonalSelection | Array) -> NDArrayLike: + from zarr.core.array import Array + + # if input is a Zarr array, we materialize it now. + if isinstance(selection, Array): + selection = _zarr_array_to_int_or_bool_array(selection) + + fields, new_selection = pop_fields(selection) + new_selection = ensure_tuple(new_selection) + new_selection = replace_lists(new_selection) + return await self.array.get_orthogonal_selection( + cast(OrthogonalSelection, new_selection), fields=fields + ) + + @dataclass(frozen=True) class BlockIndexer(Indexer): dim_indexers: list[SliceDimIndexer] From 535ebaae7db496b78efbdcfc7e97ea5b53b46330 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Fri, 23 May 2025 13:29:53 +0700 Subject: [PATCH 02/30] fix --- src/zarr/core/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index e9a8dd4167..32a51b7020 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1370,7 +1370,7 @@ async def get_orthogonal_selection( if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) - return await self._async_array._get_selection( + return await self._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) From 6f25f8239e06a435c8b7f7ec6f512c0f503b899e Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 29 May 2025 17:31:57 +0700 Subject: [PATCH 03/30] add support for async vindex --- src/zarr/core/array.py | 28 +++++++++++++++++++++++++++- src/zarr/core/indexing.py | 26 +++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 32a51b7020..179516abe3 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -68,6 +68,8 @@ ) from zarr.core.config import config as zarr_config from zarr.core.indexing import ( + AsyncOIndex, + AsyncVIndex, BasicIndexer, BasicSelection, BlockIndex, @@ -79,7 +81,6 @@ MaskIndexer, MaskSelection, OIndex, - AsyncOIndex, OrthogonalIndexer, OrthogonalSelection, Selection, @@ -1374,6 +1375,27 @@ async def get_orthogonal_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) + @_deprecate_positional_args + async def get_coordinate_selection( + self, + selection: CoordinateSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLikeOrScalar: + if prototype is None: + prototype = default_buffer_prototype() + indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) + out_array = await self._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + + if hasattr(out_array, "shape"): + # restore shape + out_array = np.array(out_array).reshape(indexer.sel_shape) + return out_array + async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = False) -> None: """ Asynchronously save the array metadata. @@ -1510,6 +1532,10 @@ def oindex(self) -> AsyncOIndex: :func:`set_orthogonal_selection` for documentation and examples.""" return AsyncOIndex(self) + @property + def vindex(self) -> AsyncVIndex: + return AsyncVIndex(self) + async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: """ Asynchronously resize the array to a new shape. diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 665b9ddb2e..32c694a3cc 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -950,7 +950,7 @@ def __getitem__(self, selection: OrthogonalSelection | Array) -> NDArrayLikeOrSc return self.array.get_orthogonal_selection( cast(OrthogonalSelection, new_selection), fields=fields ) - + def __setitem__(self, selection: OrthogonalSelection, value: npt.ArrayLike) -> None: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) @@ -1287,6 +1287,30 @@ def __setitem__( raise VindexInvalidSelectionError(new_selection) +@dataclass(frozen=True) +class AsyncVIndex: + array: AsyncArray + + # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool + async def getitem( + self, selection: CoordinateSelection | MaskSelection | Array + ) -> NDArrayLikeOrScalar: + from zarr.core.array import Array + + # if input is a Zarr array, we materialize it now. + if isinstance(selection, Array): + selection = _zarr_array_to_int_or_bool_array(selection) + fields, new_selection = pop_fields(selection) + new_selection = ensure_tuple(new_selection) + new_selection = replace_lists(new_selection) + if is_coordinate_selection(new_selection, self.array.shape): + return await self.array.get_coordinate_selection(new_selection, fields=fields) + elif is_mask_selection(new_selection, self.array.shape): + return self.array.get_mask_selection(new_selection, fields=fields) + else: + raise VindexInvalidSelectionError(new_selection) + + def check_fields(fields: Fields | None, dtype: np.dtype[Any]) -> np.dtype[Any]: # early out if fields is None: From bdbdd613a4882f3f80afa73f6565b2db0d453892 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 22 Jul 2025 13:28:13 +0100 Subject: [PATCH 04/30] remove outdated @_deprecate_positional_args --- src/zarr/core/array.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 724bd3c6cd..a93eeff8ce 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1442,7 +1442,6 @@ async def get_orthogonal_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) - @_deprecate_positional_args async def get_coordinate_selection( self, selection: CoordinateSelection, From fec243d11b4ddf22f973d8b80c18f64d0ada11fa Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 22 Jul 2025 13:45:24 +0100 Subject: [PATCH 05/30] correct return type hint --- src/zarr/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index b9846df8f1..78ef09303a 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -964,7 +964,7 @@ def __setitem__(self, selection: OrthogonalSelection, value: npt.ArrayLike) -> N class AsyncOIndex: array: AsyncArray - async def getitem(self, selection: OrthogonalSelection | Array) -> NDArrayLike: + async def getitem(self, selection: OrthogonalSelection | Array) -> NDArrayLikeOrScalar: from zarr.core.array import Array # if input is a Zarr array, we materialize it now. From ea0f6579d84d678cded2062fb63dbb9fbe54937e Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 22 Jul 2025 13:53:14 +0100 Subject: [PATCH 06/30] add type parameter to generic --- src/zarr/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 78ef09303a..86e21b2bb5 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -962,7 +962,7 @@ def __setitem__(self, selection: OrthogonalSelection, value: npt.ArrayLike) -> N @dataclass(frozen=True) class AsyncOIndex: - array: AsyncArray + array: AsyncArray[ArrayMetadata] async def getitem(self, selection: OrthogonalSelection | Array) -> NDArrayLikeOrScalar: from zarr.core.array import Array @@ -1289,7 +1289,7 @@ def __setitem__( @dataclass(frozen=True) class AsyncVIndex: - array: AsyncArray + array: AsyncArray[ArrayMetadata] # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool async def getitem( From 870b6b60cc95c57eefe3c074d5813f9287856762 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 22 Jul 2025 13:57:17 +0100 Subject: [PATCH 07/30] actually import type --- src/zarr/core/indexing.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 86e21b2bb5..949240db3e 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -32,6 +32,8 @@ from zarr.core.buffer import NDArrayLikeOrScalar from zarr.core.chunk_grids import ChunkGrid from zarr.core.common import ChunkCoords + from zarr.core.metadata import T_ArrayMetadata + IntSequence = list[int] | npt.NDArray[np.intp] ArrayOfIntOrBool = npt.NDArray[np.intp] | npt.NDArray[np.bool_] @@ -962,7 +964,7 @@ def __setitem__(self, selection: OrthogonalSelection, value: npt.ArrayLike) -> N @dataclass(frozen=True) class AsyncOIndex: - array: AsyncArray[ArrayMetadata] + array: AsyncArray[T_ArrayMetadata] async def getitem(self, selection: OrthogonalSelection | Array) -> NDArrayLikeOrScalar: from zarr.core.array import Array @@ -1289,7 +1291,7 @@ def __setitem__( @dataclass(frozen=True) class AsyncVIndex: - array: AsyncArray[ArrayMetadata] + array: AsyncArray[T_ArrayMetadata] # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool async def getitem( From a7e9e43c7ac43e2966f535565026ba78c2553b91 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 22 Jul 2025 15:03:16 +0100 Subject: [PATCH 08/30] add hypothesis tests for supported async indexing --- tests/test_properties.py | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tests/test_properties.py b/tests/test_properties.py index 27f847fa69..18dea4d265 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -1,6 +1,7 @@ import json import numbers from typing import Any +import asyncio import numpy as np import pytest @@ -121,6 +122,26 @@ def test_basic_indexing(data: st.DataObject) -> None: assert_array_equal(nparray, zarray[:]) +@settings(deadline=None) +@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") +@given(data=st.data()) +def test_basic_indexing_async(data: st.DataObject) -> None: + zarray = data.draw(simple_arrays()) + nparray = zarray[:] + indexer = data.draw(basic_indices(shape=nparray.shape)) + async_zarray = zarray._async_array + + actual = asyncio.run(async_zarray.getitem(indexer)) + assert_array_equal(nparray[indexer], actual) + + # TODO test async setitem + # new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)) + # asyncio.run(async_zarray.setitem(indexer, new_data)) + # nparray[indexer] = new_data + # result = asyncio.run(async_zarray.getitem(indexer)) + # assert_array_equal(nparray, result) + + @given(data=st.data()) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") def test_oindex(data: st.DataObject) -> None: @@ -143,6 +164,21 @@ def test_oindex(data: st.DataObject) -> None: assert_array_equal(nparray, zarray[:]) +@given(data=st.data()) +@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") +def test_oindex_async(data: st.DataObject) -> None: + # integer_array_indices can't handle 0-size dimensions. + zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) + nparray = zarray[:] + async_zarray = zarray._async_array + + zindexer, npindexer = data.draw(orthogonal_indices(shape=nparray.shape)) + actual = asyncio.run(async_zarray.oindex.getitem(zindexer)) + assert_array_equal(nparray[npindexer], actual) + + # note: async oindex setting not yet implemented + + @given(data=st.data()) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") def test_vindex(data: st.DataObject) -> None: @@ -167,6 +203,23 @@ def test_vindex(data: st.DataObject) -> None: # assert_array_equal(nparray, zarray[:]) +@given(data=st.data()) +@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") +def test_vindex_async(data: st.DataObject) -> None: + # integer_array_indices can't handle 0-size dimensions. + zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) + nparray = zarray[:] + async_zarray = zarray._async_array + + indexer = data.draw( + npst.integer_array_indices( + shape=nparray.shape, result_shape=npst.array_shapes(min_side=1, max_dims=None) + ) + ) + actual = asyncio.run(async_zarray.vindex.getitem(indexer)) + assert_array_equal(nparray[indexer], actual) + + @given(store=stores, meta=array_metadata()) # type: ignore[misc] @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") async def test_roundtrip_array_metadata_from_store( From 102e4118e58fbe1da727162f29218657df5d9c48 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 22 Jul 2025 15:36:05 +0100 Subject: [PATCH 09/30] make tests of async into async functions --- tests/test_properties.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/test_properties.py b/tests/test_properties.py index 18dea4d265..9d4727c68a 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -122,16 +122,17 @@ def test_basic_indexing(data: st.DataObject) -> None: assert_array_equal(nparray, zarray[:]) +@pytest.mark.asyncio @settings(deadline=None) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @given(data=st.data()) -def test_basic_indexing_async(data: st.DataObject) -> None: +async def test_basic_indexing_async(data: st.DataObject) -> None: zarray = data.draw(simple_arrays()) nparray = zarray[:] indexer = data.draw(basic_indices(shape=nparray.shape)) async_zarray = zarray._async_array - actual = asyncio.run(async_zarray.getitem(indexer)) + actual = await async_zarray.getitem(indexer) assert_array_equal(nparray[indexer], actual) # TODO test async setitem @@ -164,16 +165,17 @@ def test_oindex(data: st.DataObject) -> None: assert_array_equal(nparray, zarray[:]) +@pytest.mark.asyncio @given(data=st.data()) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") -def test_oindex_async(data: st.DataObject) -> None: +async def test_oindex_async(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) nparray = zarray[:] async_zarray = zarray._async_array zindexer, npindexer = data.draw(orthogonal_indices(shape=nparray.shape)) - actual = asyncio.run(async_zarray.oindex.getitem(zindexer)) + actual = await async_zarray.oindex.getitem(zindexer) assert_array_equal(nparray[npindexer], actual) # note: async oindex setting not yet implemented @@ -203,9 +205,10 @@ def test_vindex(data: st.DataObject) -> None: # assert_array_equal(nparray, zarray[:]) +@pytest.mark.asyncio @given(data=st.data()) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") -def test_vindex_async(data: st.DataObject) -> None: +async def test_vindex_async(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) nparray = zarray[:] @@ -216,7 +219,7 @@ def test_vindex_async(data: st.DataObject) -> None: shape=nparray.shape, result_shape=npst.array_shapes(min_side=1, max_dims=None) ) ) - actual = asyncio.run(async_zarray.vindex.getitem(indexer)) + actual = await async_zarray.vindex.getitem(indexer) assert_array_equal(nparray[indexer], actual) From 0cd96aa15ff7e9be39ea183775e54121993788c7 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 22 Jul 2025 15:46:47 +0100 Subject: [PATCH 10/30] release notes --- changes/3083.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3083.feature.rst diff --git a/changes/3083.feature.rst b/changes/3083.feature.rst new file mode 100644 index 0000000000..4403224df1 --- /dev/null +++ b/changes/3083.feature.rst @@ -0,0 +1 @@ +Added support for async vectorized and orthogonal indexing. \ No newline at end of file From b50396949e98e463536d846d157e30bd50226d25 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Jul 2025 10:01:58 +0100 Subject: [PATCH 11/30] linting --- tests/test_properties.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_properties.py b/tests/test_properties.py index 9d4727c68a..2336d1a2aa 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -1,7 +1,6 @@ import json import numbers from typing import Any -import asyncio import numpy as np import pytest @@ -131,7 +130,7 @@ async def test_basic_indexing_async(data: st.DataObject) -> None: nparray = zarray[:] indexer = data.draw(basic_indices(shape=nparray.shape)) async_zarray = zarray._async_array - + actual = await async_zarray.getitem(indexer) assert_array_equal(nparray[indexer], actual) @@ -175,7 +174,7 @@ async def test_oindex_async(data: st.DataObject) -> None: async_zarray = zarray._async_array zindexer, npindexer = data.draw(orthogonal_indices(shape=nparray.shape)) - actual = await async_zarray.oindex.getitem(zindexer) + actual = await async_zarray.oindex.getitem(zindexer) assert_array_equal(nparray[npindexer], actual) # note: async oindex setting not yet implemented From 9b8ebde7a14bac9bbe4c813bfe9f53d4f358a9ea Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Jul 2025 10:10:54 +0100 Subject: [PATCH 12/30] fix type hint issues with T_ArrayMetadata and Generics --- src/zarr/core/array.py | 4 ++-- src/zarr/core/indexing.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 8427a2444d..293a8caf45 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1594,13 +1594,13 @@ async def setitem( return await self._set_selection(indexer, value, prototype=prototype) @property - def oindex(self) -> AsyncOIndex: + def oindex(self) -> AsyncOIndex[T_ArrayMetadata]: """Shortcut for orthogonal (outer) indexing, see :func:`get_orthogonal_selection` and :func:`set_orthogonal_selection` for documentation and examples.""" return AsyncOIndex(self) @property - def vindex(self) -> AsyncVIndex: + def vindex(self) -> AsyncVIndex[T_ArrayMetadata]: return AsyncVIndex(self) async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 949240db3e..b6d9a3ca69 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -12,6 +12,7 @@ from typing import ( TYPE_CHECKING, Any, + Generic, Literal, NamedTuple, Protocol, @@ -32,8 +33,8 @@ from zarr.core.buffer import NDArrayLikeOrScalar from zarr.core.chunk_grids import ChunkGrid from zarr.core.common import ChunkCoords - from zarr.core.metadata import T_ArrayMetadata +from zarr.core.metadata import T_ArrayMetadata IntSequence = list[int] | npt.NDArray[np.intp] ArrayOfIntOrBool = npt.NDArray[np.intp] | npt.NDArray[np.bool_] @@ -963,7 +964,7 @@ def __setitem__(self, selection: OrthogonalSelection, value: npt.ArrayLike) -> N @dataclass(frozen=True) -class AsyncOIndex: +class AsyncOIndex(Generic[T_ArrayMetadata]): array: AsyncArray[T_ArrayMetadata] async def getitem(self, selection: OrthogonalSelection | Array) -> NDArrayLikeOrScalar: @@ -1290,7 +1291,7 @@ def __setitem__( @dataclass(frozen=True) -class AsyncVIndex: +class AsyncVIndex(Generic[T_ArrayMetadata]): array: AsyncArray[T_ArrayMetadata] # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool From 125ebdf1a253d98e19d31ebec2fc777f30de9b8e Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Jul 2025 10:12:15 +0100 Subject: [PATCH 13/30] copy docstring for async vindex --- src/zarr/core/array.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 293a8caf45..b5d2957039 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1601,6 +1601,9 @@ def oindex(self) -> AsyncOIndex[T_ArrayMetadata]: @property def vindex(self) -> AsyncVIndex[T_ArrayMetadata]: + """Shortcut for vectorized (inner) indexing, see :func:`get_coordinate_selection`, + :func:`set_coordinate_selection`, :func:`get_mask_selection` and + :func:`set_mask_selection` for documentation and examples.""" return AsyncVIndex(self) async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: From b6d5b6db162fc57ceef8e627074f7276f47b455b Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Jul 2025 10:16:29 +0100 Subject: [PATCH 14/30] broaden return type to include scalar --- src/zarr/core/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index b5d2957039..19bcabc3f5 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1434,7 +1434,7 @@ async def get_orthogonal_selection( out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, - ) -> NDArrayLike: + ) -> NDArrayLikeOrScalar: if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) From e7cbaef8d41467369b227fbfa74dae4093129d29 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Jul 2025 10:38:34 +0100 Subject: [PATCH 15/30] satisfied mypy by adding get_mask_selection to AsyncArray --- src/zarr/core/array.py | 15 +++++++++++++++ src/zarr/core/indexing.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 19bcabc3f5..93c3ef2cf5 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1442,6 +1442,21 @@ async def get_orthogonal_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) + async def get_mask_selection( + self, + mask: MaskSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLikeOrScalar: + if prototype is None: + prototype = default_buffer_prototype() + indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) + return await self._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + async def get_coordinate_selection( self, selection: CoordinateSelection, diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index b6d9a3ca69..0d4e87d2f5 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -1309,7 +1309,7 @@ async def getitem( if is_coordinate_selection(new_selection, self.array.shape): return await self.array.get_coordinate_selection(new_selection, fields=fields) elif is_mask_selection(new_selection, self.array.shape): - return self.array.get_mask_selection(new_selection, fields=fields) + return await self.array.get_mask_selection(new_selection, fields=fields) else: raise VindexInvalidSelectionError(new_selection) From d5d5494f50ad84aab3e4bb25bf3f5a484812e9f8 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Jul 2025 10:46:35 +0100 Subject: [PATCH 16/30] move T_ArrayMetadata import back inside type checking block --- src/zarr/core/indexing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 0d4e87d2f5..067a9f5a4f 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -33,8 +33,7 @@ from zarr.core.buffer import NDArrayLikeOrScalar from zarr.core.chunk_grids import ChunkGrid from zarr.core.common import ChunkCoords - -from zarr.core.metadata import T_ArrayMetadata + from zarr.core.metadata import T_ArrayMetadata IntSequence = list[int] | npt.NDArray[np.intp] ArrayOfIntOrBool = npt.NDArray[np.intp] | npt.NDArray[np.bool_] From c0026e99a0a3dd5e36f42af993e99d73b082831f Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Jul 2025 11:02:11 +0100 Subject: [PATCH 17/30] resolve circular import by moving ceildiv to common.py --- src/zarr/core/array.py | 2 +- src/zarr/core/chunk_grids.py | 2 +- src/zarr/core/common.py | 7 +++++++ src/zarr/core/indexing.py | 11 +++-------- tests/test_array.py | 4 ++-- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 93c3ef2cf5..260e94bc88 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -61,6 +61,7 @@ ZarrFormat, _default_zarr_format, _warn_order_kwarg, + ceildiv, concurrent_map, parse_shapelike, product, @@ -94,7 +95,6 @@ Selection, VIndex, _iter_grid, - ceildiv, check_fields, check_no_multi_fields, is_pure_fancy_indexing, diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 4bf03c89de..6a3d6816a6 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -18,10 +18,10 @@ ChunkCoords, ChunkCoordsLike, ShapeLike, + ceildiv, parse_named_configuration, parse_shapelike, ) -from zarr.core.indexing import ceildiv if TYPE_CHECKING: from collections.abc import Iterator diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py index e86347d808..33590c83a5 100644 --- a/src/zarr/core/common.py +++ b/src/zarr/core/common.py @@ -2,6 +2,7 @@ import asyncio import functools +import math import operator import warnings from collections.abc import Iterable, Mapping, Sequence @@ -69,6 +70,12 @@ def product(tup: ChunkCoords) -> int: return functools.reduce(operator.mul, tup, 1) +def ceildiv(a: float, b: float) -> int: + if a == 0: + return 0 + return math.ceil(a / b) + + T = TypeVar("T", bound=tuple[Any, ...]) V = TypeVar("V") diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 067a9f5a4f..00814a8863 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -26,14 +26,15 @@ import numpy as np import numpy.typing as npt -from zarr.core.common import product +from zarr.core.common import ceildiv, product +from zarr.core.metadata import T_ArrayMetadata if TYPE_CHECKING: from zarr.core.array import Array, AsyncArray from zarr.core.buffer import NDArrayLikeOrScalar from zarr.core.chunk_grids import ChunkGrid from zarr.core.common import ChunkCoords - from zarr.core.metadata import T_ArrayMetadata + IntSequence = list[int] | npt.NDArray[np.intp] ArrayOfIntOrBool = npt.NDArray[np.intp] | npt.NDArray[np.bool_] @@ -95,12 +96,6 @@ class Indexer(Protocol): def __iter__(self) -> Iterator[ChunkProjection]: ... -def ceildiv(a: float, b: float) -> int: - if a == 0: - return 0 - return math.ceil(a / b) - - _ArrayIndexingOrder: TypeAlias = Literal["lexicographic"] diff --git a/tests/test_array.py b/tests/test_array.py index 42f4a1cbdd..f672006f9a 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -41,7 +41,7 @@ from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.chunk_grids import _auto_partition from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams -from zarr.core.common import JSON, ZarrFormat +from zarr.core.common import JSON, ZarrFormat, ceildiv from zarr.core.dtype import ( DateTime64, Float32, @@ -59,7 +59,7 @@ from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str from zarr.core.dtype.npy.string import UTF8Base from zarr.core.group import AsyncGroup -from zarr.core.indexing import BasicIndexer, ceildiv +from zarr.core.indexing import BasicIndexer from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.core.sync import sync From b9197e5a5017f65270f85e796ffab0ff895c8e5b Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 24 Jul 2025 20:41:12 +0100 Subject: [PATCH 18/30] merge sync and async tests into one function --- tests/test_properties.py | 90 +++++++++++++++------------------------- 1 file changed, 33 insertions(+), 57 deletions(-) diff --git a/tests/test_properties.py b/tests/test_properties.py index 2336d1a2aa..705cfd1b59 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -105,54 +105,52 @@ def test_array_creates_implicit_groups(array): # this decorator removes timeout; not ideal but it should avoid intermittent CI failures +@pytest.mark.asyncio @settings(deadline=None) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @given(data=st.data()) -def test_basic_indexing(data: st.DataObject) -> None: +async def test_basic_indexing(data: st.DataObject) -> None: zarray = data.draw(simple_arrays()) nparray = zarray[:] indexer = data.draw(basic_indices(shape=nparray.shape)) + + # sync get actual = zarray[indexer] assert_array_equal(nparray[indexer], actual) + # async get + async_zarray = zarray._async_array + actual = await async_zarray.getitem(indexer) + assert_array_equal(nparray[indexer], actual) + + # sync set new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)) zarray[indexer] = new_data nparray[indexer] = new_data assert_array_equal(nparray, zarray[:]) - -@pytest.mark.asyncio -@settings(deadline=None) -@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") -@given(data=st.data()) -async def test_basic_indexing_async(data: st.DataObject) -> None: - zarray = data.draw(simple_arrays()) - nparray = zarray[:] - indexer = data.draw(basic_indices(shape=nparray.shape)) - async_zarray = zarray._async_array - - actual = await async_zarray.getitem(indexer) - assert_array_equal(nparray[indexer], actual) - - # TODO test async setitem - # new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)) - # asyncio.run(async_zarray.setitem(indexer, new_data)) - # nparray[indexer] = new_data - # result = asyncio.run(async_zarray.getitem(indexer)) - # assert_array_equal(nparray, result) + # TODO test async setitem? +@pytest.mark.asyncio @given(data=st.data()) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") -def test_oindex(data: st.DataObject) -> None: +async def test_oindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) nparray = zarray[:] - zindexer, npindexer = data.draw(orthogonal_indices(shape=nparray.shape)) + + # sync get actual = zarray.oindex[zindexer] assert_array_equal(nparray[npindexer], actual) + # async get + async_zarray = zarray._async_array + actual = await async_zarray.oindex.getitem(zindexer) + assert_array_equal(nparray[npindexer], actual) + + # sync get assume(zarray.shards is None) # GH2834 for idxr in npindexer: if isinstance(idxr, np.ndarray) and idxr.size != np.unique(idxr).size: @@ -163,38 +161,32 @@ def test_oindex(data: st.DataObject) -> None: zarray.oindex[zindexer] = new_data assert_array_equal(nparray, zarray[:]) - -@pytest.mark.asyncio -@given(data=st.data()) -@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") -async def test_oindex_async(data: st.DataObject) -> None: - # integer_array_indices can't handle 0-size dimensions. - zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) - nparray = zarray[:] - async_zarray = zarray._async_array - - zindexer, npindexer = data.draw(orthogonal_indices(shape=nparray.shape)) - actual = await async_zarray.oindex.getitem(zindexer) - assert_array_equal(nparray[npindexer], actual) - - # note: async oindex setting not yet implemented + # note: async oindex setitem not yet implemented +@pytest.mark.asyncio @given(data=st.data()) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") -def test_vindex(data: st.DataObject) -> None: +async def test_vindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) nparray = zarray[:] - indexer = data.draw( npst.integer_array_indices( shape=nparray.shape, result_shape=npst.array_shapes(min_side=1, max_dims=None) ) ) + + # sync get actual = zarray.vindex[indexer] assert_array_equal(nparray[indexer], actual) + # async get + async_zarray = zarray._async_array + actual = await async_zarray.vindex.getitem(indexer) + assert_array_equal(nparray[indexer], actual) + + # sync set # FIXME! # when the indexer is such that a value gets overwritten multiple times, # I think the output depends on chunking. @@ -203,23 +195,7 @@ def test_vindex(data: st.DataObject) -> None: # zarray.vindex[indexer] = new_data # assert_array_equal(nparray, zarray[:]) - -@pytest.mark.asyncio -@given(data=st.data()) -@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") -async def test_vindex_async(data: st.DataObject) -> None: - # integer_array_indices can't handle 0-size dimensions. - zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) - nparray = zarray[:] - async_zarray = zarray._async_array - - indexer = data.draw( - npst.integer_array_indices( - shape=nparray.shape, result_shape=npst.array_shapes(min_side=1, max_dims=None) - ) - ) - actual = await async_zarray.vindex.getitem(indexer) - assert_array_equal(nparray[indexer], actual) + # note: async vindex setitem not yet implemented @given(store=stores, meta=array_metadata()) # type: ignore[misc] From 9e60062dac9007ef362fc216634096e1423cd4b1 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 28 Jul 2025 14:05:19 +0100 Subject: [PATCH 19/30] sketch out async oindex test --- tests/test_indexing.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index fd4bb13bb1..caf7945090 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -1994,3 +1994,44 @@ def test_iter_chunk_regions(): assert_array_equal(a[region], np.ones_like(a[region])) a[region] = 0 assert_array_equal(a[region], np.zeros_like(a[region])) + + +import asyncio + + +class TestAsync: + @pytest.mark.parametrize( + "indexer,expected", + [ + # int + ((0,), np.array([[1, 2]])), + ((1,), np.array([[3, 4]])), + ((0, 1), np.array(2)), + # slice + ((slice(None),), np.array([[1, 2], [3, 4]])), + ((slice(0, 1),), np.array([[1, 2]])), + ((slice(1, 2),), np.array([[3, 4]])), + ((slice(0, 2),), np.array([[1, 2], [3, 4]])), + ((slice(0, 0),), np.empty(shape=(0, 2), dtype="i8")), + # TODO ellipsis + + # TODO combined + ((0, slice(None)), np.array([[1, 2]])), + ((slice(None), 0), np.array([[1, 3]])), + ((slice(None), slice(None)), np.array([[1, 2], [3, 4]])), + # TODO array of ints + # TODO boolean array + ], + ) + @pytest.mark.asyncio + async def test_async_oindex(self, store, indexer, expected): + z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") + z[...] = np.array([[1, 2], [3, 4]]) + async_zarr = z._async_array + + result = await async_zarr.oindex.getitem(indexer) + assert_array_equal(result, expected) + + @pytest.mark.asyncio + async def test_async_vindex(self): + ... From 18ea04214145a64730b86444749dd19764857454 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 28 Jul 2025 14:09:39 +0100 Subject: [PATCH 20/30] add ellipsis tests --- tests/test_indexing.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index caf7945090..f292d11a2d 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -1996,16 +1996,13 @@ def test_iter_chunk_regions(): assert_array_equal(a[region], np.zeros_like(a[region])) -import asyncio - - class TestAsync: @pytest.mark.parametrize( "indexer,expected", [ # int - ((0,), np.array([[1, 2]])), - ((1,), np.array([[3, 4]])), + ((0,), np.array([1, 2])), + ((1,), np.array([3, 4])), ((0, 1), np.array(2)), # slice ((slice(None),), np.array([[1, 2], [3, 4]])), @@ -2013,11 +2010,14 @@ class TestAsync: ((slice(1, 2),), np.array([[3, 4]])), ((slice(0, 2),), np.array([[1, 2], [3, 4]])), ((slice(0, 0),), np.empty(shape=(0, 2), dtype="i8")), - # TODO ellipsis - + # ellipsis + ((...,), np.array([[1, 2], [3, 4]])), + ((0, ...), np.array([1, 2])), + ((..., 0), np.array([1, 3])), + ((0, 1, ...), np.array(2)), # TODO combined - ((0, slice(None)), np.array([[1, 2]])), - ((slice(None), 0), np.array([[1, 3]])), + ((0, slice(None)), np.array([1, 2])), + ((slice(None), 0), np.array([1, 3])), ((slice(None), slice(None)), np.array([[1, 2], [3, 4]])), # TODO array of ints # TODO boolean array @@ -2028,7 +2028,7 @@ async def test_async_oindex(self, store, indexer, expected): z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") z[...] = np.array([[1, 2], [3, 4]]) async_zarr = z._async_array - + result = await async_zarr.oindex.getitem(indexer) assert_array_equal(result, expected) From 7fe1ffd7a02530abc1d3416fa1329c5a1bba012c Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 28 Jul 2025 14:17:37 +0100 Subject: [PATCH 21/30] add tests for arrays of ints --- tests/test_indexing.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index f292d11a2d..ab02d9f6eb 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -2019,7 +2019,12 @@ class TestAsync: ((0, slice(None)), np.array([1, 2])), ((slice(None), 0), np.array([1, 3])), ((slice(None), slice(None)), np.array([[1, 2], [3, 4]])), - # TODO array of ints + # array of ints + (([0]), np.array([[1, 2]])), + (([1]), np.array([[3, 4]])), + (([0], [1]), np.array(2)), + (([0, 1], [0]), np.array([[1], [3]])), + (([0, 1], [0, 1]), np.array([[1, 2], [3, 4]])), # TODO boolean array ], ) From b0af4a75f48049384fb082395c828932743073e9 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 28 Jul 2025 14:24:59 +0100 Subject: [PATCH 22/30] all async oindex tests --- tests/test_indexing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index ab02d9f6eb..69608892ba 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -2015,7 +2015,7 @@ class TestAsync: ((0, ...), np.array([1, 2])), ((..., 0), np.array([1, 3])), ((0, 1, ...), np.array(2)), - # TODO combined + # combined ((0, slice(None)), np.array([1, 2])), ((slice(None), 0), np.array([1, 3])), ((slice(None), slice(None)), np.array([[1, 2], [3, 4]])), @@ -2025,7 +2025,11 @@ class TestAsync: (([0], [1]), np.array(2)), (([0, 1], [0]), np.array([[1], [3]])), (([0, 1], [0, 1]), np.array([[1, 2], [3, 4]])), - # TODO boolean array + # boolean array + (np.array([True, True]), np.array([[1, 2], [3, 4]])), + (np.array([True, False]), np.array([[1, 2]])), + (np.array([False, True]), np.array([[3, 4]])), + (np.array([False, False]), np.empty(shape=(0, 2), dtype="i8")), ], ) @pytest.mark.asyncio From 79f78cc388df6edb715e0ecf77927d2d76153824 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 28 Jul 2025 14:30:42 +0100 Subject: [PATCH 23/30] add vindex test --- tests/test_indexing.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 69608892ba..aa9151c66f 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -2041,6 +2041,18 @@ async def test_async_oindex(self, store, indexer, expected): result = await async_zarr.oindex.getitem(indexer) assert_array_equal(result, expected) + @pytest.mark.parametrize( + "indexer,expected", + [ + (([0], [0]), np.array(1)), + (([0, 1], [0, 1]), np.array([1, 4])), + ], + ) @pytest.mark.asyncio - async def test_async_vindex(self): - ... + async def test_async_vindex(self, store, indexer, expected): + z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") + z[...] = np.array([[1, 2], [3, 4]]) + async_zarr = z._async_array + + result = await async_zarr.vindex.getitem(indexer) + assert_array_equal(result, expected) From 3b62dfac9fe57396c0bac68e1bcfb73a70309317 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 28 Jul 2025 14:40:10 +0100 Subject: [PATCH 24/30] satisfy mypy --- tests/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index aa9151c66f..683a982159 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -1998,7 +1998,7 @@ def test_iter_chunk_regions(): class TestAsync: @pytest.mark.parametrize( - "indexer,expected", + ("indexer", "expected"), [ # int ((0,), np.array([1, 2])), From 6fa9f376a38748decff1f87fa6ed7b576b772a89 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 28 Jul 2025 16:39:33 +0100 Subject: [PATCH 25/30] linting --- tests/test_indexing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 683a982159..bbc4c9ce26 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -1998,7 +1998,7 @@ def test_iter_chunk_regions(): class TestAsync: @pytest.mark.parametrize( - ("indexer", "expected"), + ("indexer", "expected"), [ # int ((0,), np.array([1, 2])), @@ -2022,7 +2022,7 @@ class TestAsync: # array of ints (([0]), np.array([[1, 2]])), (([1]), np.array([[3, 4]])), - (([0], [1]), np.array(2)), + (([0], [1]), np.array(2)), (([0, 1], [0]), np.array([[1], [3]])), (([0, 1], [0, 1]), np.array([[1, 2], [3, 4]])), # boolean array @@ -2042,7 +2042,7 @@ async def test_async_oindex(self, store, indexer, expected): assert_array_equal(result, expected) @pytest.mark.parametrize( - "indexer,expected", + ("indexer", "expected"), [ (([0], [0]), np.array(1)), (([0, 1], [0, 1]), np.array([1, 4])), From 01ac722429dbf85e0aaf622d5ec16ff26e13e2cf Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 29 Jul 2025 15:00:38 +0100 Subject: [PATCH 26/30] add test for indexing with zarr array --- tests/test_indexing.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index bbc4c9ce26..84eddf206f 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -2041,6 +2041,22 @@ async def test_async_oindex(self, store, indexer, expected): result = await async_zarr.oindex.getitem(indexer) assert_array_equal(result, expected) + @pytest.mark.asyncio + async def test_async_oindex_with_zarr_array(self, store): + z1 = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") + z1[...] = np.array([[1, 2], [3, 4]]) + async_zarr = z1._async_array + + # create boolean zarr array to index with + z2 = zarr.create_array( + store=store, name="z2", shape=(2,), chunks=(1,), zarr_format=3, dtype="?" + ) + z2[...] = np.array([True, False]) + + result = await async_zarr.oindex.getitem(z2) + expected = np.array([[1, 2]]) + assert_array_equal(result, expected) + @pytest.mark.parametrize( ("indexer", "expected"), [ From c7a10001de791ce2662d1577b3edd3e9f988ec84 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 30 Jul 2025 17:35:07 +0100 Subject: [PATCH 27/30] add test case for masked boolean vectorized indexing --- src/zarr/core/indexing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 00814a8863..15cf6f0f1a 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -1292,6 +1292,8 @@ class AsyncVIndex(Generic[T_ArrayMetadata]): async def getitem( self, selection: CoordinateSelection | MaskSelection | Array ) -> NDArrayLikeOrScalar: + # TODO deduplicate these internals with the sync version of getitem + # TODO requires solving this circular sync issue: https://github.com/zarr-developers/zarr-python/pull/3083#discussion_r2230737448 from zarr.core.array import Array # if input is a Zarr array, we materialize it now. From 7e9681dfab06d65318a10234babceee416945c2e Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 30 Jul 2025 17:38:49 +0100 Subject: [PATCH 28/30] add test to cover invalid indexer passed to vindex --- tests/test_indexing.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 84eddf206f..6d70a99a92 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -2062,6 +2062,7 @@ async def test_async_oindex_with_zarr_array(self, store): [ (([0], [0]), np.array(1)), (([0, 1], [0, 1]), np.array([1, 4])), + (np.array([[False, True], [False, True]]), np.array([2, 4])), ], ) @pytest.mark.asyncio @@ -2072,3 +2073,12 @@ async def test_async_vindex(self, store, indexer, expected): result = await async_zarr.vindex.getitem(indexer) assert_array_equal(result, expected) + + @pytest.mark.asyncio + async def test_async_vindex_invalid_indexer(self, store): + z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") + z[...] = np.array([[1, 2], [3, 4]]) + async_zarr = z._async_array + + with pytest.raises(IndexError): + await async_zarr.vindex.getitem("invalid_indexer") From 1469093071fdcc7887a68154f4ad281343c54fc6 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 30 Jul 2025 17:40:46 +0100 Subject: [PATCH 29/30] also cover invalid indexer to oindex --- tests/test_indexing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 6d70a99a92..0719b9214f 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -2075,10 +2075,13 @@ async def test_async_vindex(self, store, indexer, expected): assert_array_equal(result, expected) @pytest.mark.asyncio - async def test_async_vindex_invalid_indexer(self, store): + async def test_async_invalid_indexer(self, store): z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") z[...] = np.array([[1, 2], [3, 4]]) async_zarr = z._async_array with pytest.raises(IndexError): await async_zarr.vindex.getitem("invalid_indexer") + + with pytest.raises(IndexError): + await async_zarr.oindex.getitem("invalid_indexer") From 6fbb6b115ecd198dbaeecdeec57c2db34c9b81ce Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 30 Jul 2025 17:43:09 +0100 Subject: [PATCH 30/30] test vindexing with zarr array --- tests/test_indexing.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 0719b9214f..24b4b65505 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -2074,6 +2074,22 @@ async def test_async_vindex(self, store, indexer, expected): result = await async_zarr.vindex.getitem(indexer) assert_array_equal(result, expected) + @pytest.mark.asyncio + async def test_async_vindex_with_zarr_array(self, store): + z1 = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") + z1[...] = np.array([[1, 2], [3, 4]]) + async_zarr = z1._async_array + + # create boolean zarr array to index with + z2 = zarr.create_array( + store=store, name="z2", shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="?" + ) + z2[...] = np.array([[False, True], [False, True]]) + + result = await async_zarr.vindex.getitem(z2) + expected = np.array([2, 4]) + assert_array_equal(result, expected) + @pytest.mark.asyncio async def test_async_invalid_indexer(self, store): z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8")