From cc3698b0486db2458da5da563c8e8cd94946816e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 25 Jul 2025 11:04:38 +0200 Subject: [PATCH 01/26] factor array element iteration routines into stand-alone functions, and add a failing test --- src/zarr/core/array.py | 93 ++++++++++++++++++++++++++++++++++++++++++ tests/test_array.py | 37 ++++++++++++++++- 2 files changed, 128 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 78dddf3669..1d5707cf5a 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -4928,3 +4928,96 @@ def _parse_data_params( raise ValueError(msg) dtype_out = data.dtype return data, shape_out, dtype_out + + +def iter_chunk_coords( + array: Array | AsyncArray[Any], + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, +) -> Iterator[ChunkCoords]: + """ + Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` + keyword is used, iteration will start at the chunk index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + array : Array | AsyncArray + The array to iterate over. + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + chunk_coords: ChunkCoords + The coordinates of each chunk in the selection. + """ + return _iter_grid(array.cdata_shape, origin=origin, selection_shape=selection_shape) + + +def iter_chunk_keys( + array: Array | AsyncArray[Any], + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, +) -> Iterator[str]: + """ + Iterate over the storage keys of each chunk, relative to an optional origin, and optionally + limited to a contiguous region in chunk grid coordinates. + + Parameters + ---------- + array : Array | AsyncArray + The array to iterate over. + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + key: str + The storage key of each chunk in the selection. + """ + # Iterate over the coordinates of chunks in chunk grid space. + for k in iter_chunk_coords(array, origin=origin, selection_shape=selection_shape): + # Encode the chunk key from the chunk coordinates. + yield array.metadata.encode_chunk_key(k) + + +def iter_chunk_regions( + array: Array | AsyncArray[Any], + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, +) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each chunk. + + Parameters + ---------- + array : Array | AsyncArray + The array to iterate over. + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + region: tuple[slice, ...] + A tuple of slice objects representing the region spanned by each chunk in the selection. + """ + for cgrid_position in iter_chunk_coords(array, origin=origin, selection_shape=selection_shape): + out: tuple[slice, ...] = () + for c_pos, c_shape in zip(cgrid_position, array.chunks, strict=False): + start = c_pos * c_shape + stop = start + c_shape + out += (slice(start, stop, 1),) + yield out diff --git a/tests/test_array.py b/tests/test_array.py index 42f4a1cbdd..170b2f5597 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -6,7 +6,7 @@ import pickle import re import sys -from itertools import accumulate +from itertools import accumulate, starmap from typing import TYPE_CHECKING, Any, Literal from unittest import mock @@ -37,6 +37,7 @@ create_array, default_filters_v2, default_serializer_v3, + iter_chunk_keys, ) from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.chunk_grids import _auto_partition @@ -59,7 +60,7 @@ from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str from zarr.core.dtype.npy.string import UTF8Base from zarr.core.group import AsyncGroup -from zarr.core.indexing import BasicIndexer, ceildiv +from zarr.core.indexing import BasicIndexer, _iter_grid, ceildiv from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.core.sync import sync @@ -1835,3 +1836,35 @@ def test_unknown_object_codec_default_filters_v2() -> None: msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}." with pytest.raises(ValueError, match=re.escape(msg)): default_filters_v2(dtype) + + +@pytest.mark.parametrize( + ("shard_size", "chunk_size"), + [ + ((8,), (8,)), + ((8,), (2,)), + ( + ( + 8, + 10, + ), + (2, 2), + ), + ], +) +def test_iter_chunk_keys(shard_size: tuple[int, ...], chunk_size: tuple[int, ...]) -> None: + store = {} + arr = zarr.create_array( + store, + dtype="uint8", + shape=tuple(2 * x for x in shard_size), + chunks=chunk_size, + shards=shard_size, + zarr_format=3, + ) + shard_grid_shape = tuple(starmap(ceildiv, zip(arr.shape, arr.shards, strict=True))) + expected_keys = tuple( + arr.metadata.chunk_key_encoding.encode_chunk_key(region) + for region in _iter_grid(shard_grid_shape) + ) + assert tuple(iter_chunk_keys(arr)) == expected_keys From 39b5c07fbc420438676791a32a0653c01061fc62 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 25 Jul 2025 11:20:01 +0200 Subject: [PATCH 02/26] add shard_grid_shape, chunk_grid_shape Co-authored-by: Bojidar Marinov --- src/zarr/core/array.py | 43 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 1d5707cf5a..592f0aaa02 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1192,7 +1192,31 @@ def cdata_shape(self) -> ChunkCoords: Tuple[int] The shape of the chunk grid for this array. """ - return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=False))) + return self.chunk_grid_shape + + @property + def chunk_grid_shape(self) -> ChunkCoords: + """ + The shape of the chunk grid for this array. + + Returns + ------- + Tuple[int] + The shape of the chunk grid for this array. + """ + return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=True))) + + @property + def shard_grid_shape(self) -> ChunkCoords: + """ + The shape of the shard grid for this array. + + Returns + ------- + Tuple[int] + The shape of the shard grid for this array. + """ + return tuple(starmap(ceildiv, zip(self.shape, self.shards, strict=True))) @property def nchunks(self) -> int: @@ -2198,10 +2222,25 @@ def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec @property def cdata_shape(self) -> ChunkCoords: + """ + The shape of the chunk grid for this array. This property exists for backwards compatibility. + See :func:`chunk_grid_shape` for the preferred method. + """ + return self._async_array.chunk_grid_shape + + @property + def chunk_grid_shape(self) -> ChunkCoords: """ The shape of the chunk grid for this array. """ - return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=False))) + return self._async_array.chunk_grid_shape + + @property + def shard_grid_shape(self) -> ChunkCoords: + """ + The shape of the shard grid for this array. + """ + return self._async_array.shard_grid_shape @property def nchunks(self) -> int: From 70a48761bdb6680e51fd7492f0aabbe7736a25d5 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 25 Jul 2025 11:31:16 +0200 Subject: [PATCH 03/26] docstrings --- src/zarr/core/array.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 592f0aaa02..3edb6c43cd 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1185,11 +1185,12 @@ def basename(self) -> str: @property def cdata_shape(self) -> ChunkCoords: """ - The shape of the chunk grid for this array. + The shape of the chunk grid for this array. This property exists for backwards compatibility. + See :func:`chunk_grid_shape` for the preferred method. Returns ------- - Tuple[int] + tuple[int, ...] The shape of the chunk grid for this array. """ return self.chunk_grid_shape @@ -1201,7 +1202,7 @@ def chunk_grid_shape(self) -> ChunkCoords: Returns ------- - Tuple[int] + tuple[int, ...] The shape of the chunk grid for this array. """ return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=True))) @@ -1213,7 +1214,7 @@ def shard_grid_shape(self) -> ChunkCoords: Returns ------- - Tuple[int] + tuple[int, ...] The shape of the shard grid for this array. """ return tuple(starmap(ceildiv, zip(self.shape, self.shards, strict=True))) From 35b489126d2f5d3d262a57bb07ea1fd04434d266 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 25 Jul 2025 11:31:55 +0200 Subject: [PATCH 04/26] handle null shards --- src/zarr/core/array.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 3edb6c43cd..f1af9a2d4f 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1217,7 +1217,11 @@ def shard_grid_shape(self) -> ChunkCoords: tuple[int, ...] The shape of the shard grid for this array. """ - return tuple(starmap(ceildiv, zip(self.shape, self.shards, strict=True))) + if self.shards is None: + shard_shape = self.chunks + else: + shard_shape = self.shards + return tuple(starmap(ceildiv, zip(self.shape, shard_shape, strict=True))) @property def nchunks(self) -> int: From 8846c96073e7b7653b380d851186a4b82284d73f Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 25 Jul 2025 11:32:45 +0200 Subject: [PATCH 05/26] use shard_grid_shape instead of cdata_shape in nchunks --- src/zarr/core/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index f1af9a2d4f..bb073b25c6 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1233,7 +1233,7 @@ def nchunks(self) -> int: int The total number of chunks in the array. """ - return product(self.cdata_shape) + return product(self.shard_grid_shape) async def nchunks_initialized(self) -> int: """ From 31698d4642978e248a8b75b7133a7ad6ef621148 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 25 Jul 2025 18:35:08 +0200 Subject: [PATCH 06/26] add improved set of low-level iteration routines for arrays --- src/zarr/core/array.py | 356 ++++++++++++++++++++++++++++++-------- src/zarr/core/indexing.py | 76 +++++++- tests/test_array.py | 219 +++++++++++++++++++---- tests/test_indexing.py | 74 +++++++- 4 files changed, 602 insertions(+), 123 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index bb073b25c6..a847c02f94 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -91,13 +91,14 @@ OrthogonalSelection, Selection, VIndex, - _iter_grid, ceildiv, check_fields, check_no_multi_fields, is_pure_fancy_indexing, is_pure_orthogonal_indexing, is_scalar, + iter_grid, + iter_regions, pop_fields, ) from zarr.core.metadata import ( @@ -1268,8 +1269,9 @@ def _iter_chunk_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[ChunkCoords]: """ - Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` - keyword is used, iteration will start at the chunk index specified by `origin`. + Create an iterator over the coordinates of chunks in chunk grid space. + + If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. The default behavior is to start at the origin of the grid coordinate space. If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as @@ -1287,21 +1289,78 @@ def _iter_chunk_coords( chunk_coords: ChunkCoords The coordinates of each chunk in the selection. """ - return _iter_grid(self.cdata_shape, origin=origin, selection_shape=selection_shape) + return iter_chunk_coords( + array=self, + origin=origin, + selection_shape=selection_shape, + ) + + def _iter_shard_coords( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[ChunkCoords]: + """ + Create an iterator over the coordinates of shards in shard grid space. + + Note that + + If the `origin` keyword is used, iteration will start at the shard index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's shard grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in shard grid coordinates. + + Yields + ------ + chunk_coords: tuple[int, ...] + The coordinates of each shard in the selection. + """ + return iter_shard_coords( + array=self, + origin=origin, + selection_shape=selection_shape, + ) + @deprecated("Use _iter_chunk_keys instead") def _iter_chunk_keys( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: """ - Iterate over the storage keys of each chunk, relative to an optional origin, and optionally - limited to a contiguous region in chunk grid coordinates. + Iterate over the keys of the stored objects supporting this array. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's shard grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in shard grid coordinates. + + Yields + ------ + key: str + The storage key of each shard in the selection. + """ + # Iterate over the coordinates of chunks in chunk grid space. + return self._iter_shard_keys(origin=origin, selection_shape=selection_shape) + + def _iter_shard_keys( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[str]: + """ + Iterate over the keys of the stored objects supporting this array. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None - The shape of the selection in chunk grid coordinates. + The shape of the selection in shard grid coordinates. Yields ------ @@ -1309,9 +1368,11 @@ def _iter_chunk_keys( The storage key of each chunk in the selection. """ # Iterate over the coordinates of chunks in chunk grid space. - for k in self._iter_chunk_coords(origin=origin, selection_shape=selection_shape): - # Encode the chunk key from the chunk coordinates. - yield self.metadata.encode_chunk_key(k) + return iter_shard_keys( + array=self, + origin=origin, + selection_shape=selection_shape, + ) def _iter_chunk_regions( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -1331,15 +1392,31 @@ def _iter_chunk_regions( region: tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ - for cgrid_position in self._iter_chunk_coords( - origin=origin, selection_shape=selection_shape - ): - out: tuple[slice, ...] = () - for c_pos, c_shape in zip(cgrid_position, self.chunks, strict=False): - start = c_pos * c_shape - stop = start + c_shape - out += (slice(start, stop, 1),) - yield out + return iter_chunk_regions( + array=self, + origin=origin, + selection_shape=selection_shape, + ) + + def _iter_shard_regions( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each shard. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's shard grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in shard grid coordinates. + + Yields + ------ + region: tuple[slice, ...] + A tuple of slice objects representing the region spanned by each shard in the selection. + """ + return iter_shard_regions(array=self, origin=origin, selection_shape=selection_shape) @property def nbytes(self) -> int: @@ -2254,33 +2331,6 @@ def nchunks(self) -> int: """ return self._async_array.nchunks - def _iter_chunk_coords( - self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None - ) -> Iterator[ChunkCoords]: - """ - Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` - keyword is used, iteration will start at the chunk index specified by `origin`. - The default behavior is to start at the origin of the grid coordinate space. - If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region - ranging from `[origin, origin + selection_shape]`, where the upper bound is exclusive as - per python indexing conventions. - - Parameters - ---------- - origin : Sequence[int] | None, default=None - The origin of the selection relative to the array's chunk grid. - selection_shape : Sequence[int] | None, default=None - The shape of the selection in chunk grid coordinates. - - Yields - ------ - chunk_coords: ChunkCoords - The coordinates of each chunk in the selection. - """ - yield from self._async_array._iter_chunk_coords( - origin=origin, selection_shape=selection_shape - ) - @property def nbytes(self) -> int: """ @@ -2333,6 +2383,7 @@ def nbytes_stored(self) -> int: """ return sync(self._async_array.nbytes_stored()) + @deprecated("Use _iter_shard_keys instead.") def _iter_chunk_keys( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: @@ -2352,9 +2403,80 @@ def _iter_chunk_keys( key: str The storage key of each chunk in the selection. """ - yield from self._async_array._iter_chunk_keys( - origin=origin, selection_shape=selection_shape - ) + return self._async_array._iter_shard_keys(origin=origin, selection_shape=selection_shape) + + def _iter_shard_keys( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[str]: + """ + Iterate over the storage keys of each shard, relative to an optional origin, and optionally + limited to a contiguous region in chunk grid coordinates. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's shard grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in shard grid coordinates. + + Yields + ------ + key: str + The storage key of each shard in the selection. + """ + return self._async_array._iter_shard_keys(origin=origin, selection_shape=selection_shape) + + def _iter_chunk_coords( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[ChunkCoords]: + """ + Create an iterator over the coordinates of chunks in chunk grid space. + + If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin + selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + chunk_coords: ChunkCoords + The coordinates of each chunk in the selection. + """ + return self._async_array._iter_chunk_coords(origin=origin, selection_shape=selection_shape) + + def _iter_shard_coords( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[int, ...]]: + """ + Create an iterator over the coordinates of shards in shard grid space. + + If the `origin` keyword is used, iteration will start at the shard index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's shard grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in shard grid coordinates. + + Yields + ------ + chunk_coords: tuple[int, ...] + The coordinates of each shard in the selection. + """ + return self._async_array._iter_shard_coords(origin=origin, selection_shape=selection_shape) def _iter_chunk_regions( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2374,9 +2496,27 @@ def _iter_chunk_regions( region: tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ - yield from self._async_array._iter_chunk_regions( - origin=origin, selection_shape=selection_shape - ) + return self._async_array._iter_chunk_regions(origin=origin, selection_shape=selection_shape) + + def _iter_shard_regions( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each shard. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + region: tuple[slice, ...] + A tuple of slice objects representing the region spanned by each chunk in the selection. + """ + return self._async_array._iter_shard_regions(origin=origin, selection_shape=selection_shape) def __array__( self, dtype: npt.DTypeLike | None = None, copy: bool | None = None @@ -3841,7 +3981,7 @@ async def chunks_initialized( _relativize_path(path=key, prefix=array.store_path.path) for key in store_contents ] return tuple( - chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents_relative + chunk_key for chunk_key in array._iter_shard_keys() if chunk_key in store_contents_relative ) @@ -4153,7 +4293,7 @@ async def _copy_array_region(chunk_coords: ChunkCoords | slice, _data: Array) -> # Stream data from the source array to the new array await concurrent_map( - [(region, data) for region in result._iter_chunk_regions()], + [(region, data) for region in result._iter_shard_regions()], _copy_array_region, zarr.core.config.config.get("async.concurrency"), ) @@ -4993,36 +5133,67 @@ def iter_chunk_coords( array : Array | AsyncArray The array to iterate over. origin : Sequence[int] | None, default=None - The origin of the selection relative to the array's chunk grid. + The origin of the selection in grid coordinates. selection_shape : Sequence[int] | None, default=None - The shape of the selection in chunk grid coordinates. + The shape of the selection in grid coordinates. Yields ------ chunk_coords: ChunkCoords The coordinates of each chunk in the selection. """ - return _iter_grid(array.cdata_shape, origin=origin, selection_shape=selection_shape) + return iter_grid(array.chunk_grid_shape, origin=origin, selection_shape=selection_shape) -def iter_chunk_keys( +def iter_shard_coords( + array: Array | AsyncArray[Any], + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, +) -> Iterator[ChunkCoords]: + """ + Create an iterator over the coordinates of shards in shard grid space. If the `origin` + keyword is used, iteration will start at the shard index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + array : Array | AsyncArray + The array to iterate over. + origin : Sequence[int] | None, default=None + The origin of the selection in grid coordinates. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in grid coordinates. + + Yields + ------ + chunk_coords: ChunkCoords + The coordinates of each shard in the selection. + """ + return iter_grid(array.shard_grid_shape, origin=origin, selection_shape=selection_shape) + + +def iter_shard_keys( array: Array | AsyncArray[Any], *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None, ) -> Iterator[str]: """ - Iterate over the storage keys of each chunk, relative to an optional origin, and optionally - limited to a contiguous region in chunk grid coordinates. + Iterate over the storage keys of each shard, relative to an optional origin, and optionally + limited to a contiguous region in shard grid coordinates. Parameters ---------- array : Array | AsyncArray The array to iterate over. origin : Sequence[int] | None, default=None - The origin of the selection relative to the array's chunk grid. + The origin of the selection in grid coordinates. selection_shape : Sequence[int] | None, default=None - The shape of the selection in chunk grid coordinates. + The shape of the selection in grid coordinates. Yields ------ @@ -5030,9 +5201,43 @@ def iter_chunk_keys( The storage key of each chunk in the selection. """ # Iterate over the coordinates of chunks in chunk grid space. - for k in iter_chunk_coords(array, origin=origin, selection_shape=selection_shape): - # Encode the chunk key from the chunk coordinates. - yield array.metadata.encode_chunk_key(k) + _iter = iter_grid(array.shard_grid_shape, origin=origin, selection_shape=selection_shape) + return (array.metadata.encode_chunk_key(k) for k in _iter) + + +def iter_shard_regions( + array: Array | AsyncArray[Any], + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, +) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each shard. + + These are the smallest regions of the array that are safe to write concurrently. + + Parameters + ---------- + array : Array | AsyncArray + The array to iterate over. + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's shard grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in shard grid coordinates. + + Yields + ------ + region: tuple[slice, ...] + A tuple of slice objects representing the region spanned by each shard in the selection. + """ + if array.shards is None: + shard_shape = array.chunks + else: + shard_shape = array.shards + + return iter_regions( + array.shape, shard_shape, origin=origin, selection_shape=selection_shape, trim_excess=True + ) def iter_chunk_regions( @@ -5042,26 +5247,25 @@ def iter_chunk_regions( selection_shape: Sequence[int] | None = None, ) -> Iterator[tuple[slice, ...]]: """ - Iterate over the regions spanned by each chunk. + Iterate over the regions spanned by each shard. + + These are the smallest regions of the array that are efficient to read concurrently. Parameters ---------- array : Array | AsyncArray The array to iterate over. origin : Sequence[int] | None, default=None - The origin of the selection relative to the array's chunk grid. + The origin of the selection in grid coordinates. selection_shape : Sequence[int] | None, default=None - The shape of the selection in chunk grid coordinates. + The shape of the selection in grid coordinates. Yields ------ region: tuple[slice, ...] - A tuple of slice objects representing the region spanned by each chunk in the selection. + A tuple of slice objects representing the region spanned by each shard in the selection. """ - for cgrid_position in iter_chunk_coords(array, origin=origin, selection_shape=selection_shape): - out: tuple[slice, ...] = () - for c_pos, c_shape in zip(cgrid_position, array.chunks, strict=False): - start = c_pos * c_shape - stop = start + c_shape - out += (slice(start, stop, 1),) - yield out + + return iter_regions( + array.shape, array.chunks, origin=origin, selection_shape=selection_shape, trim_excess=True + ) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index c11889f7f4..4498788b6f 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -102,7 +102,7 @@ def ceildiv(a: float, b: float) -> int: _ArrayIndexingOrder: TypeAlias = Literal["lexicographic"] -def _iter_grid( +def iter_grid( grid_shape: Sequence[int], *, origin: Sequence[int] | None = None, @@ -127,7 +127,7 @@ def _iter_grid( Returns ------- - itertools.product object + Iterator[tuple[int, ...]] An iterator over tuples of integers Examples @@ -138,11 +138,11 @@ def _iter_grid( >>> tuple(iter_grid((2,3))) ((0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)) - >>> tuple(iter_grid((2,3)), origin=(1,1)) - ((1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3)) + >>> tuple(iter_grid((2,3), origin=(1,1))) + ((1, 1), (1, 2)) - >>> tuple(iter_grid((2,3)), origin=(1,1), selection_shape=(2,2)) - ((1, 1), (1, 2), (1, 3), (2, 1)) + >>> tuple(iter_grid((2,3), origin=(0,0), selection_shape=(2,2))) + ((0, 0), (0, 1), (1, 0), (1, 1)) """ if origin is None: origin_parsed = (0,) * len(grid_shape) @@ -167,16 +167,76 @@ def _iter_grid( ): if o + ss > gs: raise IndexError( - f"Invalid selection shape ({selection_shape}) for origin ({origin}) and grid shape ({grid_shape}) at axis {idx}." + f"Invalid selection shape ({ss}) for origin ({o}) and grid shape ({gs}) at axis {idx}." ) dimensions += (range(o, o + ss),) - yield from itertools.product(*(dimensions)) + return itertools.product(*(dimensions)) else: msg = f"Indexing order {order} is not supported at this time." # type: ignore[unreachable] raise NotImplementedError(msg) +def iter_regions( + domain_shape: Sequence[int], + region_shape: Sequence[int], + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, + order: _ArrayIndexingOrder = "lexicographic", + trim_excess: bool = True, +) -> Iterator[tuple[slice, ...]]: + """ + Iterate over contiguous regions on a grid of integers, with the option to restrict the + domain of iteration to a contiguous subregion of that grid. + + Parameters + ---------- + domain_shape : Sequence[int] + The size of the domain to iterate over. + region_shape : Sequence[int] + The shape of the region to iterate over. + origin : Sequence[int] | None, default=None + The location, in grid coordinates, of the first region to return. + selection_shape : Sequence[int] | None, default=None + The shape of the selection, in grid coordinates. + order : Literal["lexicographic"], default="lexicographic" + The linear indexing order to use. + + Returns + ------- + + Iterator[tuple[slice, ...]] + An iterator over tuples of slices, where each slice spans a separate contiguous region + + Examples + -------- + >>> tuple(iter_regions((1,), (1,))) + ((slice(0, 1, 1),),) + + >>> tuple(iter_regions((2, 3), (1, 2))) + ((slice(0, 1, 1), slice(0, 2, 1)), (slice(1, 2, 1), slice(0, 2, 1))) + + >>> tuple(iter_regions((2,3), (1,2)), origin=(1,1)) + ((slice(1, 2, 1), slice(1, 3, 1)), (slice(2, 3, 1), slice(1, 3, 1))) + + >>> tuple(iter_regions((2,3), (1,2)), origin=(1,1), selection_shape=(2,2)) + ((slice(1, 2, 1), slice(1, 3, 1)), (slice(2, 3, 1), slice(1, 3, 1))) + """ + grid_shape = tuple(ceildiv(d, s) for d, s in zip(domain_shape, region_shape, strict=True)) + for grid_position in iter_grid( + grid_shape=grid_shape, origin=origin, selection_shape=selection_shape, order=order + ): + out: list[slice] = [] + for g_pos, r_shape, d_shape in zip(grid_position, region_shape, domain_shape, strict=True): + start = g_pos * r_shape + stop = start + r_shape + if trim_excess: + stop = min(stop, d_shape) + out.append(slice(start, stop, 1)) + yield tuple(out) + + def is_integer(x: Any) -> TypeGuard[int]: """True if x is an integer (both pure Python or NumPy).""" return isinstance(x, numbers.Integral) and not is_bool(x) diff --git a/tests/test_array.py b/tests/test_array.py index 170b2f5597..6a28051c99 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -6,7 +6,7 @@ import pickle import re import sys -from itertools import accumulate, starmap +from itertools import accumulate from typing import TYPE_CHECKING, Any, Literal from unittest import mock @@ -37,7 +37,11 @@ create_array, default_filters_v2, default_serializer_v3, - iter_chunk_keys, + iter_chunk_coords, + iter_chunk_regions, + iter_shard_coords, + iter_shard_keys, + iter_shard_regions, ) from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.chunk_grids import _auto_partition @@ -60,7 +64,7 @@ from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str from zarr.core.dtype.npy.string import UTF8Base from zarr.core.group import AsyncGroup -from zarr.core.indexing import BasicIndexer, _iter_grid, ceildiv +from zarr.core.indexing import BasicIndexer, ceildiv, iter_grid, iter_regions from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.core.sync import sync @@ -371,15 +375,23 @@ def test_nchunks(test_cls: type[Array] | type[AsyncArray[Any]], nchunks: int) -> @pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) -async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> None: +@pytest.mark.parametrize( + ("shape", "shard_shape", "chunk_shape"), [((10,), (1,), (1,)), ((40,), (20,), (5,))] +) +async def test_nchunks_initialized( + test_cls: type[Array] | type[AsyncArray[Any]], + shape: tuple[int, ...], + shard_shape: tuple[int, ...], + chunk_shape: tuple[int, ...], +) -> None: """ - Test that nchunks_initialized accurately returns the number of stored chunks. + Test that nchunks_initialized accurately returns the number of stored partitions. """ store = MemoryStore() - arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="i4") + arr = zarr.create_array(store, shape=shape, shards=shard_shape, chunks=chunk_shape, dtype="i1") # write chunks one at a time - for idx, region in enumerate(arr._iter_chunk_regions()): + for idx, region in enumerate(arr._iter_shard_regions()): arr[region] = 1 expected = idx + 1 if test_cls == Array: @@ -389,7 +401,7 @@ async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]] assert observed == expected # delete chunks - for idx, key in enumerate(arr._iter_chunk_keys()): + for idx, key in enumerate(arr._iter_shard_keys()): sync(arr.store_path.store.delete(key)) if test_cls == Array: observed = arr.nchunks_initialized @@ -400,17 +412,24 @@ async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]] @pytest.mark.parametrize("path", ["", "foo"]) -async def test_chunks_initialized(path: str) -> None: +@pytest.mark.parametrize( + ("shape", "shard_shape", "chunk_shape"), [((10,), (1,), (1,)), ((40,), (20,), (5,))] +) +async def test_chunks_initialized( + path: str, shape: tuple[int, ...], shard_shape: tuple[int, ...], chunk_shape: tuple[int, ...] +) -> None: """ Test that chunks_initialized accurately returns the keys of stored chunks. """ store = MemoryStore() - arr = zarr.create_array(store, name=path, shape=(100,), chunks=(10,), dtype="i4") + arr = zarr.create_array( + store, name=path, shape=shape, shards=shard_shape, chunks=chunk_shape, dtype="i1" + ) chunks_accumulated = tuple( - accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys())) + accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_shard_keys())) ) - for keys, region in zip(chunks_accumulated, arr._iter_chunk_regions(), strict=False): + for keys, region in zip(chunks_accumulated, arr._iter_shard_regions(), strict=False): arr[region] = 1 observed = sorted(await chunks_initialized(arr._async_array)) expected = sorted(keys) @@ -1839,32 +1858,160 @@ def test_unknown_object_codec_default_filters_v2() -> None: @pytest.mark.parametrize( - ("shard_size", "chunk_size"), - [ - ((8,), (8,)), - ((8,), (2,)), - ( - ( - 8, - 10, - ), - (2, 2), - ), - ], + ("array_shape", "shard_shape", "chunk_shape"), [((10,), None, (1,)), ((30, 10), None, (2, 5))] ) -def test_iter_chunk_keys(shard_size: tuple[int, ...], chunk_size: tuple[int, ...]) -> None: - store = {} +def test_iter_chunk_coords( + array_shape: tuple[int, ...], + shard_shape: tuple[int, ...] | None, + chunk_shape: tuple[int, ...], + zarr_format: ZarrFormat, +) -> None: + """ + Test that we can use the various invocations of iter_chunk_coords to iterate over the coordinates + of the origin of each chunk. + """ + arr = zarr.create_array( - store, + {}, dtype="uint8", - shape=tuple(2 * x for x in shard_size), - chunks=chunk_size, - shards=shard_size, - zarr_format=3, + shape=array_shape, + chunks=chunk_shape, + shards=shard_shape, + zarr_format=zarr_format, ) - shard_grid_shape = tuple(starmap(ceildiv, zip(arr.shape, arr.shards, strict=True))) - expected_keys = tuple( - arr.metadata.chunk_key_encoding.encode_chunk_key(region) - for region in _iter_grid(shard_grid_shape) + expected = tuple(iter_grid(arr.shard_grid_shape)) + observed = tuple(iter_chunk_coords(arr)) + assert observed == expected + assert observed == tuple(arr._iter_chunk_coords()) + assert observed == tuple(arr._async_array._iter_chunk_coords()) + + +@pytest.mark.parametrize( + ("array_shape", "shard_shape", "chunk_shape"), + [((10,), (1,), (1,)), ((10,), None, (1,)), ((30, 10), (10, 5), (2, 5))], +) +def test_iter_shard_coords( + array_shape: tuple[int, ...], + shard_shape: tuple[int, ...] | None, + chunk_shape: tuple[int, ...], + zarr_format: ZarrFormat, +) -> None: + """ + Test that we can use the various invocations of iter_shard_coords to iterate over the coordinates + of the origin of each shard. + """ + + if zarr_format == 2 and shard_shape is not None: + pytest.skip("Zarr format 2 does not support shard shape.") + + arr = zarr.create_array( + {}, + dtype="uint8", + shape=array_shape, + chunks=chunk_shape, + shards=shard_shape, + zarr_format=zarr_format, ) - assert tuple(iter_chunk_keys(arr)) == expected_keys + expected = tuple(iter_grid(arr.shard_grid_shape)) + observed = tuple(iter_shard_coords(arr)) + assert observed == expected + assert observed == tuple(arr._iter_shard_coords()) + assert observed == tuple(arr._async_array._iter_shard_coords()) + + +@pytest.mark.parametrize( + ("array_shape", "shard_shape", "chunk_shape"), + [((10,), (1,), (1,)), ((10,), None, (1,)), ((30, 10), (10, 5), (2, 5))], +) +def test_iter_shard_keys( + array_shape: tuple[int, ...], + shard_shape: tuple[int, ...] | None, + chunk_shape: tuple[int, ...], + zarr_format: ZarrFormat, +) -> None: + """ + Test that we can use the various invocations of iter_shard_keys to iterate over the stored + keys of the shards of an array. + """ + + if zarr_format == 2 and shard_shape is not None: + pytest.skip("Zarr format 2 does not support shard shape.") + + arr = zarr.create_array( + {}, + dtype="uint8", + shape=array_shape, + chunks=chunk_shape, + shards=shard_shape, + zarr_format=zarr_format, + ) + expected = tuple(arr.metadata.encode_chunk_key(key) for key in iter_grid(arr.shard_grid_shape)) + observed = tuple(iter_shard_keys(arr)) + assert observed == expected + assert observed == tuple(arr._iter_shard_keys()) + assert observed == tuple(arr._async_array._iter_shard_keys()) + + +@pytest.mark.parametrize( + ("array_shape", "shard_shape", "chunk_shape"), + [((10,), None, (1,)), ((10,), (1,), (1,)), ((30, 10), (10, 5), (2, 5))], +) +def test_iter_shard_regions( + array_shape: tuple[int, ...], + shard_shape: tuple[int, ...] | None, + chunk_shape: tuple[int, ...], + zarr_format: ZarrFormat, +) -> None: + """ + Test that we can use the various invocations of iter_shard_regions to iterate over the regions + spanned by the shards of an array. + """ + if zarr_format == 2 and shard_shape is not None: + pytest.skip("Zarr format 2 does not support shard shape.") + + arr = zarr.create_array( + {}, + dtype="uint8", + shape=array_shape, + chunks=chunk_shape, + shards=shard_shape, + zarr_format=zarr_format, + ) + if shard_shape is None: + _shard_shape = chunk_shape + else: + _shard_shape = shard_shape + expected = tuple(iter_regions(arr.shape, _shard_shape)) + observed = tuple(iter_shard_regions(arr)) + assert observed == expected + assert observed == tuple(arr._iter_shard_regions()) + assert observed == tuple(arr._async_array._iter_shard_regions()) + + +@pytest.mark.parametrize( + ("array_shape", "shard_shape", "chunk_shape"), [((10,), None, (1,)), ((30, 10), None, (2, 5))] +) +def test_iter_chunk_regions( + array_shape: tuple[int, ...], + shard_shape: tuple[int, ...] | None, + chunk_shape: tuple[int, ...], + zarr_format: ZarrFormat, +) -> None: + """ + Test that we can use the various invocations of iter_chunk_regions to iterate over the regions + spanned by the chunks of an array. + """ + arr = zarr.create_array( + {}, + dtype="uint8", + shape=array_shape, + chunks=chunk_shape, + shards=shard_shape, + zarr_format=zarr_format, + ) + + expected = tuple(iter_regions(arr.shape, chunk_shape)) + observed = tuple(iter_chunk_regions(arr)) + assert observed == expected + assert observed == tuple(arr._iter_chunk_regions()) + assert observed == tuple(arr._async_array._iter_chunk_regions()) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index fd4bb13bb1..63f43c21b0 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -18,7 +18,10 @@ CoordinateSelection, OrthogonalSelection, Selection, - _iter_grid, + _ArrayIndexingOrder, + ceildiv, + iter_grid, + iter_regions, make_slice_selection, normalize_integer_selection, oindex, @@ -1919,7 +1922,7 @@ def test_iter_grid( selection_shape = tuple(gs - o for gs, o in zip(grid_shape, origin, strict=False)) observed = tuple( - _iter_grid(grid_shape, origin=origin_kwarg, selection_shape=selection_shape_kwarg) + iter_grid(grid_shape, origin=origin_kwarg, selection_shape=selection_shape_kwarg) ) # generate a numpy array of indices, and index it @@ -1940,7 +1943,7 @@ def test_iter_grid_invalid() -> None: Ensure that a selection_shape that exceeds the grid_shape + origin produces an indexing error. """ with pytest.raises(IndexError): - list(_iter_grid((5,), origin=(0,), selection_shape=(10,))) + list(iter_grid((5,), origin=(0,), selection_shape=(10,))) def test_indexing_with_zarr_array(store: StorePath) -> None: @@ -1994,3 +1997,68 @@ def test_iter_chunk_regions(): assert_array_equal(a[region], np.ones_like(a[region])) a[region] = 0 assert_array_equal(a[region], np.zeros_like(a[region])) + + +@pytest.mark.parametrize( + ("domain_shape", "region_shape", "origin", "selection_shape"), + [ + ((9,), (1,), None, (9,)), + ((9,), (1,), (0,), (9,)), + ((3,), (2,), (0,), (1,)), + ((9,), (2,), (2,), (2,)), + ((9, 9), (2, 1), None, None), + ((9, 9), (4, 1), None, None), + ], +) +@pytest.mark.parametrize("order", ["lexicographic"]) +@pytest.mark.parametrize("trim_excess", [True, False]) +def test_iter_regions( + domain_shape: tuple[int, ...], + region_shape: tuple[int, ...], + origin: tuple[int, ...] | None, + selection_shape: tuple[int, ...] | None, + order: _ArrayIndexingOrder, + trim_excess: bool, +) -> None: + """ + Test that iter_regions properly iterates over contiguous regions of a gridded domain. + """ + expected_slices_by_dim: list[list[slice]] = [] + origin_parsed: tuple[int, ...] + selection_shape_parsed: tuple[int, ...] + if origin is None: + origin_parsed = (0,) * len(domain_shape) + else: + origin_parsed = origin + if selection_shape is None: + selection_shape_parsed = tuple( + ceildiv(ds, rs) - o + for ds, o, rs in zip(domain_shape, origin_parsed, region_shape, strict=True) + ) + else: + selection_shape_parsed = selection_shape + for d_s, r_s, o, ss in zip( + domain_shape, region_shape, origin_parsed, selection_shape_parsed, strict=True + ): + _expected_slices: list[slice] = [] + start = o * r_s + for incr in range(start, start + ss * r_s, r_s): + if trim_excess: + term = min(incr + r_s, d_s) + else: + term = incr + r_s + _expected_slices.append(slice(incr, term, 1)) + expected_slices_by_dim.append(_expected_slices) + + expected = tuple(itertools.product(*expected_slices_by_dim)) + observed = tuple( + iter_regions( + domain_shape, + region_shape, + origin=origin, + selection_shape=selection_shape, + order=order, + trim_excess=trim_excess, + ) + ) + assert observed == expected From 5c34e4afce7c646bd80afdb8ab381d236fadec2c Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 25 Jul 2025 18:44:41 +0200 Subject: [PATCH 07/26] correct deprecation message --- src/zarr/core/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index a847c02f94..4faa8d20ce 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1327,7 +1327,7 @@ def _iter_shard_coords( selection_shape=selection_shape, ) - @deprecated("Use _iter_chunk_keys instead") + @deprecated("Use _iter_shard_keys instead") def _iter_chunk_keys( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: From 54d7fd7f921d45cd8fd4c773987d1197f332aea7 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 25 Jul 2025 19:56:28 +0200 Subject: [PATCH 08/26] check for deprecation warnings around iter_chunk_keys --- tests/test_array.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_array.py b/tests/test_array.py index 6a28051c99..9ea0ab9cd8 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1950,6 +1950,10 @@ def test_iter_shard_keys( assert observed == expected assert observed == tuple(arr._iter_shard_keys()) assert observed == tuple(arr._async_array._iter_shard_keys()) + with pytest.warns(DeprecationWarning, match="Use _iter_shard_keys instead"): + assert observed == tuple(arr._iter_chunk_keys()) + with pytest.warns(DeprecationWarning, match="Use _iter_shard_keys instead"): + assert observed == tuple(arr._async_array._iter_chunk_keys()) @pytest.mark.parametrize( From 82d9bdcb3f73847607ea36e66d96d32365da9e70 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 25 Jul 2025 20:09:38 +0200 Subject: [PATCH 09/26] plug tiny coverage holes --- src/zarr/core/array.py | 2 +- src/zarr/core/indexing.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4faa8d20ce..d75f5cfea6 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -698,7 +698,7 @@ async def _create( overwrite=overwrite, ) else: - raise ValueError(f"Insupported zarr_format. Got: {zarr_format}") + raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover if data is not None: # insert user-provided data diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 4498788b6f..08ddffe26e 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -173,8 +173,8 @@ def iter_grid( return itertools.product(*(dimensions)) else: - msg = f"Indexing order {order} is not supported at this time." # type: ignore[unreachable] - raise NotImplementedError(msg) + msg = f"Indexing order {order} is not supported at this time." # type: ignore[unreachable] # pragma: no cover + raise NotImplementedError(msg) # pragma: no cover def iter_regions( From ec5e0f1d19c99588893489b7662dbab502f95b6a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 25 Jul 2025 23:18:48 +0200 Subject: [PATCH 10/26] lint --- src/zarr/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 08ddffe26e..75046616a2 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -174,7 +174,7 @@ def iter_grid( else: msg = f"Indexing order {order} is not supported at this time." # type: ignore[unreachable] # pragma: no cover - raise NotImplementedError(msg) # pragma: no cover + raise NotImplementedError(msg) # pragma: no cover def iter_regions( From 83a3ea54aa0efc086d6bffa41705974827cb8455 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 28 Jul 2025 18:24:23 +0200 Subject: [PATCH 11/26] rename chunks_initialized to shards_initialized --- src/zarr/core/array.py | 4 ++-- tests/test_array.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index d75f5cfea6..f143e08e77 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1260,7 +1260,7 @@ async def nchunks_initialized(self) -> int: >>> await arr.nchunks_initialized() 3 """ - return len(await chunks_initialized(self)) + return len(await shards_initialized(self)) async def nbytes_stored(self) -> int: return await self.store_path.store.getsize_prefix(self.store_path.path) @@ -3953,7 +3953,7 @@ def info_complete(self) -> Any: return sync(self._async_array.info_complete()) -async def chunks_initialized( +async def shards_initialized( array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], ) -> tuple[str, ...]: """ diff --git a/tests/test_array.py b/tests/test_array.py index 9ea0ab9cd8..63c93b4f6f 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -33,7 +33,6 @@ FiltersLike, _parse_chunk_encoding_v2, _parse_chunk_encoding_v3, - chunks_initialized, create_array, default_filters_v2, default_serializer_v3, @@ -42,6 +41,7 @@ iter_shard_coords, iter_shard_keys, iter_shard_regions, + shards_initialized, ) from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.chunk_grids import _auto_partition @@ -431,7 +431,7 @@ async def test_chunks_initialized( ) for keys, region in zip(chunks_accumulated, arr._iter_shard_regions(), strict=False): arr[region] = 1 - observed = sorted(await chunks_initialized(arr._async_array)) + observed = sorted(await shards_initialized(arr._async_array)) expected = sorted(keys) assert observed == expected From 29c1cddee28b026e59e4745d559f6e0bd9136618 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 28 Jul 2025 19:01:06 +0200 Subject: [PATCH 12/26] add nshards, nshards_initialized --- src/zarr/core/array.py | 83 ++++++++++++++++++++++++++++++++++++++++-- tests/test_array.py | 24 ++++++++---- 2 files changed, 96 insertions(+), 11 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index f143e08e77..86dce1d249 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1227,15 +1227,32 @@ def shard_grid_shape(self) -> ChunkCoords: @property def nchunks(self) -> int: """ - The number of chunks in the stored representation of this array. + The number of chunks in this array. + + Note that if a sharding codec is used, then the number of chunks may exceed the number of + stored objects supporting this array. To find out the number of stored objects that support + this array, see :func:`nshards`. Returns ------- int The total number of chunks in the array. """ + return product(self.chunk_grid_shape) + + @property + def nshards(self) -> int: + """ + The number of shards in this array. + + Returns + ------- + int + The total number of shards in the array. + """ return product(self.shard_grid_shape) + @deprecated("Use nshards_initialized instead") async def nchunks_initialized(self) -> int: """ Calculate the number of chunks that have been initialized, i.e. the number of chunks that have @@ -1262,6 +1279,32 @@ async def nchunks_initialized(self) -> int: """ return len(await shards_initialized(self)) + async def nshards_initialized(self) -> int: + """ + Calculate the number of shards that have been initialized, i.e. the number of shards that have + been persisted to the storage backend. + + Returns + ------- + nshards_initialized : int + The number of shards that have been initialized. + + Notes + ----- + On :class:`AsyncArray` this is an asynchronous method, unlike the (synchronous) + property :attr:`Array.nshards_initialized`. + + Examples + -------- + >>> arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(2,)) + >>> await arr.nshards_initialized() + 0 + >>> await arr.setitem(slice(5), 1) + >>> await arr.nshards_initialized() + 3 + """ + return len(await shards_initialized(self)) + async def nbytes_stored(self) -> int: return await self.store_path.store.getsize_prefix(self.store_path.path) @@ -1860,7 +1903,7 @@ async def info_complete(self) -> Any: A property giving just the statically known information about an array. """ return self._info( - await self.nchunks_initialized(), + await self.nshards_initialized(), await self.store_path.store.getsize_prefix(self.store_path.path), ) @@ -2327,10 +2370,21 @@ def shard_grid_shape(self) -> ChunkCoords: @property def nchunks(self) -> int: """ - The number of chunks in the stored representation of this array. + The number of chunks in this array. + + Note that if a sharding codec is used, then the number of chunks may exceed the number of + stored objects supporting this array. To find out the number of stored objects that support + this array, see :func:`nshards`. """ return self._async_array.nchunks + @property + def nshards(self) -> int: + """ + The number of shards in the stored representation of this array. + """ + return self._async_array.nshards + @property def nbytes(self) -> int: """ @@ -2347,6 +2401,7 @@ def nbytes(self) -> int: return self._async_array.nbytes @property + @deprecated("Use nshards_initialized instead.") def nchunks_initialized(self) -> int: """ Calculate the number of chunks that have been initialized, i.e. the number of chunks that have @@ -2373,6 +2428,28 @@ def nchunks_initialized(self) -> int: """ return sync(self._async_array.nchunks_initialized()) + @property + def nshards_initialized(self) -> int: + """ + Calculate the number of shards that have been initialized, i.e. the number of shards that have + been persisted to the storage backend. + + Returns + ------- + nshards_initialized : int + The number of shards that have been initialized. + + Examples + -------- + >>> arr = await zarr.create(shape=(10,), chunks=(2,)) + >>> arr.nshards_initialized + 0 + >>> arr[:5] = 1 + >>> arr.nshard_initialized + 3 + """ + return sync(self._async_array.nshards_initialized()) + def nbytes_stored(self) -> int: """ Determine the size, in bytes, of the array actually written to the store. diff --git a/tests/test_array.py b/tests/test_array.py index 63c93b4f6f..3db5e65265 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -395,19 +395,27 @@ async def test_nchunks_initialized( arr[region] = 1 expected = idx + 1 if test_cls == Array: - observed = arr.nchunks_initialized + with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"): + observed = arr.nchunks_initialized + assert observed == arr.nshards_initialized else: - observed = await arr._async_array.nchunks_initialized() + with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"): + observed = await arr._async_array.nchunks_initialized() + assert observed == await arr._async_array.nshards_initialized() assert observed == expected # delete chunks for idx, key in enumerate(arr._iter_shard_keys()): sync(arr.store_path.store.delete(key)) if test_cls == Array: - observed = arr.nchunks_initialized + with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"): + observed = arr.nchunks_initialized + assert observed == arr.nshards_initialized else: - observed = await arr._async_array.nchunks_initialized() - expected = arr.nchunks - idx - 1 + with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"): + observed = await arr._async_array.nchunks_initialized() + assert observed == await arr._async_array.nshards_initialized() + expected = arr.nshards - idx - 1 assert observed == expected @@ -876,14 +884,14 @@ def test_write_empty_chunks_behavior( # initialize the store with some non-fill value chunks arr[:] = fill_value + 1 - assert arr.nchunks_initialized == arr.nchunks + assert arr.nshards_initialized == arr.nshards arr[:] = fill_value if not write_empty_chunks: - assert arr.nchunks_initialized == 0 + assert arr.nshards_initialized == 0 else: - assert arr.nchunks_initialized == arr.nchunks + assert arr.nshards_initialized == arr.nshards @pytest.mark.parametrize( From 2d7daca1f26c3a1da79a49727dc8e2751c50f32a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 28 Jul 2025 22:41:57 +0200 Subject: [PATCH 13/26] fix doctests --- docs/user-guide/performance.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst index 0f31e5d7be..f254c5bcb5 100644 --- a/docs/user-guide/performance.rst +++ b/docs/user-guide/performance.rst @@ -211,7 +211,7 @@ the time required to write an array with different values.:: ... start = time.time() ... arr[:] = value ... elapsed = time.time() - start - ... result.append((elapsed, arr.nchunks_initialized)) + ... result.append((elapsed, arr.nshards_initialized)) ... return result ... # log results >>> for write_empty_chunks in (True, False): From 1dad7b3b926541d78de61531b647aff771b39124 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 29 Jul 2025 18:09:57 +0200 Subject: [PATCH 14/26] remove iter_chunk_keys --- src/zarr/core/array.py | 44 ------------------------------------------ tests/test_array.py | 4 ---- 2 files changed, 48 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 86dce1d249..ad5b754d6e 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1370,28 +1370,6 @@ def _iter_shard_coords( selection_shape=selection_shape, ) - @deprecated("Use _iter_shard_keys instead") - def _iter_chunk_keys( - self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None - ) -> Iterator[str]: - """ - Iterate over the keys of the stored objects supporting this array. - - Parameters - ---------- - origin : Sequence[int] | None, default=None - The origin of the selection relative to the array's shard grid. - selection_shape : Sequence[int] | None, default=None - The shape of the selection in shard grid coordinates. - - Yields - ------ - key: str - The storage key of each shard in the selection. - """ - # Iterate over the coordinates of chunks in chunk grid space. - return self._iter_shard_keys(origin=origin, selection_shape=selection_shape) - def _iter_shard_keys( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: @@ -2460,28 +2438,6 @@ def nbytes_stored(self) -> int: """ return sync(self._async_array.nbytes_stored()) - @deprecated("Use _iter_shard_keys instead.") - def _iter_chunk_keys( - self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None - ) -> Iterator[str]: - """ - Iterate over the storage keys of each chunk, relative to an optional origin, and optionally - limited to a contiguous region in chunk grid coordinates. - - Parameters - ---------- - origin : Sequence[int] | None, default=None - The origin of the selection relative to the array's chunk grid. - selection_shape : Sequence[int] | None, default=None - The shape of the selection in chunk grid coordinates. - - Yields - ------ - key: str - The storage key of each chunk in the selection. - """ - return self._async_array._iter_shard_keys(origin=origin, selection_shape=selection_shape) - def _iter_shard_keys( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: diff --git a/tests/test_array.py b/tests/test_array.py index 3db5e65265..452ebc3337 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1958,10 +1958,6 @@ def test_iter_shard_keys( assert observed == expected assert observed == tuple(arr._iter_shard_keys()) assert observed == tuple(arr._async_array._iter_shard_keys()) - with pytest.warns(DeprecationWarning, match="Use _iter_shard_keys instead"): - assert observed == tuple(arr._iter_chunk_keys()) - with pytest.warns(DeprecationWarning, match="Use _iter_shard_keys instead"): - assert observed == tuple(arr._async_array._iter_chunk_keys()) @pytest.mark.parametrize( From 0b9916443d555d9e762f5501314383dc828c26bf Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 29 Jul 2025 20:10:50 +0200 Subject: [PATCH 15/26] make nchunks_initialized report the product of the number of shards and the number of chunks per shard --- src/zarr/core/array.py | 48 ++++++++++++++++++++++++++---------------- tests/test_array.py | 21 ++++++++---------- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index ad5b754d6e..87e159be21 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1252,11 +1252,14 @@ def nshards(self) -> int: """ return product(self.shard_grid_shape) - @deprecated("Use nshards_initialized instead") async def nchunks_initialized(self) -> int: """ - Calculate the number of chunks that have been initialized, i.e. the number of chunks that have - been persisted to the storage backend. + Calculate the number of chunks that have been initialized in storage. + + This value is calculated as the product of the number of initialized shards and the number of + chunks per shard. For arrays that do not use sharding, the number of chunks per shard is effectively 1, + and in that case the number of chunks initialized is the same as the number of stored objects associated with an + array. For a direct count of the number of initialized stored objects, see ``nshards_initialized``. Returns ------- @@ -1270,19 +1273,28 @@ async def nchunks_initialized(self) -> int: Examples -------- - >>> arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(2,)) + >>> arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(1,), shards=(2,)) >>> await arr.nchunks_initialized() 0 >>> await arr.setitem(slice(5), 1) - >>> await arr.nchunks_initialized() + >>> await arr.nshards_initialized() 3 + >>> await arr.nchunks_initialized() + 6 """ - return len(await shards_initialized(self)) + if self.shards is None: + chunks_per_shard = 1 + else: + chunks_per_shard = product( + tuple(a // b for a, b in zip(self.shards, self.chunks, strict=True)) + ) + return (await self.nshards_initialized()) * chunks_per_shard async def nshards_initialized(self) -> int: """ - Calculate the number of shards that have been initialized, i.e. the number of shards that have - been persisted to the storage backend. + Calculate the number of shards that have been initialized in storage. + + This is the number of shards that have been persisted to the storage backend. Returns ------- @@ -2379,30 +2391,30 @@ def nbytes(self) -> int: return self._async_array.nbytes @property - @deprecated("Use nshards_initialized instead.") def nchunks_initialized(self) -> int: """ - Calculate the number of chunks that have been initialized, i.e. the number of chunks that have - been persisted to the storage backend. + Calculate the number of chunks that have been initialized in storage. + + This value is calculated as the product of the number of initialized shards and the number of + chunks per shard. For arrays that do not use sharding, the number of chunks per shard is effectively 1, + and in that case the number of chunks initialized is the same as the number of stored objects associated with an + array. For a direct count of the number of initialized stored objects, see ``nshards_initialized``. Returns ------- nchunks_initialized : int The number of chunks that have been initialized. - Notes - ----- - On :class:`Array` this is a (synchronous) property, unlike asynchronous function - :meth:`AsyncArray.nchunks_initialized`. - Examples -------- - >>> arr = await zarr.create(shape=(10,), chunks=(2,)) + >>> arr = zarr.create_array(store={}, shape=(10,), chunks=(1,), shards=(2,)) >>> arr.nchunks_initialized 0 >>> arr[:5] = 1 - >>> arr.nchunks_initialized + >>> arr.nshards_initialized 3 + >>> arr.nchunks_initialized + 6 """ return sync(self._async_array.nchunks_initialized()) diff --git a/tests/test_array.py b/tests/test_array.py index 452ebc3337..227d616528 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -387,6 +387,7 @@ async def test_nchunks_initialized( """ Test that nchunks_initialized accurately returns the number of stored partitions. """ + chunks_per_shard = np.prod(np.array(shard_shape) // np.array(chunk_shape)) store = MemoryStore() arr = zarr.create_array(store, shape=shape, shards=shard_shape, chunks=chunk_shape, dtype="i1") @@ -395,26 +396,22 @@ async def test_nchunks_initialized( arr[region] = 1 expected = idx + 1 if test_cls == Array: - with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"): - observed = arr.nchunks_initialized - assert observed == arr.nshards_initialized + observed = arr.nshards_initialized + assert observed == arr.nchunks_initialized // chunks_per_shard else: - with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"): - observed = await arr._async_array.nchunks_initialized() - assert observed == await arr._async_array.nshards_initialized() + observed = await arr._async_array.nshards_initialized() + assert observed == await arr._async_array.nchunks_initialized() // chunks_per_shard assert observed == expected # delete chunks for idx, key in enumerate(arr._iter_shard_keys()): sync(arr.store_path.store.delete(key)) if test_cls == Array: - with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"): - observed = arr.nchunks_initialized - assert observed == arr.nshards_initialized + observed = arr.nshards_initialized + assert observed == arr.nchunks_initialized // chunks_per_shard else: - with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"): - observed = await arr._async_array.nchunks_initialized() - assert observed == await arr._async_array.nshards_initialized() + observed = await arr._async_array.nshards_initialized() + assert observed == await arr._async_array.nchunks_initialized() // chunks_per_shard expected = arr.nshards - idx - 1 assert observed == expected From 96e5564618c6059c34dc0b2130a663c40d92b5ca Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 31 Jul 2025 23:41:14 +0200 Subject: [PATCH 16/26] changelog --- changes/3299.fix.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 changes/3299.fix.rst diff --git a/changes/3299.fix.rst b/changes/3299.fix.rst new file mode 100644 index 0000000000..29accdaa30 --- /dev/null +++ b/changes/3299.fix.rst @@ -0,0 +1,6 @@ +Fix a bug in ``create_array`` caused by iterating over chunk-aligned regions instead of +shard-aligned regions when writing data. To make the distinction between chunks and shards more +obvious in the ``Array`` API, new properties ``chunk_grid_shape``, +``shard_grid_shape``, ``nshards``, ``nshards_initialized`` were added to the ``Array`` class. +Additionally, the behavior of ``nchunks_initialized`` has been adjusted. This function consistently +reports the number of chunks present in stored objects, even when the array uses the sharding codec. \ No newline at end of file From 66fb79515b970a1c83cbd7103d712c0af827962f Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 31 Jul 2025 23:46:19 +0200 Subject: [PATCH 17/26] correct name of changelog entry --- changes/{3299.fix.rst => 3299.bugfix.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changes/{3299.fix.rst => 3299.bugfix.rst} (100%) diff --git a/changes/3299.fix.rst b/changes/3299.bugfix.rst similarity index 100% rename from changes/3299.fix.rst rename to changes/3299.bugfix.rst From b9d6ea3673a21529c62a94d9e4ffbc3c3a7f7b77 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 1 Aug 2025 11:33:34 +0200 Subject: [PATCH 18/26] add test for unsharded data --- tests/test_array.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_array.py b/tests/test_array.py index da91336d2b..ec5161fe24 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -418,7 +418,8 @@ async def test_nchunks_initialized( @pytest.mark.parametrize("path", ["", "foo"]) @pytest.mark.parametrize( - ("shape", "shard_shape", "chunk_shape"), [((10,), (1,), (1,)), ((40,), (20,), (5,))] + ("shape", "shard_shape", "chunk_shape"), + [((10,), None, (1,)), ((10,), (1,), (1,)), ((40,), (20,), (5,))], ) async def test_chunks_initialized( path: str, shape: tuple[int, ...], shard_shape: tuple[int, ...], chunk_shape: tuple[int, ...] From 8b33dc350d0a267a69b408cc2483bbbabc540c4e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 1 Aug 2025 18:10:42 +0200 Subject: [PATCH 19/26] add test for cdata_shape, shard_grid_shape, chunk_grid_shape --- tests/test_array.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/test_array.py b/tests/test_array.py index ec5161fe24..93020efeb3 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1863,6 +1863,37 @@ def test_unknown_object_codec_default_filters_v2() -> None: default_filters_v2(dtype) +@pytest.mark.parametrize( + ("array_shape", "shard_shape", "chunk_shape"), [((10,), None, (1,)), ((30, 10), None, (2, 5))] +) +def test_chunk_grid_shape( + array_shape: tuple[int, ...], + shard_shape: tuple[int, ...] | None, + chunk_shape: tuple[int, ...], + zarr_format: ZarrFormat, +) -> None: + """ + Test that the shape of the chunk grid and the shard grid are correctly indicated + """ + arr = zarr.create_array( + {}, + dtype="uint8", + shape=array_shape, + chunks=chunk_shape, + shards=shard_shape, + zarr_format=zarr_format, + ) + chunk_grid_shape = tuple(ceildiv(a, b) for a, b in zip(array_shape, chunk_shape, strict=True)) + if shard_shape is None: + _shard_shape = chunk_shape + else: + _shard_shape = shard_shape + shard_grid_shape = tuple(ceildiv(a, b) for a, b in zip(array_shape, _shard_shape, strict=True)) + assert arr.chunk_grid_shape == chunk_grid_shape + assert arr.cdata_shape == chunk_grid_shape + assert arr.shard_grid_shape == shard_grid_shape + + @pytest.mark.parametrize( ("array_shape", "shard_shape", "chunk_shape"), [((10,), None, (1,)), ((30, 10), None, (2, 5))] ) From 80f6ac4d284faa0d7a035f806370280b905054c5 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 1 Aug 2025 18:12:23 +0200 Subject: [PATCH 20/26] add nshards --- tests/test_array.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_array.py b/tests/test_array.py index 93020efeb3..0de23d106f 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1892,6 +1892,7 @@ def test_chunk_grid_shape( assert arr.chunk_grid_shape == chunk_grid_shape assert arr.cdata_shape == chunk_grid_shape assert arr.shard_grid_shape == shard_grid_shape + assert arr.nshards == np.prod(shard_grid_shape) @pytest.mark.parametrize( From 8db5fa72b5a4de786ca0be94ad61f7487814760c Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 1 Aug 2025 18:15:37 +0200 Subject: [PATCH 21/26] expand nchunks_initialized test conditions --- tests/test_array.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/test_array.py b/tests/test_array.py index 0de23d106f..b47d25122e 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -376,19 +376,24 @@ def test_nchunks(test_cls: type[Array] | type[AsyncArray[Any]], nchunks: int) -> @pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) @pytest.mark.parametrize( - ("shape", "shard_shape", "chunk_shape"), [((10,), (1,), (1,)), ((40,), (20,), (5,))] + ("shape", "shard_shape", "chunk_shape"), + [((10,), None, (1,)), ((10,), (1,), (1,)), ((40,), (20,), (5,))], ) async def test_nchunks_initialized( test_cls: type[Array] | type[AsyncArray[Any]], shape: tuple[int, ...], - shard_shape: tuple[int, ...], + shard_shape: tuple[int, ...] | None, chunk_shape: tuple[int, ...], ) -> None: """ Test that nchunks_initialized accurately returns the number of stored partitions. """ - chunks_per_shard = np.prod(np.array(shard_shape) // np.array(chunk_shape)) - store = MemoryStore() + store = {} + if shard_shape is None: + chunks_per_shard = 1 + else: + chunks_per_shard = np.prod(np.array(shard_shape) // np.array(chunk_shape)) + arr = zarr.create_array(store, shape=shape, shards=shard_shape, chunks=chunk_shape, dtype="i1") # write chunks one at a time From 8805c35b1340523f40b6cf67bffcbe7d8b0ead11 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 1 Aug 2025 18:18:01 +0200 Subject: [PATCH 22/26] restore memorystore --- tests/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_array.py b/tests/test_array.py index b47d25122e..247dfc9309 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -388,7 +388,7 @@ async def test_nchunks_initialized( """ Test that nchunks_initialized accurately returns the number of stored partitions. """ - store = {} + store = MemoryStore() if shard_shape is None: chunks_per_shard = 1 else: From ac27ea796347843be5e9e9ac3ac115d2e4963555 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 4 Aug 2025 12:11:22 +0200 Subject: [PATCH 23/26] expand test parametrization --- tests/test_array.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_array.py b/tests/test_array.py index 247dfc9309..92af697ffb 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1869,7 +1869,8 @@ def test_unknown_object_codec_default_filters_v2() -> None: @pytest.mark.parametrize( - ("array_shape", "shard_shape", "chunk_shape"), [((10,), None, (1,)), ((30, 10), None, (2, 5))] + ("array_shape", "shard_shape", "chunk_shape"), + [((10,), None, (1,)), ((10,), 1, (1,)), ((30, 10), None, (2, 5)), ((30, 10), (4, 10), (2, 5))], ) def test_chunk_grid_shape( array_shape: tuple[int, ...], From 932340023c5624a30fe73d6841dccd3356e84e97 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 4 Aug 2025 13:34:48 +0200 Subject: [PATCH 24/26] unbreak tests --- tests/test_array.py | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/tests/test_array.py b/tests/test_array.py index 92af697ffb..9b4d8b6553 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1870,7 +1870,12 @@ def test_unknown_object_codec_default_filters_v2() -> None: @pytest.mark.parametrize( ("array_shape", "shard_shape", "chunk_shape"), - [((10,), None, (1,)), ((10,), 1, (1,)), ((30, 10), None, (2, 5)), ((30, 10), (4, 10), (2, 5))], + [ + ((10,), None, (1,)), + ((10,), (1,), (1,)), + ((30, 10), None, (2, 5)), + ((30, 10), (4, 10), (2, 5)), + ], ) def test_chunk_grid_shape( array_shape: tuple[int, ...], @@ -1881,14 +1886,30 @@ def test_chunk_grid_shape( """ Test that the shape of the chunk grid and the shard grid are correctly indicated """ - arr = zarr.create_array( - {}, - dtype="uint8", - shape=array_shape, - chunks=chunk_shape, - shards=shard_shape, - zarr_format=zarr_format, - ) + if zarr_format == 2 and shard_shape is not None: + with pytest.raises( + ValueError, + match="Zarr format 2 arrays can only be created with `shard_shape` set to `None`.", + ): + arr = zarr.create_array( + {}, + dtype="uint8", + shape=array_shape, + chunks=chunk_shape, + shards=shard_shape, + zarr_format=zarr_format, + ) + pytest.skip("Zarr format 2 arrays can only be created with `shard_shape` set to `None`.") + else: + arr = zarr.create_array( + {}, + dtype="uint8", + shape=array_shape, + chunks=chunk_shape, + shards=shard_shape, + zarr_format=zarr_format, + ) + chunk_grid_shape = tuple(ceildiv(a, b) for a, b in zip(array_shape, chunk_shape, strict=True)) if shard_shape is None: _shard_shape = chunk_shape From fb5312148ae595187b61dc2269186e17f1244f0d Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 4 Aug 2025 16:01:25 +0200 Subject: [PATCH 25/26] include asyncarray cdata_shape in test --- tests/test_array.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_array.py b/tests/test_array.py index 9b4d8b6553..a99eea6a3f 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1918,6 +1918,7 @@ def test_chunk_grid_shape( shard_grid_shape = tuple(ceildiv(a, b) for a, b in zip(array_shape, _shard_shape, strict=True)) assert arr.chunk_grid_shape == chunk_grid_shape assert arr.cdata_shape == chunk_grid_shape + assert arr._async_array.cdata_shape == chunk_grid_shape assert arr.shard_grid_shape == shard_grid_shape assert arr.nshards == np.prod(shard_grid_shape) From aa9d0b7f6e6861d54b5f1c5583676aafdf2fbf77 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 13 Aug 2025 21:53:41 +0200 Subject: [PATCH 26/26] make new API private --- src/zarr/core/array.py | 86 +++++++++++++++++++-------------------- src/zarr/core/indexing.py | 6 +-- tests/test_array.py | 54 ++++++++++++------------ tests/test_indexing.py | 10 ++--- 4 files changed, 78 insertions(+), 78 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 77946cae13..840be87241 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -93,13 +93,13 @@ OrthogonalSelection, Selection, VIndex, + _iter_grid, + _iter_regions, check_fields, check_no_multi_fields, is_pure_fancy_indexing, is_pure_orthogonal_indexing, is_scalar, - iter_grid, - iter_regions, pop_fields, ) from zarr.core.metadata import ( @@ -1187,18 +1187,17 @@ def basename(self) -> str: @property def cdata_shape(self) -> ChunkCoords: """ - The shape of the chunk grid for this array. This property exists for backwards compatibility. - See :func:`chunk_grid_shape` for the preferred method. + The shape of the chunk grid for this array. Returns ------- tuple[int, ...] The shape of the chunk grid for this array. """ - return self.chunk_grid_shape + return self._chunk_grid_shape @property - def chunk_grid_shape(self) -> ChunkCoords: + def _chunk_grid_shape(self) -> ChunkCoords: """ The shape of the chunk grid for this array. @@ -1210,7 +1209,7 @@ def chunk_grid_shape(self) -> ChunkCoords: return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=True))) @property - def shard_grid_shape(self) -> ChunkCoords: + def _shard_grid_shape(self) -> ChunkCoords: """ The shape of the shard grid for this array. @@ -1239,10 +1238,10 @@ def nchunks(self) -> int: int The total number of chunks in the array. """ - return product(self.chunk_grid_shape) + return product(self._chunk_grid_shape) @property - def nshards(self) -> int: + def _nshards(self) -> int: """ The number of shards in this array. @@ -1251,16 +1250,16 @@ def nshards(self) -> int: int The total number of shards in the array. """ - return product(self.shard_grid_shape) + return product(self._shard_grid_shape) async def nchunks_initialized(self) -> int: """ Calculate the number of chunks that have been initialized in storage. - This value is calculated as the product of the number of initialized shards and the number of - chunks per shard. For arrays that do not use sharding, the number of chunks per shard is effectively 1, - and in that case the number of chunks initialized is the same as the number of stored objects associated with an - array. For a direct count of the number of initialized stored objects, see ``nshards_initialized``. + This value is calculated as the product of the number of initialized shards and the number + of chunks per shard. For arrays that do not use sharding, the number of chunks per shard is + effectively 1, and in that case the number of chunks initialized is the same as the number + of stored objects associated with an array. Returns ------- @@ -1289,9 +1288,9 @@ async def nchunks_initialized(self) -> int: chunks_per_shard = product( tuple(a // b for a, b in zip(self.shards, self.chunks, strict=True)) ) - return (await self.nshards_initialized()) * chunks_per_shard + return (await self._nshards_initialized()) * chunks_per_shard - async def nshards_initialized(self) -> int: + async def _nshards_initialized(self) -> int: """ Calculate the number of shards that have been initialized in storage. @@ -1316,7 +1315,7 @@ async def nshards_initialized(self) -> int: >>> await arr.nshards_initialized() 3 """ - return len(await shards_initialized(self)) + return len(await _shards_initialized(self)) async def nbytes_stored(self) -> int: return await self.store_path.store.getsize_prefix(self.store_path.path) @@ -1345,7 +1344,7 @@ def _iter_chunk_coords( chunk_coords: ChunkCoords The coordinates of each chunk in the selection. """ - return iter_chunk_coords( + return _iter_chunk_coords( array=self, origin=origin, selection_shape=selection_shape, @@ -1377,7 +1376,7 @@ def _iter_shard_coords( chunk_coords: tuple[int, ...] The coordinates of each shard in the selection. """ - return iter_shard_coords( + return _iter_shard_coords( array=self, origin=origin, selection_shape=selection_shape, @@ -1402,7 +1401,7 @@ def _iter_shard_keys( The storage key of each chunk in the selection. """ # Iterate over the coordinates of chunks in chunk grid space. - return iter_shard_keys( + return _iter_shard_keys( array=self, origin=origin, selection_shape=selection_shape, @@ -1426,7 +1425,7 @@ def _iter_chunk_regions( region: tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ - return iter_chunk_regions( + return _iter_chunk_regions( array=self, origin=origin, selection_shape=selection_shape, @@ -1450,7 +1449,7 @@ def _iter_shard_regions( region: tuple[slice, ...] A tuple of slice objects representing the region spanned by each shard in the selection. """ - return iter_shard_regions(array=self, origin=origin, selection_shape=selection_shape) + return _iter_shard_regions(array=self, origin=origin, selection_shape=selection_shape) @property def nbytes(self) -> int: @@ -1957,7 +1956,7 @@ async def info_complete(self) -> Any: A property giving just the statically known information about an array. """ return self._info( - await self.nshards_initialized(), + await self._nshards_initialized(), await self.store_path.store.getsize_prefix(self.store_path.path), ) @@ -2402,24 +2401,23 @@ def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]: @property def cdata_shape(self) -> ChunkCoords: """ - The shape of the chunk grid for this array. This property exists for backwards compatibility. - See :func:`chunk_grid_shape` for the preferred method. + The shape of the chunk grid for this array. """ - return self._async_array.chunk_grid_shape + return self._async_array._chunk_grid_shape @property - def chunk_grid_shape(self) -> ChunkCoords: + def _chunk_grid_shape(self) -> ChunkCoords: """ The shape of the chunk grid for this array. """ - return self._async_array.chunk_grid_shape + return self._async_array._chunk_grid_shape @property - def shard_grid_shape(self) -> ChunkCoords: + def _shard_grid_shape(self) -> ChunkCoords: """ The shape of the shard grid for this array. """ - return self._async_array.shard_grid_shape + return self._async_array._shard_grid_shape @property def nchunks(self) -> int: @@ -2433,11 +2431,11 @@ def nchunks(self) -> int: return self._async_array.nchunks @property - def nshards(self) -> int: + def _nshards(self) -> int: """ The number of shards in the stored representation of this array. """ - return self._async_array.nshards + return self._async_array._nshards @property def nbytes(self) -> int: @@ -2502,7 +2500,7 @@ def nshards_initialized(self) -> int: >>> arr.nshard_initialized 3 """ - return sync(self._async_array.nshards_initialized()) + return sync(self._async_array._nshards_initialized()) def nbytes_stored(self) -> int: """ @@ -4062,7 +4060,7 @@ def info_complete(self) -> Any: return sync(self._async_array.info_complete()) -async def shards_initialized( +async def _shards_initialized( array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], ) -> tuple[str, ...]: """ @@ -5221,7 +5219,7 @@ def _parse_data_params( return data, shape_out, dtype_out -def iter_chunk_coords( +def _iter_chunk_coords( array: Array | AsyncArray[Any], *, origin: Sequence[int] | None = None, @@ -5249,10 +5247,10 @@ def iter_chunk_coords( chunk_coords: ChunkCoords The coordinates of each chunk in the selection. """ - return iter_grid(array.chunk_grid_shape, origin=origin, selection_shape=selection_shape) + return _iter_grid(array._chunk_grid_shape, origin=origin, selection_shape=selection_shape) -def iter_shard_coords( +def _iter_shard_coords( array: Array | AsyncArray[Any], *, origin: Sequence[int] | None = None, @@ -5280,10 +5278,10 @@ def iter_shard_coords( chunk_coords: ChunkCoords The coordinates of each shard in the selection. """ - return iter_grid(array.shard_grid_shape, origin=origin, selection_shape=selection_shape) + return _iter_grid(array._shard_grid_shape, origin=origin, selection_shape=selection_shape) -def iter_shard_keys( +def _iter_shard_keys( array: Array | AsyncArray[Any], *, origin: Sequence[int] | None = None, @@ -5308,11 +5306,11 @@ def iter_shard_keys( The storage key of each chunk in the selection. """ # Iterate over the coordinates of chunks in chunk grid space. - _iter = iter_grid(array.shard_grid_shape, origin=origin, selection_shape=selection_shape) + _iter = _iter_grid(array._shard_grid_shape, origin=origin, selection_shape=selection_shape) return (array.metadata.encode_chunk_key(k) for k in _iter) -def iter_shard_regions( +def _iter_shard_regions( array: Array | AsyncArray[Any], *, origin: Sequence[int] | None = None, @@ -5342,12 +5340,12 @@ def iter_shard_regions( else: shard_shape = array.shards - return iter_regions( + return _iter_regions( array.shape, shard_shape, origin=origin, selection_shape=selection_shape, trim_excess=True ) -def iter_chunk_regions( +def _iter_chunk_regions( array: Array | AsyncArray[Any], *, origin: Sequence[int] | None = None, @@ -5373,6 +5371,6 @@ def iter_chunk_regions( A tuple of slice objects representing the region spanned by each shard in the selection. """ - return iter_regions( + return _iter_regions( array.shape, array.chunks, origin=origin, selection_shape=selection_shape, trim_excess=True ) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 7fcf3aa500..bc2bd3b9f2 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -99,7 +99,7 @@ def __iter__(self) -> Iterator[ChunkProjection]: ... _ArrayIndexingOrder: TypeAlias = Literal["lexicographic"] -def iter_grid( +def _iter_grid( grid_shape: Sequence[int], *, origin: Sequence[int] | None = None, @@ -174,7 +174,7 @@ def iter_grid( raise NotImplementedError(msg) # pragma: no cover -def iter_regions( +def _iter_regions( domain_shape: Sequence[int], region_shape: Sequence[int], *, @@ -221,7 +221,7 @@ def iter_regions( ((slice(1, 2, 1), slice(1, 3, 1)), (slice(2, 3, 1), slice(1, 3, 1))) """ grid_shape = tuple(ceildiv(d, s) for d, s in zip(domain_shape, region_shape, strict=True)) - for grid_position in iter_grid( + for grid_position in _iter_grid( grid_shape=grid_shape, origin=origin, selection_shape=selection_shape, order=order ): out: list[slice] = [] diff --git a/tests/test_array.py b/tests/test_array.py index 04c876c28a..2087013b1a 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -31,17 +31,17 @@ from zarr.core.array import ( CompressorsLike, FiltersLike, + _iter_chunk_coords, + _iter_chunk_regions, + _iter_shard_coords, + _iter_shard_keys, + _iter_shard_regions, _parse_chunk_encoding_v2, _parse_chunk_encoding_v3, + _shards_initialized, create_array, default_filters_v2, default_serializer_v3, - iter_chunk_coords, - iter_chunk_regions, - iter_shard_coords, - iter_shard_keys, - iter_shard_regions, - shards_initialized, ) from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.chunk_grids import _auto_partition @@ -64,7 +64,7 @@ from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str from zarr.core.dtype.npy.string import UTF8Base from zarr.core.group import AsyncGroup -from zarr.core.indexing import BasicIndexer, iter_grid, iter_regions +from zarr.core.indexing import BasicIndexer, _iter_grid, _iter_regions from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.core.sync import sync @@ -409,7 +409,7 @@ async def test_nchunks_initialized( observed = arr.nshards_initialized assert observed == arr.nchunks_initialized // chunks_per_shard else: - observed = await arr._async_array.nshards_initialized() + observed = await arr._async_array._nshards_initialized() assert observed == await arr._async_array.nchunks_initialized() // chunks_per_shard assert observed == expected @@ -420,9 +420,9 @@ async def test_nchunks_initialized( observed = arr.nshards_initialized assert observed == arr.nchunks_initialized // chunks_per_shard else: - observed = await arr._async_array.nshards_initialized() + observed = await arr._async_array._nshards_initialized() assert observed == await arr._async_array.nchunks_initialized() // chunks_per_shard - expected = arr.nshards - idx - 1 + expected = arr._nshards - idx - 1 assert observed == expected @@ -447,7 +447,7 @@ async def test_chunks_initialized( ) for keys, region in zip(chunks_accumulated, arr._iter_shard_regions(), strict=False): arr[region] = 1 - observed = sorted(await shards_initialized(arr._async_array)) + observed = sorted(await _shards_initialized(arr._async_array)) expected = sorted(keys) assert observed == expected @@ -892,14 +892,14 @@ def test_write_empty_chunks_behavior( # initialize the store with some non-fill value chunks arr[:] = fill_value + 1 - assert arr.nshards_initialized == arr.nshards + assert arr.nshards_initialized == arr._nshards arr[:] = fill_value if not write_empty_chunks: assert arr.nshards_initialized == 0 else: - assert arr.nshards_initialized == arr.nshards + assert arr.nshards_initialized == arr._nshards @pytest.mark.parametrize("store", ["memory"], indirect=True) @@ -1952,11 +1952,11 @@ def test_chunk_grid_shape( else: _shard_shape = shard_shape shard_grid_shape = tuple(ceildiv(a, b) for a, b in zip(array_shape, _shard_shape, strict=True)) - assert arr.chunk_grid_shape == chunk_grid_shape + assert arr._chunk_grid_shape == chunk_grid_shape assert arr.cdata_shape == chunk_grid_shape assert arr._async_array.cdata_shape == chunk_grid_shape - assert arr.shard_grid_shape == shard_grid_shape - assert arr.nshards == np.prod(shard_grid_shape) + assert arr._shard_grid_shape == shard_grid_shape + assert arr._nshards == np.prod(shard_grid_shape) @pytest.mark.parametrize( @@ -1981,8 +1981,8 @@ def test_iter_chunk_coords( shards=shard_shape, zarr_format=zarr_format, ) - expected = tuple(iter_grid(arr.shard_grid_shape)) - observed = tuple(iter_chunk_coords(arr)) + expected = tuple(_iter_grid(arr._shard_grid_shape)) + observed = tuple(_iter_chunk_coords(arr)) assert observed == expected assert observed == tuple(arr._iter_chunk_coords()) assert observed == tuple(arr._async_array._iter_chunk_coords()) @@ -2014,8 +2014,8 @@ def test_iter_shard_coords( shards=shard_shape, zarr_format=zarr_format, ) - expected = tuple(iter_grid(arr.shard_grid_shape)) - observed = tuple(iter_shard_coords(arr)) + expected = tuple(_iter_grid(arr._shard_grid_shape)) + observed = tuple(_iter_shard_coords(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_coords()) assert observed == tuple(arr._async_array._iter_shard_coords()) @@ -2047,8 +2047,10 @@ def test_iter_shard_keys( shards=shard_shape, zarr_format=zarr_format, ) - expected = tuple(arr.metadata.encode_chunk_key(key) for key in iter_grid(arr.shard_grid_shape)) - observed = tuple(iter_shard_keys(arr)) + expected = tuple( + arr.metadata.encode_chunk_key(key) for key in _iter_grid(arr._shard_grid_shape) + ) + observed = tuple(_iter_shard_keys(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_keys()) assert observed == tuple(arr._async_array._iter_shard_keys()) @@ -2083,8 +2085,8 @@ def test_iter_shard_regions( _shard_shape = chunk_shape else: _shard_shape = shard_shape - expected = tuple(iter_regions(arr.shape, _shard_shape)) - observed = tuple(iter_shard_regions(arr)) + expected = tuple(_iter_regions(arr.shape, _shard_shape)) + observed = tuple(_iter_shard_regions(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_regions()) assert observed == tuple(arr._async_array._iter_shard_regions()) @@ -2112,8 +2114,8 @@ def test_iter_chunk_regions( zarr_format=zarr_format, ) - expected = tuple(iter_regions(arr.shape, chunk_shape)) - observed = tuple(iter_chunk_regions(arr)) + expected = tuple(_iter_regions(arr.shape, chunk_shape)) + observed = tuple(_iter_chunk_regions(arr)) assert observed == expected assert observed == tuple(arr._iter_chunk_regions()) assert observed == tuple(arr._async_array._iter_chunk_regions()) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index ce1dd9832b..56f17ad46b 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -19,9 +19,9 @@ OrthogonalSelection, Selection, _ArrayIndexingOrder, + _iter_grid, + _iter_regions, ceildiv, - iter_grid, - iter_regions, make_slice_selection, normalize_integer_selection, oindex, @@ -1922,7 +1922,7 @@ def test_iter_grid( selection_shape = tuple(gs - o for gs, o in zip(grid_shape, origin, strict=False)) observed = tuple( - iter_grid(grid_shape, origin=origin_kwarg, selection_shape=selection_shape_kwarg) + _iter_grid(grid_shape, origin=origin_kwarg, selection_shape=selection_shape_kwarg) ) # generate a numpy array of indices, and index it @@ -1943,7 +1943,7 @@ def test_iter_grid_invalid() -> None: Ensure that a selection_shape that exceeds the grid_shape + origin produces an indexing error. """ with pytest.raises(IndexError): - list(iter_grid((5,), origin=(0,), selection_shape=(10,))) + list(_iter_grid((5,), origin=(0,), selection_shape=(10,))) def test_indexing_with_zarr_array(store: StorePath) -> None: @@ -2052,7 +2052,7 @@ def test_iter_regions( expected = tuple(itertools.product(*expected_slices_by_dim)) observed = tuple( - iter_regions( + _iter_regions( domain_shape, region_shape, origin=origin,