From 09d7b7684625e66ac559f969d03476a6beed49d6 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 12 Aug 2025 13:15:02 +0100 Subject: [PATCH 1/9] Test with multiple zarr versions --- .github/workflows/ci.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b1a1a7d9..a672eff3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,8 +13,10 @@ jobs: fail-fast: false matrix: python-version: ["3.11", "3.12", "3.13"] + zarr-version: ["3.0.*", "3.1.*"] # macos-13 is an intel runner, macos-14 is an arm64 runner - platform: [ubuntu-latest, ubuntu-22.04-arm, windows-latest, macos-13, macos-14] + platform: + [ubuntu-latest, ubuntu-22.04-arm, windows-latest, macos-13, macos-14] defaults: run: @@ -51,6 +53,7 @@ jobs: run: | export DISABLE_NUMCODECS_AVX2="" python -m pip install -v -e .[test,test_extras,msgpack,crc32c,pcodec,zfpy] + python -m pip install zarr==${{ matrix.zarr-version }} - name: Install zarr-python # Since zarr v3 requires numpy >= 1.25, on Python 3.11 leave it out From f2386f876e04c8cf43e166e7ca032bba8576b763 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 12 Aug 2025 13:18:14 +0100 Subject: [PATCH 2/9] Only run one 3.0.x test --- .github/workflows/ci.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a672eff3..884f12ac 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,10 +13,16 @@ jobs: fail-fast: false matrix: python-version: ["3.11", "3.12", "3.13"] - zarr-version: ["3.0.*", "3.1.*"] + # Run full test matrix on latest version of zarr + zarr-version: ["3.1.*"] # macos-13 is an intel runner, macos-14 is an arm64 runner platform: [ubuntu-latest, ubuntu-22.04-arm, windows-latest, macos-13, macos-14] + include: + # Add one test run for zarr 3.0.x + - zarr-version: "3.0.*" + python-version: "3.11" + platform: "ubuntu-latest" defaults: run: From a59dce30b1f78b0ab09f1fb6b0c1be2ed6359b48 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 12 Aug 2025 13:21:39 +0100 Subject: [PATCH 3/9] Try using fetch tags --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 884f12ac..13923e58 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -33,7 +33,7 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive - fetch-depth: 0 # required for version resolution + fetch-tags: true # required for version resolution - name: Set up Conda uses: conda-incubator/setup-miniconda@v3.1.1 From f33297978d624802ed938e99d326b3f47793eebc Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 12 Aug 2025 13:27:50 +0100 Subject: [PATCH 4/9] Use existing zarr-python install --- .github/workflows/ci.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 13923e58..2547f2b1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -20,8 +20,8 @@ jobs: [ubuntu-latest, ubuntu-22.04-arm, windows-latest, macos-13, macos-14] include: # Add one test run for zarr 3.0.x - - zarr-version: "3.0.*" - python-version: "3.11" + - python-version: "3.12" + zarr-version: "3.0.*" platform: "ubuntu-latest" defaults: @@ -59,20 +59,19 @@ jobs: run: | export DISABLE_NUMCODECS_AVX2="" python -m pip install -v -e .[test,test_extras,msgpack,crc32c,pcodec,zfpy] - python -m pip install zarr==${{ matrix.zarr-version }} - name: Install zarr-python # Since zarr v3 requires numpy >= 1.25, on Python 3.11 leave it out # so we can have some tests of our minimum version of numpy (1.24) if: matrix.python-version != '3.11' - run: python -m pip install zarr>=3 + run: python -m pip install zarr==${{ matrix.zarr-version }} - name: List installed packages run: python -m pip list - name: Run tests shell: "bash -l {0}" - run: pytest -v + run: pytest -v numcodecs/tests - uses: codecov/codecov-action@v5 with: From b644e9a88e76b5ef5dca8e734cbe436d739963dc Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 12 Aug 2025 13:44:59 +0100 Subject: [PATCH 5/9] Use pyargs with pytest --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2547f2b1..39216aff 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -71,7 +71,7 @@ jobs: - name: Run tests shell: "bash -l {0}" - run: pytest -v numcodecs/tests + run: pytest -v --pyargs numcodecs.tests - uses: codecov/codecov-action@v5 with: From e3798cdf59aa11937a10c6e1469e2bd2761b104f Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 12 Aug 2025 13:51:39 +0100 Subject: [PATCH 6/9] Fix to work with zarr 3.1.0 --- numcodecs/zarr3.py | 53 +++++++++++++++++++++++++++++++++++----------- pyproject.toml | 2 +- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 3ace9814..1604e424 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -29,17 +29,19 @@ import math from dataclasses import dataclass, replace from functools import cached_property +from importlib.metadata import version from typing import Any, Self from warnings import warn import numpy as np +from packaging.version import Version import numcodecs try: - import zarr + import zarr # noqa: F401 - if zarr.__version__ < "3.0.0": # pragma: no cover + if Version(version('zarr')) < Version("3.0.0"): # pragma: no cover raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.") except ImportError as e: # pragma: no cover raise ImportError( @@ -52,10 +54,28 @@ from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer from zarr.core.buffer.cpu import as_numpy_array_wrapper from zarr.core.common import JSON, parse_named_configuration, product +from zarr.dtype import ZDType CODEC_PREFIX = "numcodecs." +def _from_zarr_dtype(dtype: Any) -> np.dtype: + """ + Get a numpy data type from an array spec, depending on the zarr version. + """ + if Version(version('zarr')) >= Version("3.1.0"): + return dtype.to_native_dtype() + return dtype # pragma: no cover + + +def _to_zarr_dtype(dtype: np.dtype) -> Any: + if Version(version('zarr')) >= Version("3.1.0"): + from zarr.dtype import parse_data_type + + return parse_data_type(dtype, zarr_format=3) + return dtype # pragma: no cover + + def _expect_name_prefix(codec_name: str) -> str: if not codec_name.startswith(CODEC_PREFIX): raise ValueError( @@ -224,7 +244,8 @@ class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"): class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"): def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: if self.codec_config.get("elementsize") is None: - return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) + dtype = _from_zarr_dtype(array_spec.dtype) + return Shuffle(**{**self.codec_config, "elementsize": dtype.itemsize}) return self # pragma: no cover @@ -232,7 +253,8 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): - return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] + dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload] + return replace(chunk_spec, dtype=dtype) return chunk_spec @@ -243,12 +265,14 @@ class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"): class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): - return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] + dtype = _to_zarr_dtype(np.dtype(astype)) # type: ignore[call-overload] + return replace(chunk_spec, dtype=dtype) return chunk_spec def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: if self.codec_config.get("dtype") is None: - return FixedScaleOffset(**{**self.codec_config, "dtype": str(array_spec.dtype)}) + dtype = _from_zarr_dtype(array_spec.dtype) + return FixedScaleOffset(**{**self.codec_config, "dtype": str(dtype)}) return self @@ -258,7 +282,8 @@ def __init__(self, **codec_config: JSON) -> None: def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize: if self.codec_config.get("dtype") is None: - return Quantize(**{**self.codec_config, "dtype": str(array_spec.dtype)}) + dtype = _from_zarr_dtype(array_spec.dtype) + return Quantize(**{**self.codec_config, "dtype": str(dtype)}) return self @@ -267,21 +292,25 @@ def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: return replace( chunk_spec, shape=(1 + math.ceil(product(chunk_spec.shape) / 8),), - dtype=np.dtype("uint8"), + dtype=_to_zarr_dtype(np.dtype("uint8")), ) - def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: - if dtype != np.dtype("bool"): + def validate(self, *, shape: tuple[int, ...], dtype: ZDType[Any, Any], **_kwargs) -> None: + _dtype = _from_zarr_dtype(dtype) + if _dtype != np.dtype("bool"): raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.") class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: - return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] + dtype = _to_zarr_dtype(np.dtype(self.codec_config["encode_dtype"])) # type: ignore[arg-type] + return replace(chunk_spec, dtype=dtype) def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: if self.codec_config.get("decode_dtype") is None: - return AsType(**{**self.codec_config, "decode_dtype": str(array_spec.dtype)}) + # TODO: remove these coverage exemptions the correct way, i.e. with tests + dtype = _from_zarr_dtype(array_spec.dtype) # pragma: no cover + return AsType(**{**self.codec_config, "decode_dtype": str(dtype)}) # pragma: no cover return self diff --git a/pyproject.toml b/pyproject.toml index 387603f3..7ed4aefe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ description = """ A Python package providing buffer compression and transformation codecs \ for use in data storage and communication applications.""" readme = "README.rst" -dependencies = ["numpy>=1.24", "typing_extensions"] +dependencies = ["numpy>=1.24", "typing_extensions", "packaging"] requires-python = ">=3.11" dynamic = [ "version", From ad72b734952152df07a2a455b929e2da69a10143 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 12 Aug 2025 14:03:11 +0100 Subject: [PATCH 7/9] Add back fetch depth --- .github/workflows/ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 39216aff..3989946b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -33,7 +33,8 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive - fetch-tags: true # required for version resolution + fetch-depth: 0 # required for version resolution + fetch-tags: true - name: Set up Conda uses: conda-incubator/setup-miniconda@v3.1.1 From 3b57729b99c075099b57bd496123ba39928d0d97 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 12 Aug 2025 14:18:09 +0100 Subject: [PATCH 8/9] Move ZDtype to type checking block --- .pre-commit-config.yaml | 2 +- numcodecs/zarr3.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c08b8f8a..c6b0ab6c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,4 +30,4 @@ repos: hooks: - id: mypy args: [--config-file, pyproject.toml] - additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3'] + additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3.1'] diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 1604e424..5447c368 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -30,7 +30,7 @@ from dataclasses import dataclass, replace from functools import cached_property from importlib.metadata import version -from typing import Any, Self +from typing import TYPE_CHECKING, Any, Self from warnings import warn import numpy as np @@ -54,7 +54,9 @@ from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer from zarr.core.buffer.cpu import as_numpy_array_wrapper from zarr.core.common import JSON, parse_named_configuration, product -from zarr.dtype import ZDType + +if TYPE_CHECKING: + from zarr.dtype import ZDType CODEC_PREFIX = "numcodecs." @@ -295,7 +297,7 @@ def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: dtype=_to_zarr_dtype(np.dtype("uint8")), ) - def validate(self, *, shape: tuple[int, ...], dtype: ZDType[Any, Any], **_kwargs) -> None: + def validate(self, *, shape: tuple[int, ...], dtype: "ZDType[Any, Any]", **_kwargs) -> None: # noqa: UP037 _dtype = _from_zarr_dtype(dtype) if _dtype != np.dtype("bool"): raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.") From 216f80a92e5714366a218703b73a123a7905e282 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 12 Aug 2025 14:20:15 +0100 Subject: [PATCH 9/9] Remove no cover statements --- numcodecs/zarr3.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 5447c368..3384c3e6 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -41,9 +41,9 @@ try: import zarr # noqa: F401 - if Version(version('zarr')) < Version("3.0.0"): # pragma: no cover + if Version(version('zarr')) < Version("3.0.0"): raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.") -except ImportError as e: # pragma: no cover +except ImportError as e: raise ImportError( "zarr 3.0.0 or later is required to use the numcodecs zarr integration." ) from e @@ -67,7 +67,7 @@ def _from_zarr_dtype(dtype: Any) -> np.dtype: """ if Version(version('zarr')) >= Version("3.1.0"): return dtype.to_native_dtype() - return dtype # pragma: no cover + return dtype def _to_zarr_dtype(dtype: np.dtype) -> Any: @@ -75,14 +75,12 @@ def _to_zarr_dtype(dtype: np.dtype) -> Any: from zarr.dtype import parse_data_type return parse_data_type(dtype, zarr_format=3) - return dtype # pragma: no cover + return dtype def _expect_name_prefix(codec_name: str) -> str: if not codec_name.startswith(CODEC_PREFIX): - raise ValueError( - f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead." - ) # pragma: no cover + raise ValueError(f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead.") return codec_name.removeprefix(CODEC_PREFIX) @@ -91,7 +89,7 @@ def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]: if not parsed_name.startswith(CODEC_PREFIX): raise ValueError( f"Expected name to start with '{CODEC_PREFIX}'. Got {parsed_name} instead." - ) # pragma: no cover + ) id = _expect_name_prefix(parsed_name) return {"id": id, **parsed_configuration} @@ -117,7 +115,7 @@ def __init__(self, **codec_config: JSON) -> None: if not self.codec_name: raise ValueError( "The codec name needs to be supplied through the `codec_name` attribute." - ) # pragma: no cover + ) unprefixed_codec_name = _expect_name_prefix(self.codec_name) if "id" not in codec_config: @@ -125,7 +123,7 @@ def __init__(self, **codec_config: JSON) -> None: elif codec_config["id"] != unprefixed_codec_name: raise ValueError( f"Codec id does not match {unprefixed_codec_name}. Got: {codec_config['id']}." - ) # pragma: no cover + ) object.__setattr__(self, "codec_config", codec_config) warn( @@ -310,16 +308,15 @@ def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: if self.codec_config.get("decode_dtype") is None: - # TODO: remove these coverage exemptions the correct way, i.e. with tests - dtype = _from_zarr_dtype(array_spec.dtype) # pragma: no cover - return AsType(**{**self.codec_config, "decode_dtype": str(dtype)}) # pragma: no cover + dtype = _from_zarr_dtype(array_spec.dtype) + return AsType(**{**self.codec_config, "decode_dtype": str(dtype)}) return self # bytes-to-bytes checksum codecs class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec): def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: - return input_byte_length + 4 # pragma: no cover + return input_byte_length + 4 class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"):