Skip to content

Unexpected zarr numcodecs userwarning when using to_icechunk #447

@ianhi

Description

@ianhi

Issue

When using to_icechunk with zarr 3 I get an unexpected userwarning regarding the numcodecs import

minimal example

import xarray as xr
from virtualizarr import open_virtual_dataset
from icechunk import Repository, local_filesystem_storage
import warnings
import tempfile

# create an example pre-existing netCDF4 file
ds = xr.tutorial.open_dataset('air_temperature')
ds.to_netcdf('air.nc')
vds = open_virtual_dataset('air.nc')

# icechunk
repo = Repository.create(local_filesystem_storage(tempfile.mkdtemp()))
session = repo.writable_session("main")
# uncomment to get a full stack trace
# warnings.filterwarnings('error')
vds.virtualize.to_icechunk(session.store)

gives me:

[/Users/ian/miniforge3/envs/icechunk/lib/python3.12/site-packages/numcodecs/zarr3.py:155]: UserWarning: Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.
  super().__init__(**codec_config)

If I uncomment the warnings line to cause an exception we can get the full stack trace:

Full Stack Trace
---------------------------------------------------------------------------
UserWarning                               Traceback (most recent call last)
Cell In[15], line 15
     13 # uncomment to get a full stack trace
     14 warnings.filterwarnings('error')
---> 15 vds.virtualize.to_icechunk(session.store)

File [~/miniforge3/envs/icechunk/lib/python3.12/site-packages/virtualizarr/accessor.py:83](http://localhost:8888/lab/workspaces/auto-p/tree/icechunk-learning/~/miniforge3/envs/icechunk/lib/python3.12/site-packages/virtualizarr/accessor.py#line=82), in VirtualiZarrDatasetAccessor.to_icechunk(self, store, group, append_dim, last_updated_at)
     34 """
     35 Write an xarray dataset to an Icechunk store.
     36 
   (...)
     79 ... )
     80 """
     81 from virtualizarr.writers.icechunk import dataset_to_icechunk
---> 83 dataset_to_icechunk(
     84     self.ds,
     85     store,
     86     group=group,
     87     append_dim=append_dim,
     88     last_updated_at=last_updated_at,
     89 )

File [~/miniforge3/envs/icechunk/lib/python3.12/site-packages/virtualizarr/writers/icechunk.py:112](http://localhost:8888/lab/workspaces/auto-p/tree/icechunk-learning/~/miniforge3/envs/icechunk/lib/python3.12/site-packages/virtualizarr/writers/icechunk.py#line=111), in dataset_to_icechunk(ds, store, group, append_dim, last_updated_at)
    106     group_object = Group.from_store(store=store_path, zarr_format=3)
    108 group_object.update_attributes(
    109     {k: encode_zarr_attr_value(v) for k, v in ds.attrs.items()}
    110 )
--> 112 return write_variables_to_icechunk_group(
    113     ds.variables,
    114     ds.attrs,
    115     store=store,
    116     group=group_object,
    117     append_dim=append_dim,
    118     last_updated_at=last_updated_at,
    119 )

File [~/miniforge3/envs/icechunk/lib/python3.12/site-packages/virtualizarr/writers/icechunk.py:156](http://localhost:8888/lab/workspaces/auto-p/tree/icechunk-learning/~/miniforge3/envs/icechunk/lib/python3.12/site-packages/virtualizarr/writers/icechunk.py#line=155), in write_variables_to_icechunk_group(variables, attrs, store, group, append_dim, last_updated_at)
    154 # Then finish by writing the virtual variables to the same group
    155 for name, var in virtual_variables.items():
--> 156     write_virtual_variable_to_icechunk(
    157         store=store,
    158         group=group,
    159         name=name,
    160         var=var,
    161         append_dim=append_dim,
    162         last_updated_at=last_updated_at,
    163     )

File [~/miniforge3/envs/icechunk/lib/python3.12/site-packages/virtualizarr/writers/icechunk.py:247](http://localhost:8888/lab/workspaces/auto-p/tree/icechunk-learning/~/miniforge3/envs/icechunk/lib/python3.12/site-packages/virtualizarr/writers/icechunk.py#line=246), in write_virtual_variable_to_icechunk(store, group, name, var, append_dim, last_updated_at)
    244 append_axis = None
    246 # Get the codecs and convert them to zarr v3 format
--> 247 codecs = zarray._v3_codecs()
    249 # create array if it doesn't already exist
    250 arr = group.require_array(
    251     name=name,
    252     shape=zarray.shape,
   (...)
    259     fill_value=zarray.fill_value,
    260 )

File [~/miniforge3/envs/icechunk/lib/python3.12/site-packages/virtualizarr/zarr.py:216](http://localhost:8888/lab/workspaces/auto-p/tree/icechunk-learning/~/miniforge3/envs/icechunk/lib/python3.12/site-packages/virtualizarr/zarr.py#line=215), in ZArray._v3_codecs(self)
    213     codec_configs.append(_num_codec_config_to_configurable(self.compressor))
    215 # convert the pipeline repr into actual v3 codec objects
--> 216 codecs = parse_codecs(codec_configs)
    217 filters = v3_codecs_to_filters(codecs)
    218 compressors = v3_codecs_to_compressors(codecs)

File [~/miniforge3/envs/icechunk/lib/python3.12/site-packages/zarr/core/metadata/v3.py:80](http://localhost:8888/lab/workspaces/auto-p/tree/icechunk-learning/~/miniforge3/envs/icechunk/lib/python3.12/site-packages/zarr/core/metadata/v3.py#line=79), in parse_codecs(data)
     78     else:
     79         name_parsed, _ = parse_named_configuration(c, require_configuration=False)
---> 80         out += (get_codec_class(name_parsed).from_dict(c),)
     82 return out

File [~/miniforge3/envs/icechunk/lib/python3.12/site-packages/numcodecs/zarr3.py:111](http://localhost:8888/lab/workspaces/auto-p/tree/icechunk-learning/~/miniforge3/envs/icechunk/lib/python3.12/site-packages/numcodecs/zarr3.py#line=110), in _NumcodecsCodec.from_dict(cls, data)
    108 @classmethod
    109 def from_dict(cls, data: dict[str, JSON]) -> Self:
    110     codec_config = _parse_codec_configuration(data)
--> 111     return cls(**codec_config)

File [~/miniforge3/envs/icechunk/lib/python3.12/site-packages/numcodecs/zarr3.py:303](http://localhost:8888/lab/workspaces/auto-p/tree/icechunk-learning/~/miniforge3/envs/icechunk/lib/python3.12/site-packages/numcodecs/zarr3.py#line=302), in FixedScaleOffset.__init__(self, **codec_config)
    302 def __init__(self, **codec_config: JSON) -> None:
--> 303     super().__init__(**codec_config)

File [~/miniforge3/envs/icechunk/lib/python3.12/site-packages/numcodecs/zarr3.py:155](http://localhost:8888/lab/workspaces/auto-p/tree/icechunk-learning/~/miniforge3/envs/icechunk/lib/python3.12/site-packages/numcodecs/zarr3.py#line=154), in _NumcodecsArrayArrayCodec.__init__(self, **codec_config)
    154 def __init__(self, **codec_config: JSON) -> None:
--> 155     super().__init__(**codec_config)

File [~/miniforge3/envs/icechunk/lib/python3.12/site-packages/numcodecs/zarr3.py:97](http://localhost:8888/lab/workspaces/auto-p/tree/icechunk-learning/~/miniforge3/envs/icechunk/lib/python3.12/site-packages/numcodecs/zarr3.py#line=96), in _NumcodecsCodec.__init__(self, **codec_config)
     92     raise ValueError(
     93         f"Codec id does not match {unprefixed_codec_name}. Got: {codec_config['id']}."
     94     )  # pragma: no cover
     96 object.__setattr__(self, "codec_config", codec_config)
---> 97 warn(
     98     "Numcodecs codecs are not in the Zarr version 3 specification and "
     99     "may not be supported by other zarr implementations.",
    100     category=UserWarning,
    101     stacklevel=2,
    102 )

UserWarning: Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.

Metadata

Metadata

Assignees

No one assigned

    Labels

    documentationImprovements or additions to documentation

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions