diff --git a/src/anemoi/datasets/.gitignore b/src/anemoi/datasets/.gitignore new file mode 100644 index 000000000..0aba28e9b --- /dev/null +++ b/src/anemoi/datasets/.gitignore @@ -0,0 +1 @@ +!build/ diff --git a/src/anemoi/datasets/__init__.py b/src/anemoi/datasets/__init__.py index fe6ca61f1..84264bc23 100644 --- a/src/anemoi/datasets/__init__.py +++ b/src/anemoi/datasets/__init__.py @@ -8,16 +8,16 @@ # nor does it submit to any jurisdiction. -from .data import MissingDateError -from .data import add_dataset_path -from .data import add_named_dataset -from .data import list_dataset_names -from .data import open_dataset +from anemoi.datasets.use.gridded import MissingDateError +from anemoi.datasets.use.gridded import add_dataset_path +from anemoi.datasets.use.gridded import add_named_dataset +from anemoi.datasets.use.gridded import list_dataset_names +from anemoi.datasets.use.gridded import open_dataset try: # NOTE: the `_version.py` file must not be present in the git repository # as it is generated by setuptools at install time - from ._version import __version__ # type: ignore + from anemoi.datasets._version import __version__ # type: ignore except ImportError: # pragma: no cover # Local copy or not installed with setuptools __version__ = "999" diff --git a/src/anemoi/datasets/__main__.py b/src/anemoi/datasets/__main__.py index 62b7d7c73..f47c46050 100644 --- a/src/anemoi/datasets/__main__.py +++ b/src/anemoi/datasets/__main__.py @@ -12,8 +12,8 @@ from anemoi.utils.cli import cli_main from anemoi.utils.cli import make_parser -from . import __version__ -from .commands import COMMANDS +from anemoi.datasets import __version__ +from anemoi.datasets.commands import COMMANDS # For read-the-docs diff --git a/src/anemoi/datasets/build/__init__.py b/src/anemoi/datasets/build/__init__.py new file mode 100644 index 000000000..9fc775e54 --- /dev/null +++ b/src/anemoi/datasets/build/__init__.py @@ -0,0 +1,8 @@ +# (C) Copyright 2025 Anemoi contributors. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/src/anemoi/datasets/create/__init__.py b/src/anemoi/datasets/build/gridded/__init__.py similarity index 97% rename from src/anemoi/datasets/create/__init__.py rename to src/anemoi/datasets/build/gridded/__init__.py index 5600cb254..696fc118b 100644 --- a/src/anemoi/datasets/create/__init__.py +++ b/src/anemoi/datasets/build/gridded/__init__.py @@ -31,26 +31,25 @@ from anemoi.datasets import MissingDateError from anemoi.datasets import open_dataset -from anemoi.datasets.create.input.trace import enable_trace -from anemoi.datasets.create.persistent import build_storage -from anemoi.datasets.data.misc import as_first_date -from anemoi.datasets.data.misc import as_last_date +from anemoi.datasets.build.gridded.check import DatasetName +from anemoi.datasets.build.gridded.check import check_data_values +from anemoi.datasets.build.gridded.chunks import ChunkFilter +from anemoi.datasets.build.gridded.config import build_output +from anemoi.datasets.build.gridded.config import loader_config +from anemoi.datasets.build.gridded.persistent import build_storage +from anemoi.datasets.build.gridded.statistics import Summary +from anemoi.datasets.build.gridded.statistics import TmpStatistics +from anemoi.datasets.build.gridded.statistics import check_variance +from anemoi.datasets.build.gridded.statistics import compute_statistics +from anemoi.datasets.build.gridded.statistics import default_statistics_dates +from anemoi.datasets.build.gridded.statistics import fix_variance +from anemoi.datasets.build.gridded.utils import normalize_and_check_dates +from anemoi.datasets.build.gridded.writer import ViewCacheArray +from anemoi.datasets.build.input import InputBuilder +from anemoi.datasets.build.input.trace import enable_trace from anemoi.datasets.dates.groups import Groups - -from .check import DatasetName -from .check import check_data_values -from .chunks import ChunkFilter -from .config import build_output -from .config import loader_config -from .input import InputBuilder -from .statistics import Summary -from .statistics import TmpStatistics -from .statistics import check_variance -from .statistics import compute_statistics -from .statistics import default_statistics_dates -from .statistics import fix_variance -from .utils import normalize_and_check_dates -from .writer import ViewCacheArray +from anemoi.datasets.use.gridded.misc import as_first_date +from anemoi.datasets.use.gridded.misc import as_last_date LOG = logging.getLogger(__name__) @@ -193,7 +192,7 @@ def add_dataset(self, mode: str = "r+", **kwargs: Any) -> zarr.Array: import zarr z = zarr.open(self.path, mode=mode) - from .zarr import add_zarr_dataset + from anemoi.datasets.build.gridded.zarr import add_zarr_dataset return add_zarr_dataset(zarr_root=z, **kwargs) @@ -397,7 +396,7 @@ def _cache_context(self) -> Any: Any The cache context. """ - from .utils import cache_context + from anemoi.datasets.build.gridded.utils import cache_context return cache_context(self.cache) @@ -473,7 +472,7 @@ def __init__(self, path: str, options: dict = None, **kwargs: Any): def run(self) -> None: """Run the patch.""" - from .patch import apply_patch + from anemoi.datasets.build.gridded.patch import apply_patch apply_patch(self.path, **self.options) @@ -493,7 +492,7 @@ def __init__(self, path: str, **kwargs: Any): def run(self) -> None: """Run the size computation.""" - from .size import compute_directory_sizes + from anemoi.datasets.build.gridded.size import compute_directory_sizes metadata = compute_directory_sizes(self.path) self.update_metadata(**metadata) @@ -515,7 +514,7 @@ class HasRegistryMixin: @cached_property def registry(self) -> Any: """Get the registry.""" - from .zarr import ZarrBuiltRegistry + from anemoi.datasets.build.gridded.zarr import ZarrBuiltRegistry return ZarrBuiltRegistry(self.path, use_threads=self.use_threads) diff --git a/src/anemoi/datasets/create/check.py b/src/anemoi/datasets/build/gridded/check.py similarity index 100% rename from src/anemoi/datasets/create/check.py rename to src/anemoi/datasets/build/gridded/check.py diff --git a/src/anemoi/datasets/create/chunks.py b/src/anemoi/datasets/build/gridded/chunks.py similarity index 100% rename from src/anemoi/datasets/create/chunks.py rename to src/anemoi/datasets/build/gridded/chunks.py diff --git a/src/anemoi/datasets/create/config.py b/src/anemoi/datasets/build/gridded/config.py similarity index 100% rename from src/anemoi/datasets/create/config.py rename to src/anemoi/datasets/build/gridded/config.py diff --git a/src/anemoi/datasets/create/input/context/field.py b/src/anemoi/datasets/build/gridded/context.py similarity index 88% rename from src/anemoi/datasets/create/input/context/field.py rename to src/anemoi/datasets/build/gridded/context.py index 1dd01340e..91ea80c07 100644 --- a/src/anemoi/datasets/create/input/context/field.py +++ b/src/anemoi/datasets/build/gridded/context.py @@ -12,11 +12,11 @@ from earthkit.data.core.order import build_remapping -from ..result.field import FieldResult -from . import Context +from anemoi.datasets.build.gridded.result import GriddedResult +from anemoi.datasets.build.input.context import Context -class FieldContext(Context): +class GriddedContext(Context): def __init__( self, @@ -46,7 +46,7 @@ def filter_argument(self, argument: Any) -> Any: return argument def create_result(self, data): - return FieldResult(self, data) + return GriddedResult(self, data) def matching_dates(self, filtering_dates, group_of_dates: Any) -> Any: from anemoi.datasets.dates.groups import GroupOfDates diff --git a/src/anemoi/datasets/create/patch.py b/src/anemoi/datasets/build/gridded/patch.py similarity index 100% rename from src/anemoi/datasets/create/patch.py rename to src/anemoi/datasets/build/gridded/patch.py diff --git a/src/anemoi/datasets/create/persistent.py b/src/anemoi/datasets/build/gridded/persistent.py similarity index 100% rename from src/anemoi/datasets/create/persistent.py rename to src/anemoi/datasets/build/gridded/persistent.py diff --git a/src/anemoi/datasets/create/input/result/field.py b/src/anemoi/datasets/build/gridded/result.py similarity index 99% rename from src/anemoi/datasets/create/input/result/field.py rename to src/anemoi/datasets/build/gridded/result.py index 083d2ffd7..69c560969 100644 --- a/src/anemoi/datasets/create/input/result/field.py +++ b/src/anemoi/datasets/build/gridded/result.py @@ -22,7 +22,7 @@ from anemoi.utils.humanize import shorten_list from earthkit.data.core.order import build_remapping -from . import Result +from anemoi.datasets.build.input.result import Result LOG = logging.getLogger(__name__) @@ -276,7 +276,7 @@ def sort(old_dic: DefaultDict[str, set]) -> dict[str, list[Any]]: return dict(param_level=params_levels, param_step=params_steps, area=area, grid=grid) -class FieldResult(Result): +class GriddedResult(Result): """Class to represent the result of an action in the dataset creation process.""" empty: bool = False diff --git a/src/anemoi/datasets/create/size.py b/src/anemoi/datasets/build/gridded/size.py similarity index 100% rename from src/anemoi/datasets/create/size.py rename to src/anemoi/datasets/build/gridded/size.py diff --git a/src/anemoi/datasets/create/source.py b/src/anemoi/datasets/build/gridded/source.py similarity index 95% rename from src/anemoi/datasets/create/source.py rename to src/anemoi/datasets/build/gridded/source.py index f79b0e9dd..494b29b92 100644 --- a/src/anemoi/datasets/create/source.py +++ b/src/anemoi/datasets/build/gridded/source.py @@ -12,7 +12,7 @@ import earthkit.data as ekd -from anemoi.datasets.create.typing import DateList +from anemoi.datasets.build.gridded.typing import DateList class Source(ABC): diff --git a/src/anemoi/datasets/create/sources/__init__.py b/src/anemoi/datasets/build/gridded/sources/__init__.py similarity index 100% rename from src/anemoi/datasets/create/sources/__init__.py rename to src/anemoi/datasets/build/gridded/sources/__init__.py diff --git a/src/anemoi/datasets/create/sources/accumulations.py b/src/anemoi/datasets/build/gridded/sources/accumulations.py similarity index 99% rename from src/anemoi/datasets/create/sources/accumulations.py rename to src/anemoi/datasets/build/gridded/sources/accumulations.py index ce4ff6266..86adea4d1 100644 --- a/src/anemoi/datasets/create/sources/accumulations.py +++ b/src/anemoi/datasets/build/gridded/sources/accumulations.py @@ -20,7 +20,7 @@ from earthkit.data.readers.grib.output import new_grib_output from numpy.typing import NDArray -from anemoi.datasets.create.sources import source_registry +from anemoi.datasets.build.gridded.sources import source_registry from .legacy import LegacySource from .mars import mars diff --git a/src/anemoi/datasets/create/sources/accumulations2.py b/src/anemoi/datasets/build/gridded/sources/accumulations2.py similarity index 99% rename from src/anemoi/datasets/create/sources/accumulations2.py rename to src/anemoi/datasets/build/gridded/sources/accumulations2.py index c6bf98843..64410164f 100644 --- a/src/anemoi/datasets/create/sources/accumulations2.py +++ b/src/anemoi/datasets/build/gridded/sources/accumulations2.py @@ -18,10 +18,10 @@ from earthkit.data.core.temporary import temp_file from earthkit.data.readers.grib.output import new_grib_output -from anemoi.datasets.create.sources import source_registry -from anemoi.datasets.create.sources.mars import mars +from anemoi.datasets.build.gridded.sources import source_registry from .legacy import LegacySource +from .mars import mars LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/create/sources/anemoi_dataset.py b/src/anemoi/datasets/build/gridded/sources/anemoi_dataset.py similarity index 100% rename from src/anemoi/datasets/create/sources/anemoi_dataset.py rename to src/anemoi/datasets/build/gridded/sources/anemoi_dataset.py diff --git a/src/anemoi/datasets/create/sources/constants.py b/src/anemoi/datasets/build/gridded/sources/constants.py similarity index 100% rename from src/anemoi/datasets/create/sources/constants.py rename to src/anemoi/datasets/build/gridded/sources/constants.py diff --git a/src/anemoi/datasets/create/sources/eccc_fstd.py b/src/anemoi/datasets/build/gridded/sources/eccc_fstd.py similarity index 100% rename from src/anemoi/datasets/create/sources/eccc_fstd.py rename to src/anemoi/datasets/build/gridded/sources/eccc_fstd.py diff --git a/src/anemoi/datasets/create/sources/empty.py b/src/anemoi/datasets/build/gridded/sources/empty.py similarity index 100% rename from src/anemoi/datasets/create/sources/empty.py rename to src/anemoi/datasets/build/gridded/sources/empty.py diff --git a/src/anemoi/datasets/create/sources/fdb.py b/src/anemoi/datasets/build/gridded/sources/fdb.py similarity index 97% rename from src/anemoi/datasets/create/sources/fdb.py rename to src/anemoi/datasets/build/gridded/sources/fdb.py index bb33f7d50..5d678fca7 100644 --- a/src/anemoi/datasets/create/sources/fdb.py +++ b/src/anemoi/datasets/build/gridded/sources/fdb.py @@ -16,7 +16,7 @@ from anemoi.transform.flavour import RuleBasedFlavour from anemoi.transform.grids import grid_registry -from anemoi.datasets.create.typing import DateList +from anemoi.datasets.build.gridded.typing import DateList from ..source import Source from . import source_registry @@ -125,7 +125,7 @@ def _time_request_keys(dt: datetime, offset_from_date: bool | None = None) -> st def _shortname_to_paramid(shortname: list[str], param_id_map: dict[str, int] | None = None) -> list[int]: - from anemoi.datasets.create.sources.mars import use_grib_paramid + from .mars import use_grib_paramid """Convert a shortname to a parameter ID.""" if param_id_map is None: diff --git a/src/anemoi/datasets/create/sources/forcings.py b/src/anemoi/datasets/build/gridded/sources/forcings.py similarity index 100% rename from src/anemoi/datasets/create/sources/forcings.py rename to src/anemoi/datasets/build/gridded/sources/forcings.py diff --git a/src/anemoi/datasets/create/sources/grib.py b/src/anemoi/datasets/build/gridded/sources/grib.py similarity index 100% rename from src/anemoi/datasets/create/sources/grib.py rename to src/anemoi/datasets/build/gridded/sources/grib.py diff --git a/src/anemoi/datasets/create/sources/grib_index.py b/src/anemoi/datasets/build/gridded/sources/grib_index.py similarity index 100% rename from src/anemoi/datasets/create/sources/grib_index.py rename to src/anemoi/datasets/build/gridded/sources/grib_index.py diff --git a/src/anemoi/datasets/create/sources/hindcasts.py b/src/anemoi/datasets/build/gridded/sources/hindcasts.py similarity index 96% rename from src/anemoi/datasets/create/sources/hindcasts.py rename to src/anemoi/datasets/build/gridded/sources/hindcasts.py index ad1df38a5..a61a00d12 100644 --- a/src/anemoi/datasets/create/sources/hindcasts.py +++ b/src/anemoi/datasets/build/gridded/sources/hindcasts.py @@ -12,10 +12,10 @@ from earthkit.data.core.fieldlist import MultiFieldList -from anemoi.datasets.create.sources.mars import mars +from anemoi.datasets.build.gridded.sources import source_registry -from . import source_registry from .legacy import LegacySource +from .mars import mars LOGGER = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/create/sources/legacy.py b/src/anemoi/datasets/build/gridded/sources/legacy.py similarity index 95% rename from src/anemoi/datasets/create/sources/legacy.py rename to src/anemoi/datasets/build/gridded/sources/legacy.py index f9a0288a0..d4110cf5b 100644 --- a/src/anemoi/datasets/create/sources/legacy.py +++ b/src/anemoi/datasets/build/gridded/sources/legacy.py @@ -12,7 +12,7 @@ from abc import abstractmethod from typing import Any -from anemoi.datasets.create.input.context import Context +from anemoi.datasets.build.input.context import Context from ..source import Source diff --git a/src/anemoi/datasets/create/sources/mars.py b/src/anemoi/datasets/build/gridded/sources/mars.py similarity index 99% rename from src/anemoi/datasets/create/sources/mars.py rename to src/anemoi/datasets/build/gridded/sources/mars.py index 25e223cb4..a2804e77a 100644 --- a/src/anemoi/datasets/create/sources/mars.py +++ b/src/anemoi/datasets/build/gridded/sources/mars.py @@ -16,7 +16,7 @@ from earthkit.data import from_source from earthkit.data.utils.availability import Availability -from anemoi.datasets.create.sources import source_registry +from anemoi.datasets.build.gridded.sources import source_registry from .legacy import LegacySource diff --git a/src/anemoi/datasets/create/sources/netcdf.py b/src/anemoi/datasets/build/gridded/sources/netcdf.py similarity index 100% rename from src/anemoi/datasets/create/sources/netcdf.py rename to src/anemoi/datasets/build/gridded/sources/netcdf.py diff --git a/src/anemoi/datasets/create/sources/opendap.py b/src/anemoi/datasets/build/gridded/sources/opendap.py similarity index 100% rename from src/anemoi/datasets/create/sources/opendap.py rename to src/anemoi/datasets/build/gridded/sources/opendap.py diff --git a/src/anemoi/datasets/create/sources/patterns.py b/src/anemoi/datasets/build/gridded/sources/patterns.py similarity index 100% rename from src/anemoi/datasets/create/sources/patterns.py rename to src/anemoi/datasets/build/gridded/sources/patterns.py diff --git a/src/anemoi/datasets/create/sources/planetary_computer.py b/src/anemoi/datasets/build/gridded/sources/planetary_computer.py similarity index 100% rename from src/anemoi/datasets/create/sources/planetary_computer.py rename to src/anemoi/datasets/build/gridded/sources/planetary_computer.py diff --git a/src/anemoi/datasets/create/sources/recentre.py b/src/anemoi/datasets/build/gridded/sources/recentre.py similarity index 100% rename from src/anemoi/datasets/create/sources/recentre.py rename to src/anemoi/datasets/build/gridded/sources/recentre.py diff --git a/src/anemoi/datasets/create/sources/repeated_dates.py b/src/anemoi/datasets/build/gridded/sources/repeated_dates.py similarity index 89% rename from src/anemoi/datasets/create/sources/repeated_dates.py rename to src/anemoi/datasets/build/gridded/sources/repeated_dates.py index f1f86eb78..509ee4966 100644 --- a/src/anemoi/datasets/create/sources/repeated_dates.py +++ b/src/anemoi/datasets/build/gridded/sources/repeated_dates.py @@ -14,9 +14,10 @@ from anemoi.transform.fields import new_field_with_valid_datetime from anemoi.transform.fields import new_fieldlist_from_list -from anemoi.datasets.create.input.repeated_dates import DateMapper -from anemoi.datasets.create.source import Source -from anemoi.datasets.create.sources import source_registry +from anemoi.datasets.build.input.repeated_dates import DateMapper + +from ..source import Source +from ..sources import source_registry LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/create/sources/source.py b/src/anemoi/datasets/build/gridded/sources/source.py similarity index 96% rename from src/anemoi/datasets/create/sources/source.py rename to src/anemoi/datasets/build/gridded/sources/source.py index 0c6b23853..1ad5850a7 100644 --- a/src/anemoi/datasets/create/sources/source.py +++ b/src/anemoi/datasets/build/gridded/sources/source.py @@ -12,7 +12,7 @@ from earthkit.data import from_source -from anemoi.datasets.create.sources import source_registry +from anemoi.datasets.build.gridded.sources import source_registry from .legacy import LegacySource diff --git a/src/anemoi/datasets/create/sources/tendencies.py b/src/anemoi/datasets/build/gridded/sources/tendencies.py similarity index 98% rename from src/anemoi/datasets/create/sources/tendencies.py rename to src/anemoi/datasets/build/gridded/sources/tendencies.py index cdf4ce291..69c06a78c 100644 --- a/src/anemoi/datasets/create/sources/tendencies.py +++ b/src/anemoi/datasets/build/gridded/sources/tendencies.py @@ -14,7 +14,7 @@ from earthkit.data.core.temporary import temp_file from earthkit.data.readers.grib.output import new_grib_output -from anemoi.datasets.create.sources import source_registry +from anemoi.datasets.build.gridded.sources import source_registry from .legacy import LegacySource diff --git a/src/anemoi/datasets/create/sources/xarray.py b/src/anemoi/datasets/build/gridded/sources/xarray.py similarity index 97% rename from src/anemoi/datasets/create/sources/xarray.py rename to src/anemoi/datasets/build/gridded/sources/xarray.py index d63b708d6..fb10dab8e 100644 --- a/src/anemoi/datasets/create/sources/xarray.py +++ b/src/anemoi/datasets/build/gridded/sources/xarray.py @@ -11,7 +11,7 @@ import earthkit.data as ekd -from anemoi.datasets.create.typing import DateList +from anemoi.datasets.build.gridded.typing import DateList from ..source import Source from .xarray_support import XarrayFieldList diff --git a/src/anemoi/datasets/create/sources/xarray_kerchunk.py b/src/anemoi/datasets/build/gridded/sources/xarray_kerchunk.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_kerchunk.py rename to src/anemoi/datasets/build/gridded/sources/xarray_kerchunk.py diff --git a/src/anemoi/datasets/create/sources/xarray_support/README.md b/src/anemoi/datasets/build/gridded/sources/xarray_support/README.md similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_support/README.md rename to src/anemoi/datasets/build/gridded/sources/xarray_support/README.md diff --git a/src/anemoi/datasets/create/sources/xarray_support/__init__.py b/src/anemoi/datasets/build/gridded/sources/xarray_support/__init__.py similarity index 98% rename from src/anemoi/datasets/create/sources/xarray_support/__init__.py rename to src/anemoi/datasets/build/gridded/sources/xarray_support/__init__.py index 8e3cebc08..e0f4a7e75 100644 --- a/src/anemoi/datasets/create/sources/xarray_support/__init__.py +++ b/src/anemoi/datasets/build/gridded/sources/xarray_support/__init__.py @@ -15,7 +15,7 @@ import xarray as xr from earthkit.data.core.fieldlist import MultiFieldList -from anemoi.datasets.create.sources.patterns import iterate_patterns +from anemoi.datasets.build.gridded.sources.patterns import iterate_patterns from .. import source_registry from ..legacy import LegacySource diff --git a/src/anemoi/datasets/create/sources/xarray_support/coordinates.py b/src/anemoi/datasets/build/gridded/sources/xarray_support/coordinates.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_support/coordinates.py rename to src/anemoi/datasets/build/gridded/sources/xarray_support/coordinates.py diff --git a/src/anemoi/datasets/create/sources/xarray_support/field.py b/src/anemoi/datasets/build/gridded/sources/xarray_support/field.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_support/field.py rename to src/anemoi/datasets/build/gridded/sources/xarray_support/field.py diff --git a/src/anemoi/datasets/create/sources/xarray_support/fieldlist.py b/src/anemoi/datasets/build/gridded/sources/xarray_support/fieldlist.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_support/fieldlist.py rename to src/anemoi/datasets/build/gridded/sources/xarray_support/fieldlist.py diff --git a/src/anemoi/datasets/create/sources/xarray_support/flavour.py b/src/anemoi/datasets/build/gridded/sources/xarray_support/flavour.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_support/flavour.py rename to src/anemoi/datasets/build/gridded/sources/xarray_support/flavour.py diff --git a/src/anemoi/datasets/create/sources/xarray_support/grid.py b/src/anemoi/datasets/build/gridded/sources/xarray_support/grid.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_support/grid.py rename to src/anemoi/datasets/build/gridded/sources/xarray_support/grid.py diff --git a/src/anemoi/datasets/create/sources/xarray_support/metadata.py b/src/anemoi/datasets/build/gridded/sources/xarray_support/metadata.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_support/metadata.py rename to src/anemoi/datasets/build/gridded/sources/xarray_support/metadata.py diff --git a/src/anemoi/datasets/create/sources/xarray_support/patch.py b/src/anemoi/datasets/build/gridded/sources/xarray_support/patch.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_support/patch.py rename to src/anemoi/datasets/build/gridded/sources/xarray_support/patch.py diff --git a/src/anemoi/datasets/create/sources/xarray_support/time.py b/src/anemoi/datasets/build/gridded/sources/xarray_support/time.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_support/time.py rename to src/anemoi/datasets/build/gridded/sources/xarray_support/time.py diff --git a/src/anemoi/datasets/create/sources/xarray_support/variable.py b/src/anemoi/datasets/build/gridded/sources/xarray_support/variable.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_support/variable.py rename to src/anemoi/datasets/build/gridded/sources/xarray_support/variable.py diff --git a/src/anemoi/datasets/create/sources/xarray_zarr.py b/src/anemoi/datasets/build/gridded/sources/xarray_zarr.py similarity index 100% rename from src/anemoi/datasets/create/sources/xarray_zarr.py rename to src/anemoi/datasets/build/gridded/sources/xarray_zarr.py diff --git a/src/anemoi/datasets/create/sources/zenodo.py b/src/anemoi/datasets/build/gridded/sources/zenodo.py similarity index 100% rename from src/anemoi/datasets/create/sources/zenodo.py rename to src/anemoi/datasets/build/gridded/sources/zenodo.py diff --git a/src/anemoi/datasets/create/statistics/__init__.py b/src/anemoi/datasets/build/gridded/statistics/__init__.py similarity index 99% rename from src/anemoi/datasets/create/statistics/__init__.py rename to src/anemoi/datasets/build/gridded/statistics/__init__.py index f74cbf364..e9835bfe2 100644 --- a/src/anemoi/datasets/create/statistics/__init__.py +++ b/src/anemoi/datasets/build/gridded/statistics/__init__.py @@ -23,8 +23,8 @@ from anemoi.utils.provenance import gather_provenance_info from numpy.typing import NDArray -from ..check import check_data_values -from .summary import Summary +from anemoi.datasets.build.gridded.check import check_data_values +from anemoi.datasets.build.gridded.statistics.summary import Summary LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/create/statistics/summary.py b/src/anemoi/datasets/build/gridded/statistics/summary.py similarity index 95% rename from src/anemoi/datasets/create/statistics/summary.py rename to src/anemoi/datasets/build/gridded/statistics/summary.py index 6c7bbb433..2f81f4e5b 100644 --- a/src/anemoi/datasets/create/statistics/summary.py +++ b/src/anemoi/datasets/build/gridded/statistics/summary.py @@ -13,9 +13,9 @@ import numpy as np -from ..check import StatisticsValueError -from ..check import check_data_values -from ..check import check_stats +from anemoi.datasets.build.gridded.check import StatisticsValueError +from anemoi.datasets.build.gridded.check import check_data_values +from anemoi.datasets.build.gridded.check import check_stats class Summary(dict): diff --git a/src/anemoi/datasets/create/testing.py b/src/anemoi/datasets/build/gridded/testing.py similarity index 100% rename from src/anemoi/datasets/create/testing.py rename to src/anemoi/datasets/build/gridded/testing.py diff --git a/src/anemoi/datasets/create/typing.py b/src/anemoi/datasets/build/gridded/typing.py similarity index 100% rename from src/anemoi/datasets/create/typing.py rename to src/anemoi/datasets/build/gridded/typing.py diff --git a/src/anemoi/datasets/create/utils.py b/src/anemoi/datasets/build/gridded/utils.py similarity index 100% rename from src/anemoi/datasets/create/utils.py rename to src/anemoi/datasets/build/gridded/utils.py diff --git a/src/anemoi/datasets/create/writer.py b/src/anemoi/datasets/build/gridded/writer.py similarity index 100% rename from src/anemoi/datasets/create/writer.py rename to src/anemoi/datasets/build/gridded/writer.py diff --git a/src/anemoi/datasets/create/zarr.py b/src/anemoi/datasets/build/gridded/zarr.py similarity index 100% rename from src/anemoi/datasets/create/zarr.py rename to src/anemoi/datasets/build/gridded/zarr.py diff --git a/src/anemoi/datasets/create/input/__init__.py b/src/anemoi/datasets/build/input/__init__.py similarity index 87% rename from src/anemoi/datasets/create/input/__init__.py rename to src/anemoi/datasets/build/input/__init__.py index e30ecefb5..c3d601fd1 100644 --- a/src/anemoi/datasets/create/input/__init__.py +++ b/src/anemoi/datasets/build/input/__init__.py @@ -12,10 +12,8 @@ from typing import TYPE_CHECKING from typing import Any -from anemoi.datasets.create.input.context.field import FieldContext - if TYPE_CHECKING: - from anemoi.datasets.create.input.action import Recipe + from anemoi.datasets.build.input.action import Recipe class InputBuilder: @@ -40,8 +38,8 @@ def __init__(self, config: dict, data_sources: dict | list, **kwargs: Any) -> No @cached_property def action(self) -> "Recipe": """Returns the action object based on the configuration.""" - from .action import Recipe - from .action import action_factory + from anemoi.datasets.build.input.action import Recipe + from anemoi.datasets.build.input.action import action_factory sources = action_factory(self.data_sources, "data_sources") input = action_factory(self.config, "input") @@ -61,7 +59,9 @@ def select(self, argument) -> Any: Any Selected data. """ - context = FieldContext(argument, **self.kwargs) + from anemoi.datasets.build.gridded.context import GriddedContext + + context = GriddedContext(argument, **self.kwargs) return context.create_result(self.action(context, argument)) diff --git a/src/anemoi/datasets/create/input/action.py b/src/anemoi/datasets/build/input/action.py similarity index 97% rename from src/anemoi/datasets/create/input/action.py rename to src/anemoi/datasets/build/input/action.py index 7808ae717..1a37d2f99 100644 --- a/src/anemoi/datasets/create/input/action.py +++ b/src/anemoi/datasets/build/input/action.py @@ -181,7 +181,7 @@ class DatasetSourceMixin: """Mixin class for sources defined in anemoi-datasets""" def create_object(self, context, config): - from anemoi.datasets.create.sources import create_source as create_datasets_source + from anemoi.datasets.build.gridded.sources import create_source as create_datasets_source return create_datasets_source(context, config) @@ -286,7 +286,7 @@ def make(key, config, *path): from anemoi.transform.filters import filter_registry as transform_filter_registry from anemoi.transform.sources import source_registry as transform_source_registry - from anemoi.datasets.create.sources import source_registry as dataset_source_registry + from anemoi.datasets.build.gridded.sources import source_registry as dataset_source_registry # Register sources, local first for name in dataset_source_registry.registered: diff --git a/src/anemoi/datasets/create/input/context/__init__.py b/src/anemoi/datasets/build/input/context.py similarity index 96% rename from src/anemoi/datasets/create/input/context/__init__.py rename to src/anemoi/datasets/build/input/context.py index 89df7a727..e8572ba78 100644 --- a/src/anemoi/datasets/create/input/context/__init__.py +++ b/src/anemoi/datasets/build/input/context.py @@ -55,7 +55,7 @@ def resolve(self, config): return config def create_source(self, config: Any, *path) -> Any: - from anemoi.datasets.create.input.action import action_factory + from anemoi.datasets.build.input.action import action_factory if not isinstance(config, dict): # It is already a result (e.g. ekd.FieldList), loaded from ${a.b.c} diff --git a/src/anemoi/datasets/create/input/data_sources.py b/src/anemoi/datasets/build/input/data_sources.py similarity index 93% rename from src/anemoi/datasets/create/input/data_sources.py rename to src/anemoi/datasets/build/input/data_sources.py index 31bf3d8cc..6e9bfaa6a 100644 --- a/src/anemoi/datasets/create/input/data_sources.py +++ b/src/anemoi/datasets/build/input/data_sources.py @@ -13,11 +13,11 @@ from earthkit.data import FieldList -from ...dates.groups import GroupOfDates -from .action import Action -from .action import action_factory -from .misc import _tidy -from .result.field import Result +from anemoi.datasets.build.gridded.result import Result +from anemoi.datasets.build.input.action import Action +from anemoi.datasets.build.input.action import action_factory +from anemoi.datasets.build.input.misc import _tidy +from anemoi.datasets.dates.groups import GroupOfDates LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/create/input/misc.py b/src/anemoi/datasets/build/input/misc.py similarity index 100% rename from src/anemoi/datasets/create/input/misc.py rename to src/anemoi/datasets/build/input/misc.py diff --git a/src/anemoi/datasets/create/input/repeated_dates.py b/src/anemoi/datasets/build/input/repeated_dates.py similarity index 97% rename from src/anemoi/datasets/create/input/repeated_dates.py rename to src/anemoi/datasets/build/input/repeated_dates.py index ad46fe208..f20d764ec 100644 --- a/src/anemoi/datasets/create/input/repeated_dates.py +++ b/src/anemoi/datasets/build/input/repeated_dates.py @@ -19,11 +19,11 @@ from anemoi.utils.dates import as_datetime from anemoi.utils.dates import frequency_to_timedelta -from .action import Action -from .action import action_factory -from .join import JoinResult -from .result.field import Result -from .trace import trace_select +from anemoi.datasets.build.gridded.result import Result +from anemoi.datasets.build.input.action import Action +from anemoi.datasets.build.input.action import action_factory +from anemoi.datasets.build.input.join import JoinResult +from anemoi.datasets.build.input.trace import trace_select LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/create/input/result/__init__.py b/src/anemoi/datasets/build/input/result.py similarity index 100% rename from src/anemoi/datasets/create/input/result/__init__.py rename to src/anemoi/datasets/build/input/result.py diff --git a/src/anemoi/datasets/create/input/trace.py b/src/anemoi/datasets/build/input/trace.py similarity index 100% rename from src/anemoi/datasets/create/input/trace.py rename to src/anemoi/datasets/build/input/trace.py diff --git a/src/anemoi/datasets/commands/check.py b/src/anemoi/datasets/commands/check.py index 61b29bf23..212987839 100644 --- a/src/anemoi/datasets/commands/check.py +++ b/src/anemoi/datasets/commands/check.py @@ -13,7 +13,7 @@ import yaml -from anemoi.datasets.create.check import DatasetName +from anemoi.datasets.build.gridded.check import DatasetName from . import Command @@ -90,7 +90,7 @@ def _check_name(self, name: str) -> None: def _check_zarr(self, zarr: str) -> None: - from anemoi.datasets.check import check_zarr + from anemoi.datasets.misc.check import check_zarr check_zarr(zarr) diff --git a/src/anemoi/datasets/commands/copy.py b/src/anemoi/datasets/commands/copy.py index 5020a208d..886726d99 100644 --- a/src/anemoi/datasets/commands/copy.py +++ b/src/anemoi/datasets/commands/copy.py @@ -19,7 +19,7 @@ from anemoi.utils.remote import Transfer from anemoi.utils.remote import TransferMethodNotImplementedError -from anemoi.datasets.check import check_zarr +from anemoi.datasets.misc.check import check_zarr from . import Command diff --git a/src/anemoi/datasets/commands/create.py b/src/anemoi/datasets/commands/create.py index 3f6bbe7dd..601468d5c 100644 --- a/src/anemoi/datasets/commands/create.py +++ b/src/anemoi/datasets/commands/create.py @@ -45,7 +45,7 @@ def task(what: str, options: dict, *args: Any, **kwargs: Any) -> Any: now = datetime.datetime.now() LOG.info(f"🎬 Task {what}({args},{kwargs}) starting") - from anemoi.datasets.create import creator_factory + from anemoi.datasets.build.gridded import creator_factory options = {k: v for k, v in options.items() if v is not None} diff --git a/src/anemoi/datasets/commands/grib-index.py b/src/anemoi/datasets/commands/grib-index.py index cfd7a08e8..59c2fba89 100644 --- a/src/anemoi/datasets/commands/grib-index.py +++ b/src/anemoi/datasets/commands/grib-index.py @@ -83,7 +83,7 @@ def match(path: str) -> bool: """ return fnmatch.fnmatch(os.path.basename(path), args.match) - from anemoi.datasets.create.sources.grib_index import GribIndex + from anemoi.datasets.build.gridded.sources.grib_index import GribIndex index = GribIndex( args.index, diff --git a/src/anemoi/datasets/commands/inspect.py b/src/anemoi/datasets/commands/inspect.py index 384ee7d34..50840ccbe 100644 --- a/src/anemoi/datasets/commands/inspect.py +++ b/src/anemoi/datasets/commands/inspect.py @@ -27,8 +27,8 @@ from numpy.typing import NDArray from anemoi.datasets import open_dataset -from anemoi.datasets.data.stores import open_zarr -from anemoi.datasets.data.stores import zarr_lookup +from anemoi.datasets.use.gridded.stores import open_zarr +from anemoi.datasets.use.gridded.stores import zarr_lookup from . import Command diff --git a/src/anemoi/datasets/commands/recipe/__init__.py b/src/anemoi/datasets/commands/recipe/__init__.py index 45400806c..85fd574e3 100644 --- a/src/anemoi/datasets/commands/recipe/__init__.py +++ b/src/anemoi/datasets/commands/recipe/__init__.py @@ -15,7 +15,7 @@ import yaml -from anemoi.datasets.create import validate_config +from anemoi.datasets.build.gridded import validate_config from .. import Command from .format import format_recipe diff --git a/src/anemoi/datasets/commands/recipe/format.py b/src/anemoi/datasets/commands/recipe/format.py index 872060981..b6993a49a 100644 --- a/src/anemoi/datasets/commands/recipe/format.py +++ b/src/anemoi/datasets/commands/recipe/format.py @@ -11,7 +11,7 @@ import datetime import logging -from ...dumper import yaml_dump +from anemoi.datasets.misc.dumper import yaml_dump LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/commands/recipe/migrate.py b/src/anemoi/datasets/commands/recipe/migrate.py index 03da61fbc..8ca2ddd5d 100644 --- a/src/anemoi/datasets/commands/recipe/migrate.py +++ b/src/anemoi/datasets/commands/recipe/migrate.py @@ -17,8 +17,8 @@ from glom import delete from glom import glom -from anemoi.datasets.create import validate_config -from anemoi.datasets.dumper import yaml_dump +from anemoi.datasets.build.gridded import validate_config +from anemoi.datasets.misc.dumper import yaml_dump LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/commands/validate.py b/src/anemoi/datasets/commands/validate.py index 1382814a7..dfc2d297b 100644 --- a/src/anemoi/datasets/commands/validate.py +++ b/src/anemoi/datasets/commands/validate.py @@ -10,7 +10,7 @@ import logging from typing import Any -from anemoi.datasets.validate import validate_dataset +from anemoi.datasets.misc.validate import validate_dataset from . import Command diff --git a/src/anemoi/datasets/misc/__init__.py b/src/anemoi/datasets/misc/__init__.py new file mode 100644 index 000000000..9fc775e54 --- /dev/null +++ b/src/anemoi/datasets/misc/__init__.py @@ -0,0 +1,8 @@ +# (C) Copyright 2025 Anemoi contributors. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/src/anemoi/datasets/check.py b/src/anemoi/datasets/misc/check.py similarity index 100% rename from src/anemoi/datasets/check.py rename to src/anemoi/datasets/misc/check.py diff --git a/src/anemoi/datasets/dumper.py b/src/anemoi/datasets/misc/dumper.py similarity index 100% rename from src/anemoi/datasets/dumper.py rename to src/anemoi/datasets/misc/dumper.py diff --git a/src/anemoi/datasets/grids.py b/src/anemoi/datasets/misc/grids.py similarity index 100% rename from src/anemoi/datasets/grids.py rename to src/anemoi/datasets/misc/grids.py diff --git a/src/anemoi/datasets/testing.py b/src/anemoi/datasets/misc/testing.py similarity index 100% rename from src/anemoi/datasets/testing.py rename to src/anemoi/datasets/misc/testing.py diff --git a/src/anemoi/datasets/validate.py b/src/anemoi/datasets/misc/validate.py similarity index 99% rename from src/anemoi/datasets/validate.py rename to src/anemoi/datasets/misc/validate.py index 9e1c6c891..9c103f470 100644 --- a/src/anemoi/datasets/validate.py +++ b/src/anemoi/datasets/misc/validate.py @@ -14,8 +14,8 @@ import numpy as np -from anemoi.datasets.data.dataset import Dataset -from anemoi.datasets.testing import default_test_indexing +from anemoi.datasets.misc.testing import default_test_indexing +from anemoi.datasets.use.gridded.dataset import Dataset LOG = logging.getLogger(__name__) # List of methods called during training. To update the list, run training with ANEMOI_DATASETS_TRACE=1 diff --git a/src/anemoi/datasets/schemas/recipe.json b/src/anemoi/datasets/schemas/recipe.json deleted file mode 100644 index 3c02bfd64..000000000 --- a/src/anemoi/datasets/schemas/recipe.json +++ /dev/null @@ -1,131 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "type": "object", - "$id": "https://ecmwf.int/anemoi-datasets-recipe.schema.json", - "title": "Product", - "description": "Anemoi datasets recipe configuration", - "additionalProperties": false, - "$defs": { - "source-or-filter": { - "type": "object", - "minProperties": 1, - "maxProperties": 1 - }, - "pipe": { - "type": "array", - "items": { - "$ref": "#/$defs/input-object" - } - }, - "join": { - "type": "array", - "items": { - "$ref": "#/$defs/input-object" - } - }, - "concat": { - "type": "array", - "items": { - "type": "object", - "minProperties": 2, - "maxProperties": 2, - "required": [ - "dates" - ] - } - }, - "input-object": { - "oneOf": [ - { - "$ref": "#/$defs/pipe" - }, - { - "$ref": "#/$defs/join" - }, - { - "$ref": "#/$defs/concat" - }, - { - "$ref": "#/$defs/source-or-filter" - } - ] - } - }, - "properties": { - "env": { - "type": "object" - }, - "description": { - "type": "string" - }, - "name": { - "type": "string" - }, - "licence": { - "type": "string" - }, - "attribution": { - "type": "string" - }, - "dates": { - "type": "object", - "required": [ - "start", - "end" - ], - "properties": { - "start": { - "type": "string", - "format": "date" - }, - "end": { - "type": "string", - "format": "date" - }, - "frequency": { - "type": [ - "integer", - "string" - ] - }, - "group_by": { - "type": [ - "integer", - "string" - ] - } - } - }, - "input": { - "$ref": "#/$defs/input-object" - }, - "data_sources": { - "type": "object", - "patternProperties": { - "^[a-zA-Z_][a-zA-Z0-9_]*$": { - "$ref": "#/$defs/input-object" - } - }, - "additionalProperties": false - }, - "output": { - "type": "object" - }, - "statistics": { - "type": "object" - }, - "build": { - "type": "object" - }, - "common": { - "type": "object" - }, - "platform": { - "type": "object" - } - }, - "required": [ - "dates", - "input" - ] -} diff --git a/src/anemoi/datasets/use/__init__.py b/src/anemoi/datasets/use/__init__.py new file mode 100644 index 000000000..9fc775e54 --- /dev/null +++ b/src/anemoi/datasets/use/__init__.py @@ -0,0 +1,8 @@ +# (C) Copyright 2025 Anemoi contributors. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/src/anemoi/datasets/data/__init__.py b/src/anemoi/datasets/use/gridded/__init__.py similarity index 89% rename from src/anemoi/datasets/data/__init__.py rename to src/anemoi/datasets/use/gridded/__init__.py index f32d83bb2..dbbfcd9a5 100644 --- a/src/anemoi/datasets/data/__init__.py +++ b/src/anemoi/datasets/use/gridded/__init__.py @@ -15,13 +15,13 @@ # from .dataset import FullIndex # from .dataset import Shape # from .dataset import TupleIndex -from .misc import _open_dataset -from .misc import _save_dataset -from .misc import add_dataset_path -from .misc import add_named_dataset +from anemoi.datasets.use.gridded.misc import _open_dataset +from anemoi.datasets.use.gridded.misc import _save_dataset +from anemoi.datasets.use.gridded.misc import add_dataset_path +from anemoi.datasets.use.gridded.misc import add_named_dataset if TYPE_CHECKING: - from .dataset import Dataset + from anemoi.datasets.use.gridded.dataset import Dataset LOG = logging.getLogger(__name__) @@ -95,7 +95,7 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset": ds._check() if trace: - from anemoi.datasets.testing import Trace + from anemoi.datasets.misc.testing import Trace ds = Trace(ds) diff --git a/src/anemoi/datasets/data/complement.py b/src/anemoi/datasets/use/gridded/complement.py similarity index 94% rename from src/anemoi/datasets/data/complement.py rename to src/anemoi/datasets/use/gridded/complement.py index be5f84409..1881a74fa 100644 --- a/src/anemoi/datasets/data/complement.py +++ b/src/anemoi/datasets/use/gridded/complement.py @@ -16,18 +16,18 @@ import numpy as np from numpy.typing import NDArray -from ..grids import nearest_grid_points -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex -from .debug import Node -from .forwards import Combined -from .indexing import apply_index_to_slices_changes -from .indexing import index_to_slices -from .indexing import update_tuple -from .misc import _auto_adjust -from .misc import _open_dataset +from anemoi.datasets.misc.grids import nearest_grid_points +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.forwards import Combined +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple +from anemoi.datasets.use.gridded.misc import _auto_adjust +from anemoi.datasets.use.gridded.misc import _open_dataset LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/concat.py b/src/anemoi/datasets/use/gridded/concat.py similarity index 90% rename from src/anemoi/datasets/data/concat.py rename to src/anemoi/datasets/use/gridded/concat.py index 234001c8c..2f3811995 100644 --- a/src/anemoi/datasets/data/concat.py +++ b/src/anemoi/datasets/use/gridded/concat.py @@ -16,20 +16,20 @@ from anemoi.utils.dates import frequency_to_timedelta from numpy.typing import NDArray -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex -from .debug import Node -from .debug import debug_indexing -from .forwards import Combined -from .indexing import apply_index_to_slices_changes -from .indexing import expand_list_indexing -from .indexing import index_to_slices -from .indexing import length_to_slices -from .indexing import update_tuple -from .misc import _auto_adjust -from .misc import _open +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Combined +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import length_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple +from anemoi.datasets.use.gridded.misc import _auto_adjust +from anemoi.datasets.use.gridded.misc import _open LOG = logging.getLogger(__name__) @@ -229,7 +229,7 @@ def check_dataset_compatibility(cls, datasets: list[Any], fill_missing_gaps: boo s = ranges[i + 1] if r[1] + frequency != s[0]: if fill_missing_gaps: - from .missing import MissingDataset + from anemoi.datasets.use.gridded.missing import MissingDataset result.append(MissingDataset(datasets[i], r[1] + frequency, s[0] - frequency)) else: diff --git a/src/anemoi/datasets/data/dataset.py b/src/anemoi/datasets/use/gridded/dataset.py similarity index 94% rename from src/anemoi/datasets/data/dataset.py rename to src/anemoi/datasets/use/gridded/dataset.py index f463bca9f..d52a2753d 100644 --- a/src/anemoi/datasets/data/dataset.py +++ b/src/anemoi/datasets/use/gridded/dataset.py @@ -34,8 +34,8 @@ from anemoi.utils.dates import frequency_to_timedelta from numpy.typing import NDArray -from .debug import Node -from .debug import Source +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import Source if TYPE_CHECKING: import matplotlib @@ -165,7 +165,7 @@ def __subset(self, **kwargs: Any) -> "Dataset": # This one must be first if "fill_missing_dates" in kwargs: - from .fill_missing import fill_missing_dates_factory + from anemoi.datasets.use.gridded.fill_missing import fill_missing_dates_factory fill_missing_dates = kwargs.pop("fill_missing_dates") ds = fill_missing_dates_factory(self, fill_missing_dates, kwargs) @@ -179,7 +179,7 @@ def __subset(self, **kwargs: Any) -> "Dataset": if padding: if padding != "empty": raise ValueError(f"Only 'empty' padding is supported, got {padding=}") - from .padded import Padded + from anemoi.datasets.use.gridded.padded import Padded frequency = kwargs.pop("frequency", self.frequency) return ( @@ -188,14 +188,14 @@ def __subset(self, **kwargs: Any) -> "Dataset": .mutate() ) - from .subset import Subset + from anemoi.datasets.use.gridded.subset import Subset return ( Subset(self, self._dates_to_indices(start, end), dict(start=start, end=end))._subset(**kwargs).mutate() ) if "frequency" in kwargs: - from .subset import Subset + from anemoi.datasets.use.gridded.subset import Subset if "interpolate_frequency" in kwargs: raise ValueError("Cannot use both `frequency` and `interpolate_frequency`") @@ -208,38 +208,38 @@ def __subset(self, **kwargs: Any) -> "Dataset": ) if "select" in kwargs: - from .select import Select + from anemoi.datasets.use.gridded.select import Select select = kwargs.pop("select") return Select(self, self._select_to_columns(select), {"select": select})._subset(**kwargs).mutate() if "drop" in kwargs: - from .select import Select + from anemoi.datasets.use.gridded.select import Select drop = kwargs.pop("drop") return Select(self, self._drop_to_columns(drop), {"drop": drop})._subset(**kwargs).mutate() if "reorder" in kwargs: - from .select import Select + from anemoi.datasets.use.gridded.select import Select reorder = kwargs.pop("reorder") return Select(self, self._reorder_to_columns(reorder), {"reoder": reorder})._subset(**kwargs).mutate() if "rename" in kwargs: - from .select import Rename + from anemoi.datasets.use.gridded.select import Rename rename = kwargs.pop("rename") return Rename(self, rename)._subset(**kwargs).mutate() if "rescale" in kwargs: - from .rescale import Rescale + from anemoi.datasets.use.gridded.rescale import Rescale rescale = kwargs.pop("rescale") return Rescale(self, rescale)._subset(**kwargs).mutate() if "statistics" in kwargs: - from ..data import open_dataset - from .statistics import Statistics + from anemoi.datasets.use.gridded import open_dataset + from anemoi.datasets.use.gridded.statistics import Statistics statistics = kwargs.pop("statistics") @@ -247,26 +247,26 @@ def __subset(self, **kwargs: Any) -> "Dataset": # Note: trim_edge should go before thinning if "trim_edge" in kwargs: - from .masked import TrimEdge + from anemoi.datasets.use.gridded.masked import TrimEdge edge = kwargs.pop("trim_edge") return TrimEdge(self, edge)._subset(**kwargs).mutate() if "thinning" in kwargs: - from .masked import Thinning + from anemoi.datasets.use.gridded.masked import Thinning thinning = kwargs.pop("thinning") method = kwargs.pop("method", "every-nth") return Thinning(self, thinning, method)._subset(**kwargs).mutate() if "area" in kwargs: - from .masked import Cropping + from anemoi.datasets.use.gridded.masked import Cropping bbox = kwargs.pop("area") return Cropping(self, bbox)._subset(**kwargs).mutate() if "number" in kwargs or "numbers" in kwargs or "member" in kwargs or "members" in kwargs: - from .ensemble import Number + from anemoi.datasets.use.gridded.ensemble import Number members = {} for key in ["number", "numbers", "member", "members"]: @@ -276,13 +276,13 @@ def __subset(self, **kwargs: Any) -> "Dataset": return Number(self, **members)._subset(**kwargs).mutate() if "set_missing_dates" in kwargs: - from .missing import MissingDates + from anemoi.datasets.use.gridded.missing import MissingDates set_missing_dates = kwargs.pop("set_missing_dates") return MissingDates(self, set_missing_dates)._subset(**kwargs).mutate() if "skip_missing_dates" in kwargs: - from .missing import SkipMissingDates + from anemoi.datasets.use.gridded.missing import SkipMissingDates if "expected_access" not in kwargs: raise ValueError("`expected_access` is required with `skip_missing_dates`") @@ -294,19 +294,19 @@ def __subset(self, **kwargs: Any) -> "Dataset": return SkipMissingDates(self, expected_access)._subset(**kwargs).mutate() if "rolling_average" in kwargs: - from .rolling_average import RollingAverage + from anemoi.datasets.use.gridded.rolling_average import RollingAverage rolling_average = kwargs.pop("rolling_average") return RollingAverage(self, rolling_average)._subset(**kwargs).mutate() if "interpolate_frequency" in kwargs: - from .interpolate import InterpolateFrequency + from anemoi.datasets.use.gridded.interpolate import InterpolateFrequency interpolate_frequency = kwargs.pop("interpolate_frequency") return InterpolateFrequency(self, interpolate_frequency)._subset(**kwargs).mutate() if "interpolate_variables" in kwargs: - from .interpolate import InterpolateNearest + from anemoi.datasets.use.gridded.interpolate import InterpolateNearest interpolate_variables = kwargs.pop("interpolate_variables") max_distance = kwargs.pop("max_distance", None) @@ -314,7 +314,7 @@ def __subset(self, **kwargs: Any) -> "Dataset": # Keep last if "shuffle" in kwargs: - from .subset import Subset + from anemoi.datasets.use.gridded.subset import Subset shuffle = kwargs.pop("shuffle") @@ -378,8 +378,8 @@ def _dates_to_indices( list of int The list of indices. """ - from .misc import as_first_date - from .misc import as_last_date + from anemoi.datasets.use.gridded.misc import as_first_date + from anemoi.datasets.use.gridded.misc import as_last_date # TODO: optimize diff --git a/src/anemoi/datasets/data/debug.css b/src/anemoi/datasets/use/gridded/debug.css similarity index 100% rename from src/anemoi/datasets/data/debug.css rename to src/anemoi/datasets/use/gridded/debug.css diff --git a/src/anemoi/datasets/data/debug.py b/src/anemoi/datasets/use/gridded/debug.py similarity index 99% rename from src/anemoi/datasets/data/debug.py rename to src/anemoi/datasets/use/gridded/debug.py index 0c58dafa1..25b6649a6 100644 --- a/src/anemoi/datasets/data/debug.py +++ b/src/anemoi/datasets/use/gridded/debug.py @@ -20,7 +20,7 @@ from numpy.typing import NDArray if TYPE_CHECKING: - from .dataset import Dataset + from anemoi.datasets.use.gridded.dataset import Dataset LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/ensemble.py b/src/anemoi/datasets/use/gridded/ensemble.py similarity index 88% rename from src/anemoi/datasets/data/ensemble.py rename to src/anemoi/datasets/use/gridded/ensemble.py index 50725c2c1..0d1aa15b2 100644 --- a/src/anemoi/datasets/data/ensemble.py +++ b/src/anemoi/datasets/use/gridded/ensemble.py @@ -14,17 +14,17 @@ import numpy as np from numpy.typing import NDArray -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .debug import Node -from .forwards import Forwards -from .forwards import GivenAxis -from .indexing import apply_index_to_slices_changes -from .indexing import index_to_slices -from .indexing import update_tuple -from .misc import _auto_adjust -from .misc import _open +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.forwards import Forwards +from anemoi.datasets.use.gridded.forwards import GivenAxis +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple +from anemoi.datasets.use.gridded.misc import _auto_adjust +from anemoi.datasets.use.gridded.misc import _open LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/fill_missing.py b/src/anemoi/datasets/use/gridded/fill_missing.py similarity index 92% rename from src/anemoi/datasets/data/fill_missing.py rename to src/anemoi/datasets/use/gridded/fill_missing.py index d705b1d75..337549cfc 100644 --- a/src/anemoi/datasets/data/fill_missing.py +++ b/src/anemoi/datasets/use/gridded/fill_missing.py @@ -14,18 +14,17 @@ import numpy as np from numpy.typing import NDArray -from anemoi.datasets.data import MissingDateError - -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import TupleIndex -from .debug import Node -from .debug import debug_indexing -from .forwards import Forwards -from .indexing import apply_index_to_slices_changes -from .indexing import expand_list_indexing -from .indexing import index_to_slices -from .indexing import update_tuple +from anemoi.datasets.use.gridded import MissingDateError +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Forwards +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/forwards.py b/src/anemoi/datasets/use/gridded/forwards.py similarity index 96% rename from src/anemoi/datasets/data/forwards.py rename to src/anemoi/datasets/use/gridded/forwards.py index 4e2219b1c..d0b8dedcb 100644 --- a/src/anemoi/datasets/data/forwards.py +++ b/src/anemoi/datasets/use/gridded/forwards.py @@ -18,16 +18,16 @@ import numpy as np from numpy.typing import NDArray -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex -from .debug import debug_indexing -from .indexing import apply_index_to_slices_changes -from .indexing import expand_list_indexing -from .indexing import index_to_slices -from .indexing import length_to_slices -from .indexing import update_tuple +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import length_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/grids.py b/src/anemoi/datasets/use/gridded/grids.py similarity index 96% rename from src/anemoi/datasets/data/grids.py rename to src/anemoi/datasets/use/gridded/grids.py index 3c350227a..8b399a820 100644 --- a/src/anemoi/datasets/data/grids.py +++ b/src/anemoi/datasets/use/gridded/grids.py @@ -16,16 +16,16 @@ from numpy.typing import NDArray from scipy.spatial import cKDTree -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex -from .debug import Node -from .forwards import GivenAxis -from .indexing import apply_index_to_slices_changes -from .indexing import index_to_slices -from .misc import _auto_adjust -from .misc import _open +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.forwards import GivenAxis +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.misc import _auto_adjust +from anemoi.datasets.use.gridded.misc import _open LOG = logging.getLogger(__name__) @@ -203,7 +203,7 @@ def _initialize_masks(self) -> None: ValueError If the global mask dimension does not match the global dataset grid points. """ - from anemoi.datasets.grids import cutout_mask + from anemoi.datasets.misc.grids import cutout_mask for i, lam in enumerate(self.lams): assert len(lam.shape) == len( diff --git a/src/anemoi/datasets/data/indexing.py b/src/anemoi/datasets/use/gridded/indexing.py similarity index 97% rename from src/anemoi/datasets/data/indexing.py rename to src/anemoi/datasets/use/gridded/indexing.py index 106023ccb..b333ae361 100644 --- a/src/anemoi/datasets/data/indexing.py +++ b/src/anemoi/datasets/use/gridded/indexing.py @@ -15,9 +15,9 @@ import numpy as np from numpy.typing import NDArray -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex def _tuple_with_slices(t: TupleIndex, shape: Shape) -> tuple[TupleIndex, tuple[int, ...]]: diff --git a/src/anemoi/datasets/data/interpolate.py b/src/anemoi/datasets/use/gridded/interpolate.py similarity index 92% rename from src/anemoi/datasets/data/interpolate.py rename to src/anemoi/datasets/use/gridded/interpolate.py index b03404645..f3c5155f9 100644 --- a/src/anemoi/datasets/data/interpolate.py +++ b/src/anemoi/datasets/use/gridded/interpolate.py @@ -17,17 +17,17 @@ from anemoi.utils.dates import frequency_to_timedelta from numpy.typing import NDArray -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex -from .debug import Node -from .debug import debug_indexing -from .forwards import Forwards -from .indexing import apply_index_to_slices_changes -from .indexing import expand_list_indexing -from .indexing import index_to_slices -from .indexing import update_tuple +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Forwards +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple LOG = logging.getLogger(__name__) @@ -227,7 +227,7 @@ def __init__(self, dataset: Dataset, interpolate_variables: list[str], max_dista max_distance : Optional[float], optional The maximum distance for nearest neighbor search, by default None. """ - from ..grids import nearest_grid_points + from anemoi.datasets.misc.grids import nearest_grid_points super().__init__(dataset) self.vars = interpolate_variables diff --git a/src/anemoi/datasets/data/join.py b/src/anemoi/datasets/use/gridded/join.py similarity index 90% rename from src/anemoi/datasets/data/join.py rename to src/anemoi/datasets/use/gridded/join.py index 59aefd3a4..4c146a73d 100644 --- a/src/anemoi/datasets/data/join.py +++ b/src/anemoi/datasets/use/gridded/join.py @@ -16,20 +16,20 @@ import numpy as np from numpy.typing import NDArray -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex -from .debug import Node -from .debug import Source -from .debug import debug_indexing -from .forwards import Combined -from .indexing import apply_index_to_slices_changes -from .indexing import expand_list_indexing -from .indexing import index_to_slices -from .indexing import update_tuple -from .misc import _auto_adjust -from .misc import _open +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import Source +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Combined +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple +from anemoi.datasets.use.gridded.misc import _auto_adjust +from anemoi.datasets.use.gridded.misc import _open LOG = logging.getLogger(__name__) @@ -173,7 +173,7 @@ def _overlay(self) -> Dataset: if not ok: LOG.warning("Dataset %r completely overridden.", d) - from .select import Select + from anemoi.datasets.use.gridded.select import Select return Select(self, indices, {"overlay": variables}) diff --git a/src/anemoi/datasets/data/masked.py b/src/anemoi/datasets/use/gridded/masked.py similarity index 92% rename from src/anemoi/datasets/data/masked.py rename to src/anemoi/datasets/use/gridded/masked.py index f7eeea03d..d12fc54d4 100644 --- a/src/anemoi/datasets/data/masked.py +++ b/src/anemoi/datasets/use/gridded/masked.py @@ -15,18 +15,18 @@ import numpy as np from numpy.typing import NDArray -from ..grids import cropping_mask -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex -from .debug import Node -from .debug import debug_indexing -from .forwards import Forwards -from .indexing import apply_index_to_slices_changes -from .indexing import expand_list_indexing -from .indexing import index_to_slices -from .indexing import update_tuple +from anemoi.datasets.misc.grids import cropping_mask +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Forwards +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple LOG = logging.getLogger(__name__) @@ -214,7 +214,7 @@ def __init__(self, forward: Dataset, area: Dataset | tuple[float, float, float, area : Union[Dataset, Tuple[float, float, float, float]] The cropping area. """ - from ..data import open_dataset + from anemoi.datasets.use.gridded import open_dataset area = area if isinstance(area, (list, tuple)) else open_dataset(area) diff --git a/src/anemoi/datasets/data/merge.py b/src/anemoi/datasets/use/gridded/merge.py similarity index 91% rename from src/anemoi/datasets/data/merge.py rename to src/anemoi/datasets/use/gridded/merge.py index ca2697dda..d6a1943e5 100644 --- a/src/anemoi/datasets/data/merge.py +++ b/src/anemoi/datasets/use/gridded/merge.py @@ -16,19 +16,19 @@ import numpy as np from numpy.typing import NDArray -from . import MissingDateError -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import TupleIndex -from .debug import Node -from .debug import debug_indexing -from .forwards import Combined -from .indexing import apply_index_to_slices_changes -from .indexing import expand_list_indexing -from .indexing import index_to_slices -from .indexing import update_tuple -from .misc import _auto_adjust -from .misc import _open +from anemoi.datasets.use.gridded import MissingDateError +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Combined +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple +from anemoi.datasets.use.gridded.misc import _auto_adjust +from anemoi.datasets.use.gridded.misc import _open LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/misc.py b/src/anemoi/datasets/use/gridded/misc.py similarity index 94% rename from src/anemoi/datasets/data/misc.py rename to src/anemoi/datasets/use/gridded/misc.py index 3252e345a..97549ac24 100644 --- a/src/anemoi/datasets/data/misc.py +++ b/src/anemoi/datasets/use/gridded/misc.py @@ -23,7 +23,7 @@ from numpy.typing import NDArray if TYPE_CHECKING: - from .dataset import Dataset + from anemoi.datasets.use.gridded.dataset import Dataset LOG = logging.getLogger(__name__) @@ -323,11 +323,11 @@ def _concat_or_join(datasets: list["Dataset"], kwargs: dict[str, Any]) -> tuple[ ranges = [(d.dates[0].astype(object), d.dates[-1].astype(object)) for d in datasets] if len(set(ranges)) == 1: - from .join import Join + from anemoi.datasets.use.gridded.join import Join return Join(datasets)._overlay(), kwargs - from .concat import Concat + from anemoi.datasets.use.gridded.concat import Concat Concat.check_dataset_compatibility(datasets) @@ -347,9 +347,9 @@ def _open(a: str | PurePath | dict[str, Any] | list[Any] | tuple[Any, ...]) -> " Dataset The opened dataset. """ - from .dataset import Dataset - from .stores import Zarr - from .stores import zarr_lookup + from anemoi.datasets.use.gridded.dataset import Dataset + from anemoi.datasets.use.gridded.stores import Zarr + from anemoi.datasets.use.gridded.stores import zarr_lookup if isinstance(a, str) and len(a.split(".")) in [2, 3]: @@ -359,7 +359,7 @@ def _open(a: str | PurePath | dict[str, Any] | list[Any] | tuple[Any, ...]) -> " if "backend" not in metadata: raise ValueError(f"Metadata for {a} does not contain 'backend' key") - from anemoi.datasets.data.records import open_records_dataset + from anemoi.datasets.use.tabular.records import open_records_dataset return open_records_dataset(a, backend=metadata["backend"]) @@ -501,7 +501,7 @@ def _open_dataset(*args: Any, **kwargs: Any) -> "Dataset": sets.append(_open(a)) if "observations" in kwargs: - from .observations import observations_factory + from anemoi.datasets.use.tabular.observations import observations_factory assert not sets, sets @@ -509,70 +509,70 @@ def _open_dataset(*args: Any, **kwargs: Any) -> "Dataset": if "xy" in kwargs: # Experimental feature, may be removed - from .xy import xy_factory + from anemoi.datasets.use.gridded.xy import xy_factory assert not sets, sets return xy_factory(args, kwargs).mutate() if "x" in kwargs and "y" in kwargs: # Experimental feature, may be removed - from .xy import xy_factory + from anemoi.datasets.use.gridded.xy import xy_factory assert not sets, sets return xy_factory(args, kwargs).mutate() if "zip" in kwargs: # Experimental feature, may be removed - from .xy import zip_factory + from anemoi.datasets.use.gridded.xy import zip_factory assert not sets, sets return zip_factory(args, kwargs).mutate() if "chain" in kwargs: # Experimental feature, may be removed - from .unchecked import chain_factory + from anemoi.datasets.use.gridded.unchecked import chain_factory assert not sets, sets return chain_factory(args, kwargs).mutate() if "join" in kwargs: - from .join import join_factory + from anemoi.datasets.use.gridded.join import join_factory assert not sets, sets return join_factory(args, kwargs).mutate() if "concat" in kwargs: - from .concat import concat_factory + from anemoi.datasets.use.gridded.concat import concat_factory assert not sets, sets return concat_factory(args, kwargs).mutate() if "merge" in kwargs: - from .merge import merge_factory + from anemoi.datasets.use.gridded.merge import merge_factory assert not sets, sets return merge_factory(args, kwargs).mutate() if "ensemble" in kwargs: - from .ensemble import ensemble_factory + from anemoi.datasets.use.gridded.ensemble import ensemble_factory assert not sets, sets return ensemble_factory(args, kwargs).mutate() if "grids" in kwargs: - from .grids import grids_factory + from anemoi.datasets.use.gridded.grids import grids_factory assert not sets, sets return grids_factory(args, kwargs).mutate() if "cutout" in kwargs: - from .grids import cutout_factory + from anemoi.datasets.use.gridded.grids import cutout_factory assert not sets, sets return cutout_factory(args, kwargs).mutate() if "complement" in kwargs: - from .complement import complement_factory + from anemoi.datasets.use.gridded.complement import complement_factory assert not sets, sets return complement_factory(args, kwargs).mutate() diff --git a/src/anemoi/datasets/data/missing.py b/src/anemoi/datasets/use/gridded/missing.py similarity index 95% rename from src/anemoi/datasets/data/missing.py rename to src/anemoi/datasets/use/gridded/missing.py index 5e6530bda..b1e83638d 100644 --- a/src/anemoi/datasets/data/missing.py +++ b/src/anemoi/datasets/use/gridded/missing.py @@ -16,17 +16,16 @@ import numpy as np from numpy.typing import NDArray -from anemoi.datasets.create.utils import to_datetime -from anemoi.datasets.data import MissingDateError - -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import TupleIndex -from .debug import Node -from .debug import debug_indexing -from .forwards import Forwards -from .indexing import expand_list_indexing -from .indexing import update_tuple +from anemoi.datasets.build.gridded.utils import to_datetime +from anemoi.datasets.use.gridded import MissingDateError +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Forwards +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import update_tuple LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/padded.py b/src/anemoi/datasets/use/gridded/padded.py similarity index 92% rename from src/anemoi/datasets/data/padded.py rename to src/anemoi/datasets/use/gridded/padded.py index d0bebb6fc..1b23fb6fb 100644 --- a/src/anemoi/datasets/data/padded.py +++ b/src/anemoi/datasets/use/gridded/padded.py @@ -17,16 +17,16 @@ from anemoi.utils.dates import frequency_to_timedelta from numpy.typing import NDArray -from anemoi.datasets.data.dataset import Dataset -from anemoi.datasets.data.dataset import FullIndex -from anemoi.datasets.data.dataset import Shape -from anemoi.datasets.data.dataset import TupleIndex -from anemoi.datasets.data.debug import Node -from anemoi.datasets.data.debug import debug_indexing -from anemoi.datasets.data.forwards import Forwards -from anemoi.datasets.data.indexing import expand_list_indexing -from anemoi.datasets.data.misc import as_first_date -from anemoi.datasets.data.misc import as_last_date +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Forwards +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.misc import as_first_date +from anemoi.datasets.use.gridded.misc import as_last_date LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/rescale.py b/src/anemoi/datasets/use/gridded/rescale.py similarity index 91% rename from src/anemoi/datasets/data/rescale.py rename to src/anemoi/datasets/use/gridded/rescale.py index 613bbe93e..8426bffbe 100644 --- a/src/anemoi/datasets/data/rescale.py +++ b/src/anemoi/datasets/use/gridded/rescale.py @@ -16,16 +16,16 @@ import numpy as np from numpy.typing import NDArray -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import TupleIndex -from .debug import Node -from .debug import debug_indexing -from .forwards import Forwards -from .indexing import apply_index_to_slices_changes -from .indexing import expand_list_indexing -from .indexing import index_to_slices -from .indexing import update_tuple +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Forwards +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/rolling_average.py b/src/anemoi/datasets/use/gridded/rolling_average.py similarity index 98% rename from src/anemoi/datasets/data/rolling_average.py rename to src/anemoi/datasets/use/gridded/rolling_average.py index a7b62e79b..4a0f1525c 100644 --- a/src/anemoi/datasets/data/rolling_average.py +++ b/src/anemoi/datasets/use/gridded/rolling_average.py @@ -15,7 +15,7 @@ import numpy as np from numpy.typing import NDArray -from anemoi.datasets.data.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import expand_list_indexing from .dataset import Dataset from .dataset import FullIndex diff --git a/src/anemoi/datasets/data/select.py b/src/anemoi/datasets/use/gridded/select.py similarity index 91% rename from src/anemoi/datasets/data/select.py rename to src/anemoi/datasets/use/gridded/select.py index 048802892..3cb813bae 100644 --- a/src/anemoi/datasets/data/select.py +++ b/src/anemoi/datasets/use/gridded/select.py @@ -15,18 +15,18 @@ from numpy.typing import NDArray -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex -from .debug import Node -from .debug import Source -from .debug import debug_indexing -from .forwards import Forwards -from .indexing import apply_index_to_slices_changes -from .indexing import expand_list_indexing -from .indexing import index_to_slices -from .indexing import update_tuple +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import Source +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Forwards +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import update_tuple LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/statistics.py b/src/anemoi/datasets/use/gridded/statistics.py similarity index 93% rename from src/anemoi/datasets/data/statistics.py rename to src/anemoi/datasets/use/gridded/statistics.py index af0d4bc6e..236ce1b7a 100644 --- a/src/anemoi/datasets/data/statistics.py +++ b/src/anemoi/datasets/use/gridded/statistics.py @@ -15,10 +15,10 @@ from numpy.typing import NDArray -from . import open_dataset -from .dataset import Dataset -from .debug import Node -from .forwards import Forwards +from anemoi.datasets.use.gridded import open_dataset +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.forwards import Forwards LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/stores.py b/src/anemoi/datasets/use/gridded/stores.py similarity index 96% rename from src/anemoi/datasets/data/stores.py rename to src/anemoi/datasets/use/gridded/stores.py index 78470fec6..f48ebedf5 100644 --- a/src/anemoi/datasets/data/stores.py +++ b/src/anemoi/datasets/use/gridded/stores.py @@ -22,17 +22,17 @@ from anemoi.utils.dates import frequency_to_timedelta from numpy.typing import NDArray -from . import MissingDateError -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex -from .debug import DEBUG_ZARR_LOADING -from .debug import Node -from .debug import Source -from .debug import debug_indexing -from .indexing import expand_list_indexing -from .misc import load_config +from anemoi.datasets.use.gridded import MissingDateError +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import DEBUG_ZARR_LOADING +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import Source +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.misc import load_config LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/subset.py b/src/anemoi/datasets/use/gridded/subset.py similarity index 89% rename from src/anemoi/datasets/data/subset.py rename to src/anemoi/datasets/use/gridded/subset.py index 8954fa5bc..13b5d71e0 100644 --- a/src/anemoi/datasets/data/subset.py +++ b/src/anemoi/datasets/use/gridded/subset.py @@ -19,19 +19,19 @@ from anemoi.utils.dates import frequency_to_timedelta from numpy.typing import NDArray -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .dataset import TupleIndex -from .debug import Node -from .debug import Source -from .debug import debug_indexing -from .forwards import Forwards -from .indexing import apply_index_to_slices_changes -from .indexing import expand_list_indexing -from .indexing import index_to_slices -from .indexing import make_slice_or_index_from_list_or_tuple -from .indexing import update_tuple +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.dataset import TupleIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.debug import Source +from anemoi.datasets.use.gridded.debug import debug_indexing +from anemoi.datasets.use.gridded.forwards import Forwards +from anemoi.datasets.use.gridded.indexing import apply_index_to_slices_changes +from anemoi.datasets.use.gridded.indexing import expand_list_indexing +from anemoi.datasets.use.gridded.indexing import index_to_slices +from anemoi.datasets.use.gridded.indexing import make_slice_or_index_from_list_or_tuple +from anemoi.datasets.use.gridded.indexing import update_tuple LOG = logging.getLogger(__name__) @@ -61,7 +61,7 @@ def _start(a: int, b: int, dates: NDArray[np.datetime64]) -> int: Returns: int: The index of the start date. """ - from .misc import as_first_date + from anemoi.datasets.use.gridded.misc import as_first_date c = as_first_date(a, dates) d = as_first_date(b, dates) @@ -82,7 +82,7 @@ def _end(a: int, b: int, dates: NDArray[np.datetime64]) -> int: Returns: int: The index of the end date. """ - from .misc import as_last_date + from anemoi.datasets.use.gridded.misc import as_last_date c = as_last_date(a, dates) d = as_last_date(b, dates) diff --git a/src/anemoi/datasets/data/unchecked.py b/src/anemoi/datasets/use/gridded/unchecked.py similarity index 94% rename from src/anemoi/datasets/data/unchecked.py rename to src/anemoi/datasets/use/gridded/unchecked.py index cb4a1304c..96907a651 100644 --- a/src/anemoi/datasets/data/unchecked.py +++ b/src/anemoi/datasets/use/gridded/unchecked.py @@ -18,14 +18,14 @@ import numpy as np from numpy.typing import NDArray -from .concat import ConcatMixin -from .dataset import Dataset -from .dataset import FullIndex -from .dataset import Shape -from .debug import Node -from .forwards import Combined -from .misc import _auto_adjust -from .misc import _open +from anemoi.datasets.use.gridded.concat import ConcatMixin +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.dataset import Shape +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.forwards import Combined +from anemoi.datasets.use.gridded.misc import _auto_adjust +from anemoi.datasets.use.gridded.misc import _open LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/xy.py b/src/anemoi/datasets/use/gridded/xy.py similarity index 95% rename from src/anemoi/datasets/data/xy.py rename to src/anemoi/datasets/use/gridded/xy.py index d3ae622bb..da51bde61 100644 --- a/src/anemoi/datasets/data/xy.py +++ b/src/anemoi/datasets/use/gridded/xy.py @@ -12,12 +12,12 @@ from functools import cached_property from typing import Any -from .dataset import Dataset -from .dataset import FullIndex -from .debug import Node -from .forwards import Combined -from .misc import _auto_adjust -from .misc import _open +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.dataset import FullIndex +from anemoi.datasets.use.gridded.debug import Node +from anemoi.datasets.use.gridded.forwards import Combined +from anemoi.datasets.use.gridded.misc import _auto_adjust +from anemoi.datasets.use.gridded.misc import _open LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/use/tabular/__init__.py b/src/anemoi/datasets/use/tabular/__init__.py new file mode 100644 index 000000000..9fc775e54 --- /dev/null +++ b/src/anemoi/datasets/use/tabular/__init__.py @@ -0,0 +1,8 @@ +# (C) Copyright 2025 Anemoi contributors. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/src/anemoi/datasets/data/observations/__init__.py b/src/anemoi/datasets/use/tabular/observations/__init__.py similarity index 97% rename from src/anemoi/datasets/data/observations/__init__.py rename to src/anemoi/datasets/use/tabular/observations/__init__.py index bb9595da9..004d9299c 100644 --- a/src/anemoi/datasets/data/observations/__init__.py +++ b/src/anemoi/datasets/use/tabular/observations/__init__.py @@ -14,9 +14,8 @@ import numpy as np from anemoi.utils.dates import frequency_to_timedelta -from anemoi.datasets.data.dataset import Dataset - -from ..debug import Node +from anemoi.datasets.use.gridded.dataset import Dataset +from anemoi.datasets.use.gridded.debug import Node LOG = logging.getLogger(__name__) @@ -139,7 +138,7 @@ def __init__(self, dataset, frequency=None, window=None): if isinstance(dataset, zarr.hierarchy.Group): dataset = dataset._store.path - from ..stores import zarr_lookup + from anemoi.datasets.use.gridded.stores import zarr_lookup dataset = zarr_lookup(dataset) self.path = dataset @@ -177,7 +176,7 @@ def __init__(self, dataset, frequency=None, window=None): # last_window_end must be the end of the time window of the last item last_window_end = int(end.strftime("%Y%m%d%H%M%S")) - from .legacy_obs_dataset import ObsDataset + from anemoi.datasets.use.tabular.observations.legacy_obs_dataset import ObsDataset args = [self.path, first_window_begin, last_window_end] kwargs = dict( diff --git a/src/anemoi/datasets/data/observations/legacy_obs_dataset.py b/src/anemoi/datasets/use/tabular/observations/legacy_obs_dataset.py similarity index 100% rename from src/anemoi/datasets/data/observations/legacy_obs_dataset.py rename to src/anemoi/datasets/use/tabular/observations/legacy_obs_dataset.py diff --git a/src/anemoi/datasets/data/observations/multi.py b/src/anemoi/datasets/use/tabular/observations/multi.py similarity index 97% rename from src/anemoi/datasets/data/observations/multi.py rename to src/anemoi/datasets/use/tabular/observations/multi.py index af5c02e71..31fc4e1dd 100644 --- a/src/anemoi/datasets/data/observations/multi.py +++ b/src/anemoi/datasets/use/tabular/observations/multi.py @@ -10,7 +10,7 @@ import logging import os -from anemoi.datasets.data import open_dataset +from anemoi.datasets.use.gridded import open_dataset LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/data/records/__init__.py b/src/anemoi/datasets/use/tabular/records/__init__.py similarity index 98% rename from src/anemoi/datasets/data/records/__init__.py rename to src/anemoi/datasets/use/tabular/records/__init__.py index f569a4105..9093a5845 100644 --- a/src/anemoi/datasets/data/records/__init__.py +++ b/src/anemoi/datasets/use/tabular/records/__init__.py @@ -16,7 +16,7 @@ import numpy as np from anemoi.utils.dates import frequency_to_timedelta -from anemoi.datasets.data.records.backends import backend_factory +from anemoi.datasets.use.tabular.records.backends import backend_factory LOG = logging.getLogger(__name__) @@ -91,8 +91,8 @@ def _subset(self, **kwargs): if start is not None or end is not None: def _dates_to_indices(start, end): - from anemoi.datasets.data.misc import as_first_date - from anemoi.datasets.data.misc import as_last_date + from anemoi.datasets.use.gridded.misc import as_first_date + from anemoi.datasets.use.gridded.misc import as_last_date start = self.dates[0] if start is None else as_first_date(start, self.dates) end = self.dates[-1] if end is None else as_last_date(end, self.dates) diff --git a/src/anemoi/datasets/data/records/backends/__init__.py b/src/anemoi/datasets/use/tabular/records/backends/__init__.py similarity index 97% rename from src/anemoi/datasets/data/records/backends/__init__.py rename to src/anemoi/datasets/use/tabular/records/backends/__init__.py index 817d3fc88..786202908 100644 --- a/src/anemoi/datasets/data/records/backends/__init__.py +++ b/src/anemoi/datasets/use/tabular/records/backends/__init__.py @@ -100,7 +100,7 @@ def write(self, i, data, **kwargs): np.savez(out_path, **data) def write_metadata(self, metadata): - from anemoi.datasets.create import json_tidy + from anemoi.datasets.build.gridded import json_tidy os.makedirs(self.path, exist_ok=True) with open(os.path.join(self.path, "metadata.json"), "w") as f: @@ -128,7 +128,7 @@ def write(self, i, data, **kwargs): np.savez(out_path, **data) def write_metadata(self, metadata): - from anemoi.datasets.create import json_tidy + from anemoi.datasets.build.gridded import json_tidy os.makedirs(self.path, exist_ok=True) with open(os.path.join(self.path, "metadata.json"), "w") as f: diff --git a/tests/create/test_sources.py b/tests/create/test_sources.py index dbf0d746a..e841744ea 100644 --- a/tests/create/test_sources.py +++ b/tests/create/test_sources.py @@ -96,14 +96,17 @@ def test_grib_gridfile(get_test_data) -> None: ) @skip_if_offline @pytest.mark.parametrize( - "refinement_level_c,shape", + "input_refinement_level_c,output_refinement_level_c,shape", ( - (2, (2, 13, 1, 2880)), - (7, (2, 13, 1, 2949120)), + (7, 2, (2, 13, 1, 2880)), + (7, 7, (2, 13, 1, 2949120)), ), ) def test_grib_gridfile_with_refinement_level( - refinement_level_c: str, shape: tuple[int, int, int, int, int], get_test_data: callable + input_refinement_level_c: str, + output_refinement_level_c: str, + shape: tuple[int, int, int, int, int], + get_test_data: callable, ) -> None: """Test the creation of a dataset from GRIB files with an unstructured grid. @@ -129,11 +132,21 @@ def test_grib_gridfile_with_refinement_level( grib = { "path": os.path.join(path, "{date:strftimedelta(+3h;%Y%m%d%H)}+fc_R03B07_rea_ml.{date:strftime(%Y%m%d%H)}"), - "grid_definition": {"icon": {"path": gridfile, "refinement_level_c": refinement_level_c}}, + "grid_definition": { + "icon": { + "path": gridfile, + "refinement_level_c": input_refinement_level_c, + } + }, "param": param, "level": level, } - refinement_filter = {"icon_refinement_level": {"grid": gridfile, "refinement_level_c": refinement_level_c}} + refinement_filter = { + "icon_refinement_level": { + "grid": gridfile, + "refinement_level_c": output_refinement_level_c, + } + } config = { "dates": { diff --git a/tests/create/utils/compare.py b/tests/create/utils/compare.py index 56b7d0f82..a2793d445 100644 --- a/tests/create/utils/compare.py +++ b/tests/create/utils/compare.py @@ -13,7 +13,7 @@ from anemoi.utils.dates import frequency_to_timedelta from anemoi.datasets import open_dataset -from anemoi.datasets.data.stores import open_zarr +from anemoi.datasets.use.gridded.stores import open_zarr class Comparer: diff --git a/tests/create/utils/create.py b/tests/create/utils/create.py index a57022bab..a10c83132 100644 --- a/tests/create/utils/create.py +++ b/tests/create/utils/create.py @@ -12,7 +12,7 @@ import yaml -from anemoi.datasets.create import creator_factory +from anemoi.datasets.build.gridded import creator_factory class TestingContext: diff --git a/tests/test_chunks.py b/tests/test_chunks.py index 18337b689..529c1f0cd 100644 --- a/tests/test_chunks.py +++ b/tests/test_chunks.py @@ -7,11 +7,11 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -"""Test suite for the ChunkFilter class in the anemoi.datasets.create.chunks module.""" +"""Test suite for the ChunkFilter class in the anemoi.datasets.build.gridded.chunks module.""" import pytest -from anemoi.datasets.create.chunks import ChunkFilter +from anemoi.datasets.build.gridded.chunks import ChunkFilter def test_chunk_filter(): diff --git a/tests/test_data.py b/tests/test_data.py index 50fb96859..a29b4930b 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -25,20 +25,20 @@ from anemoi.datasets import open_dataset from anemoi.datasets.commands.inspect import InspectZarr from anemoi.datasets.commands.inspect import NoVersion -from anemoi.datasets.data import save_dataset -from anemoi.datasets.data.concat import Concat -from anemoi.datasets.data.ensemble import Ensemble -from anemoi.datasets.data.grids import GridsBase -from anemoi.datasets.data.join import Join -from anemoi.datasets.data.misc import as_first_date -from anemoi.datasets.data.misc import as_last_date -from anemoi.datasets.data.padded import Padded -from anemoi.datasets.data.select import Rename -from anemoi.datasets.data.select import Select -from anemoi.datasets.data.statistics import Statistics -from anemoi.datasets.data.stores import Zarr -from anemoi.datasets.data.subset import Subset -from anemoi.datasets.testing import default_test_indexing +from anemoi.datasets.misc.testing import default_test_indexing +from anemoi.datasets.use.gridded import save_dataset +from anemoi.datasets.use.gridded.concat import Concat +from anemoi.datasets.use.gridded.ensemble import Ensemble +from anemoi.datasets.use.gridded.grids import GridsBase +from anemoi.datasets.use.gridded.join import Join +from anemoi.datasets.use.gridded.misc import as_first_date +from anemoi.datasets.use.gridded.misc import as_last_date +from anemoi.datasets.use.gridded.padded import Padded +from anemoi.datasets.use.gridded.select import Rename +from anemoi.datasets.use.gridded.select import Select +from anemoi.datasets.use.gridded.statistics import Statistics +from anemoi.datasets.use.gridded.stores import Zarr +from anemoi.datasets.use.gridded.subset import Subset VALUES = 10 @@ -60,7 +60,7 @@ def mockup_open_zarr(func: Callable) -> Callable: @wraps(func) def wrapper(*args, **kwargs): with patch("zarr.convenience.open", zarr_from_str): - with patch("anemoi.datasets.data.stores.zarr_lookup", lambda name: name): + with patch("anemoi.datasets.use.gridded.stores.zarr_lookup", lambda name: name): return func(*args, **kwargs) return wrapper diff --git a/tests/test_data_gridded.py b/tests/test_data_gridded.py index d493a50e7..6e655f601 100644 --- a/tests/test_data_gridded.py +++ b/tests/test_data_gridded.py @@ -42,7 +42,7 @@ def mockup_open_zarr(func: Callable) -> Callable: @wraps(func) def wrapper(*args, **kwargs): with patch("zarr.convenience.open", zarr_from_str): - with patch("anemoi.datasets.data.stores.zarr_lookup", lambda name: name): + with patch("anemoi.datasets.use.gridded.stores.zarr_lookup", lambda name: name): return func(*args, **kwargs) return wrapper diff --git a/tests/test_dates.py b/tests/test_dates.py index 7d7613506..abc746d8e 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -14,7 +14,7 @@ import numpy as np import pytest -from anemoi.datasets.create.statistics import default_statistics_dates +from anemoi.datasets.build.gridded.statistics import default_statistics_dates _ = datetime.datetime diff --git a/tests/test_indexing.py b/tests/test_indexing.py index bc53462ac..494376aa9 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -10,7 +10,7 @@ import numpy as np -from anemoi.datasets.data.indexing import length_to_slices +from anemoi.datasets.use.gridded.indexing import length_to_slices def test_length_to_slices() -> None: diff --git a/tests/test_records.py b/tests/test_records.py index 896081f9a..6fadaf26e 100644 --- a/tests/test_records.py +++ b/tests/test_records.py @@ -11,9 +11,9 @@ import numpy as np import pytest -from anemoi.datasets.data import open_dataset -from anemoi.datasets.data.records import Record -from anemoi.datasets.data.records import Tabular +from anemoi.datasets.use.gridded import open_dataset +from anemoi.datasets.use.tabular.records import Record +from anemoi.datasets.use.tabular.records import Tabular def check_numpy(x, y): diff --git a/tests/test_validate.py b/tests/test_validate.py index 21fd250e1..4cd590ac9 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -17,7 +17,7 @@ from anemoi.utils.testing import GetTestData from anemoi.utils.testing import skip_if_offline -from anemoi.datasets.validate import validate_dataset +from anemoi.datasets.misc.validate import validate_dataset @pytest.fixture diff --git a/tests/xarray/test_flavour.py b/tests/xarray/test_flavour.py index 7b2bb33e5..ab058839e 100644 --- a/tests/xarray/test_flavour.py +++ b/tests/xarray/test_flavour.py @@ -11,18 +11,18 @@ import pytest import xarray as xr -from anemoi.datasets.create.sources.xarray_support.coordinates import DateCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import EnsembleCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import LatitudeCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import LevelCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import LongitudeCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import ScalarCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import StepCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import TimeCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import UnsupportedCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import XCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import YCoordinate -from anemoi.datasets.create.sources.xarray_support.flavour import DefaultCoordinateGuesser +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import DateCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import EnsembleCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import LatitudeCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import LevelCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import LongitudeCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import ScalarCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import StepCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import TimeCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import UnsupportedCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import XCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import YCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.flavour import DefaultCoordinateGuesser def create_ds(var_name, standard_name, long_name, units, coord_length=5): diff --git a/tests/xarray/test_netcdf.py b/tests/xarray/test_netcdf.py index f25d8c4d7..7994789f6 100644 --- a/tests/xarray/test_netcdf.py +++ b/tests/xarray/test_netcdf.py @@ -12,7 +12,7 @@ import xarray as xr from multiurl import download -from anemoi.datasets.create.sources.xarray import XarrayFieldList +from anemoi.datasets.build.gridded.sources.xarray import XarrayFieldList URLS = { "https://get.ecmwf.int/repository/test-data/earthkit-data/examples/efas.nc": dict(length=3), diff --git a/tests/xarray/test_opendap.py b/tests/xarray/test_opendap.py index fb855ca94..538630a23 100644 --- a/tests/xarray/test_opendap.py +++ b/tests/xarray/test_opendap.py @@ -12,8 +12,8 @@ import xarray as xr from anemoi.utils.testing import skip_if_offline -from anemoi.datasets.create.sources.xarray import XarrayFieldList -from anemoi.datasets.testing import assert_field_list +from anemoi.datasets.build.gridded.sources.xarray import XarrayFieldList +from anemoi.datasets.misc.testing import assert_field_list @skip_if_offline diff --git a/tests/xarray/test_variable.py b/tests/xarray/test_variable.py index ff43da389..0f060a32e 100644 --- a/tests/xarray/test_variable.py +++ b/tests/xarray/test_variable.py @@ -13,14 +13,14 @@ import pytest import xarray as xr -from anemoi.datasets.create.sources.xarray_support.coordinates import DateCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import LatitudeCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import LevelCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import LongitudeCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import StepCoordinate -from anemoi.datasets.create.sources.xarray_support.coordinates import TimeCoordinate -from anemoi.datasets.create.sources.xarray_support.time import ForecastFromValidTimeAndStep -from anemoi.datasets.create.sources.xarray_support.variable import Variable +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import DateCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import LatitudeCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import LevelCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import LongitudeCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import StepCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.coordinates import TimeCoordinate +from anemoi.datasets.build.gridded.sources.xarray_support.time import ForecastFromValidTimeAndStep +from anemoi.datasets.build.gridded.sources.xarray_support.variable import Variable @pytest.fixture diff --git a/tests/xarray/test_zarr.py b/tests/xarray/test_zarr.py index 742bfae80..1c35361c7 100644 --- a/tests/xarray/test_zarr.py +++ b/tests/xarray/test_zarr.py @@ -12,8 +12,8 @@ from anemoi.utils.testing import skip_if_offline from anemoi.utils.testing import skip_missing_packages -from anemoi.datasets.create.sources.xarray import XarrayFieldList -from anemoi.datasets.testing import assert_field_list +from anemoi.datasets.build.gridded.sources.xarray import XarrayFieldList +from anemoi.datasets.misc.testing import assert_field_list @skip_if_offline diff --git a/tools/build-obs.py b/tools/build-obs.py index e3caff9f9..d29339cda 100755 --- a/tools/build-obs.py +++ b/tools/build-obs.py @@ -28,7 +28,7 @@ def build(input, output, backend, overwrite=False): print(f"Dataset has {len(ds)} records, from {ds.start_date} to {ds.end_date}") print(f"Converting dataset to {output} using new backend '{backend}'") - from anemoi.datasets.data.records.backends import writer_backend_factory + from anemoi.datasets.use.tabular.records.backends import writer_backend_factory if os.path.exists(output): if overwrite: diff --git a/tools/grids/grids_multilam.ipynb b/tools/grids/grids_multilam.ipynb index bb212bc4a..f6b6f5355 100644 --- a/tools/grids/grids_multilam.ipynb +++ b/tools/grids/grids_multilam.ipynb @@ -8,7 +8,7 @@ "source": [ "import numpy as np\n", "from anemoi.datasets import open_dataset\n", - "from anemoi.datasets.data.grids import Cutout" + "from anemoi.datasets.use.gridded.grids import Cutout" ] }, {