Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/anemoi/datasets/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!build/
12 changes: 6 additions & 6 deletions src/anemoi/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
# nor does it submit to any jurisdiction.


from .data import MissingDateError
from .data import add_dataset_path
from .data import add_named_dataset
from .data import list_dataset_names
from .data import open_dataset
from anemoi.datasets.use.gridded import MissingDateError
from anemoi.datasets.use.gridded import add_dataset_path
from anemoi.datasets.use.gridded import add_named_dataset
from anemoi.datasets.use.gridded import list_dataset_names
from anemoi.datasets.use.gridded import open_dataset

try:
# NOTE: the `_version.py` file must not be present in the git repository
# as it is generated by setuptools at install time
from ._version import __version__ # type: ignore
from anemoi.datasets._version import __version__ # type: ignore
except ImportError: # pragma: no cover
# Local copy or not installed with setuptools
__version__ = "999"
Expand Down
4 changes: 2 additions & 2 deletions src/anemoi/datasets/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from anemoi.utils.cli import cli_main
from anemoi.utils.cli import make_parser

from . import __version__
from .commands import COMMANDS
from anemoi.datasets import __version__
from anemoi.datasets.commands import COMMANDS


# For read-the-docs
Expand Down
8 changes: 8 additions & 0 deletions src/anemoi/datasets/build/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# (C) Copyright 2025 Anemoi contributors.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
#
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,25 @@

from anemoi.datasets import MissingDateError
from anemoi.datasets import open_dataset
from anemoi.datasets.create.input.trace import enable_trace
from anemoi.datasets.create.persistent import build_storage
from anemoi.datasets.data.misc import as_first_date
from anemoi.datasets.data.misc import as_last_date
from anemoi.datasets.build.gridded.check import DatasetName
from anemoi.datasets.build.gridded.check import check_data_values
from anemoi.datasets.build.gridded.chunks import ChunkFilter
from anemoi.datasets.build.gridded.config import build_output
from anemoi.datasets.build.gridded.config import loader_config
from anemoi.datasets.build.gridded.persistent import build_storage
from anemoi.datasets.build.gridded.statistics import Summary
from anemoi.datasets.build.gridded.statistics import TmpStatistics
from anemoi.datasets.build.gridded.statistics import check_variance
from anemoi.datasets.build.gridded.statistics import compute_statistics
from anemoi.datasets.build.gridded.statistics import default_statistics_dates
from anemoi.datasets.build.gridded.statistics import fix_variance
from anemoi.datasets.build.gridded.utils import normalize_and_check_dates
from anemoi.datasets.build.gridded.writer import ViewCacheArray
from anemoi.datasets.build.input import InputBuilder
from anemoi.datasets.build.input.trace import enable_trace
from anemoi.datasets.dates.groups import Groups

from .check import DatasetName
from .check import check_data_values
from .chunks import ChunkFilter
from .config import build_output
from .config import loader_config
from .input import InputBuilder
from .statistics import Summary
from .statistics import TmpStatistics
from .statistics import check_variance
from .statistics import compute_statistics
from .statistics import default_statistics_dates
from .statistics import fix_variance
from .utils import normalize_and_check_dates
from .writer import ViewCacheArray
from anemoi.datasets.use.gridded.misc import as_first_date
from anemoi.datasets.use.gridded.misc import as_last_date

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -193,7 +192,7 @@ def add_dataset(self, mode: str = "r+", **kwargs: Any) -> zarr.Array:
import zarr

z = zarr.open(self.path, mode=mode)
from .zarr import add_zarr_dataset
from anemoi.datasets.build.gridded.zarr import add_zarr_dataset

return add_zarr_dataset(zarr_root=z, **kwargs)

Expand Down Expand Up @@ -397,7 +396,7 @@ def _cache_context(self) -> Any:
Any
The cache context.
"""
from .utils import cache_context
from anemoi.datasets.build.gridded.utils import cache_context

return cache_context(self.cache)

Expand Down Expand Up @@ -473,7 +472,7 @@ def __init__(self, path: str, options: dict = None, **kwargs: Any):

def run(self) -> None:
"""Run the patch."""
from .patch import apply_patch
from anemoi.datasets.build.gridded.patch import apply_patch

apply_patch(self.path, **self.options)

Expand All @@ -493,7 +492,7 @@ def __init__(self, path: str, **kwargs: Any):

def run(self) -> None:
"""Run the size computation."""
from .size import compute_directory_sizes
from anemoi.datasets.build.gridded.size import compute_directory_sizes

metadata = compute_directory_sizes(self.path)
self.update_metadata(**metadata)
Expand All @@ -515,7 +514,7 @@ class HasRegistryMixin:
@cached_property
def registry(self) -> Any:
"""Get the registry."""
from .zarr import ZarrBuiltRegistry
from anemoi.datasets.build.gridded.zarr import ZarrBuiltRegistry

return ZarrBuiltRegistry(self.path, use_threads=self.use_threads)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@

from earthkit.data.core.order import build_remapping

from ..result.field import FieldResult
from . import Context
from anemoi.datasets.build.gridded.result import GriddedResult
from anemoi.datasets.build.input.context import Context


class FieldContext(Context):
class GriddedContext(Context):

def __init__(
self,
Expand Down Expand Up @@ -46,7 +46,7 @@ def filter_argument(self, argument: Any) -> Any:
return argument

def create_result(self, data):
return FieldResult(self, data)
return GriddedResult(self, data)

def matching_dates(self, filtering_dates, group_of_dates: Any) -> Any:
from anemoi.datasets.dates.groups import GroupOfDates
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from anemoi.utils.humanize import shorten_list
from earthkit.data.core.order import build_remapping

from . import Result
from anemoi.datasets.build.input.result import Result

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -276,7 +276,7 @@ def sort(old_dic: DefaultDict[str, set]) -> dict[str, list[Any]]:
return dict(param_level=params_levels, param_step=params_steps, area=area, grid=grid)


class FieldResult(Result):
class GriddedResult(Result):
"""Class to represent the result of an action in the dataset creation process."""

empty: bool = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import earthkit.data as ekd

from anemoi.datasets.create.typing import DateList
from anemoi.datasets.build.gridded.typing import DateList


class Source(ABC):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from earthkit.data.readers.grib.output import new_grib_output
from numpy.typing import NDArray

from anemoi.datasets.create.sources import source_registry
from anemoi.datasets.build.gridded.sources import source_registry

from .legacy import LegacySource
from .mars import mars
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
from earthkit.data.core.temporary import temp_file
from earthkit.data.readers.grib.output import new_grib_output

from anemoi.datasets.create.sources import source_registry
from anemoi.datasets.create.sources.mars import mars
from anemoi.datasets.build.gridded.sources import source_registry

from .legacy import LegacySource
from .mars import mars

LOG = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from anemoi.transform.flavour import RuleBasedFlavour
from anemoi.transform.grids import grid_registry

from anemoi.datasets.create.typing import DateList
from anemoi.datasets.build.gridded.typing import DateList

from ..source import Source
from . import source_registry
Expand Down Expand Up @@ -125,7 +125,7 @@ def _time_request_keys(dt: datetime, offset_from_date: bool | None = None) -> st


def _shortname_to_paramid(shortname: list[str], param_id_map: dict[str, int] | None = None) -> list[int]:
from anemoi.datasets.create.sources.mars import use_grib_paramid
from .mars import use_grib_paramid

"""Convert a shortname to a parameter ID."""
if param_id_map is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@

from earthkit.data.core.fieldlist import MultiFieldList

from anemoi.datasets.create.sources.mars import mars
from anemoi.datasets.build.gridded.sources import source_registry

from . import source_registry
from .legacy import LegacySource
from .mars import mars

LOGGER = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from abc import abstractmethod
from typing import Any

from anemoi.datasets.create.input.context import Context
from anemoi.datasets.build.input.context import Context

from ..source import Source

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from earthkit.data import from_source
from earthkit.data.utils.availability import Availability

from anemoi.datasets.create.sources import source_registry
from anemoi.datasets.build.gridded.sources import source_registry

from .legacy import LegacySource

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
from anemoi.transform.fields import new_field_with_valid_datetime
from anemoi.transform.fields import new_fieldlist_from_list

from anemoi.datasets.create.input.repeated_dates import DateMapper
from anemoi.datasets.create.source import Source
from anemoi.datasets.create.sources import source_registry
from anemoi.datasets.build.input.repeated_dates import DateMapper

from ..source import Source
from ..sources import source_registry

LOG = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from earthkit.data import from_source

from anemoi.datasets.create.sources import source_registry
from anemoi.datasets.build.gridded.sources import source_registry

from .legacy import LegacySource

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from earthkit.data.core.temporary import temp_file
from earthkit.data.readers.grib.output import new_grib_output

from anemoi.datasets.create.sources import source_registry
from anemoi.datasets.build.gridded.sources import source_registry

from .legacy import LegacySource

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import earthkit.data as ekd

from anemoi.datasets.create.typing import DateList
from anemoi.datasets.build.gridded.typing import DateList

from ..source import Source
from .xarray_support import XarrayFieldList
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import xarray as xr
from earthkit.data.core.fieldlist import MultiFieldList

from anemoi.datasets.create.sources.patterns import iterate_patterns
from anemoi.datasets.build.gridded.sources.patterns import iterate_patterns

from .. import source_registry
from ..legacy import LegacySource
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
from anemoi.utils.provenance import gather_provenance_info
from numpy.typing import NDArray

from ..check import check_data_values
from .summary import Summary
from anemoi.datasets.build.gridded.check import check_data_values
from anemoi.datasets.build.gridded.statistics.summary import Summary

LOG = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@

import numpy as np

from ..check import StatisticsValueError
from ..check import check_data_values
from ..check import check_stats
from anemoi.datasets.build.gridded.check import StatisticsValueError
from anemoi.datasets.build.gridded.check import check_data_values
from anemoi.datasets.build.gridded.check import check_stats


class Summary(dict):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@
from typing import TYPE_CHECKING
from typing import Any

from anemoi.datasets.create.input.context.field import FieldContext

if TYPE_CHECKING:
from anemoi.datasets.create.input.action import Recipe
from anemoi.datasets.build.input.action import Recipe


class InputBuilder:
Expand All @@ -40,8 +38,8 @@ def __init__(self, config: dict, data_sources: dict | list, **kwargs: Any) -> No
@cached_property
def action(self) -> "Recipe":
"""Returns the action object based on the configuration."""
from .action import Recipe
from .action import action_factory
from anemoi.datasets.build.input.action import Recipe
from anemoi.datasets.build.input.action import action_factory

sources = action_factory(self.data_sources, "data_sources")
input = action_factory(self.config, "input")
Expand All @@ -61,7 +59,9 @@ def select(self, argument) -> Any:
Any
Selected data.
"""
context = FieldContext(argument, **self.kwargs)
from anemoi.datasets.build.gridded.context import GriddedContext

context = GriddedContext(argument, **self.kwargs)
return context.create_result(self.action(context, argument))


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ class DatasetSourceMixin:
"""Mixin class for sources defined in anemoi-datasets"""

def create_object(self, context, config):
from anemoi.datasets.create.sources import create_source as create_datasets_source
from anemoi.datasets.build.gridded.sources import create_source as create_datasets_source

return create_datasets_source(context, config)

Expand Down Expand Up @@ -286,7 +286,7 @@ def make(key, config, *path):
from anemoi.transform.filters import filter_registry as transform_filter_registry
from anemoi.transform.sources import source_registry as transform_source_registry

from anemoi.datasets.create.sources import source_registry as dataset_source_registry
from anemoi.datasets.build.gridded.sources import source_registry as dataset_source_registry

# Register sources, local first
for name in dataset_source_registry.registered:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def resolve(self, config):
return config

def create_source(self, config: Any, *path) -> Any:
from anemoi.datasets.create.input.action import action_factory
from anemoi.datasets.build.input.action import action_factory

if not isinstance(config, dict):
# It is already a result (e.g. ekd.FieldList), loaded from ${a.b.c}
Expand Down
Loading
Loading