Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 4 additions & 12 deletions src/anemoi/datasets/create/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,7 @@ def check_name(
resolution: str,
dates: list[datetime.datetime],
frequency: datetime.timedelta,
raise_exception: bool = True,
is_test: bool = False,
raise_exception: bool = False,
) -> None:
"""Check the name of the dataset.

Expand All @@ -271,15 +270,13 @@ def check_name(
The frequency of the dataset.
raise_exception : bool, optional
Whether to raise an exception if the name is invalid.
is_test : bool, optional
Whether this is a test.
"""
basename, _ = os.path.splitext(os.path.basename(self.path))
try:
DatasetName(basename, resolution, dates[0], dates[-1], frequency).raise_if_not_valid()
except Exception as e:
if raise_exception and not is_test:
raise e
if raise_exception:
raise
else:
LOG.warning(f"Dataset name error: {e}")

Expand Down Expand Up @@ -577,7 +574,6 @@ def __init__(
use_threads: bool = False,
statistics_temp_dir: str | None = None,
progress: Any = None,
test: bool = False,
cache: str | None = None,
**kwargs: Any,
):
Expand All @@ -599,8 +595,6 @@ def __init__(
The directory for temporary statistics.
progress : Any, optional
The progress indicator.
test : bool, optional
Whether this is a test.
cache : Optional[str], optional
The cache directory.
"""
Expand All @@ -613,9 +607,8 @@ def __init__(
self.use_threads = use_threads
self.statistics_temp_dir = statistics_temp_dir
self.progress = progress
self.test = test

self.main_config = loader_config(config, is_test=test)
self.main_config = loader_config(config)

# self.registry.delete() ??
self.tmp_statistics.delete()
Expand Down Expand Up @@ -748,7 +741,6 @@ def _run(self) -> int:

self.dataset.check_name(
raise_exception=self.check_name,
is_test=self.test,
resolution=resolution,
dates=dates,
frequency=frequency,
Expand Down
59 changes: 4 additions & 55 deletions src/anemoi/datasets/create/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
from anemoi.utils.config import load_any_dict_format
from earthkit.data.core.order import normalize_order_by

from anemoi.datasets.dates.groups import Groups

LOG = logging.getLogger(__name__)


Expand Down Expand Up @@ -340,72 +338,20 @@ def _prepare_serialisation(o: Any) -> Any:
return str(o)


def set_to_test_mode(cfg: dict) -> None:
"""Modifies the configuration to run in test mode.

Parameters
----------
cfg : dict
The configuration dictionary.
"""
NUMBER_OF_DATES = 4

LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
groups = Groups(**LoadersConfig(cfg).dates)

dates = groups.provider.values
cfg["dates"] = dict(
start=dates[0],
end=dates[NUMBER_OF_DATES - 1],
frequency=groups.provider.frequency,
group_by=NUMBER_OF_DATES,
)

num_ensembles = count_ensembles(cfg)

def set_element_to_test(obj):
if isinstance(obj, (list, tuple)):
for v in obj:
set_element_to_test(v)
return
if isinstance(obj, (dict, DotDict)):
if "grid" in obj and num_ensembles > 1:
previous = obj["grid"]
obj["grid"] = "20./20."
LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
if "number" in obj and num_ensembles > 1:
if isinstance(obj["number"], (list, tuple)):
previous = obj["number"]
obj["number"] = previous[0:3]
LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
for k, v in obj.items():
set_element_to_test(v)
if "constants" in obj:
constants = obj["constants"]
if "param" in constants and isinstance(constants["param"], list):
constants["param"] = ["cos_latitude"]

set_element_to_test(cfg)


def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
def loader_config(config: dict) -> LoadersConfig:
"""Loads and validates the configuration for dataset loaders.

Parameters
----------
config : dict
The configuration dictionary.
is_test : bool, optional
Whether to run in test mode. Defaults to False.

Returns
-------
LoadersConfig
The validated configuration object.
"""
config = Config(config)
if is_test:
set_to_test_mode(config)
obj = LoadersConfig(config)

# yaml round trip to check that serialisation works as expected
Expand All @@ -426,6 +372,9 @@ def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
LOG.info(f"Setting env variable {k}={v}")
os.environ[k] = str(v)

# Used by pytest only
# copy.pop('checks', None)

return copy


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def load_one(
if isinstance(dataset, xr.Dataset):
data = dataset
else:
print(f"Opening dataset {dataset} with options {options}")
data = xr.open_dataset(dataset, **options)

fs = XarrayFieldList.from_xarray(data, flavour=flavour, patch=patch)
Expand Down
30 changes: 30 additions & 0 deletions tests/create/accumulate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
dates:
start: 2021-01-10 18:00:00
#start: 2021-01-10 19:00:00
end: 2021-01-12 12:00:00
frequency: 6h

input:
accumulations2:
expver: "0001"
class: ea

stream: oper
#stream: enda

grid: 20./20.
#grid: o96
levtype: sfc
param: [ tp, cp ]
# accumulation_period: [0, 6]
accumulation_period: 24

checks:
- values:
variable: tp
minimum: 0.0
maximum: 0.06885338
- values:
variable: cp
minimum: 0.0
maximum: 0.038482666
32 changes: 21 additions & 11 deletions tests/create/accumulation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,26 @@ dates:
frequency: 6h

input:
accumulations:
expver: "0001"
class: ea
accumulations:
expver: "0001"
class: ea

stream: oper
#stream: enda
stream: oper
#stream: enda

grid: 20./20.
#grid: o96
levtype: sfc
param: [ tp , cp]
# accumulation_period: [0, 6]
accumulation_period: 24
grid: 20./20.
#grid: o96
levtype: sfc
param: [ tp, cp ]
# accumulation_period: [0, 6]
accumulation_period: 24

checks:
- values:
variable: tp
minimum: 0.0
maximum: 0.06885338
- values:
variable: cp
minimum: 0.0
maximum: 0.038482666
29 changes: 16 additions & 13 deletions tests/create/concat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,25 @@ common:
levtype: sfc
stream: oper
type: an
param: [2t]
param: [ 2t ]

input:
concat:
- dates:
start: 2020-12-30 00:00:00
end: 2021-01-01 12:00:00
frequency: 12h
mars:
<<: *mars_request
- dates:
start: 2021-01-02 00:00:00
end: 2021-01-03 12:00:00
frequency: 12h
mars:
<<: *mars_request
- dates:
start: 2020-12-30 00:00:00
end: 2021-01-01 12:00:00
frequency: 12h
mars:
<<: *mars_request
- dates:
start: 2021-01-02 00:00:00
end: 2021-01-03 12:00:00
frequency: 12h
mars:
<<: *mars_request

statistics:
end: 2021

checks:
- compare_to_reference: {}
3 changes: 3 additions & 0 deletions tests/create/join.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ naming_scheme: "{param}_{levelist}{level_units}_{accumultion_period}"

statistics:
end: 2021

checks:
- compare_to_reference: {}
3 changes: 3 additions & 0 deletions tests/create/missing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,6 @@ input:

statistics:
end: 2021-01-02

checks:
- compare_to_reference: {}
3 changes: 3 additions & 0 deletions tests/create/nan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ input:
statistics:
end: 2020
allow_nans: [sst]

checks:
- compare_to_reference: {}
3 changes: 3 additions & 0 deletions tests/create/pipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,6 @@ input:

statistics:
end: 2021

checks:
- compare_to_reference: {}
5 changes: 5 additions & 0 deletions tests/create/recentre.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,8 @@ input:
- sin_julian_day
- sin_local_time
- insolation


slow_test: true
checks:
- none: {}
3 changes: 3 additions & 0 deletions tests/create/regrid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ input:
method: nearest
in_grid: o32
out_grid: o48

checks:
- none: {}
31 changes: 31 additions & 0 deletions tests/create/repeated-dates.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
dates:
start: 2020-12-30 00:00:00
end: 2021-01-03 12:00:00
frequency: 12h

input:
repeated_dates:
mode: constant
source:
mars:
expver: "0001"
class: ea
grid: 20./20.
levtype: sfc
stream: oper
type: an
param: [ 2t ]
date: 1990-01-01

checks:
- dates:
- 2020-12-30 00:00:00
- 2020-12-30 12:00:00
- 2020-12-31 00:00:00
- 2020-12-31 12:00:00
- 2021-01-01 00:00:00
- 2021-01-01 12:00:00
- 2021-01-02 00:00:00
- 2021-01-02 12:00:00
- 2021-01-03 00:00:00
- 2021-01-03 12:00:00
37 changes: 37 additions & 0 deletions tests/create/s3-winds.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: test_s3_winds_dataset
description: Test creation of a dataset from S3-hosted wind data
attribution: DMI
license: CC-BY-4.0

dates:
start: 2020-01-01T00:00:00
end: 2020-01-01T12:00:00
frequency: 12h

input:
join:
- pipe:
- xarray-zarr:
url: s3://dmi-danra-05/single_levels.zarr
options:
storage_options:
anon: true
param:
- u10m
- v10m
- uv-to-ddff:
u_component: u10m
v_component: v10m
wind_speed: ws
wind_direction: wdir
convention: meteo
radians: false
build:
variable_naming: "{param}_{pressure}_{altitude}"

slow_test: true
checks:
- variables: [ wdir, ws ]
- dates:
- 2020-01-01T00:00:00
- 2020-01-01T12:00:00
Loading
Loading