Skip to content

Commit 1e27166

Browse files
committed
test: expand suite with helpers, encoding, writer and e2e cases
1 parent 8cccc3c commit 1e27166

File tree

7 files changed

+336
-113
lines changed

7 files changed

+336
-113
lines changed

tests/conftest.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import warnings
2+
3+
# Filter noisy Zarr v3 consolidated metadata warning in tests
4+
warnings.filterwarnings(
5+
"ignore",
6+
message="Consolidated metadata is currently not part in the Zarr format 3 specification",
7+
category=UserWarning,
8+
)
9+
10+
# Future-proof xarray dims warning until upstream change lands
11+
warnings.filterwarnings(
12+
"ignore",
13+
message="The return type of `Dataset.dims` will be changed",
14+
category=FutureWarning,
15+
)

tests/test_cli_e2e.py

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,9 @@ def test_cli_convert_real_sentinel2_data(self, temp_output_dir: str) -> None:
111111

112112
cmd_info = ["python", "-m", "eopf_geozarr", "info", str(output_path)]
113113

114-
result_info = subprocess.run(
115-
cmd_info, capture_output=True, text=True, timeout=60
116-
)
114+
result_info = subprocess.run(cmd_info, capture_output=True, text=True, timeout=60)
117115

118-
assert (
119-
result_info.returncode == 0
120-
), f"CLI info command failed: {result_info.stderr}"
116+
assert result_info.returncode == 0, f"CLI info command failed: {result_info.stderr}"
121117
print("✅ CLI info command succeeded")
122118
print(f"Info output: {result_info.stdout}")
123119

@@ -138,9 +134,7 @@ def test_cli_convert_real_sentinel2_data(self, temp_output_dir: str) -> None:
138134
str(output_path),
139135
]
140136

141-
result_validate = subprocess.run(
142-
cmd_validate, capture_output=True, text=True, timeout=60
143-
)
137+
result_validate = subprocess.run(cmd_validate, capture_output=True, text=True, timeout=60)
144138

145139
assert (
146140
result_validate.returncode == 0
@@ -151,7 +145,8 @@ def test_cli_convert_real_sentinel2_data(self, temp_output_dir: str) -> None:
151145
# Verify validation output
152146
validate_output = result_validate.stdout
153147
assert "Validation Results:" in validate_output, "Should show validation header"
154-
assert "✅" in validate_output, "Should show successful validations"
148+
success_ok = ("✅" in validate_output) or ("GeoZarr compliant" in validate_output)
149+
assert success_ok, "Validation should indicate success"
155150

156151
# Test 4: Verify data structure and compliance
157152
print("\n=== Verifying converted data structure ===")
@@ -160,9 +155,7 @@ def test_cli_convert_real_sentinel2_data(self, temp_output_dir: str) -> None:
160155

161156
print("✅ All CLI end-to-end tests passed!")
162157

163-
def _verify_converted_data_structure(
164-
self, output_path: Path, groups: list[str]
165-
) -> None:
158+
def _verify_converted_data_structure(self, output_path: Path, groups: list[str]) -> None:
166159
"""Verify the structure and compliance of converted data."""
167160
# Check each group was converted
168161
for group in groups:
@@ -215,9 +208,7 @@ def _verify_converted_data_structure(
215208
ds.close()
216209

217210
# Check for overview levels
218-
level_dirs = [
219-
d for d in group_path.iterdir() if d.is_dir() and d.name.isdigit()
220-
]
211+
level_dirs = [d for d in group_path.iterdir() if d.is_dir() and d.name.isdigit()]
221212
print(f" Overview levels: {sorted([d.name for d in level_dirs])}")
222213

223214
if len(level_dirs) > 1:
@@ -352,24 +343,15 @@ def test_cli_convert_with_crs_groups(self, temp_output_dir: str) -> None:
352343

353344
# Check command succeeded
354345
if result.returncode != 0:
355-
print(
356-
f"CLI convert with --crs-groups failed with return code {result.returncode}"
357-
)
346+
print(f"CLI convert with --crs-groups failed with return code {result.returncode}")
358347
print(f"STDOUT: {result.stdout}")
359348
print(f"STDERR: {result.stderr}")
360349
# Don't fail the test if CRS groups don't exist in the dataset
361350
# This is expected behavior for best-effort processing
362-
if (
363-
"not found in DataTree" in result.stdout
364-
or "not found in DataTree" in result.stderr
365-
):
366-
print(
367-
"✅ CLI handled missing CRS groups gracefully (expected behavior)"
368-
)
351+
if "not found in DataTree" in result.stdout or "not found in DataTree" in result.stderr:
352+
print("✅ CLI handled missing CRS groups gracefully (expected behavior)")
369353
return
370-
pytest.fail(
371-
f"CLI convert with --crs-groups command failed: {result.stderr}"
372-
)
354+
pytest.fail(f"CLI convert with --crs-groups command failed: {result.stderr}")
373355

374356
print("✅ CLI convert with --crs-groups command succeeded")
375357

@@ -434,9 +416,7 @@ def test_cli_crs_groups_empty_list(self, temp_output_dir: str) -> None:
434416
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
435417

436418
# Should succeed (empty crs_groups list is valid)
437-
assert (
438-
result.returncode == 0
439-
), f"CLI with empty --crs-groups failed: {result.stderr}"
419+
assert result.returncode == 0, f"CLI with empty --crs-groups failed: {result.stderr}"
440420
assert "CRS groups: []" in result.stdout, "Should show empty CRS groups list"
441421

442422
print("✅ CLI with empty --crs-groups list works correctly")

tests/test_conversion.py

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,8 @@
1515
setup_datatree_metadata_geozarr_spec_compliant,
1616
validate_existing_band_data,
1717
)
18-
from eopf_geozarr.conversion.geozarr import (
19-
create_overview_dataset_all_vars,
20-
prepare_dataset_with_crs_info,
21-
)
18+
from eopf_geozarr.conversion.geozarr import prepare_dataset_with_crs_info
19+
from eopf_geozarr.conversion.multiscales import create_overview_dataset_all_vars
2220

2321

2422
class TestUtilityFunctions:
@@ -123,9 +121,7 @@ def test_validate_existing_band_data_missing(self) -> None:
123121

124122
def test_calculate_overview_levels(self) -> None:
125123
"""Test overview levels calculation."""
126-
levels = calculate_overview_levels(
127-
1024, 1024, min_dimension=256, tile_width=256
128-
)
124+
levels = calculate_overview_levels(1024, 1024, min_dimension=256, tile_width=256)
129125

130126
# Should have levels 0, 1, 2 (1024 -> 512 -> 256)
131127
assert len(levels) == 3
@@ -184,10 +180,7 @@ def test_setup_datatree_metadata_geozarr_spec_compliant(self) -> None:
184180
assert "standard_name" in processed_ds[band].attrs
185181
assert "_ARRAY_DIMENSIONS" in processed_ds[band].attrs
186182
assert "grid_mapping" in processed_ds[band].attrs
187-
assert (
188-
processed_ds[band].attrs["standard_name"]
189-
== "toa_bidirectional_reflectance"
190-
)
183+
assert processed_ds[band].attrs["standard_name"] == "toa_bidirectional_reflectance"
191184

192185
# Check coordinate attributes
193186
for coord in ["x", "y"]:
@@ -269,9 +262,7 @@ def test_prepare_dataset_with_crs_info_with_spatial_coordinates(self) -> None:
269262
dt["conditions/geometry"] = geometry_ds
270263

271264
# Mock the output path and file operations
272-
with patch(
273-
"eopf_geozarr.conversion.geozarr.fs_utils.normalize_path"
274-
) as mock_normalize:
265+
with patch("eopf_geozarr.conversion.geozarr.fs_utils.normalize_path") as mock_normalize:
275266
with patch(
276267
"eopf_geozarr.conversion.geozarr.fs_utils.get_storage_options"
277268
) as mock_storage:
@@ -437,15 +428,10 @@ def test_prepare_dataset_with_crs_info_data_variable_attributes(self) -> None:
437428
if var_name != "spatial_ref": # Skip grid mapping variable
438429
var_attrs = processed_ds[var_name].attrs
439430
assert "_ARRAY_DIMENSIONS" in var_attrs
440-
assert var_attrs["_ARRAY_DIMENSIONS"] == list(
441-
processed_ds[var_name].dims
442-
)
431+
assert var_attrs["_ARRAY_DIMENSIONS"] == list(processed_ds[var_name].dims)
443432

444433
# Variables with spatial coordinates should have grid_mapping
445-
if (
446-
"x" in processed_ds[var_name].dims
447-
and "y" in processed_ds[var_name].dims
448-
):
434+
if "x" in processed_ds[var_name].dims and "y" in processed_ds[var_name].dims:
449435
assert "grid_mapping" in var_attrs
450436
assert var_attrs["grid_mapping"] == "spatial_ref"
451437

@@ -494,9 +480,7 @@ def test_prepare_dataset_with_crs_info_crs_inference(self) -> None:
494480
pixel_size_y = float(y_coords[0] - y_coords[1]) # Usually negative
495481

496482
# Create GeoTransform (GDAL format)
497-
transform_str = (
498-
f"{x_coords[0]} {pixel_size_x} 0.0 {y_coords[0]} 0.0 {pixel_size_y}"
499-
)
483+
transform_str = f"{x_coords[0]} {pixel_size_x} 0.0 {y_coords[0]} 0.0 {pixel_size_y}"
500484
ds["spatial_ref"].attrs["GeoTransform"] = transform_str
501485

502486
# Verify CRS was inferred and applied
@@ -613,9 +597,7 @@ def test_prepare_dataset_with_crs_info_no_spatial_coordinates(self) -> None:
613597
# Set up data variables with proper attributes
614598
for var_name in ds.data_vars:
615599
# Add _ARRAY_DIMENSIONS attribute if missing
616-
if "_ARRAY_DIMENSIONS" not in ds[var_name].attrs and hasattr(
617-
ds[var_name], "dims"
618-
):
600+
if "_ARRAY_DIMENSIONS" not in ds[var_name].attrs and hasattr(ds[var_name], "dims"):
619601
ds[var_name].attrs["_ARRAY_DIMENSIONS"] = list(ds[var_name].dims)
620602

621603
# Verify the group was processed but no CRS was added

tests/test_encoding.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import numpy as np
2+
import xarray as xr
3+
4+
from eopf_geozarr.conversion.encoding import create_geozarr_encoding
5+
6+
7+
def _make_ds(shape_2d=(1024, 2048), extra_dims=("band",), bands=3, dtype="uint16"):
8+
h, w = shape_2d
9+
coords = {"y": np.arange(h), "x": np.arange(w)}
10+
data_vars = {}
11+
if extra_dims:
12+
coords[extra_dims[0]] = np.arange(bands)
13+
data = np.random.randint(0, 1000, size=(bands, h, w), dtype=dtype)
14+
data_vars["data"] = (extra_dims + ("y", "x"), data)
15+
else:
16+
data = np.random.randint(0, 1000, size=(h, w), dtype=dtype)
17+
data_vars["data"] = (("y", "x"), data)
18+
# Add a grid mapping var and reference it via a data variable's attrs
19+
data_vars["spatial_ref"] = ((), np.array(0, dtype="int8"))
20+
ds = xr.Dataset(data_vars=data_vars, coords=coords)
21+
ds["data"].attrs["grid_mapping"] = "spatial_ref"
22+
return ds
23+
24+
25+
def test_grid_mapping_excluded_from_compression():
26+
ds = _make_ds()
27+
enc = create_geozarr_encoding(ds, compressor="COMP", spatial_chunk=4096)
28+
assert (
29+
enc["spatial_ref"].get("compressors") is None
30+
), "grid mapping var should not be compressed"
31+
assert enc["data"]["compressors"], "data variable should have compressors list"
32+
33+
34+
def test_chunk_shape_3d_leading_dims_collapsed():
35+
ds = _make_ds()
36+
enc = create_geozarr_encoding(ds, compressor=None, spatial_chunk=512)
37+
chunks = enc["data"]["chunks"]
38+
# Expect (1, y_chunk, x_chunk)
39+
assert len(chunks) == 3 and chunks[0] == 1, f"Leading dim should be 1-sized in chunks: {chunks}"
40+
assert chunks[1] == chunks[2], "Spatial chunks should be square"
41+
assert chunks[1] <= 512, "Spatial chunk should not exceed requested size"
42+
43+
44+
def test_chunk_shape_2d():
45+
ds = _make_ds(extra_dims=None)
46+
enc = create_geozarr_encoding(ds, compressor=None, spatial_chunk=256)
47+
chunks = enc["data"]["chunks"]
48+
assert len(chunks) == 2, "2D var should have 2 chunk dims"
49+
assert chunks[0] == chunks[1] <= 256
50+
51+
52+
def test_max_chunk_bytes_enforced(monkeypatch):
53+
ds = _make_ds()
54+
# Force very low limit so logic must reduce
55+
monkeypatch.setenv("EOPF_MAX_CHUNK_BYTES", "16384") # 16 KiB
56+
enc = create_geozarr_encoding(ds, compressor=None, spatial_chunk=2048)
57+
chunks = enc["data"]["chunks"]
58+
# Compute approximate bytes
59+
dtype_size = ds["data"].dtype.itemsize
60+
try:
61+
from math import prod
62+
except Exception:
63+
64+
def prod(vals):
65+
out = 1
66+
for v in vals:
67+
out *= v
68+
return out
69+
70+
est_bytes = prod(chunks) * dtype_size
71+
assert est_bytes <= 16384, f"Chunk bytes {est_bytes} exceed limit with chunks {chunks}"
72+
73+
74+
def test_coord_variables_no_compression():
75+
ds = _make_ds()
76+
enc = create_geozarr_encoding(ds, compressor=None, spatial_chunk=512)
77+
for coord in ds.coords:
78+
assert (
79+
enc[coord].get("compressors") is None
80+
), f"Coordinate {coord} should not have compressors"

0 commit comments

Comments
 (0)