From b84ec0782373616d7c42e6dee67f7eec0695a557 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 1 Oct 2025 10:27:37 +0200 Subject: [PATCH 1/2] migrate tests out of source tree --- .github/workflows/ci.yml | 4 ++-- {src/eopf_geozarr/tests => tests}/__init__.py | 0 {src/eopf_geozarr/tests => tests}/test_cli_e2e.py | 0 {src/eopf_geozarr/tests => tests}/test_conversion.py | 0 {src/eopf_geozarr/tests => tests}/test_fs_utils.py | 0 .../tests => tests}/test_integration_sentinel1.py | 0 .../tests => tests}/test_integration_sentinel2.py | 0 .../tests => tests}/test_reprojection_validation.py | 0 8 files changed, 2 insertions(+), 2 deletions(-) rename {src/eopf_geozarr/tests => tests}/__init__.py (100%) rename {src/eopf_geozarr/tests => tests}/test_cli_e2e.py (100%) rename {src/eopf_geozarr/tests => tests}/test_conversion.py (100%) rename {src/eopf_geozarr/tests => tests}/test_fs_utils.py (100%) rename {src/eopf_geozarr/tests => tests}/test_integration_sentinel1.py (100%) rename {src/eopf_geozarr/tests => tests}/test_integration_sentinel2.py (100%) rename {src/eopf_geozarr/tests => tests}/test_reprojection_validation.py (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c4c3f7d..8dd254c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,7 +50,7 @@ jobs: - name: Run tests run: | - uv run pytest src/eopf_geozarr/tests/ -v --tb=short -m "not network" --cov=eopf_geozarr --cov-report=xml --cov-report=term-missing + uv run pytest tests/ -v --tb=short -m "not network" --cov=eopf_geozarr --cov-report=xml --cov-report=term-missing - name: Upload coverage to Codecov if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11' @@ -105,7 +105,7 @@ jobs: - name: Run network tests run: | - uv run pytest src/eopf_geozarr/tests/ -v --tb=short -m "network" + uv run pytest tests/ -v --tb=short -m "network" security: runs-on: ubuntu-latest diff --git a/src/eopf_geozarr/tests/__init__.py b/tests/__init__.py similarity index 100% rename from src/eopf_geozarr/tests/__init__.py rename to tests/__init__.py diff --git a/src/eopf_geozarr/tests/test_cli_e2e.py b/tests/test_cli_e2e.py similarity index 100% rename from src/eopf_geozarr/tests/test_cli_e2e.py rename to tests/test_cli_e2e.py diff --git a/src/eopf_geozarr/tests/test_conversion.py b/tests/test_conversion.py similarity index 100% rename from src/eopf_geozarr/tests/test_conversion.py rename to tests/test_conversion.py diff --git a/src/eopf_geozarr/tests/test_fs_utils.py b/tests/test_fs_utils.py similarity index 100% rename from src/eopf_geozarr/tests/test_fs_utils.py rename to tests/test_fs_utils.py diff --git a/src/eopf_geozarr/tests/test_integration_sentinel1.py b/tests/test_integration_sentinel1.py similarity index 100% rename from src/eopf_geozarr/tests/test_integration_sentinel1.py rename to tests/test_integration_sentinel1.py diff --git a/src/eopf_geozarr/tests/test_integration_sentinel2.py b/tests/test_integration_sentinel2.py similarity index 100% rename from src/eopf_geozarr/tests/test_integration_sentinel2.py rename to tests/test_integration_sentinel2.py diff --git a/src/eopf_geozarr/tests/test_reprojection_validation.py b/tests/test_reprojection_validation.py similarity index 100% rename from src/eopf_geozarr/tests/test_reprojection_validation.py rename to tests/test_reprojection_validation.py From 858bd913f0c281bf27461aa6260cd272a631c70b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 1 Oct 2025 10:36:29 +0200 Subject: [PATCH 2/2] add conftest, and redirect imports --- tests/conftest.py | 269 ++++++++++++++++++++++++++++ tests/test_integration_sentinel1.py | 3 +- 2 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..21c7784 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,269 @@ +"""Tests for the eopf-geozarr package.""" + +import pathlib + +import xarray as xr + + +def _verify_basic_structure(output_path: pathlib.Path, groups: list[str]) -> None: + """Verify the basic Zarr store structure.""" + print("Verifying basic structure...") + + # Check that the main zarr store exists + assert (output_path / "zarr.json").exists() + + # Check that each group has been created + for group in groups: + group_path = output_path / group.lstrip("/") + assert group_path.exists(), f"Group {group} not found" + assert (group_path / "zarr.json").exists(), f"Group {group} missing zarr.json" + + # Check that level 0 (native resolution) exists + level_0_path = group_path / "0" + assert level_0_path.exists(), f"Level 0 not found for {group}" + assert (level_0_path / "zarr.json").exists(), ( + f"Level 0 missing zarr.json for {group}" + ) + + +def _verify_geozarr_spec_compliance(output_path: pathlib.Path, group: str) -> None: + """ + Verify GeoZarr specification compliance following the notebook verification. + + This replicates the compliance checks from the notebook: + - _ARRAY_DIMENSIONS attributes on all arrays + - CF standard names properly set + - Grid mapping attributes reference correct CRS variables + - GeoTransform attributes in grid_mapping variables + - Native CRS preservation + """ + print(f"Verifying GeoZarr-spec compliance for {group}...") + + # Open the native resolution dataset (level 0) + group_path = str(output_path / group.lstrip("/") / "0") + ds = xr.open_dataset(group_path, engine="zarr", zarr_format=3) + + print(f" Variables: {list(ds.data_vars)}") + print(f" Coordinates: {list(ds.coords)}") + + # Check 1: _ARRAY_DIMENSIONS attributes (required by GeoZarr spec) + for var_name in ds.data_vars: + if var_name != "spatial_ref": # Skip grid_mapping variable + assert "_ARRAY_DIMENSIONS" in ds[var_name].attrs, ( + f"Missing _ARRAY_DIMENSIONS for {var_name} in {group}" + ) + assert ds[var_name].attrs["_ARRAY_DIMENSIONS"] == list(ds[var_name].dims), ( + f"Incorrect _ARRAY_DIMENSIONS for {var_name} in {group}" + ) + print( + f" ✅ _ARRAY_DIMENSIONS: {ds[var_name].attrs['_ARRAY_DIMENSIONS']}" + ) + + # Check coordinates + for coord_name in ds.coords: + if coord_name not in ["spatial_ref"]: # Skip CRS coordinate + assert "_ARRAY_DIMENSIONS" in ds[coord_name].attrs, ( + f"Missing _ARRAY_DIMENSIONS for coordinate {coord_name} in {group}" + ) + print( + f" ✅ {coord_name} _ARRAY_DIMENSIONS: {ds[coord_name].attrs['_ARRAY_DIMENSIONS']}" + ) + + # Check 2: CF standard names (required by GeoZarr spec) + for var_name in ds.data_vars: + if var_name != "spatial_ref": + assert "standard_name" in ds[var_name].attrs, ( + f"Missing standard_name for {var_name} in {group}" + ) + assert ( + ds[var_name].attrs["standard_name"] == "toa_bidirectional_reflectance" + ), f"Incorrect standard_name for {var_name} in {group}" + print(f" ✅ standard_name: {ds[var_name].attrs['standard_name']}") + + # Check 3: Grid mapping attributes (required by GeoZarr spec) + for var_name in ds.data_vars: + if var_name != "spatial_ref": + assert "grid_mapping" in ds[var_name].attrs, ( + f"Missing grid_mapping for {var_name} in {group}" + ) + assert ds[var_name].attrs["grid_mapping"] == "spatial_ref", ( + f"Incorrect grid_mapping for {var_name} in {group}" + ) + print(f" ✅ grid_mapping: {ds[var_name].attrs['grid_mapping']}") + + # Check 4: Spatial reference variable (as in notebook) + assert "spatial_ref" in ds, f"Missing spatial_ref variable in {group}" + assert "_ARRAY_DIMENSIONS" in ds["spatial_ref"].attrs, ( + f"Missing _ARRAY_DIMENSIONS for spatial_ref in {group}" + ) + assert ds["spatial_ref"].attrs["_ARRAY_DIMENSIONS"] == [], ( + f"Incorrect _ARRAY_DIMENSIONS for spatial_ref in {group}" + ) + print( + f" ✅ spatial_ref _ARRAY_DIMENSIONS: {ds['spatial_ref'].attrs['_ARRAY_DIMENSIONS']}" + ) + + # Check 5: GeoTransform attribute (from notebook verification) + if "GeoTransform" in ds["spatial_ref"].attrs: + print(f" ✅ GeoTransform: {ds['spatial_ref'].attrs['GeoTransform']}") + else: + print(" ⚠️ Missing GeoTransform attribute") + + # Check 6: CRS information (from notebook verification) + if "crs_wkt" in ds["spatial_ref"].attrs: + print(" ✅ CRS WKT present") + else: + print(" ⚠️ Missing CRS WKT") + + # Check 7: Coordinate standard names (from notebook verification) + for coord in ["x", "y"]: + if coord in ds.coords: + if "standard_name" in ds[coord].attrs: + expected_name = ( + "projection_x_coordinate" + if coord == "x" + else "projection_y_coordinate" + ) + assert ds[coord].attrs["standard_name"] == expected_name, ( + f"Incorrect standard_name for {coord} coordinate in {group}" + ) + print( + f" ✅ {coord} standard_name: {ds[coord].attrs['standard_name']}" + ) + + ds.close() + + +def _verify_multiscale_structure(output_path: pathlib.Path, group: str) -> None: + """Verify multiscale structure following notebook patterns.""" + print(f"Verifying multiscale structure for {group}...") + + group_path = output_path / group.lstrip("/") + + # Check that at least one level exists (level 0 is always created) + level_dirs = [d for d in group_path.iterdir() if d.is_dir() and d.name.isdigit()] + assert len(level_dirs) >= 1, ( + f"Expected at least 1 overview level for {group}, found {len(level_dirs)}" + ) + print( + f" Found {len(level_dirs)} overview levels: {sorted([d.name for d in level_dirs])}" + ) + + # For larger datasets, expect multiple levels + level_0_path = str(group_path / "0") + ds_0 = xr.open_dataset(level_0_path, engine="zarr", zarr_format=3) + native_size = min(ds_0.sizes["y"], ds_0.sizes["x"]) + ds_0.close() + + if native_size >= 512: # Larger datasets should have multiple levels + assert len(level_dirs) >= 2, ( + f"Expected multiple overview levels for large dataset {group} (size {native_size}), found {len(level_dirs)}" + ) + else: + print(f" Small dataset (size {native_size}), single level is acceptable") + + # Verify level 0 (native resolution) exists + assert (group_path / "0").exists(), f"Level 0 missing for {group}" + + # Check that each level contains valid data + level_shapes = {} + for level_dir in sorted(level_dirs, key=lambda x: int(x.name)): + level_num = int(level_dir.name) + level_path = str(level_dir) + ds = xr.open_dataset(level_path, engine="zarr", zarr_format=3) + + # Verify that the dataset has data variables + assert len(ds.data_vars) > 0, f"No data variables in {level_path}" + + # Verify that spatial dimensions exist + assert "x" in ds.dims and "y" in ds.dims, ( + f"Missing spatial dimensions in {level_path}" + ) + + # Store shape for progression verification + level_shapes[level_num] = (ds.dims["y"], ds.dims["x"]) + print(f" Level {level_num}: {level_shapes[level_num]} pixels") + + ds.close() + + # Verify that overview levels have progressively smaller dimensions (COG-style /2 downsampling) + if len(level_shapes) > 1: + for level in sorted(level_shapes.keys())[1:]: + prev_level = level - 1 + if prev_level in level_shapes: + prev_height, prev_width = level_shapes[prev_level] + curr_height, curr_width = level_shapes[level] + + # Check that dimensions are roughly half (allowing for rounding) + height_ratio = prev_height / curr_height + width_ratio = prev_width / curr_width + + assert 1.8 <= height_ratio <= 2.2, ( + f"Height ratio between level {prev_level} and {level} should be ~2, got {height_ratio:.2f}" + ) + assert 1.8 <= width_ratio <= 2.2, ( + f"Width ratio between level {prev_level} and {level} should be ~2, got {width_ratio:.2f}" + ) + + print( + f" Level {prev_level}→{level} downsampling ratio: {height_ratio:.2f}x{width_ratio:.2f}" + ) + + +def _verify_rgb_data_access(output_path: pathlib.Path, groups: list[str]) -> None: + """Verify RGB data access patterns from the notebook.""" + print("Verifying RGB data access patterns...") + + # Find groups with RGB bands (following notebook logic) + rgb_groups = [] + for group in groups: + group_path_str = str(output_path / group.lstrip("/") / "0") + ds = xr.open_dataset(group_path_str, engine="zarr", zarr_format=3) + + # Check for RGB bands (b04=red, b03=green, b02=blue for Sentinel-2) + has_rgb = all(band in ds.data_vars for band in ["b04", "b03", "b02"]) + if has_rgb: + rgb_groups.append(group) + print(f" Found RGB bands in {group}") + + ds.close() + + # Test data access for RGB groups (following notebook access patterns) + for group in rgb_groups: + print(f" Testing data access for {group}...") + + # Test access to different overview levels (as in notebook) + group_path = output_path / group.lstrip("/") + level_dirs = [ + d for d in group_path.iterdir() if d.is_dir() and d.name.isdigit() + ] + + for level_dir in sorted(level_dirs, key=lambda x: int(x.name))[ + :3 + ]: # Test first 3 levels + level_num = int(level_dir.name) + level_path = str(level_dir) + + # Open dataset and access RGB bands (following notebook pattern) + ds = xr.open_dataset(level_path, engine="zarr", zarr_format=3) + + # Access RGB data (as in notebook) + red_data = ds["b04"].values + green_data = ds["b03"].values + blue_data = ds["b02"].values + + # Verify data shapes match + assert red_data.shape == green_data.shape == blue_data.shape, ( + f"RGB band shapes don't match in {group} level {level_num}" + ) + + # Verify data is not empty + assert red_data.size > 0, f"Empty red data in {group} level {level_num}" + assert green_data.size > 0, f"Empty green data in {group} level {level_num}" + assert blue_data.size > 0, f"Empty blue data in {group} level {level_num}" + + print( + f" Level {level_num}: RGB access successful, shape {red_data.shape}" + ) + + ds.close() diff --git a/tests/test_integration_sentinel1.py b/tests/test_integration_sentinel1.py index ea75547..b00efbf 100644 --- a/tests/test_integration_sentinel1.py +++ b/tests/test_integration_sentinel1.py @@ -13,7 +13,8 @@ import xarray as xr from eopf_geozarr.conversion import create_geozarr_dataset -from eopf_geozarr.tests import _verify_basic_structure + +from .conftest import _verify_basic_structure class MockSentinel1L1GRDBuilder: