From b84ec0782373616d7c42e6dee67f7eec0695a557 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 1 Oct 2025 10:27:37 +0200
Subject: [PATCH 1/2] migrate tests out of source tree

---
 .github/workflows/ci.yml                                      | 4 ++--
 {src/eopf_geozarr/tests => tests}/__init__.py                 | 0
 {src/eopf_geozarr/tests => tests}/test_cli_e2e.py             | 0
 {src/eopf_geozarr/tests => tests}/test_conversion.py          | 0
 {src/eopf_geozarr/tests => tests}/test_fs_utils.py            | 0
 .../tests => tests}/test_integration_sentinel1.py             | 0
 .../tests => tests}/test_integration_sentinel2.py             | 0
 .../tests => tests}/test_reprojection_validation.py           | 0
 8 files changed, 2 insertions(+), 2 deletions(-)
 rename {src/eopf_geozarr/tests => tests}/__init__.py (100%)
 rename {src/eopf_geozarr/tests => tests}/test_cli_e2e.py (100%)
 rename {src/eopf_geozarr/tests => tests}/test_conversion.py (100%)
 rename {src/eopf_geozarr/tests => tests}/test_fs_utils.py (100%)
 rename {src/eopf_geozarr/tests => tests}/test_integration_sentinel1.py (100%)
 rename {src/eopf_geozarr/tests => tests}/test_integration_sentinel2.py (100%)
 rename {src/eopf_geozarr/tests => tests}/test_reprojection_validation.py (100%)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c4c3f7d..8dd254c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -50,7 +50,7 @@ jobs:
 
     - name: Run tests
       run: |
-        uv run pytest src/eopf_geozarr/tests/ -v --tb=short -m "not network" --cov=eopf_geozarr --cov-report=xml --cov-report=term-missing
+        uv run pytest tests/ -v --tb=short -m "not network" --cov=eopf_geozarr --cov-report=xml --cov-report=term-missing
 
     - name: Upload coverage to Codecov
       if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
@@ -105,7 +105,7 @@ jobs:
 
     - name: Run network tests
       run: |
-        uv run pytest src/eopf_geozarr/tests/ -v --tb=short -m "network"
+        uv run pytest tests/ -v --tb=short -m "network"
 
   security:
     runs-on: ubuntu-latest
diff --git a/src/eopf_geozarr/tests/__init__.py b/tests/__init__.py
similarity index 100%
rename from src/eopf_geozarr/tests/__init__.py
rename to tests/__init__.py
diff --git a/src/eopf_geozarr/tests/test_cli_e2e.py b/tests/test_cli_e2e.py
similarity index 100%
rename from src/eopf_geozarr/tests/test_cli_e2e.py
rename to tests/test_cli_e2e.py
diff --git a/src/eopf_geozarr/tests/test_conversion.py b/tests/test_conversion.py
similarity index 100%
rename from src/eopf_geozarr/tests/test_conversion.py
rename to tests/test_conversion.py
diff --git a/src/eopf_geozarr/tests/test_fs_utils.py b/tests/test_fs_utils.py
similarity index 100%
rename from src/eopf_geozarr/tests/test_fs_utils.py
rename to tests/test_fs_utils.py
diff --git a/src/eopf_geozarr/tests/test_integration_sentinel1.py b/tests/test_integration_sentinel1.py
similarity index 100%
rename from src/eopf_geozarr/tests/test_integration_sentinel1.py
rename to tests/test_integration_sentinel1.py
diff --git a/src/eopf_geozarr/tests/test_integration_sentinel2.py b/tests/test_integration_sentinel2.py
similarity index 100%
rename from src/eopf_geozarr/tests/test_integration_sentinel2.py
rename to tests/test_integration_sentinel2.py
diff --git a/src/eopf_geozarr/tests/test_reprojection_validation.py b/tests/test_reprojection_validation.py
similarity index 100%
rename from src/eopf_geozarr/tests/test_reprojection_validation.py
rename to tests/test_reprojection_validation.py

From 858bd913f0c281bf27461aa6260cd272a631c70b Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 1 Oct 2025 10:36:29 +0200
Subject: [PATCH 2/2] add conftest, and redirect imports

---
 tests/conftest.py                   | 269 ++++++++++++++++++++++++++++
 tests/test_integration_sentinel1.py |   3 +-
 2 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 tests/conftest.py

diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..21c7784
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,269 @@
+"""Tests for the eopf-geozarr package."""
+
+import pathlib
+
+import xarray as xr
+
+
+def _verify_basic_structure(output_path: pathlib.Path, groups: list[str]) -> None:
+    """Verify the basic Zarr store structure."""
+    print("Verifying basic structure...")
+
+    # Check that the main zarr store exists
+    assert (output_path / "zarr.json").exists()
+
+    # Check that each group has been created
+    for group in groups:
+        group_path = output_path / group.lstrip("/")
+        assert group_path.exists(), f"Group {group} not found"
+        assert (group_path / "zarr.json").exists(), f"Group {group} missing zarr.json"
+
+        # Check that level 0 (native resolution) exists
+        level_0_path = group_path / "0"
+        assert level_0_path.exists(), f"Level 0 not found for {group}"
+        assert (level_0_path / "zarr.json").exists(), (
+            f"Level 0 missing zarr.json for {group}"
+        )
+
+
+def _verify_geozarr_spec_compliance(output_path: pathlib.Path, group: str) -> None:
+    """
+    Verify GeoZarr specification compliance following the notebook verification.
+
+    This replicates the compliance checks from the notebook:
+    - _ARRAY_DIMENSIONS attributes on all arrays
+    - CF standard names properly set
+    - Grid mapping attributes reference correct CRS variables
+    - GeoTransform attributes in grid_mapping variables
+    - Native CRS preservation
+    """
+    print(f"Verifying GeoZarr-spec compliance for {group}...")
+
+    # Open the native resolution dataset (level 0)
+    group_path = str(output_path / group.lstrip("/") / "0")
+    ds = xr.open_dataset(group_path, engine="zarr", zarr_format=3)
+
+    print(f"  Variables: {list(ds.data_vars)}")
+    print(f"  Coordinates: {list(ds.coords)}")
+
+    # Check 1: _ARRAY_DIMENSIONS attributes (required by GeoZarr spec)
+    for var_name in ds.data_vars:
+        if var_name != "spatial_ref":  # Skip grid_mapping variable
+            assert "_ARRAY_DIMENSIONS" in ds[var_name].attrs, (
+                f"Missing _ARRAY_DIMENSIONS for {var_name} in {group}"
+            )
+            assert ds[var_name].attrs["_ARRAY_DIMENSIONS"] == list(ds[var_name].dims), (
+                f"Incorrect _ARRAY_DIMENSIONS for {var_name} in {group}"
+            )
+            print(
+                f"    ✅ _ARRAY_DIMENSIONS: {ds[var_name].attrs['_ARRAY_DIMENSIONS']}"
+            )
+
+    # Check coordinates
+    for coord_name in ds.coords:
+        if coord_name not in ["spatial_ref"]:  # Skip CRS coordinate
+            assert "_ARRAY_DIMENSIONS" in ds[coord_name].attrs, (
+                f"Missing _ARRAY_DIMENSIONS for coordinate {coord_name} in {group}"
+            )
+            print(
+                f"    ✅ {coord_name} _ARRAY_DIMENSIONS: {ds[coord_name].attrs['_ARRAY_DIMENSIONS']}"
+            )
+
+    # Check 2: CF standard names (required by GeoZarr spec)
+    for var_name in ds.data_vars:
+        if var_name != "spatial_ref":
+            assert "standard_name" in ds[var_name].attrs, (
+                f"Missing standard_name for {var_name} in {group}"
+            )
+            assert (
+                ds[var_name].attrs["standard_name"] == "toa_bidirectional_reflectance"
+            ), f"Incorrect standard_name for {var_name} in {group}"
+            print(f"    ✅ standard_name: {ds[var_name].attrs['standard_name']}")
+
+    # Check 3: Grid mapping attributes (required by GeoZarr spec)
+    for var_name in ds.data_vars:
+        if var_name != "spatial_ref":
+            assert "grid_mapping" in ds[var_name].attrs, (
+                f"Missing grid_mapping for {var_name} in {group}"
+            )
+            assert ds[var_name].attrs["grid_mapping"] == "spatial_ref", (
+                f"Incorrect grid_mapping for {var_name} in {group}"
+            )
+            print(f"    ✅ grid_mapping: {ds[var_name].attrs['grid_mapping']}")
+
+    # Check 4: Spatial reference variable (as in notebook)
+    assert "spatial_ref" in ds, f"Missing spatial_ref variable in {group}"
+    assert "_ARRAY_DIMENSIONS" in ds["spatial_ref"].attrs, (
+        f"Missing _ARRAY_DIMENSIONS for spatial_ref in {group}"
+    )
+    assert ds["spatial_ref"].attrs["_ARRAY_DIMENSIONS"] == [], (
+        f"Incorrect _ARRAY_DIMENSIONS for spatial_ref in {group}"
+    )
+    print(
+        f"    ✅ spatial_ref _ARRAY_DIMENSIONS: {ds['spatial_ref'].attrs['_ARRAY_DIMENSIONS']}"
+    )
+
+    # Check 5: GeoTransform attribute (from notebook verification)
+    if "GeoTransform" in ds["spatial_ref"].attrs:
+        print(f"    ✅ GeoTransform: {ds['spatial_ref'].attrs['GeoTransform']}")
+    else:
+        print("    ⚠️  Missing GeoTransform attribute")
+
+    # Check 6: CRS information (from notebook verification)
+    if "crs_wkt" in ds["spatial_ref"].attrs:
+        print("    ✅ CRS WKT present")
+    else:
+        print("    ⚠️  Missing CRS WKT")
+
+    # Check 7: Coordinate standard names (from notebook verification)
+    for coord in ["x", "y"]:
+        if coord in ds.coords:
+            if "standard_name" in ds[coord].attrs:
+                expected_name = (
+                    "projection_x_coordinate"
+                    if coord == "x"
+                    else "projection_y_coordinate"
+                )
+                assert ds[coord].attrs["standard_name"] == expected_name, (
+                    f"Incorrect standard_name for {coord} coordinate in {group}"
+                )
+                print(
+                    f"    ✅ {coord} standard_name: {ds[coord].attrs['standard_name']}"
+                )
+
+    ds.close()
+
+
+def _verify_multiscale_structure(output_path: pathlib.Path, group: str) -> None:
+    """Verify multiscale structure following notebook patterns."""
+    print(f"Verifying multiscale structure for {group}...")
+
+    group_path = output_path / group.lstrip("/")
+
+    # Check that at least one level exists (level 0 is always created)
+    level_dirs = [d for d in group_path.iterdir() if d.is_dir() and d.name.isdigit()]
+    assert len(level_dirs) >= 1, (
+        f"Expected at least 1 overview level for {group}, found {len(level_dirs)}"
+    )
+    print(
+        f"    Found {len(level_dirs)} overview levels: {sorted([d.name for d in level_dirs])}"
+    )
+
+    # For larger datasets, expect multiple levels
+    level_0_path = str(group_path / "0")
+    ds_0 = xr.open_dataset(level_0_path, engine="zarr", zarr_format=3)
+    native_size = min(ds_0.sizes["y"], ds_0.sizes["x"])
+    ds_0.close()
+
+    if native_size >= 512:  # Larger datasets should have multiple levels
+        assert len(level_dirs) >= 2, (
+            f"Expected multiple overview levels for large dataset {group} (size {native_size}), found {len(level_dirs)}"
+        )
+    else:
+        print(f"    Small dataset (size {native_size}), single level is acceptable")
+
+    # Verify level 0 (native resolution) exists
+    assert (group_path / "0").exists(), f"Level 0 missing for {group}"
+
+    # Check that each level contains valid data
+    level_shapes = {}
+    for level_dir in sorted(level_dirs, key=lambda x: int(x.name)):
+        level_num = int(level_dir.name)
+        level_path = str(level_dir)
+        ds = xr.open_dataset(level_path, engine="zarr", zarr_format=3)
+
+        # Verify that the dataset has data variables
+        assert len(ds.data_vars) > 0, f"No data variables in {level_path}"
+
+        # Verify that spatial dimensions exist
+        assert "x" in ds.dims and "y" in ds.dims, (
+            f"Missing spatial dimensions in {level_path}"
+        )
+
+        # Store shape for progression verification
+        level_shapes[level_num] = (ds.dims["y"], ds.dims["x"])
+        print(f"    Level {level_num}: {level_shapes[level_num]} pixels")
+
+        ds.close()
+
+    # Verify that overview levels have progressively smaller dimensions (COG-style /2 downsampling)
+    if len(level_shapes) > 1:
+        for level in sorted(level_shapes.keys())[1:]:
+            prev_level = level - 1
+            if prev_level in level_shapes:
+                prev_height, prev_width = level_shapes[prev_level]
+                curr_height, curr_width = level_shapes[level]
+
+                # Check that dimensions are roughly half (allowing for rounding)
+                height_ratio = prev_height / curr_height
+                width_ratio = prev_width / curr_width
+
+                assert 1.8 <= height_ratio <= 2.2, (
+                    f"Height ratio between level {prev_level} and {level} should be ~2, got {height_ratio:.2f}"
+                )
+                assert 1.8 <= width_ratio <= 2.2, (
+                    f"Width ratio between level {prev_level} and {level} should be ~2, got {width_ratio:.2f}"
+                )
+
+                print(
+                    f"    Level {prev_level}→{level} downsampling ratio: {height_ratio:.2f}x{width_ratio:.2f}"
+                )
+
+
+def _verify_rgb_data_access(output_path: pathlib.Path, groups: list[str]) -> None:
+    """Verify RGB data access patterns from the notebook."""
+    print("Verifying RGB data access patterns...")
+
+    # Find groups with RGB bands (following notebook logic)
+    rgb_groups = []
+    for group in groups:
+        group_path_str = str(output_path / group.lstrip("/") / "0")
+        ds = xr.open_dataset(group_path_str, engine="zarr", zarr_format=3)
+
+        # Check for RGB bands (b04=red, b03=green, b02=blue for Sentinel-2)
+        has_rgb = all(band in ds.data_vars for band in ["b04", "b03", "b02"])
+        if has_rgb:
+            rgb_groups.append(group)
+            print(f"    Found RGB bands in {group}")
+
+        ds.close()
+
+    # Test data access for RGB groups (following notebook access patterns)
+    for group in rgb_groups:
+        print(f"    Testing data access for {group}...")
+
+        # Test access to different overview levels (as in notebook)
+        group_path = output_path / group.lstrip("/")
+        level_dirs = [
+            d for d in group_path.iterdir() if d.is_dir() and d.name.isdigit()
+        ]
+
+        for level_dir in sorted(level_dirs, key=lambda x: int(x.name))[
+            :3
+        ]:  # Test first 3 levels
+            level_num = int(level_dir.name)
+            level_path = str(level_dir)
+
+            # Open dataset and access RGB bands (following notebook pattern)
+            ds = xr.open_dataset(level_path, engine="zarr", zarr_format=3)
+
+            # Access RGB data (as in notebook)
+            red_data = ds["b04"].values
+            green_data = ds["b03"].values
+            blue_data = ds["b02"].values
+
+            # Verify data shapes match
+            assert red_data.shape == green_data.shape == blue_data.shape, (
+                f"RGB band shapes don't match in {group} level {level_num}"
+            )
+
+            # Verify data is not empty
+            assert red_data.size > 0, f"Empty red data in {group} level {level_num}"
+            assert green_data.size > 0, f"Empty green data in {group} level {level_num}"
+            assert blue_data.size > 0, f"Empty blue data in {group} level {level_num}"
+
+            print(
+                f"      Level {level_num}: RGB access successful, shape {red_data.shape}"
+            )
+
+            ds.close()
diff --git a/tests/test_integration_sentinel1.py b/tests/test_integration_sentinel1.py
index ea75547..b00efbf 100644
--- a/tests/test_integration_sentinel1.py
+++ b/tests/test_integration_sentinel1.py
@@ -13,7 +13,8 @@
 import xarray as xr
 
 from eopf_geozarr.conversion import create_geozarr_dataset
-from eopf_geozarr.tests import _verify_basic_structure
+
+from .conftest import _verify_basic_structure
 
 
 class MockSentinel1L1GRDBuilder: