Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
f6520d1
add option for using new multiscales convention in optimized conversion
d-v-b Nov 20, 2025
686cb51
remove OOP converter and use plain functions
d-v-b Nov 21, 2025
58b578d
use pydantic models for fingerprinting sentinel2 product
d-v-b Nov 21, 2025
5bea14b
fix multiscales to use da.coarsen and propagate encoding
d-v-b Nov 21, 2025
407f167
ensure that dtype is preserved after resampling
d-v-b Nov 24, 2025
9170fd6
add new multiscales JSON example
d-v-b Nov 24, 2025
0b8243f
add mypy pydantic plugin
d-v-b Nov 24, 2025
7ea938a
lint
d-v-b Nov 24, 2025
8ad6365
add s1 and s2 demo data to tests, and don't test against remote urls
d-v-b Nov 24, 2025
c5f9e09
fix e2e tests
d-v-b Nov 24, 2025
3dd13fb
remove network test workflow from CI
d-v-b Nov 24, 2025
baeb5ec
remove extra type definition and update tests
d-v-b Nov 24, 2025
959a419
remove explicit zarr groups in favor of dynamic test fixtures
d-v-b Nov 24, 2025
7b08f9a
docstrings
d-v-b Nov 24, 2025
e02d2e0
Enhance CRS initialization and update S2 optimization commands
emmanuelmathot Nov 26, 2025
f4e9de8
Refactor code formatting for clarity in S2 optimization functions
emmanuelmathot Nov 26, 2025
cd93e34
fix failing / warning tests
d-v-b Nov 27, 2025
4d7be8f
add strict JSON schema equality check to e2e tests
d-v-b Nov 27, 2025
94e9040
support both flavors of multiscale metadata
d-v-b Nov 28, 2025
5ed7f02
dont manage return codes in cli functions
d-v-b Nov 28, 2025
b4752d6
add s2 optimized test
d-v-b Nov 28, 2025
c3739a3
add optimized geozarr exmaple hierarchies
d-v-b Nov 28, 2025
3b83c68
format JSON documents
d-v-b Nov 28, 2025
9c6a85b
mid-debug of e2e tests
d-v-b Nov 28, 2025
b0f1e11
WIP e2e fixes
d-v-b Dec 1, 2025
45dd9a4
make cf standard name validator become a pass-through when no interne…
d-v-b Dec 1, 2025
dc0e561
update example schemas
d-v-b Dec 1, 2025
2a6f00f
narrow type to just tuples in types.py
d-v-b Dec 1, 2025
b8cc1df
refactor consolidation
d-v-b Dec 1, 2025
6db66f2
use consolidated=False in conversion
d-v-b Dec 1, 2025
a231d20
update tests
d-v-b Dec 1, 2025
c924c89
lint
d-v-b Dec 1, 2025
9cba2e2
add both multiscales types to output
d-v-b Dec 1, 2025
2a5ca55
update comments in tests
d-v-b Dec 1, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 0 additions & 38 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,44 +70,6 @@ jobs:
.venv
key: uv-main-${{ hashFiles('uv.lock') }}

test-network:
runs-on: ubuntu-latest
# Temporarily enabled for PR to verify test fix
if: github.event_name == 'push' || github.event_name == 'pull_request'
steps:
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.11'

- name: Restore global uv cache
id: cache-restore
uses: actions/cache/restore@v4
with:
path: |
~/.cache/uv
~/.local/share/uv
.venv
key: uv-main-${{ hashFiles('uv.lock') }}
restore-keys: |
uv-main-

- name: Install uv
uses: astral-sh/setup-uv@v7
with:
version: "0.8.4"
python-version: "3.13"
enable-cache: false

- name: Install dependencies
run: uv sync --group dev --group test

- name: Run network tests
run: |
uv run pytest tests/ -v --tb=short -m "network"

security:
runs-on: ubuntu-latest
steps:
Expand Down
8 changes: 5 additions & 3 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,12 @@
"module": "eopf_geozarr",
"args": [
"convert-s2-optimized",
"https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202509-s02msil2a/08/products/cpm_v256/S2A_MSIL2A_20250908T100041_N0511_R122_T32TQM_20250908T115116.zarr",
// "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202509-s02msil2a/08/products/cpm_v256/S2A_MSIL2A_20250908T100041_N0511_R122_T32TQM_20250908T115116.zarr",
"https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202511-s02msil2a-eu/15/products/cpm_v262/S2B_MSIL2A_20251115T091139_N0511_R050_T35SLU_20251115T111807.zarr",
// "s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a-opt/S2A_MSIL2A_20250908T100041_N0511_R122_T32TQM_20250908T115116.zarr",
"./tests-output/eopf_geozarr/s2l2_optimized.zarr",
"--spatial-chunk", "1024",
"s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a-pr75/S2B_MSIL2A_20251115T091139_N0511_R050_T35SLU_20251115T111807.zarr",
// "./tests-output/eopf_geozarr/s2l2_optimized.zarr",
"--spatial-chunk", "512",
"--compression-level", "5",
"--enable-sharding",
"--dask-cluster",
Expand Down
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ warn_unused_ignores = true
warn_no_return = true
warn_unreachable = true
strict_equality = true
plugins = ["pydantic.mypy"]

[tool.pydantic-mypy]
init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true
warn_untyped_fields = true

[[tool.mypy.overrides]]
module = ["zarr.*", "xarray.*", "rioxarray.*", "cf_xarray.*", "dask.*"]
Expand Down
22 changes: 1 addition & 21 deletions src/eopf_geozarr/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1182,14 +1182,6 @@ def add_s2_optimization_commands(subparsers: Any) -> None:
choices=range(1, 10),
help="Compression level 1-9 (default: 3)",
)
s2_parser.add_argument(
"--skip-geometry", action="store_true", help="Skip creating geometry group"
)
s2_parser.add_argument(
"--skip-meteorology",
action="store_true",
help="Skip creating meteorology group",
)
s2_parser.add_argument(
"--skip-validation", action="store_true", help="Skip output validation"
)
Expand All @@ -1204,7 +1196,7 @@ def add_s2_optimization_commands(subparsers: Any) -> None:
s2_parser.set_defaults(func=convert_s2_optimized_command)


def convert_s2_optimized_command(args: Any) -> int:
def convert_s2_optimized_command(args: Any) -> None:
"""Execute S2 optimized conversion command."""
# Set up dask cluster if requested
dask_client = setup_dask_cluster(
Expand All @@ -1229,22 +1221,10 @@ def convert_s2_optimized_command(args: Any) -> int:
enable_sharding=args.enable_sharding,
spatial_chunk=args.spatial_chunk,
compression_level=args.compression_level,
create_geometry_group=not args.skip_geometry,
create_meteorology_group=not args.skip_meteorology,
validate_output=not args.skip_validation,
verbose=args.verbose,
)

log.info("✅ S2 optimization completed", output_path=args.output_path)
return 0

except Exception as e:
log.info("❌ Error during S2 optimization", error=str(e))
if args.verbose:
import traceback

traceback.print_exc()
return 1
finally:
# Clean up dask client if it was created
if dask_client is not None:
Expand Down
44 changes: 32 additions & 12 deletions src/eopf_geozarr/conversion/geozarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ def create_geozarr_dataset(
dt, groups, gcp_group
)

log.info("GeoZarr groups prepared", groups_prepared=list(geozarr_groups.keys()))

# Create the GeoZarr compliant store through iterative processing
dt_geozarr = iterative_copy(
dt,
Expand Down Expand Up @@ -180,11 +182,19 @@ def setup_datatree_metadata_geozarr_spec_compliant(
epsg_CPM_260 = epsg_CPM_260.split(":")[-1]

for key in groups:
if not dt[key].data_vars:
# Check if key exists in DataTree by attempting to access it
try:
node = dt[key]
except KeyError:
log.info(f"Skipping group {key} - not found in DataTree")
continue

if not node.data_vars:
log.info(f"Skipping group {key} - no data variables")
continue

log.info(f"Processing group {key} for GeoZarr compliance")
ds = dt[key].to_dataset().copy()
ds = node.to_dataset().copy()

if gcp_group is not None:
ds_gcp = dt[gcp_group].to_dataset()
Expand Down Expand Up @@ -233,7 +243,11 @@ def setup_datatree_metadata_geozarr_spec_compliant(
_setup_grid_mapping(ds, grid_mapping_var_name)

geozarr_groups[key] = ds
log.info(f"Added {key} to geozarr_groups")

log.info(
f"Returning geozarr_groups with {len(geozarr_groups)} groups: {list(geozarr_groups.keys())}"
)
return geozarr_groups


Expand Down Expand Up @@ -287,7 +301,7 @@ def iterative_copy(
dt_result.to_zarr(
output_path,
mode="a",
consolidated=True,
consolidated=False,
compute=True,
storage_options=storage_options,
)
Expand Down Expand Up @@ -638,7 +652,7 @@ def create_geozarr_compliant_multiscales(
level=ol["level"],
width=ol["width"],
height=ol["height"],
scale_factor=ol["scale_factor"],
scale_factor=ol["scale_relative"],
)

# Create native CRS tile matrix set
Expand Down Expand Up @@ -679,7 +693,7 @@ def create_geozarr_compliant_multiscales(

width = overview["width"]
height = overview["height"]
scale_factor = overview["scale_factor"]
scale_factor = overview["scale_relative"]

log.info(
f"Creating overview level (scale) {level} with scale factor {scale_factor}"
Expand Down Expand Up @@ -733,7 +747,7 @@ def create_geozarr_compliant_multiscales(
output_path,
group=overview_group,
mode="w",
consolidated=True,
consolidated=False,
zarr_format=3,
encoding=encoding,
align_chunks=align_chunks_flag,
Expand Down Expand Up @@ -824,7 +838,9 @@ def calculate_overview_levels(
"zoom": zoom,
"width": current_width,
"height": current_height,
"scale_factor": 2**level,
"translation_relative": 0.0,
"scale_absolute": 1.0,
"scale_relative": 2**level,
Copy link

@vincentsarago vincentsarago Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we imaging other base scale factors than 2?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}
overview_levels.append(overview_level) # type: ignore[arg-type]

Expand Down Expand Up @@ -877,8 +893,8 @@ def create_native_crs_tile_matrix_set(
scale_denominator = cell_size * 3779.5275

# Calculate matrix dimensions
tile_width = overview["chunks"][1][0] if "chunks" in overview else 256
tile_height = overview["chunks"][0][0] if "chunks" in overview else 256
tile_width = overview["chunks"][1][0] if "chunks" in overview else 256 # type: ignore[index]
tile_height = overview["chunks"][0][0] if "chunks" in overview else 256 # type: ignore[index]
matrix_width = int(np.ceil(width / tile_width))
matrix_height = int(np.ceil(height / tile_height))

Expand All @@ -889,7 +905,7 @@ def create_native_crs_tile_matrix_set(
"id": matrix_id,
"scaleDenominator": scale_denominator,
"cellSize": cell_size,
"pointOfOrigin": [left, top],
"pointOfOrigin": (left, top),
"tileWidth": tile_width,
"tileHeight": tile_height,
"matrixWidth": matrix_width,
Expand All @@ -910,8 +926,8 @@ def create_native_crs_tile_matrix_set(
"title": f"Native CRS Tile Matrix Set ({native_crs})",
"crs": crs_uri,
"supportedCRS": crs_uri,
"orderedAxes": ["X", "Y"],
"tileMatrices": tile_matrices,
"orderedAxes": ("X", "Y"),
"tileMatrices": tuple(tile_matrices),
}


Expand Down Expand Up @@ -1802,3 +1818,7 @@ def _is_sentinel1(dt: xr.DataTree) -> bool:
return True
else:
return False


def get_zarr_group(data: xr.DataTree) -> zarr.Group:
return data._close.__self__.zarr_group
43 changes: 26 additions & 17 deletions src/eopf_geozarr/data_api/geozarr/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import urllib
import urllib.request
from dataclasses import dataclass
from typing import Annotated, Any, Mapping, Self, TypeVar
from typing import Annotated, Any, Mapping, Self, TypeGuard, TypeVar
from urllib.error import URLError

from cf_xarray.utils import parse_cf_standard_name_table
from pydantic import AfterValidator, BaseModel, Field, model_validator
Expand Down Expand Up @@ -98,12 +99,9 @@ def get_cf_standard_names(url: str) -> tuple[str, ...]:

req = urllib.request.Request(url, headers=headers)

try:
with urllib.request.urlopen(req) as response:
content = response.read() # Read the entire response body into memory
content_fobj = io.BytesIO(content)
except urllib.error.URLError as e:
raise e
with urllib.request.urlopen(req) as response:
content = response.read() # Read the entire response body into memory
content_fobj = io.BytesIO(content)

_info, table, _aliases = parse_cf_standard_name_table(source=content_fobj)
return tuple(table.keys())
Expand All @@ -117,7 +115,13 @@ def get_cf_standard_names(url: str) -> tuple[str, ...]:

# this does IO against github. consider locally storing this data instead if fetching every time
# is problematic.
CF_STANDARD_NAMES = get_cf_standard_names(url=CF_STANDARD_NAME_URL)

try:
CF_STANDARD_NAMES = get_cf_standard_names(url=CF_STANDARD_NAME_URL)
DO_CF_NAME_VALIDATION = True
except URLError:
CF_STANDARD_NAMES = ()
DO_CF_NAME_VALIDATION = False


def check_standard_name(name: str) -> str:
Expand All @@ -139,12 +143,13 @@ def check_standard_name(name: str) -> str:
ValueError
If the standard name is not valid.
"""

if name in CF_STANDARD_NAMES:
return name
raise ValueError(
f"Invalid standard name: {name}. This name was not found in the list of CF standard names."
)
if DO_CF_NAME_VALIDATION:
if name in CF_STANDARD_NAMES:
return name
raise ValueError(
f"Invalid standard name: {name}. This name was not found in the list of CF standard names."
)
return name


CFStandardName = Annotated[str, AfterValidator(check_standard_name)]
Expand Down Expand Up @@ -245,9 +250,9 @@ class TileMatrixSet(BaseModel):
tileMatrices: tuple[TileMatrix, ...]


class Multiscales(BaseModel, extra="allow"):
class TMSMultiscales(BaseModel, extra="allow"):
"""
Multiscale metadata for a GeoZarr dataset.
Multiscale metadata for a GeoZarr dataset based on the OGC TileMatrixSet standard

Attributes
----------
Expand Down Expand Up @@ -307,4 +312,8 @@ class MultiscaleGroupAttrs(BaseModel, extra="allow"):
multiscales: MultiscaleAttrs
"""

multiscales: Multiscales
multiscales: TMSMultiscales


def is_none(data: object) -> TypeGuard[None]:
return data is None
8 changes: 2 additions & 6 deletions src/eopf_geozarr/data_api/geozarr/geoproj.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,14 @@

from __future__ import annotations

from typing import Literal, Self, TypeGuard
from typing import Literal, Self

from pydantic import BaseModel, Field, model_validator
from typing_extensions import TypedDict

from eopf_geozarr.data_api.geozarr.common import is_none
from eopf_geozarr.data_api.geozarr.projjson import ProjJSON


def is_none(data: object) -> TypeGuard[None]:
return data is None


GEO_PROJ_UUID: Literal["f17cb550-5864-4468-aeb7-f3180cfb622f"] = (
"f17cb550-5864-4468-aeb7-f3180cfb622f"
)
Expand Down
Loading
Loading