Skip to content

Commit 155c4b3

Browse files
committed
fix: tolerate missing quicklook datasets
1 parent 4bdd64c commit 155c4b3

File tree

2 files changed

+54
-7
lines changed

2 files changed

+54
-7
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ kubectl logs -n devseed-staging -l sensor-name=geozarr-sensor --tail=50
232232
kubectl logs -n devseed-staging -l eventsource-name=rabbitmq-geozarr --tail=50
233233
```
234234

235+
## Quicklook Availability
236+
237+
- Sentinel-2 CPM releases from 2025-10-29 and 2025-10-31 frequently omit the `/quality/l2a_quicklook` group from the Zarr store even though STAC still advertises the asset.
238+
- `scripts/convert.py` now probes every advertised quicklook, keeps the ones that respond, and continues when the dataset 404s; resulting GeoZarr artifacts publish without previews until the upstream store restores them.
239+
- Expect empty preview links in TiTiler while the upstream dataset is missing; rerun the conversion once the provider repopulates the quicklook to attach it.
240+
- Regression check: `uv run -q pytest -q tests/test_cli_e2e.py::TestCLIEndToEnd::test_cli_convert_real_sentinel2_data -q`.
235241

236242
---
237243

scripts/convert.py

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,24 +56,58 @@ def get_config(collection_id: str) -> dict:
5656
return CONFIGS.get(prefix, CONFIGS["sentinel-2"]).copy()
5757

5858

59-
def get_zarr_url(stac_item_url: str) -> str:
60-
"""Get Zarr asset URL from STAC item (priority: product, zarr, any .zarr)."""
59+
def get_zarr_asset_info(stac_item_url: str) -> tuple[str, dict[str, dict]]:
60+
"""Fetch STAC item and extract Zarr URL along with assets."""
61+
6162
with httpx.Client(timeout=30.0, follow_redirects=True) as client:
62-
assets = client.get(stac_item_url).raise_for_status().json().get("assets", {})
63+
item = client.get(stac_item_url).raise_for_status().json()
64+
65+
assets: dict[str, dict] = item.get("assets", {})
6366

6467
# Try priority assets first
65-
for key in ["product", "zarr"]:
68+
for key in ("product", "zarr"):
6669
if key in assets and (href := assets[key].get("href")):
67-
return str(href)
70+
return str(href), assets
6871

6972
# Fallback: any asset with .zarr in href
7073
for asset in assets.values():
7174
if ".zarr" in asset.get("href", ""):
72-
return str(asset["href"])
75+
return str(asset["href"]), assets
7376

7477
raise RuntimeError("No Zarr asset found in STAC item")
7578

7679

80+
def _quicklook_groups_from_assets(assets: dict[str, dict]) -> list[str]:
81+
if not assets:
82+
return []
83+
84+
groups: list[str] = []
85+
for asset in assets.values():
86+
href = asset.get("href", "")
87+
if "/quality/l2a_quicklook/" not in href or ".zarr/" not in href:
88+
continue
89+
rel = href.split(".zarr/", 1)[1].split("?", 1)[0].split(":", 1)[0]
90+
rel = rel.strip("/")
91+
if not rel:
92+
continue
93+
group = rel.rsplit("/", 1)[0] if "/" in rel else rel
94+
group_path = f"/{group.strip('/')}"
95+
if group_path not in groups:
96+
groups.append(group_path)
97+
return groups
98+
99+
100+
def _merge_quicklook_groups(default_groups: list[str], assets: dict[str, dict]) -> list[str]:
101+
discovered = _quicklook_groups_from_assets(assets)
102+
if not discovered:
103+
return default_groups
104+
105+
merged = [group for group in default_groups if not group.startswith("/quality/l2a_quicklook")]
106+
merged.extend(path for path in discovered if path not in merged)
107+
logger.debug("Using quicklook groups derived from STAC assets: %s", discovered)
108+
return merged
109+
110+
77111
# === Conversion Workflow ===
78112

79113

@@ -107,13 +141,19 @@ def run_conversion(
107141
logger.info(f" Collection: {collection}")
108142

109143
# Resolve source: STAC item or direct Zarr URL
110-
zarr_url = get_zarr_url(source_url) if "/items/" in source_url else source_url
144+
item_assets: dict[str, dict] = {}
145+
if "/items/" in source_url:
146+
zarr_url, item_assets = get_zarr_asset_info(source_url)
147+
else:
148+
zarr_url = source_url
111149
logger.info(f" Source: {zarr_url}")
112150

113151
# Get config and apply overrides
114152
config = get_config(collection)
115153
if groups:
116154
config["groups"] = groups.split(",")
155+
else:
156+
config["groups"] = _merge_quicklook_groups(config["groups"], item_assets)
117157
if spatial_chunk is not None:
118158
config["spatial_chunk"] = spatial_chunk
119159
if tile_width is not None:
@@ -124,6 +164,7 @@ def run_conversion(
124164
logger.info(
125165
f" Parameters: chunk={config['spatial_chunk']}, tile={config['tile_width']}, sharding={config['enable_sharding']}"
126166
)
167+
logger.debug(" Groups to convert: %s", ", ".join(config["groups"]))
127168

128169
# Construct output path and clean existing
129170
output_url = f"s3://{s3_output_bucket}/{s3_output_prefix}/{collection}/{item_id}.zarr"

0 commit comments

Comments
 (0)