Skip to content

Commit 88b0e2b

Browse files
committed
refactor: update storage tier handling to v2.0 format in STAC items
- Changed storage tier fields to use a nested 'storage:scheme' object. - Updated tests to reflect new structure and ensure correct assertions. - Adjusted functions to create and manage the new storage scheme format.
1 parent 6873b96 commit 88b0e2b

File tree

9 files changed

+140
-85
lines changed

9 files changed

+140
-85
lines changed

scripts/register_v1.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ def add_alternate_s3_assets(item: Item, s3_endpoint: str) -> None:
375375
# Query storage class for this asset
376376
storage_tier = get_s3_storage_class(s3_url, s3_endpoint)
377377

378-
# Add alternate with storage extension fields
378+
# Add alternate with storage extension fields (v2.0 format)
379379
if not hasattr(asset, "extra_fields"):
380380
asset.extra_fields = {}
381381

@@ -389,20 +389,27 @@ def add_alternate_s3_assets(item: Item, s3_endpoint: str) -> None:
389389
if not isinstance(existing_s3, dict):
390390
existing_s3 = {}
391391

392+
# Get or create storage:scheme object (v2.0 format)
393+
storage_scheme = existing_s3.get("storage:scheme", {})
394+
if not isinstance(storage_scheme, dict):
395+
storage_scheme = {}
396+
397+
# Update scheme fields
398+
storage_scheme["platform"] = "OVHcloud"
399+
storage_scheme["region"] = region
400+
storage_scheme["requester_pays"] = False
401+
402+
# Add tier to scheme (standard field in v2.0)
403+
if storage_tier:
404+
storage_scheme["tier"] = storage_tier
405+
392406
# Update s3 alternate (preserving any existing fields)
393407
s3_alternate = {
394408
**existing_s3, # Preserve existing fields
395409
"href": s3_url,
396-
"storage:platform": "OVHcloud",
397-
"storage:region": region,
398-
"storage:requester_pays": False,
410+
"storage:scheme": storage_scheme,
399411
}
400412

401-
# Add storage tier as a custom field (not part of storage extension spec)
402-
# Using ovh: prefix to indicate vendor-specific extension
403-
if storage_tier:
404-
s3_alternate["ovh:storage_tier"] = storage_tier
405-
406413
# Preserve other alternate formats (e.g., alternate.xarray if it exists)
407414
existing_alternate["s3"] = s3_alternate
408415
asset.extra_fields["alternate"] = existing_alternate

scripts/test_complete_workflow.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,16 @@ def process_item_with_gateway(item_dict: dict, s3_endpoint: str) -> dict:
104104
# Add alternate S3 URL
105105
s3_url = https_to_s3(asset["href"])
106106
if s3_url:
107+
# Create storage scheme object following v2.0 spec
108+
storage_scheme = {
109+
"platform": "OVHcloud",
110+
"region": region,
111+
"requester_pays": False,
112+
}
107113
asset["alternate"] = {
108114
"s3": {
109115
"href": s3_url,
110-
"storage:platform": "OVHcloud",
111-
"storage:region": region,
112-
"storage:requester_pays": False,
116+
"storage:scheme": storage_scheme,
113117
}
114118
}
115119
processed_count += 1
@@ -176,8 +180,10 @@ def main() -> int:
176180
if "alternate" in asset:
177181
s3_alt = asset["alternate"]["s3"]
178182
print(f" S3 alternate href: {s3_alt['href']}")
179-
print(f" Storage platform: {s3_alt['storage:platform']}")
180-
print(f" Storage region: {s3_alt['storage:region']}")
183+
if "storage:scheme" in s3_alt:
184+
scheme = s3_alt["storage:scheme"]
185+
print(f" Storage platform: {scheme.get('platform', 'N/A')}")
186+
print(f" Storage region: {scheme.get('region', 'N/A')}")
181187
break
182188

183189
# Show store link

scripts/update_stac_storage_tier.py

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,21 @@ def update_item_storage_tiers(
124124
if not isinstance(existing_alternate, dict):
125125
existing_alternate = {}
126126

127+
# Create storage scheme object following v2.0 spec
128+
storage_scheme = {
129+
"platform": "OVHcloud",
130+
"region": region,
131+
"requester_pays": False,
132+
}
133+
134+
# Add tier to scheme (standard field in v2.0)
135+
if tier:
136+
storage_scheme["tier"] = tier
137+
127138
# Create alternate.s3 object
128139
s3_alternate = {
129140
"href": s3_url,
130-
"storage:platform": "OVHcloud",
131-
"storage:region": region,
132-
"storage:requester_pays": False,
133-
"ovh:storage_tier": tier,
141+
"storage:scheme": storage_scheme,
134142
}
135143

136144
# Add distribution if storage is mixed or multiple files sampled
@@ -174,24 +182,32 @@ def update_item_storage_tiers(
174182
else:
175183
storage_tier = storage_info["tier"]
176184

185+
# Get or create storage:scheme object (v2.0 format)
186+
storage_scheme = s3_info.get("storage:scheme", {})
187+
if not isinstance(storage_scheme, dict):
188+
storage_scheme = {}
189+
177190
# Track if anything changed
178191
asset_changed = False
179-
old_tier = s3_info.get("ovh:storage_tier")
180-
181-
# Update or add storage extension fields (only if missing)
182-
if "storage:platform" not in s3_info:
183-
s3_info["storage:platform"] = "OVHcloud"
184-
asset_changed = True
185-
if "storage:region" not in s3_info:
186-
s3_info["storage:region"] = region
187-
asset_changed = True
188-
if "storage:requester_pays" not in s3_info:
189-
s3_info["storage:requester_pays"] = False
190-
asset_changed = True
191-
192-
# Add/update storage tier if available
192+
old_tier = storage_scheme.get("tier")
193+
194+
# Update scheme fields (only if missing)
195+
scheme_changed = False
196+
if "platform" not in storage_scheme:
197+
storage_scheme["platform"] = "OVHcloud"
198+
scheme_changed = True
199+
if "region" not in storage_scheme:
200+
storage_scheme["region"] = region
201+
scheme_changed = True
202+
if "requester_pays" not in storage_scheme:
203+
storage_scheme["requester_pays"] = False
204+
scheme_changed = True
205+
206+
# Add/update tier in scheme (standard v2.0 field)
193207
if storage_tier:
194-
s3_info["ovh:storage_tier"] = storage_tier
208+
if storage_scheme.get("tier") != storage_tier:
209+
storage_scheme["tier"] = storage_tier
210+
scheme_changed = True
195211
assets_with_tier += 1
196212

197213
# Add or update distribution if available
@@ -212,15 +228,20 @@ def update_item_storage_tiers(
212228
logger.debug(f" {asset_key}: {old_tier or 'none'} -> {storage_tier}")
213229
else:
214230
# Remove tier if it cannot be determined
215-
if "ovh:storage_tier" in s3_info:
216-
del s3_info["ovh:storage_tier"]
217-
asset_changed = True
231+
if "tier" in storage_scheme:
232+
del storage_scheme["tier"]
233+
scheme_changed = True
218234
logger.debug(f" {asset_key}: removed tier (not available)")
219235
# Also remove distribution if present
220236
if "ovh:storage_tier_distribution" in s3_info:
221237
del s3_info["ovh:storage_tier_distribution"]
222238
asset_changed = True
223239

240+
# Update storage:scheme in s3_info if changed
241+
if scheme_changed:
242+
s3_info["storage:scheme"] = storage_scheme
243+
asset_changed = True
244+
224245
if asset_changed:
225246
assets_updated += 1
226247

tests/fixtures/update_storage_tier/stac_item_after_tier_change.json

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
"alternate": {
1616
"s3": {
1717
"href": "s3://bucket/data.zarr/measurements/reflectance",
18-
"storage:platform": "OVHcloud",
19-
"storage:region": "de",
20-
"storage:requester_pays": false,
21-
"ovh:storage_tier": "STANDARD_IA"
18+
"storage:scheme": {
19+
"platform": "OVHcloud",
20+
"region": "de",
21+
"requester_pays": false,
22+
"tier": "STANDARD_IA"
23+
}
2224
}
2325
}
2426
},

tests/fixtures/update_storage_tier/stac_item_before.json

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
"alternate": {
1616
"s3": {
1717
"href": "s3://bucket/data.zarr/measurements/reflectance",
18-
"storage:platform": "OVHcloud",
19-
"storage:region": "de",
20-
"storage:requester_pays": false,
21-
"ovh:storage_tier": "STANDARD"
18+
"storage:scheme": {
19+
"platform": "OVHcloud",
20+
"region": "de",
21+
"requester_pays": false,
22+
"tier": "STANDARD"
23+
}
2224
}
2325
}
2426
},

tests/fixtures/update_storage_tier/stac_item_mixed_storage.json

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
"alternate": {
1616
"s3": {
1717
"href": "s3://bucket/data.zarr/measurements/reflectance",
18-
"storage:platform": "OVHcloud",
19-
"storage:region": "de",
20-
"storage:requester_pays": false,
21-
"ovh:storage_tier": "MIXED",
18+
"storage:scheme": {
19+
"platform": "OVHcloud",
20+
"region": "de",
21+
"requester_pays": false,
22+
"tier": "MIXED"
23+
},
2224
"ovh:storage_tier_distribution": {
2325
"STANDARD": 450,
2426
"STANDARD_IA": 608

tests/test_alternate_extension.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,16 @@ def add_alternate_s3_to_item(item_dict: dict, s3_endpoint: str) -> dict:
6565
if not s3_url:
6666
continue
6767

68-
# Add alternate
68+
# Add alternate (v2.0 format)
69+
storage_scheme = {
70+
"platform": "OVHcloud",
71+
"region": region,
72+
"requester_pays": False,
73+
}
6974
asset["alternate"] = {
7075
"s3": {
7176
"href": s3_url,
72-
"storage:platform": "OVHcloud",
73-
"storage:region": region,
74-
"storage:requester_pays": False,
77+
"storage:scheme": storage_scheme,
7578
}
7679
}
7780
modified_count += 1
@@ -110,8 +113,10 @@ def main():
110113
if "alternate" in asset:
111114
s3_alt = asset["alternate"]["s3"]
112115
print(f" S3 href: {s3_alt['href']}")
113-
print(f" Storage platform: {s3_alt['storage:platform']}")
114-
print(f" Storage region: {s3_alt['storage:region']}")
116+
if "storage:scheme" in s3_alt:
117+
scheme = s3_alt["storage:scheme"]
118+
print(f" Storage platform: {scheme.get('platform', 'N/A')}")
119+
print(f" Storage region: {scheme.get('region', 'N/A')}")
115120
break
116121

117122
# Show extensions

tests/test_s3_gateway.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -308,9 +308,11 @@ def test_add_alternate_to_gateway_url(self):
308308

309309
s3_alt = asset.extra_fields["alternate"]["s3"]
310310
assert s3_alt["href"] == "s3://my-bucket/path/file.zarr"
311-
assert s3_alt["storage:platform"] == "OVHcloud"
312-
assert s3_alt["storage:region"] == "de"
313-
assert s3_alt["storage:requester_pays"] is False
311+
assert "storage:scheme" in s3_alt
312+
scheme = s3_alt["storage:scheme"]
313+
assert scheme["platform"] == "OVHcloud"
314+
assert scheme["region"] == "de"
315+
assert scheme["requester_pays"] is False
314316

315317
def test_add_alternate_to_old_s3_url(self):
316318
"""Test adding alternate S3 URL to asset with old S3 format URL."""
@@ -336,7 +338,8 @@ def test_add_alternate_to_old_s3_url(self):
336338
asset = item.assets["data"]
337339
s3_alt = asset.extra_fields["alternate"]["s3"]
338340
assert s3_alt["href"] == "s3://my-bucket/path/file.zarr"
339-
assert s3_alt["storage:region"] == "gra"
341+
assert "storage:scheme" in s3_alt
342+
assert s3_alt["storage:scheme"]["region"] == "gra"
340343

341344
def test_skip_thumbnail_asset(self):
342345
"""Test that thumbnail assets are skipped."""
@@ -426,8 +429,14 @@ def test_multiple_assets(self):
426429
# Data assets should have alternates
427430
assert "alternate" in item.assets["data1"].extra_fields
428431
assert "alternate" in item.assets["data2"].extra_fields
429-
assert item.assets["data1"].extra_fields["alternate"]["s3"]["storage:region"] == "sbg"
430-
assert item.assets["data2"].extra_fields["alternate"]["s3"]["storage:region"] == "sbg"
432+
assert (
433+
item.assets["data1"].extra_fields["alternate"]["s3"]["storage:scheme"]["region"]
434+
== "sbg"
435+
)
436+
assert (
437+
item.assets["data2"].extra_fields["alternate"]["s3"]["storage:scheme"]["region"]
438+
== "sbg"
439+
)
431440

432441
# Thumbnail should not
433442
assert "alternate" not in item.assets["thumbnail"].extra_fields
@@ -451,7 +460,9 @@ def test_region_detection_de(self):
451460
)
452461

453462
add_alternate_s3_assets(item, "https://s3.de.io.cloud.ovh.net")
454-
assert item.assets["data"].extra_fields["alternate"]["s3"]["storage:region"] == "de"
463+
assert (
464+
item.assets["data"].extra_fields["alternate"]["s3"]["storage:scheme"]["region"] == "de"
465+
)
455466

456467
def test_region_detection_gra(self):
457468
"""Test region detection for GRA region."""
@@ -472,7 +483,9 @@ def test_region_detection_gra(self):
472483
)
473484

474485
add_alternate_s3_assets(item, "https://s3.gra.io.cloud.ovh.net")
475-
assert item.assets["data"].extra_fields["alternate"]["s3"]["storage:region"] == "gra"
486+
assert (
487+
item.assets["data"].extra_fields["alternate"]["s3"]["storage:scheme"]["region"] == "gra"
488+
)
476489

477490
def test_region_detection_unknown(self):
478491
"""Test region detection for unknown region."""
@@ -493,7 +506,10 @@ def test_region_detection_unknown(self):
493506
)
494507

495508
add_alternate_s3_assets(item, "https://s3.amazonaws.com")
496-
assert item.assets["data"].extra_fields["alternate"]["s3"]["storage:region"] == "unknown"
509+
assert (
510+
item.assets["data"].extra_fields["alternate"]["s3"]["storage:scheme"]["region"]
511+
== "unknown"
512+
)
497513

498514
def test_extensions_not_duplicated(self):
499515
"""Test that extensions are not duplicated if already present."""

0 commit comments

Comments
 (0)