Skip to content

Commit 97534d9

Browse files
committed
chore: tier distribution into the storage scheme
1 parent cee1192 commit 97534d9

File tree

5 files changed

+40
-32
lines changed

5 files changed

+40
-32
lines changed

scripts/README_update_storage_tier.md

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Updates existing STAC items with current S3 storage tier metadata.
44

55
## Modes
66

7-
**Update (default)**: Updates `ovh:storage_tier` for assets with existing `alternate.s3`
7+
**Update (default)**: Updates `storage:scheme.tier` for assets with existing `alternate.s3`
88

99
**Add Missing (`--add-missing`)**: Creates `alternate.s3` structure for legacy items without it
1010

@@ -19,19 +19,24 @@ Distribution shows file counts per tier, based on sample of up to 100 files.
1919
### Example: Mixed Storage
2020
```json
2121
{
22-
"ovh:storage_tier": "MIXED",
23-
"ovh:storage_tier_distribution": {
24-
"STANDARD": 450,
25-
"STANDARD_IA": 608
22+
"storage:scheme": {
23+
"platform": "OVHcloud",
24+
"region": "de",
25+
"requester_pays": false,
26+
"tier": "MIXED",
27+
"tier_distribution": {
28+
"STANDARD": 450,
29+
"STANDARD_IA": 608
30+
}
2631
}
2732
}
2833
```
2934

3035
## Notes
3136

3237
- Thumbnail assets automatically skipped
33-
- Failed S3 queries remove existing `ovh:storage_tier` field
34-
- Distribution metadata only for Zarr directories
38+
- Failed S3 queries remove existing `storage:scheme.tier` field
39+
- Distribution metadata only for Zarr directories (stored in `storage:scheme.tier_distribution`)
3540

3641
## Setup
3742

scripts/storage_tier_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ def get_s3_storage_class(s3_url: str, s3_endpoint: str) -> str | None:
103103
# Extract storage classes from list response (no need for additional head_object calls)
104104
storage_classes = []
105105
for obj in list_response["Contents"]:
106-
# StorageClass field is included in list_objects_v2 response
106+
# StorageClass field is included in list_objects_v2 response.
107+
# If not present for STANDARD tier, default to STANDARD
107108
obj_class = obj.get("StorageClass", "STANDARD")
108109
storage_classes.append(obj_class)
109110

scripts/update_stac_storage_tier.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -135,16 +135,16 @@ def update_item_storage_tiers(
135135
if tier:
136136
storage_scheme["tier"] = tier
137137

138+
# Add distribution to scheme if storage is mixed or multiple files sampled
139+
if storage_info["distribution"] is not None:
140+
storage_scheme["tier_distribution"] = storage_info["distribution"]
141+
138142
# Create alternate.s3 object
139143
s3_alternate = {
140144
"href": s3_url,
141145
"storage:scheme": storage_scheme,
142146
}
143147

144-
# Add distribution if storage is mixed or multiple files sampled
145-
if storage_info["distribution"] is not None:
146-
s3_alternate["ovh:storage_tier_distribution"] = storage_info["distribution"]
147-
148148
# Preserve other alternate formats (e.g., alternate.xarray if it exists)
149149
existing_alternate["s3"] = s3_alternate
150150
asset.extra_fields["alternate"] = existing_alternate
@@ -210,18 +210,20 @@ def update_item_storage_tiers(
210210
scheme_changed = True
211211
assets_with_tier += 1
212212

213-
# Add or update distribution if available
213+
# Add or update distribution in scheme if available
214214
if storage_info and storage_info.get("distribution") is not None:
215-
s3_info["ovh:storage_tier_distribution"] = storage_info["distribution"]
215+
if storage_scheme.get("tier_distribution") != storage_info["distribution"]:
216+
storage_scheme["tier_distribution"] = storage_info["distribution"]
217+
scheme_changed = True
216218
if storage_tier == "MIXED":
217219
logger.info(
218220
f" {asset_key}: Mixed storage detected - {storage_info['distribution']}"
219221
)
220222
else:
221-
# Remove distribution if no longer mixed
222-
if "ovh:storage_tier_distribution" in s3_info:
223-
del s3_info["ovh:storage_tier_distribution"]
224-
asset_changed = True
223+
# Remove distribution from scheme if no longer mixed
224+
if "tier_distribution" in storage_scheme:
225+
del storage_scheme["tier_distribution"]
226+
scheme_changed = True
225227

226228
if old_tier != storage_tier:
227229
asset_changed = True
@@ -232,10 +234,10 @@ def update_item_storage_tiers(
232234
del storage_scheme["tier"]
233235
scheme_changed = True
234236
logger.debug(f" {asset_key}: removed tier (not available)")
235-
# Also remove distribution if present
236-
if "ovh:storage_tier_distribution" in s3_info:
237-
del s3_info["ovh:storage_tier_distribution"]
238-
asset_changed = True
237+
# Also remove distribution from scheme if present
238+
if "tier_distribution" in storage_scheme:
239+
del storage_scheme["tier_distribution"]
240+
scheme_changed = True
239241

240242
# Update storage:scheme in s3_info if changed
241243
if scheme_changed:

tests/fixtures/update_storage_tier/stac_item_mixed_storage.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
"platform": "OVHcloud",
2020
"region": "de",
2121
"requester_pays": false,
22-
"tier": "MIXED"
23-
},
24-
"ovh:storage_tier_distribution": {
25-
"STANDARD": 450,
26-
"STANDARD_IA": 608
22+
"tier": "MIXED",
23+
"tier_distribution": {
24+
"STANDARD": 450,
25+
"STANDARD_IA": 608
26+
}
2727
}
2828
}
2929
}

tests/unit/test_update_stac_storage_tier.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def test_mixed_storage_adds_distribution(self, mock_get_info, stac_item_before):
148148
assert updated == 1
149149
s3_info = stac_item_before.assets["reflectance"].extra_fields["alternate"]["s3"]
150150
assert s3_info["storage:scheme"]["tier"] == "MIXED"
151-
assert s3_info["ovh:storage_tier_distribution"] == {"STANDARD": 450, "GLACIER": 608}
151+
assert s3_info["storage:scheme"]["tier_distribution"] == {"STANDARD": 450, "GLACIER": 608}
152152

153153
@patch("update_stac_storage_tier.get_s3_storage_info")
154154
def test_uniform_zarr_adds_distribution(self, mock_get_info, stac_item_before):
@@ -164,7 +164,7 @@ def test_uniform_zarr_adds_distribution(self, mock_get_info, stac_item_before):
164164
assert updated == 1
165165
s3_info = stac_item_before.assets["reflectance"].extra_fields["alternate"]["s3"]
166166
assert s3_info["storage:scheme"]["tier"] == "GLACIER"
167-
assert s3_info["ovh:storage_tier_distribution"] == {"GLACIER": 100}
167+
assert s3_info["storage:scheme"]["tier_distribution"] == {"GLACIER": 100}
168168

169169
@patch("update_stac_storage_tier.get_s3_storage_info")
170170
def test_single_file_no_distribution(self, mock_get_info, stac_item_before):
@@ -180,7 +180,7 @@ def test_single_file_no_distribution(self, mock_get_info, stac_item_before):
180180
assert updated == 1
181181
s3_info = stac_item_before.assets["reflectance"].extra_fields["alternate"]["s3"]
182182
assert s3_info["storage:scheme"]["tier"] == "GLACIER"
183-
assert "ovh:storage_tier_distribution" not in s3_info
183+
assert "tier_distribution" not in s3_info["storage:scheme"]
184184

185185
@patch("update_stac_storage_tier.get_s3_storage_info")
186186
def test_removes_distribution_when_becomes_single_file(self, mock_get_info, stac_item_before):
@@ -192,7 +192,7 @@ def test_removes_distribution_when_becomes_single_file(self, mock_get_info, stac
192192
if "storage:scheme" not in s3_info:
193193
s3_info["storage:scheme"] = {}
194194
s3_info["storage:scheme"]["tier"] = "MIXED"
195-
s3_info["ovh:storage_tier_distribution"] = {"STANDARD": 450, "GLACIER": 608}
195+
s3_info["storage:scheme"]["tier_distribution"] = {"STANDARD": 450, "GLACIER": 608}
196196

197197
mock_get_info.return_value = {"tier": "GLACIER", "distribution": None}
198198

@@ -203,7 +203,7 @@ def test_removes_distribution_when_becomes_single_file(self, mock_get_info, stac
203203
assert updated == 1
204204
s3_info = stac_item_before.assets["reflectance"].extra_fields["alternate"]["s3"]
205205
assert s3_info["storage:scheme"]["tier"] == "GLACIER"
206-
assert "ovh:storage_tier_distribution" not in s3_info
206+
assert "tier_distribution" not in s3_info["storage:scheme"]
207207

208208
@patch("update_stac_storage_tier.get_s3_storage_info")
209209
def test_skips_thumbnail_assets(self, mock_get_info, stac_item_before):

0 commit comments

Comments
 (0)