Skip to content

Commit 63cea6a

Browse files
authored
Merge pull request #90 from unity-sds/stac_file_asset_ext
Add file:size and file:checksum attributes to STAC asset values in stage-out catalogs
2 parents 5a7acaa + 0ecdfe8 commit 63cea6a

File tree

3 files changed

+34
-9
lines changed

3 files changed

+34
-9
lines changed

libs/unity-py/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.11.0] - 2025-05-19
9+
10+
### Added
11+
* When creating STAC catalogs using the Collection resource, if an asset is a file local to the catalog file, then the file:size and file:checksum values will be added to the asset's STAC entry.
12+
813
## [0.10.1] - 2025-03-04
914

1015
### Added

libs/unity-py/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "unity-sds-client"
3-
version = "0.10.1"
3+
version = "0.11.0"
44

55
description = "Unity-Py is a Python client to simplify interactions with NASA's Unity Platform."
66
authors = ["Anil Natha, Mike Gangl"]

libs/unity-py/unity_sds_client/resources/collection.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
from unity_sds_client.resources.dataset import Dataset
33
from unity_sds_client.resources.data_file import DataFile
44
from pystac import Catalog, get_stac_version, ItemCollection, Item, Asset
5+
from pystac.extensions.file import FileExtension
56
from pystac.errors import STACTypeError
7+
import hashlib
68
import json
79
import os
810
from datetime import datetime
@@ -132,15 +134,33 @@ def to_stac(collection, data_dir):
132134
if key.startswith("./"):
133135
key = os.path.basename(key)
134136

135-
item.add_asset(
136-
key = key,
137-
asset = Asset(
138-
href = item_location,
139-
title = "{} file".format(df.type),
140-
description = "",
141-
roles = df.roles
137+
asset = Asset(
138+
href = item_location,
139+
title = "{} file".format(df.type),
140+
description = "",
141+
roles = df.roles
142+
)
143+
144+
# If the file exists locally on disk then add optional STAC
145+
# attribuites: file:size, file:checksum
146+
data_filename = os.path.join(data_dir, item_location)
147+
if os.path.exists(data_filename):
148+
# Get file size
149+
file_stats = os.stat(data_filename)
150+
151+
# Compute MD5SUM
152+
md5 = hashlib.md5()
153+
with open(data_filename, "rb") as f:
154+
while chunk := f.read(4096):
155+
md5.update(chunk)
156+
157+
file_ext = FileExtension.ext(asset)
158+
file_ext.apply(
159+
size=file_stats.st_size,
160+
checksum=md5.hexdigest()
142161
)
143-
)
162+
163+
item.add_asset(key=key, asset=asset)
144164

145165
from pystac.layout import TemplateLayoutStrategy
146166
write_dir = data_dir

0 commit comments

Comments
 (0)