Skip to content

Commit e23a1f9

Browse files
committed
refactor: move geozarr_url construction from YAML to register.py
Register step now passes --s3-output-bucket and --s3-output-prefix instead of pre-constructed --geozarr-url. Construction happens in register.py using item_id extracted from source_url. Workflow YAML: 130 → 111 lines (no inline Python) register.py: bucket/prefix args, constructs s3://{bucket}/{prefix}/{collection}/{item_id}.zarr
1 parent 21f2cf9 commit e23a1f9

File tree

2 files changed

+21
-32
lines changed

2 files changed

+21
-32
lines changed

scripts/register.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,11 @@
2525
def run_registration(
2626
source_url: str,
2727
collection: str,
28-
geozarr_url: str,
2928
stac_api_url: str,
3029
raster_api_url: str,
3130
s3_endpoint: str,
31+
s3_output_bucket: str,
32+
s3_output_prefix: str,
3233
verbose: bool = False,
3334
mode: str = "upsert",
3435
) -> None:
@@ -37,10 +38,11 @@ def run_registration(
3738
Args:
3839
source_url: Source STAC item URL
3940
collection: Target collection ID
40-
geozarr_url: GeoZarr output URL (s3://...)
4141
stac_api_url: STAC API base URL
4242
raster_api_url: TiTiler raster API base URL
4343
s3_endpoint: S3 endpoint for HTTP access
44+
s3_output_bucket: S3 bucket name
45+
s3_output_prefix: S3 prefix path
4446
verbose: Enable verbose logging
4547
mode: Registration mode (create-or-skip | upsert | replace)
4648
@@ -51,10 +53,13 @@ def run_registration(
5153
logger.info(" STEP 2/2: STAC REGISTRATION & AUGMENTATION")
5254
logger.info("=" * 78)
5355

54-
# Extract item ID from source URL
56+
# Extract item ID from source URL and construct geozarr URL
5557
item_id = extract_item_id(source_url)
58+
geozarr_url = f"s3://{s3_output_bucket}/{s3_output_prefix}/{collection}/{item_id}.zarr"
59+
5660
logger.info(f"Item ID: {item_id}")
5761
logger.info(f"Collection: {collection}")
62+
logger.info(f"GeoZarr URL: {geozarr_url}")
5863
logger.info(f"STAC API: {stac_api_url}")
5964

6065
# Create temporary file for item JSON
@@ -123,10 +128,11 @@ def main(argv: list[str] | None = None) -> int:
123128
parser = argparse.ArgumentParser(description="Run STAC registration workflow")
124129
parser.add_argument("--source-url", required=True, help="Source STAC item URL")
125130
parser.add_argument("--collection", required=True, help="Target collection ID")
126-
parser.add_argument("--geozarr-url", required=True, help="GeoZarr output URL (s3://...)")
127131
parser.add_argument("--stac-api-url", required=True, help="STAC API base URL")
128132
parser.add_argument("--raster-api-url", required=True, help="TiTiler raster API base URL")
129133
parser.add_argument("--s3-endpoint", required=True, help="S3 endpoint for HTTP access")
134+
parser.add_argument("--s3-output-bucket", required=True, help="S3 bucket name")
135+
parser.add_argument("--s3-output-prefix", required=True, help="S3 prefix path")
130136
parser.add_argument("--verbose", action="store_true", help="Verbose logging")
131137
parser.add_argument(
132138
"--mode",
@@ -141,10 +147,11 @@ def main(argv: list[str] | None = None) -> int:
141147
run_registration(
142148
args.source_url,
143149
args.collection,
144-
args.geozarr_url,
145150
args.stac_api_url,
146151
args.raster_api_url,
147152
args.s3_endpoint,
153+
args.s3_output_bucket,
154+
args.s3_output_prefix,
148155
args.verbose,
149156
args.mode,
150157
)

workflows/base/workflowtemplate.yaml

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -97,33 +97,15 @@ spec:
9797
memory: 2Gi
9898
cpu: '1'
9999
source: |
100-
import os
101-
import sys
102-
103-
# Extract item ID from source URL (for constructing geozarr_url)
104-
sys.path.insert(0, '/app/scripts')
105-
from utils import extract_item_id
106-
107-
source_url = "{{workflow.parameters.source_url}}"
108-
collection = "{{workflow.parameters.register_collection}}"
109-
item_id = extract_item_id(source_url)
110-
geozarr_url = f"s3://{{{{workflow.parameters.s3_output_bucket}}}}/{{{{workflow.parameters.s3_output_prefix}}}}/{collection}/{item_id}.zarr"
111-
112-
# Run registration workflow
113-
os.execv(
114-
sys.executable,
115-
[
116-
sys.executable,
117-
"/app/scripts/register.py",
118-
"--source-url", source_url,
119-
"--collection", collection,
120-
"--geozarr-url", geozarr_url,
121-
"--stac-api-url", "{{workflow.parameters.stac_api_url}}",
122-
"--raster-api-url", "{{workflow.parameters.raster_api_url}}",
123-
"--s3-endpoint", "{{workflow.parameters.s3_endpoint}}",
124-
"--verbose",
125-
]
126-
)
100+
/app/scripts/register.py \
101+
--source-url "{{workflow.parameters.source_url}}" \
102+
--collection "{{workflow.parameters.register_collection}}" \
103+
--stac-api-url "{{workflow.parameters.stac_api_url}}" \
104+
--raster-api-url "{{workflow.parameters.raster_api_url}}" \
105+
--s3-endpoint "{{workflow.parameters.s3_endpoint}}" \
106+
--s3-output-bucket "{{workflow.parameters.s3_output_bucket}}" \
107+
--s3-output-prefix "{{workflow.parameters.s3_output_prefix}}" \
108+
--verbose
127109
env:
128110
- name: PYTHONUNBUFFERED
129111
value: '1'

0 commit comments

Comments
 (0)