Improved metadata in artifact download file names (#52)

joshvote · web-flow · commit 9025ef21d33e · 2025-12-19T17:25:59.000+11:00
* Improved metadata in artifact download file names

* Added default for pytest

* Added pytest-env
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,7 +27,7 @@ dev = [
     "types-requests",
     "types-Authlib",
 ]
-test = ["pytest"]
+test = ["pytest", "pytest-env"]
 
 [tool.setuptools]
 package-dir = { "" = "src" }
@@ -55,6 +55,7 @@ line-length = 120
 [tool.pytest.ini_options]
 pythonpath = ["src/"]
 testpaths = "tests"
+env = ["D:CACTUS_ORCHESTRATOR_BASEURL=http://localhost/"]
 
 [tool.isort]
 profile = "black"
diff --git a/src/cactus_ui/orchestrator.py b/src/cactus_ui/orchestrator.py
@@ -1,4 +1,5 @@
 import logging
+import re
 from dataclasses import dataclass
 from datetime import datetime
 from enum import IntEnum, auto
@@ -22,6 +23,13 @@
 CACTUS_ORCHESTRATOR_REQUEST_TIMEOUT_SPAWN = int(env.get("CACTUS_ORCHESTRATOR_REQUEST_TIMEOUT_SPAWN", "120"))
 
 
+HEADER_USER_NAME = "CACTUS-User-Name"
+HEADER_TEST_ID = "CACTUS-Test-Id"
+HEADER_RUN_ID = "CACTUS-Run-Id"
+HEADER_GROUP_ID = "CACTUS-Group-Id"
+HEADER_GROUP_NAME = "CACTUS-Group-Name"
+
+
 @dataclass
 class RunResponse:
     """Ideally this would be defined in a shared cactus-schema but that doesn't exist. Instead, ensure this remains
@@ -191,6 +199,18 @@ def generate_uri(path: str) -> str:
         return CACTUS_ORCHESTRATOR_BASEURL.rstrip("/") + "/" + path
 
 
+def file_name_safe(v: str) -> str:
+    return re.sub(r"[^A-Za-z0-9_\-]", "_", v)
+
+
+def generate_run_artifact_file_name(response: requests.Response, run_id: str) -> str:
+    raw_run_id = response.headers.get(HEADER_RUN_ID, run_id)
+    user = response.headers.get(HEADER_USER_NAME, "")
+    test_id = response.headers.get(HEADER_TEST_ID, "")
+    group_name = response.headers.get(HEADER_GROUP_NAME, "")
+    return file_name_safe(f"{raw_run_id}_{test_id}_{user}_{group_name}_artifacts") + ".zip"
+
+
 def fetch_procedures(access_token: str, page: int) -> Pagination[ProcedureResponse] | None:
     """Fetch the list of test procedures for the dropdown"""
     uri = generate_uri(f"/procedure?page={page}")
@@ -391,14 +411,14 @@ def finalise_run(access_token: str, run_id: str) -> bytes | None:
     return response.content
 
 
-def fetch_run_artifact(access_token: str, run_id: str) -> bytes | None:
-    """Given an already started run - finalise it and return the resulting ZIP file bytes"""
+def fetch_run_artifact(access_token: str, run_id: str) -> tuple[bytes | None, str]:
+    """Given an already started run - finalise it and return the resulting ZIP file bytes / file name"""
     uri = generate_uri(f"/run/{run_id}/artifact")
     response = safe_request("GET", uri, generate_headers(access_token), CACTUS_ORCHESTRATOR_REQUEST_TIMEOUT_DEFAULT)
     if response is None or not is_success_response(response):
-        return None
+        return (None, "")
 
-    return response.content
+    return (response.content, generate_run_artifact_file_name(response, run_id))
 
 
 def fetch_runs_for_group(
@@ -750,14 +770,14 @@ def admin_fetch_group_procedure_run_summaries(
     ]
 
 
-def admin_fetch_run_artifact(access_token: str, run_id: str) -> bytes | None:
-    """Given an already started run - finalise it and return the resulting ZIP file bytes"""
+def admin_fetch_run_artifact(access_token: str, run_id: str) -> tuple[bytes | None, str]:
+    """Given an already started run - finalise it and return the resulting ZIP file bytes and ZIP file name"""
     uri = generate_uri(f"/admin/run/{run_id}/artifact")
     response = safe_request("GET", uri, generate_headers(access_token), CACTUS_ORCHESTRATOR_REQUEST_TIMEOUT_DEFAULT)
     if response is None or not is_success_response(response):
-        return None
+        return (None, "")
 
-    return response.content
+    return (response.content, generate_run_artifact_file_name(response, run_id))
 
 
 def admin_fetch_run_group_artifact(access_token: str, run_group_id: int) -> bytes | None:
diff --git a/src/cactus_ui/server.py b/src/cactus_ui/server.py
@@ -328,14 +328,14 @@ def admin_group_runs_page(access_token: str, run_group_id: int) -> str | Respons
             if not run_id:
                 error = "No run ID specified."
             else:
-                artifact_data = orchestrator.admin_fetch_run_artifact(access_token, run_id)
+                artifact_data, download_name = orchestrator.admin_fetch_run_artifact(access_token, run_id)
                 if artifact_data is None:
                     error = "Failed to retrieve artifacts."
                 else:
                     return send_file(
                         io.BytesIO(artifact_data),
                         as_attachment=True,
-                        download_name=f"{run_id}_artifacts.zip",
+                        download_name=download_name,
                         mimetype="application/zip",
                     )
 
@@ -439,14 +439,14 @@ def admin_run_status_page(access_token: str, run_id: str) -> str | Response:
     if request.method == "POST":
         # Handle downloading a prior run's artifacts
         if request.form.get("action") == "artifact":
-            artifact_data = orchestrator.admin_fetch_run_artifact(access_token, run_id)
+            artifact_data, download_name = orchestrator.admin_fetch_run_artifact(access_token, run_id)
             if artifact_data is None:
                 error = "Failed to retrieve artifacts."
             else:
                 return send_file(
                     io.BytesIO(artifact_data),
                     as_attachment=True,
-                    download_name=f"{run_id}_artifacts.zip",
+                    download_name=download_name,
                     mimetype="application/zip",
                 )
 
@@ -814,14 +814,14 @@ def group_runs_page(access_token: str, run_group_id: int) -> str | Response:  #
             if not run_id:
                 error = "No run ID specified."
             else:
-                artifact_data = orchestrator.fetch_run_artifact(access_token, run_id)
+                artifact_data, download_name = orchestrator.fetch_run_artifact(access_token, run_id)
                 if artifact_data is None:
                     error = "Failed to retrieve artifacts."
                 else:
                     return send_file(
                         io.BytesIO(artifact_data),
                         as_attachment=True,
-                        download_name=f"{run_id}_artifacts.zip",
+                        download_name=download_name,
                         mimetype="application/zip",
                     )
         # Handle deleting a prior run
@@ -919,14 +919,14 @@ def run_status_page(access_token: str, run_id: str) -> str | Response:
 
         # Handle downloading a prior run's artifacts
         elif request.form.get("action") == "artifact":
-            artifact_data = orchestrator.fetch_run_artifact(access_token, run_id)
+            artifact_data, download_name = orchestrator.fetch_run_artifact(access_token, run_id)
             if artifact_data is None:
                 error = "Failed to retrieve artifacts."
             else:
                 return send_file(
                     io.BytesIO(artifact_data),
                     as_attachment=True,
-                    download_name=f"{run_id}_artifacts.zip",
+                    download_name=download_name,
                     mimetype="application/zip",
                 )
 
diff --git a/tests/unit/cactus_ui/test_orchestrator.py b/tests/unit/cactus_ui/test_orchestrator.py
@@ -0,0 +1,10 @@
+import pytest
+
+from cactus_ui.orchestrator import file_name_safe
+
+
+@pytest.mark.parametrize(
+    "input, expected", [("", ""), ("hello-VALID_123", "hello-VALID_123"), ("abc 123@DEF./com", "abc_123_DEF__com")]
+)
+def test_file_name_safe(input: str, expected: str):
+    assert file_name_safe(input) == expected