Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_uv_cache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v5

- name: Install uv
uses: astral-sh/setup-uv@v6
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: '3.11'
- uses: pre-commit/[email protected]
Expand All @@ -24,7 +24,7 @@ jobs:
python-version: ['3.11', '3.12']

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Restore global uv cache
id: cache-restore
Expand Down Expand Up @@ -74,10 +74,10 @@ jobs:
runs-on: ubuntu-latest
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v6
with:
python-version: '3.11'

Expand Down Expand Up @@ -110,10 +110,10 @@ jobs:
security:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v6
with:
python-version: '3.11'

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ jobs:
pages: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v6
with:
python-version: '3.11'

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v6
with:
python-version: '3.11'

Expand Down
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,9 @@ tests-output/
# uv
uv.lock
.vscode/settings.json

*.zarr
.DS_§tore
out
runs
*.gz
76 changes: 76 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# ========= stage: builder =========
FROM python:3.11-slim AS builder

ENV PYTHONUNBUFFERED="1" \
PIP_NO_CACHE_DIR="1"

# Build arg: 0=wheel-only (small; build as linux/amd64), 1=portable (adds GDAL/PROJ & toolchain)
ARG PORTABLE_BUILD=0

# Base OS deps
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates curl git \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /app

# uv + modern pip
COPY pyproject.toml uv.lock ./
RUN pip install --no-cache-dir -U pip \
&& pip install --no-cache-dir uv \
&& uv --version

# ---- Export ONLY third-party runtime deps (no hashes). We filter out any editable/self lines.
# NOTE: We export BEFORE copying src/ to avoid uv deciding this is a local edit; still filter to be safe.
RUN set -euo pipefail; \
uv export --no-group dev --no-group test --format=requirements-txt --no-hashes -o /tmp/req.raw.txt; \
awk ' \
BEGIN{IGNORECASE=1} \
# drop editable flags
/^-e[[:space:]]/ || /^--editable[[:space:]]/ {next} \
# drop local/self refs
/@ file:/ || /file:\/\// {next} \
# drop our own package if present
/^eopf-geozarr([[:space:]]|==|$)/ {next} \
# drop comments/blank lines
/^[[:space:]]*#/ || /^[[:space:]]*$/ {next} \
{print} \
' /tmp/req.raw.txt > /tmp/requirements.txt; \
echo "----- filtered requirements (head) -----"; \
sed -n '1,80p' /tmp/requirements.txt

# ---- If PORTABLE, add toolchain + GDAL/PROJ for sdist builds (arm64 etc.)
RUN if [ "$PORTABLE_BUILD" = "1" ]; then \
apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential gdal-bin libgdal-dev proj-bin libproj-dev \
&& rm -rf /var/lib/apt/lists/* ; \
fi

# ---- Install third-party deps, then your package (no re-resolve of deps)
RUN if [ "$PORTABLE_BUILD" = "1" ]; then \
pip install --no-cache-dir -r /tmp/requirements.txt ; \
else \
PIP_ONLY_BINARY=":all:" pip install --no-cache-dir --prefer-binary -r /tmp/requirements.txt ; \
fi

# Now copy source and install the project itself without deps (pure python install)
COPY src ./src
RUN pip install --no-cache-dir --no-deps .

# Optional: byte-compile
RUN python -m compileall -q /usr/local/lib/python3.11/site-packages || true

# ========= stage: runtime =========
FROM python:3.11-slim AS runtime
ENV PYTHONUNBUFFERED="1"

# Tiny libs manylinux wheels often need
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
libstdc++6 libgomp1 \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /app
COPY --from=builder /usr/local /usr/local

# Argo Script template supplies the command
CMD ["python", "-V"]
156 changes: 156 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# ===== Config =====
IMAGE ?= eopf-geozarr:dev
NAMESPACE ?= argo # Kubernetes namespace where Argo runs
TPL ?= geozarr-convert-template.yaml
PARAMS ?= params.json
CLUSTER ?= k3s-default

# Runtime param overrides (env > PARAMS file)
STAC_URL ?=
OUTPUT_ZARR ?=
GROUPS ?=

# Abbrev: WF = Workflow name; PVC = PersistentVolumeClaim (<WF>-outpvc)

.PHONY: build load-k3d load-minikube argo-install template apply \
submit submit-cli submit-api status latest logs-save clean \
_ensure-dirs fetch-tar run clean-pvc

# Build the image locally
# make build -> WHEEL mode (small), builds linux/amd64
# make build PORTABLE=1 -> PORTABLE mode (bigger), builds for native arch
build:
@if [ "$(PORTABLE)" = "1" ]; then \
echo "==> Building PORTABLE image for native platform (allows source builds)"; \
docker build \
--build-arg PORTABLE_BUILD=1 \
-t $(IMAGE) . ; \
else \
echo "==> Building WHEEL image for linux/amd64 (prebuilt wheels)"; \
docker buildx build --platform=linux/amd64 \
--build-arg PORTABLE_BUILD=0 \
-t $(IMAGE) --load . ; \
fi

# Load image into k3d’s containerd (dev clusters)
load-k3d:
k3d image import $(IMAGE) --cluster $(CLUSTER) || \
(docker save $(IMAGE) | docker exec -i $$(docker ps --format '{{.Names}}' | grep $(CLUSTER)-server-0) ctr -n k8s.io images import -)

# Build the image inside minikube’s Docker
load-minikube:
eval "$$(minikube docker-env)"; docker build -t $(IMAGE) .

# Install Argo Workflows (v3.7.1) into $(NAMESPACE)
argo-install:
kubectl create ns $(NAMESPACE) 2>/dev/null || true
kubectl apply -n $(NAMESPACE) -f https://github.com/argoproj/argo-workflows/releases/download/v3.7.1/install.yaml
kubectl -n $(NAMESPACE) rollout status deploy/workflow-controller
kubectl -n $(NAMESPACE) rollout status deploy/argo-server

# Apply (or update) the WorkflowTemplate
template:
kubectl -n $(NAMESPACE) apply -f $(TPL)
kubectl -n $(NAMESPACE) get workflowtemplate geozarr-convert

# Build + load + install + template (one shot)
apply: build load-k3d argo-install template

# Submit via CLI (uses env overrides, else PARAMS file)
submit: _ensure-dirs
@STAC="$${STAC_URL:-$$(jq -r '.arguments.parameters[] | select(.name=="stac_url").value' $(PARAMS))}"; \
OUT="$${OUTPUT_ZARR:-$$(jq -r '.arguments.parameters[] | select(.name=="output_zarr").value' $(PARAMS))}"; \
GRP="$${GROUPS:-$$(jq -r '.arguments.parameters[] | select(.name=="groups").value' $(PARAMS))}"; \
echo "Submitting:"; echo " stac_url=$$STAC"; echo " output_zarr=$$OUT"; echo " groups=$$GRP"; \
WF=$$(argo submit -n $(NAMESPACE) --from workflowtemplate/geozarr-convert \
-p stac_url="$$STAC" -p output_zarr="$$OUT" -p groups="$$GRP" -o name); \
TSTAMP=$$(date +%Y%m%d-%H%M%S); \
argo get -n $(NAMESPACE) $$WF -o json > runs/$${TSTAMP}-$${WF##*/}.json; \
argo get -n $(NAMESPACE) $$WF --output wide | tee runs/$${TSTAMP}-$${WF##*/}.summary.txt; \
echo "Workflow: $$WF"

# Submit via CLI (PARAMS file only, no env overrides)
submit-cli: _ensure-dirs
@WF=$$(argo submit -n $(NAMESPACE) --from workflowtemplate/geozarr-convert \
-p stac_url="$$(jq -r '.arguments.parameters[] | select(.name=="stac_url").value' $(PARAMS))" \
-p output_zarr="$$(jq -r '.arguments.parameters[] | select(.name=="output_zarr").value' $(PARAMS))" \
-p groups="$$(jq -r '.arguments.parameters[] | select(.name=="groups").value' $(PARAMS))" \
-o name); \
TSTAMP=$$(date +%Y%m%d-%H%M%S); \
argo get -n $(NAMESPACE) $$WF -o json > runs/$${TSTAMP}-$${WF##*/}.json; \
argo get -n $(NAMESPACE) $$WF --output wide | tee runs/$${TSTAMP}-$${WF##*/}.summary.txt; \
echo "Workflow: $$WF"

# Submit via Argo Server HTTP (dev port-forward, no token)
submit-api: _ensure-dirs
kubectl -n $(NAMESPACE) port-forward svc/argo-server 2746:2746 >/dev/null 2>&1 & echo $$! > .pf.pid
sleep 1
curl -s -H 'Content-Type: application/json' \
--data-binary @$(PARAMS) \
http://localhost:2746/api/v1/workflows/$(NAMESPACE)/submit \
| tee runs/submit-response.json | jq . >/dev/null || \
(echo "Non-JSON response (see runs/submit-response.json)"; exit 1)
-@[ -f .pf.pid ] && kill $$(cat .pf.pid) 2>/dev/null || true
-@rm -f .pf.pid

# Inspect
status:
argo list -n $(NAMESPACE); echo; kubectl -n $(NAMESPACE) get wf

latest:
argo get -n $(NAMESPACE) @latest --output wide

logs-save: _ensure-dirs
@WF=$$(argo list -n $(NAMESPACE) --output name | tail -1); \
TSTAMP=$$(date +%Y%m%d-%H%M%S); \
argo logs -n $(NAMESPACE) $$WF -c main > logs/$${TSTAMP}-$${WF##*/}.log; \
echo "Wrote logs/$${TSTAMP}-$${WF##*/}.log"

# Delete all workflows + completed pods
clean:
argo delete -n $(NAMESPACE) --all || true
kubectl -n $(NAMESPACE) delete pod -l workflows.argoproj.io/completed=true --force --grace-period=0 || true

_ensure-dirs:
@mkdir -p runs logs

# Fetch from PVC: copy tarball, unpack into runs/<WF>/, pull any extra files on /outputs
fetch-tar: _ensure-dirs
@WF=$$(argo list -n $(NAMESPACE) --output name | tail -1 | sed 's#.*/##'); \
PVC="$$WF-outpvc"; OUTDIR="runs/$$WF"; \
echo "Workflow: $$WF"; echo "PVC: $$PVC"; mkdir -p $$OUTDIR; \
kubectl -n $(NAMESPACE) delete pod fetch-$$WF --ignore-not-found >/dev/null 2>&1 || true; \
cat <<'YAML' | sed "s/{{WF}}/$$WF/g" | sed "s/{{PVC}}/$$PVC/g" | kubectl -n $(NAMESPACE) apply -f -
apiVersion: v1
kind: Pod
metadata:
name: fetch-{{WF}}
spec:
restartPolicy: Never
containers:
- name: fetch
image: busybox:1.36
command: ["sh","-lc","sleep 600"]
volumeMounts:
- name: out
mountPath: /mnt/out
volumes:
- name: out
persistentVolumeClaim:
claimName: {{PVC}}
YAML
kubectl -n $(NAMESPACE) wait --for=condition=Ready pod/fetch-$$WF --timeout=60s
# Main artifact
kubectl -n $(NAMESPACE) cp fetch-$$WF:/mnt/out/geozarr.tar.gz $$OUTDIR/geozarr.tar.gz
tar -xzf $$OUTDIR/geozarr.tar.gz -C $$OUTDIR
# Copy any other files (e.g., dask-report.html)
kubectl -n $(NAMESPACE) cp fetch-$$WF:/mnt/out/. $$OUTDIR/ || true
kubectl -n $(NAMESPACE) delete pod fetch-$$WF --wait=false
@echo "Unpacked into $$OUTDIR/"

# Convenience: build + load + template + submit + fetch
run: apply submit fetch-tar

# Cleanup stray per-run PVCs (removes stored artifacts)
clean-pvc:
kubectl -n $(NAMESPACE) delete pvc -l workflows.argoproj.io/workflow 2>/dev/null || true
Loading
Loading