Skip to content

Commit b0bdd36

Browse files
committed
wip
1 parent 76a72a0 commit b0bdd36

File tree

3 files changed

+212
-45
lines changed

3 files changed

+212
-45
lines changed

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,9 @@ tests-output/
211211
# uv
212212
uv.lock
213213
.vscode/settings.json
214+
215+
*.zarr
216+
.DS_§tore
217+
out
218+
runs
219+
*.gz

Dockerfile

Lines changed: 68 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,76 @@
1-
FROM python:3.11-slim
1+
# ========= stage: builder =========
2+
FROM python:3.11-slim AS builder
23

3-
# system deps for GDAL/PROJ + builds
4-
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
5-
gdal-bin libgdal-dev proj-bin libproj-dev \
6-
build-essential git curl ca-certificates \
7-
&& rm -rf /var/lib/apt/lists/*
4+
ENV PYTHONUNBUFFERED="1" \
5+
PIP_NO_CACHE_DIR="1"
6+
7+
# Build arg: 0=wheel-only (small; build as linux/amd64), 1=portable (adds GDAL/PROJ & toolchain)
8+
ARG PORTABLE_BUILD=0
89

9-
ENV GDAL_CONFIG=/usr/bin/gdal-config \
10-
PYTHONUNBUFFERED=1
10+
# Base OS deps
11+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
12+
ca-certificates curl git \
13+
&& rm -rf /var/lib/apt/lists/*
1114

1215
WORKDIR /app
16+
17+
# uv + modern pip
1318
COPY pyproject.toml uv.lock ./
19+
RUN pip install --no-cache-dir -U pip \
20+
&& pip install --no-cache-dir uv \
21+
&& uv --version
22+
23+
# ---- Export ONLY third-party runtime deps (no hashes). We filter out any editable/self lines.
24+
# NOTE: We export BEFORE copying src/ to avoid uv deciding this is a local edit; still filter to be safe.
25+
RUN set -euo pipefail; \
26+
uv export --no-group dev --no-group test --format=requirements-txt --no-hashes -o /tmp/req.raw.txt; \
27+
awk ' \
28+
BEGIN{IGNORECASE=1} \
29+
# drop editable flags
30+
/^-e[[:space:]]/ || /^--editable[[:space:]]/ {next} \
31+
# drop local/self refs
32+
/@ file:/ || /file:\/\// {next} \
33+
# drop our own package if present
34+
/^eopf-geozarr([[:space:]]|==|$)/ {next} \
35+
# drop comments/blank lines
36+
/^[[:space:]]*#/ || /^[[:space:]]*$/ {next} \
37+
{print} \
38+
' /tmp/req.raw.txt > /tmp/requirements.txt; \
39+
echo "----- filtered requirements (head) -----"; \
40+
sed -n '1,80p' /tmp/requirements.txt
41+
42+
# ---- If PORTABLE, add toolchain + GDAL/PROJ for sdist builds (arm64 etc.)
43+
RUN if [ "$PORTABLE_BUILD" = "1" ]; then \
44+
apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
45+
build-essential gdal-bin libgdal-dev proj-bin libproj-dev \
46+
&& rm -rf /var/lib/apt/lists/* ; \
47+
fi
48+
49+
# ---- Install third-party deps, then your package (no re-resolve of deps)
50+
RUN if [ "$PORTABLE_BUILD" = "1" ]; then \
51+
pip install --no-cache-dir -r /tmp/requirements.txt ; \
52+
else \
53+
PIP_ONLY_BINARY=":all:" pip install --no-cache-dir --prefer-binary -r /tmp/requirements.txt ; \
54+
fi
55+
56+
# Now copy source and install the project itself without deps (pure python install)
1457
COPY src ./src
58+
RUN pip install --no-cache-dir --no-deps .
59+
60+
# Optional: byte-compile
61+
RUN python -m compileall -q /usr/local/lib/python3.11/site-packages || true
62+
63+
# ========= stage: runtime =========
64+
FROM python:3.11-slim AS runtime
65+
ENV PYTHONUNBUFFERED="1"
1566

16-
# install uv + deps (pin rasterio so it builds against libgdal-dev)
17-
RUN pip install --no-cache-dir uv \
18-
&& uv pip install --system --upgrade pip \
19-
&& uv pip install --system "rasterio==1.4.3" \
20-
&& uv pip install --system . # installs your CLI
67+
# Tiny libs manylinux wheels often need
68+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
69+
libstdc++6 libgomp1 \
70+
&& rm -rf /var/lib/apt/lists/*
71+
72+
WORKDIR /app
73+
COPY --from=builder /usr/local /usr/local
2174

75+
# Argo Script template supplies the command
76+
CMD ["python", "-V"]

Makefile

Lines changed: 138 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,156 @@
1-
IMAGE ?= eopf-geozarr:dev
2-
WF ?= geozarr-convert.yaml
3-
CLUSTER ?= k3s-default # change if your k3d cluster has a different name
1+
# ===== Config =====
2+
IMAGE ?= eopf-geozarr:dev
3+
NAMESPACE ?= argo # Kubernetes namespace where Argo runs
4+
TPL ?= geozarr-convert-template.yaml
5+
PARAMS ?= params.json
6+
CLUSTER ?= k3s-default
47

5-
.PHONY: build load-k3d load-minikube argo-install argo-ui-dev submit status logs latest clean
8+
# Runtime param overrides (env > PARAMS file)
9+
STAC_URL ?=
10+
OUTPUT_ZARR ?=
11+
GROUPS ?=
612

13+
# Abbrev: WF = Workflow name; PVC = PersistentVolumeClaim (<WF>-outpvc)
14+
15+
.PHONY: build load-k3d load-minikube argo-install template apply \
16+
submit submit-cli submit-api status latest logs-save clean \
17+
_ensure-dirs fetch-tar run clean-pvc
18+
19+
# Build the image locally
20+
# make build -> WHEEL mode (small), builds linux/amd64
21+
# make build PORTABLE=1 -> PORTABLE mode (bigger), builds for native arch
722
build:
8-
docker build -t $(IMAGE) .
23+
@if [ "$(PORTABLE)" = "1" ]; then \
24+
echo "==> Building PORTABLE image for native platform (allows source builds)"; \
25+
docker build \
26+
--build-arg PORTABLE_BUILD=1 \
27+
-t $(IMAGE) . ; \
28+
else \
29+
echo "==> Building WHEEL image for linux/amd64 (prebuilt wheels)"; \
30+
docker buildx build --platform=linux/amd64 \
31+
--build-arg PORTABLE_BUILD=0 \
32+
-t $(IMAGE) --load . ; \
33+
fi
934

10-
# k3d: import local image into cluster's containerd
35+
# Load image into k3d’s containerd (dev clusters)
1136
load-k3d:
1237
k3d image import $(IMAGE) --cluster $(CLUSTER) || \
1338
(docker save $(IMAGE) | docker exec -i $$(docker ps --format '{{.Names}}' | grep $(CLUSTER)-server-0) ctr -n k8s.io images import -)
1439

15-
# minikube: build inside minikube's docker
40+
# Build the image inside minikube’s Docker
1641
load-minikube:
1742
eval "$$(minikube docker-env)"; docker build -t $(IMAGE) .
1843

19-
# install Argo Workflows (3.7.1) if missing
44+
# Install Argo Workflows (v3.7.1) into $(NAMESPACE)
2045
argo-install:
21-
kubectl create ns argo 2>/dev/null || true
22-
kubectl apply -n argo -f https://github.com/argoproj/argo-workflows/releases/download/v3.7.1/install.yaml
23-
kubectl -n argo rollout status deploy/workflow-controller
24-
kubectl -n argo rollout status deploy/argo-server
25-
26-
# dev UI: HTTP, no token
27-
argo-ui-dev:
28-
kubectl -n argo patch deploy argo-server --type='json' -p='[ \
29-
{"op":"replace","path":"/spec/template/spec/containers/0/args", \
30-
"value":["server","--auth-mode=server","--secure=false"]} ]' || true
31-
kubectl -n argo rollout status deploy/argo-server
32-
kubectl -n argo port-forward svc/argo-server 2746:2746
33-
34-
submit:
35-
argo submit -n argo $(WF) --watch
46+
kubectl create ns $(NAMESPACE) 2>/dev/null || true
47+
kubectl apply -n $(NAMESPACE) -f https://github.com/argoproj/argo-workflows/releases/download/v3.7.1/install.yaml
48+
kubectl -n $(NAMESPACE) rollout status deploy/workflow-controller
49+
kubectl -n $(NAMESPACE) rollout status deploy/argo-server
3650

37-
status:
38-
argo list -n argo || true
39-
kubectl -n argo get wf || true
40-
kubectl -n argo get pods || true
51+
# Apply (or update) the WorkflowTemplate
52+
template:
53+
kubectl -n $(NAMESPACE) apply -f $(TPL)
54+
kubectl -n $(NAMESPACE) get workflowtemplate geozarr-convert
55+
56+
# Build + load + install + template (one shot)
57+
apply: build load-k3d argo-install template
4158

42-
logs:
43-
argo logs -n argo @latest -f
59+
# Submit via CLI (uses env overrides, else PARAMS file)
60+
submit: _ensure-dirs
61+
@STAC="$${STAC_URL:-$$(jq -r '.arguments.parameters[] | select(.name=="stac_url").value' $(PARAMS))}"; \
62+
OUT="$${OUTPUT_ZARR:-$$(jq -r '.arguments.parameters[] | select(.name=="output_zarr").value' $(PARAMS))}"; \
63+
GRP="$${GROUPS:-$$(jq -r '.arguments.parameters[] | select(.name=="groups").value' $(PARAMS))}"; \
64+
echo "Submitting:"; echo " stac_url=$$STAC"; echo " output_zarr=$$OUT"; echo " groups=$$GRP"; \
65+
WF=$$(argo submit -n $(NAMESPACE) --from workflowtemplate/geozarr-convert \
66+
-p stac_url="$$STAC" -p output_zarr="$$OUT" -p groups="$$GRP" -o name); \
67+
TSTAMP=$$(date +%Y%m%d-%H%M%S); \
68+
argo get -n $(NAMESPACE) $$WF -o json > runs/$${TSTAMP}-$${WF##*/}.json; \
69+
argo get -n $(NAMESPACE) $$WF --output wide | tee runs/$${TSTAMP}-$${WF##*/}.summary.txt; \
70+
echo "Workflow: $$WF"
71+
72+
# Submit via CLI (PARAMS file only, no env overrides)
73+
submit-cli: _ensure-dirs
74+
@WF=$$(argo submit -n $(NAMESPACE) --from workflowtemplate/geozarr-convert \
75+
-p stac_url="$$(jq -r '.arguments.parameters[] | select(.name=="stac_url").value' $(PARAMS))" \
76+
-p output_zarr="$$(jq -r '.arguments.parameters[] | select(.name=="output_zarr").value' $(PARAMS))" \
77+
-p groups="$$(jq -r '.arguments.parameters[] | select(.name=="groups").value' $(PARAMS))" \
78+
-o name); \
79+
TSTAMP=$$(date +%Y%m%d-%H%M%S); \
80+
argo get -n $(NAMESPACE) $$WF -o json > runs/$${TSTAMP}-$${WF##*/}.json; \
81+
argo get -n $(NAMESPACE) $$WF --output wide | tee runs/$${TSTAMP}-$${WF##*/}.summary.txt; \
82+
echo "Workflow: $$WF"
83+
84+
# Submit via Argo Server HTTP (dev port-forward, no token)
85+
submit-api: _ensure-dirs
86+
kubectl -n $(NAMESPACE) port-forward svc/argo-server 2746:2746 >/dev/null 2>&1 & echo $$! > .pf.pid
87+
sleep 1
88+
curl -s -H 'Content-Type: application/json' \
89+
--data-binary @$(PARAMS) \
90+
http://localhost:2746/api/v1/workflows/$(NAMESPACE)/submit \
91+
| tee runs/submit-response.json | jq . >/dev/null || \
92+
(echo "Non-JSON response (see runs/submit-response.json)"; exit 1)
93+
-@[ -f .pf.pid ] && kill $$(cat .pf.pid) 2>/dev/null || true
94+
-@rm -f .pf.pid
95+
96+
# Inspect
97+
status:
98+
argo list -n $(NAMESPACE); echo; kubectl -n $(NAMESPACE) get wf
4499

45100
latest:
46-
argo get -n argo @latest
101+
argo get -n $(NAMESPACE) @latest --output wide
102+
103+
logs-save: _ensure-dirs
104+
@WF=$$(argo list -n $(NAMESPACE) --output name | tail -1); \
105+
TSTAMP=$$(date +%Y%m%d-%H%M%S); \
106+
argo logs -n $(NAMESPACE) $$WF -c main > logs/$${TSTAMP}-$${WF##*/}.log; \
107+
echo "Wrote logs/$${TSTAMP}-$${WF##*/}.log"
47108

109+
# Delete all workflows + completed pods
48110
clean:
49-
argo delete -n argo --all || true
50-
kubectl -n argo delete pod -l workflows.argoproj.io/completed=true --force --grace-period=0 || true
111+
argo delete -n $(NAMESPACE) --all || true
112+
kubectl -n $(NAMESPACE) delete pod -l workflows.argoproj.io/completed=true --force --grace-period=0 || true
113+
114+
_ensure-dirs:
115+
@mkdir -p runs logs
116+
117+
# Fetch from PVC: copy tarball, unpack into runs/<WF>/, pull any extra files on /outputs
118+
fetch-tar: _ensure-dirs
119+
@WF=$$(argo list -n $(NAMESPACE) --output name | tail -1 | sed 's#.*/##'); \
120+
PVC="$$WF-outpvc"; OUTDIR="runs/$$WF"; \
121+
echo "Workflow: $$WF"; echo "PVC: $$PVC"; mkdir -p $$OUTDIR; \
122+
kubectl -n $(NAMESPACE) delete pod fetch-$$WF --ignore-not-found >/dev/null 2>&1 || true; \
123+
cat <<'YAML' | sed "s/{{WF}}/$$WF/g" | sed "s/{{PVC}}/$$PVC/g" | kubectl -n $(NAMESPACE) apply -f -
124+
apiVersion: v1
125+
kind: Pod
126+
metadata:
127+
name: fetch-{{WF}}
128+
spec:
129+
restartPolicy: Never
130+
containers:
131+
- name: fetch
132+
image: busybox:1.36
133+
command: ["sh","-lc","sleep 600"]
134+
volumeMounts:
135+
- name: out
136+
mountPath: /mnt/out
137+
volumes:
138+
- name: out
139+
persistentVolumeClaim:
140+
claimName: {{PVC}}
141+
YAML
142+
kubectl -n $(NAMESPACE) wait --for=condition=Ready pod/fetch-$$WF --timeout=60s
143+
# Main artifact
144+
kubectl -n $(NAMESPACE) cp fetch-$$WF:/mnt/out/geozarr.tar.gz $$OUTDIR/geozarr.tar.gz
145+
tar -xzf $$OUTDIR/geozarr.tar.gz -C $$OUTDIR
146+
# Copy any other files (e.g., dask-report.html)
147+
kubectl -n $(NAMESPACE) cp fetch-$$WF:/mnt/out/. $$OUTDIR/ || true
148+
kubectl -n $(NAMESPACE) delete pod fetch-$$WF --wait=false
149+
@echo "Unpacked into $$OUTDIR/"
150+
151+
# Convenience: build + load + template + submit + fetch
152+
run: apply submit fetch-tar
153+
154+
# Cleanup stray per-run PVCs (removes stored artifacts)
155+
clean-pvc:
156+
kubectl -n $(NAMESPACE) delete pvc -l workflows.argoproj.io/workflow 2>/dev/null || true

0 commit comments

Comments
 (0)