Skip to content

use full DS

use full DS #21

Workflow file for this run

name: Train on GPU
on:
push:
branches:
- "train_*"
jobs:
start-runner:
name: Start EC2 GPU runner
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-3
- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@v2
with:
mode: start
github-token: ${{ secrets.GH_PAT }}
ec2-image-id: ami-06c62c213d8fdc88f
ec2-instance-type: g4dn.xlarge
subnet-id: subnet-0ed91f5fc60959086
security-group-id: sg-05cd9ff586699e8db
train:
name: Run training
needs: start-runner
runs-on: ${{ needs.start-runner.outputs.label }}
steps:
- name: Checkout branch
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-3
- name: Install dependencies
run: uv sync
- name: Run pipeline
run: uv run dvc repro
- name: Push artifacts to DVC remote
run: uv run dvc push
- name: Push results to result branch
run: |
BRANCH_NAME="${GITHUB_REF_NAME}"
RESULT_BRANCH="result_${BRANCH_NAME#train_}"
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git checkout -b "$RESULT_BRANCH"
git add dvc.lock
git commit -m "chore: training results from $BRANCH_NAME [skip ci]"
git push --force origin "$RESULT_BRANCH"
- name: Download test dataset
run: |
uv run dvc get https://github.com/pyronear/pyro-dataset \
data/processed/yolo_test --rev v2.0.0 \
--out ./data/test/yolo_test
- name: Run evaluation
run: |
uv run python ./scripts/model/yolo/evaluate.py \
--model-dir ./data/02_models/yolo/best \
--data ./data/test/yolo_test/data.yaml \
--output-dir ./data/03_reporting/yolo/eval/ \
--split test \
--iou 0.01 \
--loglevel info
- name: Download sequential test dataset
run: |
uv run dvc get https://github.com/pyronear/pyro-dataset \
data/processed/sequential_test/test --rev v2.0.0 \
--out ./data/test/sequential_test
- name: Run sequential evaluation
run: |
uv run python ./scripts/model/yolo/evaluate_sequential.py \
--model-path ./data/02_models/yolo-export/best/onnx/cpu/best.onnx \
--data-dir ./data/test/sequential_test \
--output-dir ./data/03_reporting/yolo/sequential/eval/ \
--max-frames 15 \
--loglevel info
- name: Commit eval results to result branch
run: |
RESULT_BRANCH="result_${GITHUB_REF_NAME#train_}"
mkdir -p results
cp ./data/03_reporting/yolo/eval/eval_results.json results/eval_results.json
cp ./data/03_reporting/yolo/sequential/eval/eval_sequential_results.json results/eval_sequential_results.json
git add results/eval_results.json results/eval_sequential_results.json
git commit -m "chore: add eval results [skip ci]"
git push origin "$RESULT_BRANCH"
- name: Install gh CLI
run: |
if ! which gh > /dev/null 2>&1; then
sudo mkdir -p -m 755 /etc/apt/keyrings
wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg \
| sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null
sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
| sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null
sudo apt-get update -qq
sudo apt-get install -y gh
fi
- name: Open PR and publish metrics
env:
GH_TOKEN: ${{ secrets.GH_PAT }}
run: |
export RESULT_BRANCH="result_${GITHUB_REF_NAME#train_}"
export RESULTS=./data/03_reporting/yolo/eval/eval_results.json
export SEQ_RESULTS=./data/03_reporting/yolo/sequential/eval/eval_sequential_results.json
# Fetch baseline results from main (may not exist on first run)
export BASELINE_JSON=$(git show origin/main:results/eval_results.json 2>/dev/null || echo "{}")
export BASELINE_SEQ_JSON=$(git show origin/main:results/eval_sequential_results.json 2>/dev/null || echo "{}")
BODY=$(uv run python - <<'PYEOF'
import json, os
with open(os.environ["RESULTS"]) as f:
curr = json.load(f)
with open(os.environ["SEQ_RESULTS"]) as f:
seq = json.load(f)
baseline_json = os.environ.get("BASELINE_JSON", "{}")
prev = json.loads(baseline_json) if baseline_json.strip() != "{}" else None
baseline_seq_json = os.environ.get("BASELINE_SEQ_JSON", "{}")
prev_seq = json.loads(baseline_seq_json) if baseline_seq_json.strip() != "{}" else None
def fmt(v):
return f"{v:.4f}" if v is not None else "N/A"
def delta(curr_val, prev_val):
if prev_val is None or curr_val is None:
return "n/a"
d = curr_val - prev_val
return f"{'+'if d>=0 else ''}{d:.4f}"
metrics = [
("mAP@50", "map50"),
("mAP@50-95", "map50_95"),
("Precision", "precision"),
("Recall", "recall"),
]
rows = "\n".join(
f"| {label} | {fmt(prev[key]) if prev and key in prev else 'n/a'} | {fmt(curr[key])} | {delta(curr[key], prev[key] if prev else None)} |"
for label, key in metrics
)
seq_metrics = [
("Recall (TP rate)", "recall"),
("FPR", "fpr"),
("F1", "f1"),
("Precision", "precision"),
]
seq_rows = "\n".join(
f"| {label} | {fmt(prev_seq[key]) if prev_seq and key in prev_seq else 'n/a'} | {fmt(seq[key])} | {delta(seq[key], prev_seq[key] if prev_seq else None)} |"
for label, key in seq_metrics
)
tp, fn, fp, tn = seq["tp"], seq["fn"], seq["fp"], seq["tn"]
baseline_note = "_(no baseline on main yet)_" if prev is None else "_(vs last committed results on main)_"
print(f"""## Evaluation results on test set
| Metric | main | current | Δ |
|--------|------|---------|---|
{rows}
## Sequential evaluation (engine, max {seq['max_frames']} frames/seq)
TP={tp} FN={fn} FP={fp} TN={tn}
| Metric | main | current | Δ |
|--------|------|---------|---|
{seq_rows}
{baseline_note}
**Branch:** `{os.environ.get('RESULT_BRANCH', '')}` | **Test dataset:** pyronear/pyro-dataset @ v2.0.0""")
PYEOF
)
PR_NUMBER=$(gh pr list --head "$RESULT_BRANCH" --base main --json number --jq '.[0].number')
if [ -z "$PR_NUMBER" ] || [ "$PR_NUMBER" = "null" ]; then
gh pr create \
--base main \
--head "$RESULT_BRANCH" \
--title "Training results: $RESULT_BRANCH" \
--body "$BODY"
else
gh pr comment "$PR_NUMBER" --body "$BODY"
fi
stop-runner:
name: Stop EC2 GPU runner
needs: [start-runner, train]
runs-on: ubuntu-latest
if: ${{ always() }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-3
- name: Stop EC2 runner
uses: machulav/ec2-github-runner@v2
with:
mode: stop
github-token: ${{ secrets.GH_PAT }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}