update timeout #22
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Train on GPU | |
| on: | |
| push: | |
| branches: | |
| - "train_*" | |
| jobs: | |
| start-runner: | |
| name: Start EC2 GPU runner | |
| runs-on: ubuntu-latest | |
| outputs: | |
| label: ${{ steps.start-ec2-runner.outputs.label }} | |
| ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
| steps: | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| aws-region: eu-west-3 | |
| - name: Start EC2 runner | |
| id: start-ec2-runner | |
| uses: machulav/ec2-github-runner@v2 | |
| with: | |
| mode: start | |
| github-token: ${{ secrets.GH_PAT }} | |
| ec2-image-id: ami-06c62c213d8fdc88f | |
| ec2-instance-type: g4dn.xlarge | |
| subnet-id: subnet-0ed91f5fc60959086 | |
| security-group-id: sg-05cd9ff586699e8db | |
| train: | |
| name: Run training | |
| needs: start-runner | |
| runs-on: ${{ needs.start-runner.outputs.label }} | |
| timeout-minutes: 1440 | |
| steps: | |
| - name: Checkout branch | |
| uses: actions/checkout@v4 | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v5 | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| aws-region: eu-west-3 | |
| - name: Install dependencies | |
| run: uv sync | |
| - name: Run pipeline | |
| run: uv run dvc repro | |
| - name: Push artifacts to DVC remote | |
| run: uv run dvc push | |
| - name: Push results to result branch | |
| run: | | |
| BRANCH_NAME="${GITHUB_REF_NAME}" | |
| RESULT_BRANCH="result_${BRANCH_NAME#train_}" | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| git checkout -b "$RESULT_BRANCH" | |
| git add dvc.lock | |
| git commit -m "chore: training results from $BRANCH_NAME [skip ci]" | |
| git push --force origin "$RESULT_BRANCH" | |
| - name: Download test dataset | |
| run: | | |
| uv run dvc get https://github.com/pyronear/pyro-dataset \ | |
| data/processed/yolo_test --rev v2.0.0 \ | |
| --out ./data/test/yolo_test | |
| - name: Run evaluation | |
| run: | | |
| uv run python ./scripts/model/yolo/evaluate.py \ | |
| --model-dir ./data/02_models/yolo/best \ | |
| --data ./data/test/yolo_test/data.yaml \ | |
| --output-dir ./data/03_reporting/yolo/eval/ \ | |
| --split test \ | |
| --iou 0.01 \ | |
| --loglevel info | |
| - name: Download sequential test dataset | |
| run: | | |
| uv run dvc get https://github.com/pyronear/pyro-dataset \ | |
| data/processed/sequential_test/test --rev v2.0.0 \ | |
| --out ./data/test/sequential_test | |
| - name: Run sequential evaluation | |
| run: | | |
| uv run python ./scripts/model/yolo/evaluate_sequential.py \ | |
| --model-path ./data/02_models/yolo-export/best/onnx/cpu/best.onnx \ | |
| --data-dir ./data/test/sequential_test \ | |
| --output-dir ./data/03_reporting/yolo/sequential/eval/ \ | |
| --max-frames 15 \ | |
| --loglevel info | |
| - name: Commit eval results to result branch | |
| run: | | |
| RESULT_BRANCH="result_${GITHUB_REF_NAME#train_}" | |
| mkdir -p results | |
| cp ./data/03_reporting/yolo/eval/eval_results.json results/eval_results.json | |
| cp ./data/03_reporting/yolo/sequential/eval/eval_sequential_results.json results/eval_sequential_results.json | |
| git add results/eval_results.json results/eval_sequential_results.json | |
| git commit -m "chore: add eval results [skip ci]" | |
| git push origin "$RESULT_BRANCH" | |
| - name: Install gh CLI | |
| run: | | |
| if ! which gh > /dev/null 2>&1; then | |
| sudo mkdir -p -m 755 /etc/apt/keyrings | |
| wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg \ | |
| | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null | |
| sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg | |
| echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ | |
| | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null | |
| sudo apt-get update -qq | |
| sudo apt-get install -y gh | |
| fi | |
| - name: Open PR and publish metrics | |
| env: | |
| GH_TOKEN: ${{ secrets.GH_PAT }} | |
| run: | | |
| export RESULT_BRANCH="result_${GITHUB_REF_NAME#train_}" | |
| export RESULTS=./data/03_reporting/yolo/eval/eval_results.json | |
| export SEQ_RESULTS=./data/03_reporting/yolo/sequential/eval/eval_sequential_results.json | |
| # Fetch baseline results from main (may not exist on first run) | |
| export BASELINE_JSON=$(git show origin/main:results/eval_results.json 2>/dev/null || echo "{}") | |
| export BASELINE_SEQ_JSON=$(git show origin/main:results/eval_sequential_results.json 2>/dev/null || echo "{}") | |
| BODY=$(uv run python - <<'PYEOF' | |
| import json, os | |
| with open(os.environ["RESULTS"]) as f: | |
| curr = json.load(f) | |
| with open(os.environ["SEQ_RESULTS"]) as f: | |
| seq = json.load(f) | |
| baseline_json = os.environ.get("BASELINE_JSON", "{}") | |
| prev = json.loads(baseline_json) if baseline_json.strip() != "{}" else None | |
| baseline_seq_json = os.environ.get("BASELINE_SEQ_JSON", "{}") | |
| prev_seq = json.loads(baseline_seq_json) if baseline_seq_json.strip() != "{}" else None | |
| def fmt(v): | |
| return f"{v:.4f}" if v is not None else "N/A" | |
| def delta(curr_val, prev_val): | |
| if prev_val is None or curr_val is None: | |
| return "n/a" | |
| d = curr_val - prev_val | |
| return f"{'+'if d>=0 else ''}{d:.4f}" | |
| metrics = [ | |
| ("mAP@50", "map50"), | |
| ("mAP@50-95", "map50_95"), | |
| ("Precision", "precision"), | |
| ("Recall", "recall"), | |
| ] | |
| rows = "\n".join( | |
| f"| {label} | {fmt(prev[key]) if prev and key in prev else 'n/a'} | {fmt(curr[key])} | {delta(curr[key], prev[key] if prev else None)} |" | |
| for label, key in metrics | |
| ) | |
| seq_metrics = [ | |
| ("Recall (TP rate)", "recall"), | |
| ("FPR", "fpr"), | |
| ("F1", "f1"), | |
| ("Precision", "precision"), | |
| ] | |
| seq_rows = "\n".join( | |
| f"| {label} | {fmt(prev_seq[key]) if prev_seq and key in prev_seq else 'n/a'} | {fmt(seq[key])} | {delta(seq[key], prev_seq[key] if prev_seq else None)} |" | |
| for label, key in seq_metrics | |
| ) | |
| tp, fn, fp, tn = seq["tp"], seq["fn"], seq["fp"], seq["tn"] | |
| baseline_note = "_(no baseline on main yet)_" if prev is None else "_(vs last committed results on main)_" | |
| print(f"""## Evaluation results on test set | |
| | Metric | main | current | Δ | | |
| |--------|------|---------|---| | |
| {rows} | |
| ## Sequential evaluation (engine, max {seq['max_frames']} frames/seq) | |
| TP={tp} FN={fn} FP={fp} TN={tn} | |
| | Metric | main | current | Δ | | |
| |--------|------|---------|---| | |
| {seq_rows} | |
| {baseline_note} | |
| **Branch:** `{os.environ.get('RESULT_BRANCH', '')}` | **Test dataset:** pyronear/pyro-dataset @ v2.0.0""") | |
| PYEOF | |
| ) | |
| PR_NUMBER=$(gh pr list --head "$RESULT_BRANCH" --base main --json number --jq '.[0].number') | |
| if [ -z "$PR_NUMBER" ] || [ "$PR_NUMBER" = "null" ]; then | |
| gh pr create \ | |
| --base main \ | |
| --head "$RESULT_BRANCH" \ | |
| --title "Training results: $RESULT_BRANCH" \ | |
| --body "$BODY" | |
| else | |
| gh pr comment "$PR_NUMBER" --body "$BODY" | |
| fi | |
| stop-runner: | |
| name: Stop EC2 GPU runner | |
| needs: [start-runner, train] | |
| runs-on: ubuntu-latest | |
| if: ${{ always() }} | |
| steps: | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| aws-region: eu-west-3 | |
| - name: Stop EC2 runner | |
| uses: machulav/ec2-github-runner@v2 | |
| with: | |
| mode: stop | |
| github-token: ${{ secrets.GH_PAT }} | |
| label: ${{ needs.start-runner.outputs.label }} | |
| ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |