CI/CD Integration with Ephemeral Deployment Model #1
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmarks | |
| on: | |
| # Run on schedule (nightly) | |
| schedule: | |
| - cron: '0 2 * * *' # 2 AM UTC every day | |
| # Allow manual trigger | |
| workflow_dispatch: | |
| inputs: | |
| compare_with: | |
| description: 'Baseline commit to compare against (optional)' | |
| required: false | |
| default: '' | |
| # Run on PR when benchmarks are modified | |
| pull_request: | |
| paths: | |
| - 'backend/benchmarks/**' | |
| - 'backend/toolbox/modules/**' | |
| - '.github/workflows/benchmark.yml' | |
| jobs: | |
| benchmark: | |
| name: Run Benchmarks | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # Fetch all history for comparison | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential | |
| - name: Install Python dependencies | |
| working-directory: ./backend | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -e ".[dev]" | |
| pip install pytest pytest-asyncio pytest-benchmark pytest-benchmark[histogram] | |
| - name: Run benchmarks | |
| working-directory: ./backend | |
| run: | | |
| pytest benchmarks/ \ | |
| -v \ | |
| --benchmark-only \ | |
| --benchmark-json=benchmark-results.json \ | |
| --benchmark-histogram=benchmark-histogram | |
| - name: Store benchmark results | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results-${{ github.run_number }} | |
| path: | | |
| backend/benchmark-results.json | |
| backend/benchmark-histogram.svg | |
| - name: Download baseline benchmarks | |
| if: github.event_name == 'pull_request' | |
| uses: dawidd6/action-download-artifact@v3 | |
| continue-on-error: true | |
| with: | |
| workflow: benchmark.yml | |
| branch: ${{ github.base_ref }} | |
| name: benchmark-results-* | |
| path: ./baseline | |
| search_artifacts: true | |
| - name: Compare with baseline | |
| if: github.event_name == 'pull_request' && hashFiles('baseline/benchmark-results.json') != '' | |
| run: | | |
| python -c " | |
| import json | |
| import sys | |
| with open('backend/benchmark-results.json') as f: | |
| current = json.load(f) | |
| with open('baseline/benchmark-results.json') as f: | |
| baseline = json.load(f) | |
| print('\\n## Benchmark Comparison\\n') | |
| print('| Benchmark | Current | Baseline | Change |') | |
| print('|-----------|---------|----------|--------|') | |
| regressions = [] | |
| for bench in current['benchmarks']: | |
| name = bench['name'] | |
| current_time = bench['stats']['mean'] | |
| # Find matching baseline | |
| baseline_bench = next((b for b in baseline['benchmarks'] if b['name'] == name), None) | |
| if baseline_bench: | |
| baseline_time = baseline_bench['stats']['mean'] | |
| change = ((current_time - baseline_time) / baseline_time) * 100 | |
| print(f'| {name} | {current_time:.4f}s | {baseline_time:.4f}s | {change:+.2f}% |') | |
| # Flag regressions > 10% | |
| if change > 10: | |
| regressions.append((name, change)) | |
| else: | |
| print(f'| {name} | {current_time:.4f}s | N/A | NEW |') | |
| if regressions: | |
| print('\\n⚠️ **Performance Regressions Detected:**') | |
| for name, change in regressions: | |
| print(f'- {name}: +{change:.2f}%') | |
| sys.exit(1) | |
| else: | |
| print('\\n✅ No significant performance regressions detected') | |
| " | |
| - name: Comment PR with results | |
| if: github.event_name == 'pull_request' | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| const results = JSON.parse(fs.readFileSync('backend/benchmark-results.json', 'utf8')); | |
| let body = '## Benchmark Results\\n\\n'; | |
| body += '| Category | Benchmark | Mean Time | Std Dev |\\n'; | |
| body += '|----------|-----------|-----------|---------|\\n'; | |
| for (const bench of results.benchmarks) { | |
| const group = bench.group || 'ungrouped'; | |
| const name = bench.name.split('::').pop(); | |
| const mean = bench.stats.mean.toFixed(4); | |
| const stddev = bench.stats.stddev.toFixed(4); | |
| body += `| ${group} | ${name} | ${mean}s | ${stddev}s |\\n`; | |
| } | |
| body += '\\n📊 Full benchmark results available in artifacts.'; | |
| github.rest.issues.createComment({ | |
| issue_number: context.issue.number, | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| body: body | |
| }); | |
| benchmark-summary: | |
| name: Benchmark Summary | |
| runs-on: ubuntu-latest | |
| needs: benchmark | |
| if: always() | |
| steps: | |
| - name: Check results | |
| run: | | |
| if [ "${{ needs.benchmark.result }}" != "success" ]; then | |
| echo "Benchmarks failed or detected regressions" | |
| exit 1 | |
| fi | |
| echo "Benchmarks completed successfully!" |