Skip to content

skills work

skills work #95

Workflow file for this run

name: EvalView Agent Tests
on:
push:
branches: [main]
pull_request:
branches: [main]
# Allow manual triggering
workflow_dispatch:
inputs:
test_filter:
description: 'Filter tests by name pattern'
required: false
default: ''
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install EvalView
run: |
pip install -e ".[all]"
- name: Verify installation
run: evalview --help
- name: Run agent tests
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
# Configure your agent endpoint
# AGENT_ENDPOINT: ${{ vars.AGENT_ENDPOINT }}
run: |
# Run tests with parallel execution (4 workers for CI)
evalview run \
--max-workers 4 \
--max-retries 2 \
${{ github.event.inputs.test_filter && format('--filter "{0}"', github.event.inputs.test_filter) || '' }}
- name: Generate HTML report
if: always()
run: |
# Find the latest results file and generate HTML report
RESULTS_FILE=$(ls -t .evalview/results/*.json 2>/dev/null | head -1)
if [ -n "$RESULTS_FILE" ]; then
evalview report "$RESULTS_FILE" --html report.html
fi
- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
with:
name: evalview-results
path: |
.evalview/results/*.json
report.html
retention-days: 30
- name: Check for failures
if: always()
run: |
# Parse the latest results and fail if any tests failed
RESULTS_FILE=$(ls -t .evalview/results/*.json 2>/dev/null | head -1)
if [ -n "$RESULTS_FILE" ]; then
# Count failed tests
FAILED=$(python3 -c "
import json
with open('$RESULTS_FILE') as f:
results = json.load(f)
failed = sum(1 for r in results if not r.get('passed', False))
print(failed)
")
if [ "$FAILED" -gt 0 ]; then
echo "::error::$FAILED test(s) failed"
exit 1
fi
fi