skills work #95

Workflow file for this run

.github/workflows/evalview.yml at 97cf7ba

	name: EvalView Agent Tests

	on:
	push:
	branches: [main]
	pull_request:
	branches: [main]
	# Allow manual triggering
	workflow_dispatch:
	inputs:
	test_filter:
	description: 'Filter tests by name pattern'
	required: false
	default: ''

	jobs:
	test:
	runs-on: ubuntu-latest

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Set up Python
	uses: actions/setup-python@v5
	with:
	python-version: '3.11'
	cache: 'pip'

	- name: Install EvalView
	run: \|
	pip install -e ".[all]"

	- name: Verify installation
	run: evalview --help

	- name: Run agent tests
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	# Configure your agent endpoint
	# AGENT_ENDPOINT: ${{ vars.AGENT_ENDPOINT }}
	run: \|
	# Run tests with parallel execution (4 workers for CI)
	evalview run \
	--max-workers 4 \
	--max-retries 2 \
	${{ github.event.inputs.test_filter && format('--filter "{0}"', github.event.inputs.test_filter) \|\| '' }}

	- name: Generate HTML report
	if: always()
	run: \|
	# Find the latest results file and generate HTML report
	RESULTS_FILE=$(ls -t .evalview/results/*.json 2>/dev/null \| head -1)
	if [ -n "$RESULTS_FILE" ]; then
	evalview report "$RESULTS_FILE" --html report.html
	fi

	- name: Upload test results
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: evalview-results
	path: \|
	.evalview/results/*.json
	report.html
	retention-days: 30

	- name: Check for failures
	if: always()
	run: \|
	# Parse the latest results and fail if any tests failed
	RESULTS_FILE=$(ls -t .evalview/results/*.json 2>/dev/null \| head -1)
	if [ -n "$RESULTS_FILE" ]; then
	# Count failed tests
	FAILED=$(python3 -c "
	import json
	with open('$RESULTS_FILE') as f:
	results = json.load(f)
	failed = sum(1 for r in results if not r.get('passed', False))
	print(failed)
	")
	if [ "$FAILED" -gt 0 ]; then
	echo "::error::$FAILED test(s) failed"
	exit 1
	fi
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

skills work #95

Workflow file

skills work #95

Uh oh!

Workflow file for this run