0.0.58 panic (SIGSEGV) in MCP middleware during initialize/tools flow #219
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: mcpchecker MCP Evaluation | |
| on: | |
| # Weekly schedule - runs every Monday at 9 AM UTC | |
| schedule: | |
| - cron: '0 9 * * 1' | |
| # Manual trigger via PR comments | |
| issue_comment: | |
| types: [created] | |
| # Allow manual workflow dispatch for testing | |
| workflow_dispatch: | |
| inputs: | |
| suite: | |
| description: 'Which task suite to run (kubernetes, kubevirt, kiali, or all)' | |
| required: false | |
| default: 'kubernetes' | |
| type: choice | |
| options: | |
| - kubernetes | |
| - kubevirt | |
| - kiali | |
| - all | |
| task-filter: | |
| description: 'Regular expression to filter tasks (optional)' | |
| required: false | |
| default: '' | |
| verbose: | |
| description: 'Enable verbose output' | |
| required: false | |
| type: boolean | |
| default: false | |
| # Minimal permissions - no write access to PRs/issues | |
| # This workflow checks out and runs potentially untrusted PR code | |
| permissions: | |
| contents: read | |
| actions: read | |
| concurrency: | |
| # Only run once for latest commit per ref and cancel other (previous) runs. | |
| # For issue_comment events, use PR number as group to avoid different PRs canceling each other. | |
| group: ${{ github.workflow }}-${{ github.event_name == 'issue_comment' && format('pr-{0}', github.event.issue.number) || github.ref }} | |
| cancel-in-progress: true | |
| env: | |
| GO_VERSION: 1.25 | |
| KIND_CLUSTER_NAME: mcp-eval-cluster | |
| defaults: | |
| run: | |
| shell: bash | |
| jobs: | |
| # Check if workflow should run based on trigger | |
| check-trigger: | |
| name: Check if evaluation should run | |
| runs-on: ubuntu-latest | |
| if: | | |
| github.event_name == 'schedule' || | |
| github.event_name == 'workflow_dispatch' || | |
| (github.event_name == 'issue_comment' && | |
| github.event.issue.pull_request && | |
| contains(github.event.comment.body, '/run-mcpchecker')) | |
| outputs: | |
| should-run: ${{ steps.check.outputs.should-run }} | |
| kiali-run: ${{ steps.check.outputs.kiali-run }} | |
| kubevirt-run: ${{ steps.check.outputs.kubevirt-run }} | |
| label-selector: ${{ steps.check.outputs.label-selector }} | |
| pr-number: ${{ steps.check.outputs.pr-number }} | |
| pr-sha: ${{ steps.check.outputs.pr-sha }} | |
| is-pr: ${{ steps.check.outputs.is-pr }} | |
| steps: | |
| - name: Check trigger conditions | |
| id: check | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| if [[ "${{ github.event_name }}" == "issue_comment" ]]; then | |
| # Check if commenter has write access | |
| PERMISSION=$(gh api "repos/${{ github.repository }}/collaborators/${{ github.event.comment.user.login }}/permission" --jq '.permission') | |
| if [[ "$PERMISSION" == "admin" || "$PERMISSION" == "write" ]]; then | |
| echo "should-run=true" >> $GITHUB_OUTPUT | |
| echo "is-pr=true" >> $GITHUB_OUTPUT | |
| PR_NUMBER="${{ github.event.issue.number }}" | |
| echo "pr-number=$PR_NUMBER" >> $GITHUB_OUTPUT | |
| # Capture SHA at trigger time to prevent TOCTOU race condition | |
| # This ensures we run the exact code the maintainer reviewed | |
| PR_SHA=$(gh pr view "$PR_NUMBER" --repo "${{ github.repository }}" --json headRefOid --jq '.headRefOid') | |
| echo "pr-sha=$PR_SHA" >> $GITHUB_OUTPUT | |
| echo "Pinned to SHA: $PR_SHA" | |
| else | |
| echo "should-run=false" >> $GITHUB_OUTPUT | |
| echo "User ${{ github.event.comment.user.login }} does not have permission to trigger evaluations" | |
| fi | |
| else | |
| echo "should-run=true" >> $GITHUB_OUTPUT | |
| echo "is-pr=false" >> $GITHUB_OUTPUT | |
| echo "pr-sha=${{ github.sha }}" >> $GITHUB_OUTPUT | |
| fi | |
| # Suite selection: | |
| # - For workflow_dispatch, use the provided input. | |
| # - For other triggers (schedule/issue_comment), default to kubernetes. | |
| SUITE="${{ github.event.inputs.suite || 'kubernetes' }}" | |
| TASK_FILTER="${{ github.event.inputs.task-filter || '' }}" | |
| if [[ "${{ github.event_name }}" == "issue_comment" ]]; then | |
| # Parse comment: /run-mcpchecker [suite]. Suite = kubernetes | kubevirt | kiali | all | |
| COMMENT_BODY="${{ github.event.comment.body }}" | |
| COMMENT_BODY="${COMMENT_BODY//[[:space:]]/ }" | |
| read -r _ FIRST_WORD _ <<< "$COMMENT_BODY" | |
| case "${FIRST_WORD,,}" in | |
| kubevirt|kiali|all) | |
| SUITE_INPUT="${FIRST_WORD,,}" | |
| ;; | |
| kubernetes) | |
| SUITE_INPUT="kubernetes" | |
| ;; | |
| *) ;; # default: keep SUITE_INPUT empty → kubernetes | |
| esac | |
| fi | |
| SUITE="${SUITE_INPUT:-$SUITE}" | |
| # Select label-selector and infrastructure based on suite | |
| # All suites use the same eval.yaml file; suite controls label-selector + infra. | |
| case "$SUITE" in | |
| kubevirt) | |
| echo "label-selector=suite=kubevirt" >> $GITHUB_OUTPUT | |
| echo "kiali-run=false" >> $GITHUB_OUTPUT | |
| echo "kubevirt-run=true" >> $GITHUB_OUTPUT | |
| ;; | |
| kiali) | |
| echo "label-selector=suite=kiali" >> $GITHUB_OUTPUT | |
| echo "kiali-run=true" >> $GITHUB_OUTPUT | |
| echo "kubevirt-run=false" >> $GITHUB_OUTPUT | |
| ;; | |
| all) | |
| echo "label-selector=" >> $GITHUB_OUTPUT # No filter: run all taskSets | |
| echo "kiali-run=true" >> $GITHUB_OUTPUT | |
| echo "kubevirt-run=true" >> $GITHUB_OUTPUT | |
| ;; | |
| *) | |
| echo "label-selector=suite=kubernetes" >> $GITHUB_OUTPUT | |
| echo "kiali-run=false" >> $GITHUB_OUTPUT | |
| echo "kubevirt-run=false" >> $GITHUB_OUTPUT | |
| ;; | |
| esac | |
| # Run gevals evaluation with Kind cluster | |
| run-evaluation: | |
| name: Run MCP Evaluation | |
| needs: check-trigger | |
| if: needs.check-trigger.outputs.should-run == 'true' | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v6 | |
| with: | |
| # Use pinned SHA to prevent TOCTOU attacks | |
| # For PRs: the SHA captured when maintainer commented | |
| # For other triggers: the current commit SHA | |
| ref: ${{ needs.check-trigger.outputs.pr-sha }} | |
| - name: Setup Go | |
| uses: actions/setup-go@v6 | |
| with: | |
| go-version: ${{ env.GO_VERSION }} | |
| - name: Setup Kind cluster | |
| run: make kind-create-cluster KIND_CLUSTER_NAME=${{ env.KIND_CLUSTER_NAME }} | |
| - name: Install Istio/Kiali and bookinfo demo | |
| if: needs.check-trigger.outputs.kiali-run == 'true' | |
| run: make setup-kiali | |
| - name: Install KubeVirt | |
| if: needs.check-trigger.outputs.kubevirt-run == 'true' | |
| run: make kubevirt-install | |
| - name: Start MCP server | |
| run: make run-server | |
| env: | |
| TOOLSETS: >- | |
| ${{ | |
| (needs.check-trigger.outputs.kiali-run == 'true' && needs.check-trigger.outputs.kubevirt-run == 'true' && 'kiali,kubevirt') || | |
| (needs.check-trigger.outputs.kiali-run == 'true' && 'kiali') || | |
| (needs.check-trigger.outputs.kubevirt-run == 'true' && 'kubevirt') || | |
| '' | |
| }} | |
| - name: Run mcpchecker evaluation | |
| id: mcpchecker | |
| uses: mcpchecker/mcpchecker/.github/actions/mcpchecker-action@v0.0.10 | |
| with: | |
| eval-config: 'evals/openai-agent/eval.yaml' | |
| mcpchecker-version: 'latest' | |
| label-selector: ${{ needs.check-trigger.outputs.label-selector }} | |
| task-filter: ${{ github.event.inputs.task-filter || '' }} | |
| output-format: 'json' | |
| verbose: ${{ github.event.inputs.verbose || 'false' }} | |
| upload-artifacts: 'true' | |
| artifact-name: 'mcpchecker-results' | |
| fail-on-error: 'false' | |
| task-pass-threshold: '0.8' | |
| assertion-pass-threshold: '0.8' | |
| working-directory: '.' | |
| env: | |
| # OpenAI Agent configuration | |
| MODEL_BASE_URL: ${{ secrets.MODEL_BASE_URL }} | |
| MODEL_KEY: ${{ secrets.MODEL_KEY }} | |
| # LLM Judge configuration | |
| JUDGE_BASE_URL: ${{ secrets.JUDGE_BASE_URL }} | |
| JUDGE_API_KEY: ${{ secrets.JUDGE_API_KEY }} | |
| JUDGE_MODEL_NAME: ${{ secrets.JUDGE_MODEL_NAME }} | |
| - name: Cleanup | |
| if: always() | |
| run: | | |
| make stop-server || true | |
| make kind-delete-cluster KIND_CLUSTER_NAME=${{ env.KIND_CLUSTER_NAME }} || true | |
| # Save context and results for the reporting workflow | |
| - name: Save evaluation context | |
| if: always() && needs.check-trigger.outputs.is-pr == 'true' | |
| run: | | |
| mkdir -p eval-context | |
| cat > eval-context/context.json << EOF | |
| { | |
| "pr_number": "${{ needs.check-trigger.outputs.pr-number }}", | |
| "pr_sha": "${{ needs.check-trigger.outputs.pr-sha }}", | |
| "tasks_passed": "${{ steps.mcpchecker.outputs.tasks-passed }}", | |
| "tasks_total": "${{ steps.mcpchecker.outputs.tasks-total }}", | |
| "task_pass_rate": "${{ steps.mcpchecker.outputs.task-pass-rate }}", | |
| "assertions_passed": "${{ steps.mcpchecker.outputs.assertions-passed }}", | |
| "assertions_total": "${{ steps.mcpchecker.outputs.assertions-total }}", | |
| "passed": "${{ steps.mcpchecker.outputs.passed }}" | |
| } | |
| EOF | |
| - name: Upload PR context | |
| if: always() && needs.check-trigger.outputs.is-pr == 'true' | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: eval-context | |
| path: eval-context/ | |
| retention-days: 1 | |
| # Create PR with results (scheduled runs only) | |
| commit-results: | |
| name: Commit Evaluation Results | |
| needs: [check-trigger, run-evaluation] | |
| # Only commit results on scheduled runs, not manual dispatch or PR comments | |
| if: always() && github.event_name == 'schedule' && needs.run-evaluation.result == 'success' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v6 | |
| with: | |
| ref: main | |
| - name: Download mcpchecker results | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: mcpchecker-results | |
| path: mcpchecker-results/ | |
| - name: Copy results to evals/results | |
| env: | |
| EVAL_CONFIG: 'evals/openai-agent/eval.yaml' | |
| run: | | |
| # Extract agent name from eval-config path | |
| AGENT_NAME=$(echo "$EVAL_CONFIG" | sed 's|evals/||; s|/eval\.yaml||') | |
| mkdir -p evals/results | |
| # Copy the most recent mcpchecker results file with agent-specific name | |
| RESULTS_FILE=$(ls -t mcpchecker-results/mcpchecker-*-out.json 2>/dev/null | head -1) | |
| if [ -z "$RESULTS_FILE" ]; then | |
| echo "Error: No mcpchecker results file found" | |
| exit 1 | |
| fi | |
| cp "$RESULTS_FILE" "evals/results/${AGENT_NAME}-latest.json" | |
| - name: Create Pull Request | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| TRIGGER: ${{ github.event_name }} | |
| COMMIT_SHA: ${{ needs.check-trigger.outputs.pr-sha }} | |
| RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
| run: | | |
| BRANCH="chore/update-eval-results" | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| # Create or reset the branch | |
| git checkout -B "$BRANCH" | |
| git add evals/results/*-latest.json | |
| # Skip if no changes | |
| if git diff --staged --quiet; then | |
| echo "No changes to commit" | |
| exit 0 | |
| fi | |
| git commit -m "chore(evals): update mcpchecker evaluation results" | |
| git push -f origin "$BRANCH" | |
| # Check if PR already exists | |
| EXISTING_PR=$(gh pr list --head "$BRANCH" --json number --jq '.[0].number' || echo "") | |
| PR_BODY=$(cat <<EOF | |
| ## Automated Evaluation Results Update | |
| This PR updates the mcpchecker evaluation results from the weekly scheduled run. | |
| **Run details:** | |
| - Trigger: $TRIGGER | |
| - Commit: $COMMIT_SHA | |
| - Workflow run: $RUN_URL | |
| --- | |
| This PR was automatically generated by the mcpchecker workflow. | |
| EOF | |
| ) | |
| if [ -n "$EXISTING_PR" ]; then | |
| echo "Updating existing PR #$EXISTING_PR" | |
| gh pr edit "$EXISTING_PR" --body "$PR_BODY" | |
| else | |
| echo "Creating new PR" | |
| gh pr create \ | |
| --title "chore: update mcpchecker evaluation results" \ | |
| --body "$PR_BODY" \ | |
| --base main \ | |
| --head "$BRANCH" | |
| fi |