ci: integrate vLLM inference tests with GitHub Actions workflows

derekhiggins · derekhiggins · commit 6a4da148c42f · 2025-08-15T12:10:37.000+01:00
Add vLLM provider support to integration test CI workflows alongside
existing Ollama support. Configure provider-specific test execution
where vLLM runs only inference specific tests (excluding vision tests) while
Ollama continues to run the full test suite.

This enables comprehensive CI testing of both inference providers but
keeps the vLLM footprint small, this can be expanded later if it proves
to not be too disruptive.

Signed-off-by: Derek Higgins &lt;derekh@redhat.com&gt;
diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml
@@ -52,9 +52,9 @@ runs:
           git add tests/integration/recordings/
 
           if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
-            git commit -m "Recordings update from CI (vision)"
+            git commit -m "Recordings update from CI (vision) (${{ inputs.provider }})"
           else
-            git commit -m "Recordings update from CI"
+            git commit -m "Recordings update from CI (${{ inputs.provider }})"
           fi
 
           git fetch origin ${{ github.event.pull_request.head.ref }}
@@ -70,7 +70,8 @@ runs:
       if: ${{ always() }}
       shell: bash
       run: |
-        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
+        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
+        sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
 
     - name: Upload logs
       if: ${{ always() }}
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
@@ -20,7 +20,6 @@ on:
   schedule:
     # If changing the cron schedule, update the provider in the test-matrix job
     - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
-    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
   workflow_dispatch:
     inputs:
       test-all-client-versions:
@@ -38,28 +37,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  discover-tests:
-    runs-on: ubuntu-latest
-    outputs:
-      test-types: ${{ steps.generate-test-types.outputs.test-types }}
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Generate test types
-        id: generate-test-types
-        run: |
-          # Get test directories dynamically, excluding non-test directories
-          # NOTE: we are excluding post_training since the tests take too long
-          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
-            sed 's|tests/integration/||' |
-            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
-            sort | jq -R -s -c 'split("\n")[:-1]')
-          echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
-
   run-replay-mode-tests:
-    needs: discover-tests
     runs-on: ubuntu-latest
     name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
 
@@ -68,11 +46,14 @@ jobs:
       matrix:
         client-type: [library, server]
         # Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
-        provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
+        provider: [ollama, vllm]
         # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
         python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
         client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
         run-vision-tests: [true, false]
+        exclude:
+          - provider: vllm
+            run-vision-tests: true
 
     steps:
       - name: Checkout repository
@@ -87,10 +68,27 @@ jobs:
           run-vision-tests: ${{ matrix.run-vision-tests }}
           inference-mode: 'replay'
 
+      - name: Generate test types
+        id: generate-test-types
+        run: |
+          # Only run inference tests for vllm as these are more likely to exercise the vllm provider
+          # TODO: Add agent tests for vllm
+          if [ ${{ matrix.provider }} == "vllm" ]; then
+            echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          # Get test directories dynamically, excluding non-test directories
+          # NOTE: we are excluding post_training since the tests take too long
+          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
+            sed 's|tests/integration/||' |
+            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
+            sort | jq -R -s -c 'split("\n")[:-1]')
+          echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
+
       - name: Run tests
         uses: ./.github/actions/run-and-record-tests
         with:
-          test-types: ${{ needs.discover-tests.outputs.test-types }}
+          test-types: ${{ steps.generate-test-types.outputs.test-types }}
           stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
           provider: ${{ matrix.provider }}
           inference-mode: 'replay'
diff --git a/.github/workflows/record-integration-tests.yml b/.github/workflows/record-integration-tests.yml
@@ -15,12 +15,6 @@ on:
       - '.github/actions/setup-ollama/action.yml'
       - '.github/actions/setup-test-environment/action.yml'
       - '.github/actions/run-and-record-tests/action.yml'
-  workflow_dispatch:
-    inputs:
-      test-provider:
-        description: 'Test against a specific provider'
-        type: string
-        default: 'ollama'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -42,12 +36,6 @@ jobs:
       - name: Generate test types
         id: generate-test-types
         run: |
-          # Get test directories dynamically, excluding non-test directories
-          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
-            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
-            sort | jq -R -s -c 'split("\n")[:-1]')
-          echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
-
           labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name')
           echo "labels=$labels"
 
@@ -82,6 +70,10 @@ jobs:
       fail-fast: false
       matrix:
         mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }}
+        provider: [ollama, vllm]
+        exclude:
+          - mode: vision
+            provider: vllm
 
     steps:
       - name: Checkout repository
@@ -90,20 +82,33 @@ jobs:
           ref: ${{ github.event.pull_request.head.ref }}
           fetch-depth: 0
 
+      - name: Generate test types
+        id: generate-test-types
+        run: |
+          if [ ${{ matrix.provider }} == "vllm" ]; then
+            echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT
+          else
+            # Get test directories dynamically, excluding non-test directories
+            TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
+            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
+            sort | jq -R -s -c 'split("\n")[:-1]')
+            echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
+          fi
+
       - name: Setup test environment
         uses: ./.github/actions/setup-test-environment
         with:
           python-version: "3.12"  # Use single Python version for recording
           client-version: "latest"
-          provider: ${{ inputs.test-provider || 'ollama' }}
+          provider: ${{ matrix.provider }}
           run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
           inference-mode: 'record'
 
       - name: Run and record tests
         uses: ./.github/actions/run-and-record-tests
         with:
-          test-types: ${{ needs.discover-tests.outputs.test-types }}
+          test-types: ${{ steps.generate-test-types.outputs.test-types }}
           stack-config: 'server:ci-tests'  # recording must be done with server since more tests are run
-          provider: ${{ inputs.test-provider || 'ollama' }}
+          provider: ${{ matrix.provider }}
           inference-mode: 'record'
           run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}