Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions .github/actions/deploy/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ description: "Step to start and configure KFP on Kind"
inputs:
pipeline_store:
description: "Flag to deploy KFP with K8s Native API"
default: 'database'
default: "database"
required: false
proxy:
description: "If KFP should be deployed with proxy configuration"
required: false
default: 'false'
default: "false"
cache_enabled:
description: "If KFP should be deployed with cache enabled globally"
required: false
default: 'true'
default: "true"
image_tag:
required: true
description: "Provide the image tag your image was tagged with"
Expand All @@ -26,26 +26,30 @@ inputs:
multi_user:
description: "If KFP should be deployed in multi-user mode"
required: false
default: 'false'
default: "false"
artifact_proxy:
description: "Enables artifact proxy"
required: false
default: 'false'
default: "false"
storage_backend:
description: "Storage backend to use (minio or seaweedfs)"
required: false
default: 'seaweedfs'
default: "seaweedfs"
argo_version:
required: false
description: "Argo version to use for the cluster"
db_type:
description: "The type of database to deploy for testing (mysql or pgx)."
required: false
default: ""
forward_port:
required: false
default: 'true'
default: "true"
description: "If you want to forward API server port to localhost:8888"
pod_to_pod_tls_enabled:
description: "If KFP should be deployed with TLS pod-to-pod communication."
required: false
default: 'false'
default: "false"

runs:
using: "composite"
Expand All @@ -64,13 +68,13 @@ runs:
- name: Load Docker Images
shell: bash
run: |
APPS=("apiserver" "driver" "launcher" "scheduledworkflow" "persistenceagent" "frontend" "metadata-writer")
APPS=("apiserver" "driver" "launcher" "scheduledworkflow" "persistenceagent" "frontend" "metadata-writer" "cache-server")
for app in "${APPS[@]}"; do
docker image load -i ${{ inputs.image_path }}/$app/$app.tar
docker push ${{ inputs.image_registry }}/$app:${{ inputs.image_tag }}
rm ${{ inputs.image_path }}/$app/$app.tar
docker image rm ${{ inputs.image_registry }}/$app:${{ inputs.image_tag }}
done
done

- name: Configure Args
shell: bash
Expand Down Expand Up @@ -115,6 +119,12 @@ runs:
if [ "${{inputs.pod_to_pod_tls_enabled }}" = "true" ]; then
ARGS="${ARGS} --tls-enabled"
fi

if [ -n "${{ inputs.db_type }}" ]; then
echo "Deploying with database type ${{ inputs.db_type }}"
ARGS="${ARGS} --db-type ${{ inputs.db_type }}"
fi

echo "ARGS=$ARGS" >> "$GITHUB_OUTPUT"

- name: Deploy KFP
Expand Down
3 changes: 3 additions & 0 deletions .github/resources/manifests/multiuser/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ images:
- name: ghcr.io/kubeflow/kfp-frontend
newName: kind-registry:5000/frontend
newTag: latest
- name: ghcr.io/kubeflow/kfp-cache-server
newName: kind-registry:5000/cache-server
newTag: latest
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ images:
- name: ghcr.io/kubeflow/kfp-frontend
newName: kind-registry:5000/frontend
newTag: latest
- name: ghcr.io/kubeflow/kfp-cache-server
newName: kind-registry:5000/cache-server
newTag: latest

patches:
- path: apiserver-env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: ml-pipeline
spec:
template:
spec:
containers:
- name: ml-pipeline-api-server
env:
- name: V2_DRIVER_IMAGE
value: kind-registry:5000/driver:latest
- name: V2_LAUNCHER_IMAGE
value: kind-registry:5000/launcher:latest
- name: LOG_LEVEL
value: "debug"
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

# This CI overlay for PostgreSQL testing does three things:
# 1. It uses `platform-agnostic-postgresql` as its base. This is the project's
# standard way to deploy KFP with PostgreSQL, which correctly includes both
# the KFP core components and the third-party PostgreSQL instance, and
# patches the API server to use the 'pgx' driver.
# 2. It applies an additional patch (`apiserver-env.yaml`) to inject
# CI-specific environment variables, like the V2 image path. This aligns
# with the pattern used in other CI overlays like `minio`.
# 3. It overrides the image names to use the locally built images from the
# Kind registry, which is standard practice for all CI tests.
resources:
- ../../../../../manifests/kustomize/env/platform-agnostic-postgresql

images:
- name: ghcr.io/kubeflow/kfp-api-server
newName: kind-registry:5000/apiserver
newTag: latest
- name: ghcr.io/kubeflow/kfp-cache-server
newName: kind-registry:5000/cache-server
newTag: latest

patches:
- path: apiserver-env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ images:
- name: ghcr.io/kubeflow/kfp-metadata-writer
newName: kind-registry:5000/metadata-writer
newTag: latest
- name: ghcr.io/kubeflow/kfp-cache-server
newName: kind-registry:5000/cache-server
newTag: latest
patches:
- path: apiserver-env.yaml
31 changes: 30 additions & 1 deletion .github/resources/scripts/collect-logs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,36 @@ function display_pod_info {
kubectl describe pod "${POD_NAME}" -n "${NAMESPACE}" | grep -A 100 Events || echo "No events found for pod ${POD_NAME}."

echo "----- LOGS -----"
kubectl logs "${POD_NAME}" -n "${NAMESPACE}" || echo "No logs found for pod ${POD_NAME}."

# Get all containers (init + regular) from the pod
INIT_CONTAINERS=$(kubectl get pod "${POD_NAME}" -n "${NAMESPACE}" -o jsonpath='{.spec.initContainers[*].name}' 2>/dev/null || echo "")
CONTAINERS=$(kubectl get pod "${POD_NAME}" -n "${NAMESPACE}" -o jsonpath='{.spec.containers[*].name}' 2>/dev/null || echo "")

# Collect logs from init containers
if [[ -n "${INIT_CONTAINERS}" ]]; then
for CONTAINER in ${INIT_CONTAINERS}; do
echo "----- Init Container: ${CONTAINER} (current) -----"
kubectl logs "${POD_NAME}" -c "${CONTAINER}" -n "${NAMESPACE}" 2>&1 || echo "No current logs found for init container ${CONTAINER}."

echo "----- Init Container: ${CONTAINER} (previous) -----"
kubectl logs "${POD_NAME}" -c "${CONTAINER}" -n "${NAMESPACE}" --previous 2>&1 || echo "No previous logs found for init container ${CONTAINER}."
done
fi

# Collect logs from regular containers
if [[ -n "${CONTAINERS}" ]]; then
for CONTAINER in ${CONTAINERS}; do
echo "----- Container: ${CONTAINER} (current) -----"
kubectl logs "${POD_NAME}" -c "${CONTAINER}" -n "${NAMESPACE}" 2>&1 || echo "No current logs found for container ${CONTAINER}."

echo "----- Container: ${CONTAINER} (previous) -----"
kubectl logs "${POD_NAME}" -c "${CONTAINER}" -n "${NAMESPACE}" --previous 2>&1 || echo "No previous logs found for container ${CONTAINER}."
done
else
# Fallback: try to get logs without specifying container (for single-container pods)
echo "----- Default Container -----"
kubectl logs "${POD_NAME}" -n "${NAMESPACE}" 2>&1 || echo "No logs found for pod ${POD_NAME}."
fi

echo "==========================="
echo ""
Expand Down
43 changes: 33 additions & 10 deletions .github/resources/scripts/deploy-kfp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ CACHE_DISABLED=false
ARTIFACT_PROXY_ENABLED=false
MULTI_USER=false
STORAGE_BACKEND="seaweedfs"
DB_TYPE=""
AWF_VERSION=""
POD_TO_POD_TLS_ENABLED=false
SEAWEEDFS_INIT_TIMEOUT=300s
Expand Down Expand Up @@ -64,6 +65,10 @@ while [ "$#" -gt 0 ]; do
STORAGE_BACKEND="$2"
shift 2
;;
--db-type)
DB_TYPE="$2"
shift 2
;;
--argo-version)
shift
if [[ -n "$1" ]]; then
Expand Down Expand Up @@ -139,22 +144,40 @@ fi
# Manifests will be deployed according to the flag provided
if [ "${MULTI_USER}" == "false" ] && [ "${PIPELINES_STORE}" != "kubernetes" ]; then
TEST_MANIFESTS="${TEST_MANIFESTS}/standalone"
if $CACHE_DISABLED; then
TEST_MANIFESTS="${TEST_MANIFESTS}/cache-disabled"
elif $USE_PROXY; then
TEST_MANIFESTS="${TEST_MANIFESTS}/proxy"
elif [ "${STORAGE_BACKEND}" == "minio" ]; then
TEST_MANIFESTS="${TEST_MANIFESTS}/minio"

# Priority 1: TLS-enabled (mutually exclusive with other options)
if $POD_TO_POD_TLS_ENABLED; then
TEST_MANIFESTS="${TEST_MANIFESTS}/tls-enabled"

# Priority 2: PostgreSQL (mutually exclusive with default MySQL setup)
elif [ "${DB_TYPE}" == "pgx" ]; then
TEST_MANIFESTS="${TEST_MANIFESTS}/postgresql"

# Priority 3: Check for cache-disabled + proxy + minio combination
elif $CACHE_DISABLED && $USE_PROXY && [ "${STORAGE_BACKEND}" == "minio" ]; then
TEST_MANIFESTS="${TEST_MANIFESTS}/cache-disabled-proxy-minio"

# Priority 4: Check for cache-disabled + proxy combination
elif $CACHE_DISABLED && $USE_PROXY; then
TEST_MANIFESTS="${TEST_MANIFESTS}/cache-disabled-proxy"

# Priority 5: Check for cache-disabled + minio combination
elif $CACHE_DISABLED && [ "${STORAGE_BACKEND}" == "minio" ]; then
TEST_MANIFESTS="${TEST_MANIFESTS}/cache-disabled-minio"

# Priority 6: Check for proxy + minio combination
elif $USE_PROXY && [ "${STORAGE_BACKEND}" == "minio" ]; then
TEST_MANIFESTS="${TEST_MANIFESTS}/proxy-minio"
elif $CACHE_DISABLED && $USE_PROXY && [ "${STORAGE_BACKEND}" == "minio" ]; then
TEST_MANIFESTS="${TEST_MANIFESTS}/cache-disabled-proxy-minio"
elif $POD_TO_POD_TLS_ENABLED; then
TEST_MANIFESTS="${TEST_MANIFESTS}/tls-enabled"

# Priority 7: Check for single flags (cache-disabled, proxy, or minio)
elif $CACHE_DISABLED; then
TEST_MANIFESTS="${TEST_MANIFESTS}/cache-disabled"
elif $USE_PROXY; then
TEST_MANIFESTS="${TEST_MANIFESTS}/proxy"
elif [ "${STORAGE_BACKEND}" == "minio" ]; then
TEST_MANIFESTS="${TEST_MANIFESTS}/minio"

# Default: seaweedfs with cache enabled
else
TEST_MANIFESTS="${TEST_MANIFESTS}/default"
fi
Expand Down
111 changes: 111 additions & 0 deletions .github/workflows/api-server-test-Postgres.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
name: API Server Tests - Postgres

on:
push:
branches:
- master
pull_request:
paths:
- "backend/**"
- "manifests/kustomize/third-party/postgresql/**"
- ".github/resources/manifests/standalone/**"
- ".github/workflows/kfp-backend-v2-postgres-tests.yml"
- "!**/*.md"
- "!**/OWNERS"
env:
NAMESPACE: kubeflow
POSTGRES_NAMESPACE: kubeflow
DB_TYPE: postgres
DB_DRIVER: pgx
DB_PORT: "5432"
# The IP address for port-forwarding the database. Go tests will connect to this IP.
# This should be kept in sync with other postgres test workflows and local test scripts.
# Using 127.0.0.1 to match MySQL workflow behavior and Kind local development setup.
DB_FORWARD_IP: 127.0.0.1
DB_USER: user
DB_PASSWORD: password
DB_NAME: mlpipeline
jobs:
build:
uses: ./.github/workflows/image-builds-with-cache.yml
postgres-pgx:
runs-on: ubuntu-latest
needs: build
continue-on-error: false
strategy:
matrix:
cache_enabled: [true, false]
fail-fast: false # Ensure all jobs in the matrix run, even if one fails
name: KFP Backend V2 Postgres Tests (Cache ${{ matrix.cache_enabled }})

steps:
- name: Checkout target code
uses: actions/checkout@v5
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: "1.22"
cache: true

- name: Create KFP cluster
uses: ./.github/actions/create-cluster
with:
k8s_version: "v1.30.2"

- name: Deploy KFP with Postgres
uses: ./.github/actions/deploy
with:
db_type: "pgx"
pipeline_store: "database"
cache_enabled: ${{ matrix.cache_enabled }}
image_path: ${{ needs.build.outputs.IMAGE_PATH }}
image_tag: ${{ needs.build.outputs.IMAGE_TAG }}
image_registry: ${{ needs.build.outputs.IMAGE_REGISTRY }}
forward_port: "true"
- name: Port-forward Postgres
run: kubectl -n "$POSTGRES_NAMESPACE" port-forward svc/postgres-service ${{ env.DB_PORT }}:${{ env.DB_PORT }} --address=${{ env.DB_FORWARD_IP }} &

- name: Port-forward ML Metadata service
run: kubectl -n "$NAMESPACE" port-forward svc/metadata-grpc-service 8080:8080 &
# Exclude upgrade tests for the following reasons:
# 1. Responsibility: Upgrade tests are handled by the dedicated `upgrade-test.yml` workflow.
# 2. Incompatibility: This workflow runs tests against a single, clean deployment. It cannot
# accommodate the two-phase nature of upgrade tests (prepare on an old version, then
# verify on the new one).
# 3. No Baseline: As PostgreSQL was not officially supported before, there is no prior
# stable release to serve as a baseline for an upgrade test.
- name: Run v2 api tests
run: |
go run github.com/onsi/ginkgo/v2/ginkgo -r -v --label-filter="!UpgradePreparation && !UpgradeVerification" ./backend/test/v2/api/... -- \
-namespace="$NAMESPACE"
- name: Run v2 integration tests
run: |
# v2/integration tests use testify/suite framework, not Ginkgo, so we must use 'go test' instead of 'ginkgo'
# Build the go test command with appropriate flags
TEST_CMD="go test -v -timeout 30m ./backend/test/v2/integration/..."

# Arguments for the test binary (passed after -args)
TEST_ARGS="-runIntegrationTests=true -namespace=$NAMESPACE -cacheEnabled=${{ matrix.cache_enabled }}"

if [[ "${{ matrix.cache_enabled }}" == "false" ]]; then
# When cache is disabled, we must skip the cache test itself.
# Use Go test's -skip flag to exclude TestCache
TEST_CMD="$TEST_CMD -skip TestCache"
fi

# Execute the test command with arguments
eval "$TEST_CMD -args $TEST_ARGS"

- name: Collect pod logs
if: always()
run: |
mkdir -p /tmp/tmp.kfp /tmp/tmp.postgres
./.github/resources/scripts/collect-logs.sh --ns "$NAMESPACE" --output /tmp/tmp.kfp/pod_log.txt
./.github/resources/scripts/collect-logs.sh --ns "$POSTGRES_NAMESPACE" --output /tmp/tmp.postgres/pod_log.txt

- name: Upload test artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: api-server-postgres-test-artifacts-cache-${{ matrix.cache_enabled }}
path: /tmp/tmp*/*
5 changes: 4 additions & 1 deletion .github/workflows/image-builds-with-cache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ jobs:
- image: metadata-writer
dockerfile: backend/metadata_writer/Dockerfile
context: .
- image: cache-server
dockerfile: backend/Dockerfile.cacheserver
context: .
env:
ARTIFACT_NAME: "${{ matrix.image }}"
ARTIFACTS_PATH: "images_${{ github.sha }}"
Expand Down Expand Up @@ -119,4 +122,4 @@ jobs:
path: ${{ env.ARTIFACTS_PATH }}/${{ env.ARTIFACT_NAME }}.tar
retention-days: 1
# Continue the workflow even if the upload failed, because upload can fail if other jobs were able to upload artifact first before the current one
continue-on-error: true
continue-on-error: true
Loading
Loading