Integration Tests #269
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Integration Tests | |
| permissions: | |
| contents: read | |
| on: | |
| schedule: | |
| - cron: '0 3 * * *' | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
| cancel-in-progress: true | |
| jobs: | |
| integration-tests: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| name: Run pytest integration suite | |
| steps: | |
| - uses: actions/checkout@v6 | |
| with: | |
| submodules: true | |
| - name: Set up Java (required for PySpark) | |
| uses: actions/setup-java@v5 | |
| with: | |
| distribution: temurin | |
| java-version: '17' | |
| - name: Start MinIO (S3-compatible storage) | |
| run: | | |
| docker run -d --rm \ | |
| --name minio \ | |
| -p 9000:9000 \ | |
| -p 9001:9001 \ | |
| -e MINIO_ACCESS_KEY=AKIAIOSFODNN7EXAMPLE \ | |
| -e MINIO_SECRET_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY \ | |
| -e MINIO_ROOT_USER=AKIAIOSFODNN7EXAMPLE \ | |
| -e MINIO_ROOT_PASSWORD=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY \ | |
| minio/minio server /data --console-address ":9001" | |
| echo "Starting MinIO..." | |
| # Wait for MinIO to become ready | |
| for i in {1..30}; do | |
| if curl -fsS http://localhost:9000/minio/health/live >/dev/null 2>&1; then | |
| echo "MinIO is healthy"; break; fi; sleep 2; done | |
| - name: Create MinIO bucket and upload test data | |
| run: | | |
| # Install MinIO client | |
| wget https://dl.min.io/client/mc/release/linux-amd64/mc | |
| chmod +x mc | |
| sudo mv mc /usr/local/bin/ | |
| # Configure MinIO client | |
| mc alias set minio http://localhost:9000 AKIAIOSFODNN7EXAMPLE wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY | |
| # Create bucket | |
| mc mb minio/embucket | |
| - name: Start embucket server (Docker) | |
| run: | | |
| docker run -d --rm \ | |
| --name embucket-server \ | |
| -p 3000:3000 \ | |
| -e OBJECT_STORE_BACKEND=file \ | |
| -e FILE_STORAGE_PATH=data/ \ | |
| -e DATA_FORMAT=json \ | |
| -e SLATEDB_PREFIX=sdb/ \ | |
| -e CORS_ENABLED=true \ | |
| -e CORS_ALLOW_ORIGIN="http://localhost:8080" \ | |
| -e JWT_SECRET=63f4945d921d599f27ae4fdf5bada3f1 \ | |
| -e CATALOG_URL=http://localhost:3000/catalog \ | |
| -e S3_ENDPOINT=http://localhost:9000 \ | |
| -e AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE \ | |
| -e AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY \ | |
| -e AWS_REGION=us-east-2 \ | |
| -e S3_BUCKET=embucket \ | |
| -e S3_ALLOW_HTTP=true \ | |
| -v ${{ github.workspace }}/test/integration:/app/datasets \ | |
| --network host \ | |
| embucket/embucket | |
| echo "Starting Embucket server..." | |
| # Wait for the server to become ready | |
| for i in {1..60}; do | |
| if curl -fsS http://localhost:3000/health >/dev/null 2>&1; then | |
| echo "Embucket is healthy"; break; fi; sleep 2; done || true | |
| # Fallback wait to ensure readiness | |
| sleep 10 | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: '3.12' | |
| - name: Install Python dependencies | |
| working-directory: ./test/integration | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements-test.txt | |
| - name: Set environment variables for integration tests | |
| run: | | |
| echo "EMBUCKET_ICEBERG_REST_URI=http://localhost:3000/catalog" >> $GITHUB_ENV | |
| echo "EMBUCKET_DATABASE=analytics" >> $GITHUB_ENV | |
| echo "S3_ENDPOINT=http://localhost:9000" >> $GITHUB_ENV | |
| echo "S3_ACCESS_KEY=AKIAIOSFODNN7EXAMPLE" >> $GITHUB_ENV | |
| echo "S3_SECRET_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" >> $GITHUB_ENV | |
| echo "S3_BUCKET=embucket" >> $GITHUB_ENV | |
| echo "EMBUCKET_SQL_HOST=localhost" >> $GITHUB_ENV | |
| echo "EMBUCKET_SQL_PORT=3000" >> $GITHUB_ENV | |
| echo "EMBUCKET_SQL_PROTOCOL=http" >> $GITHUB_ENV | |
| echo "EMBUCKET_USER=embucket" >> $GITHUB_ENV | |
| echo "EMBUCKET_PASSWORD=embucket" >> $GITHUB_ENV | |
| echo "EMBUCKET_SCHEMA=public" >> $GITHUB_ENV | |
| echo "AWS_REGION=us-east-2" >> $GITHUB_ENV | |
| echo "LOCAL_BASE_PATH=/app/datasets" >> $GITHUB_ENV | |
| - name: Set up DuckDB | |
| uses: opt-nc/setup-duckdb-action@v1.1.5 | |
| - name: Download and Prepare Datasets | |
| working-directory: ./test/integration | |
| run: | | |
| # Download existing files | |
| curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet | |
| curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2025-01.parquet | |
| curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/fhv_tripdata_2025-01.parquet | |
| curl -o hits.parquet https://storage.googleapis.com/glaredb-bench/data/clickbench/partitioned/hits_35.parquet | |
| mkdir -p tpch_data tpcds_data | |
| duckdb -c "CALL dbgen(sf=0.1); export database 'tpch_data' (format parquet);" | |
| duckdb -c "CALL dsdgen(sf=0.1); export database 'tpcds_data' (format parquet);" | |
| tree | |
| - name: Run integration tests | |
| working-directory: ./test/integration | |
| run: pytest -v | |
| - name: Display test failures | |
| if: always() | |
| working-directory: ./test/integration | |
| run: python display_test_failures.py | |