Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ on:
- "uv.lock"
- ".github/workflows/ci.yml"

permissions:
contents: read

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
Expand Down Expand Up @@ -44,14 +47,22 @@ jobs:
run: uv run black --check .

check-regressions:
name: ${{ matrix.agent == 'sage' && 'Sage agent always passes' || 'None agent always fails' }}
name: "${{ matrix.project_type }} / ${{ matrix.agent == 'sage' && 'Sage agent always passes' || 'None agent always fails' }}"
runs-on: ubuntu-latest
strategy:
matrix:
include:
- agent: sage
project_type: dbt
expect_success: true
- agent: none
project_type: dbt
expect_success: false
- agent: sage
project_type: dbt-fusion
expect_success: true
- agent: none
project_type: dbt-fusion
expect_success: false
steps:
- name: Checkout code
Expand All @@ -74,10 +85,15 @@ jobs:
# Pre-build the Docker image once to avoid race conditions when multiple
# concurrent trials try to build the same cached image simultaneously
- name: Pre-build Docker image
run: docker build -t ade-bench-base -f docker/base/Dockerfile.duckdb-dbt .
run: |
if [ "${{ matrix.project_type }}" = "dbt-fusion" ]; then
docker build -t ade-bench-base -f docker/base/Dockerfile.duckdb-dbtf .
else
docker build -t ade-bench-base -f docker/base/Dockerfile.duckdb-dbt .
fi

- name: Run benchmark
run: uv run ade run all --agent ${{ matrix.agent }} --db duckdb --project-type dbt --no-diffs --n-concurrent-trials 6 --no-rebuild --plugin-set none
run: uv run ade run all --agent ${{ matrix.agent }} --db duckdb --project-type ${{ matrix.project_type }} --no-diffs --n-concurrent-trials 6 --no-rebuild --plugin-set none
env:
USE_DYNAMIC_LOGGING: "FALSE"
DEFAULT_TEST_TIMEOUT_SEC: "120"
Expand All @@ -101,7 +117,7 @@ jobs:
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: benchmark-results-${{ matrix.agent }}
name: benchmark-results-${{ matrix.project_type }}-${{ matrix.agent }}
path: experiments/

- name: Check results match expectations
Expand Down
2 changes: 2 additions & 0 deletions ade_bench/handlers/trial_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ def docker_compose_path(self) -> Path:
return self._defaults_path / "docker-compose-snowflake-dbtf.yaml"
elif db_type == "snowflake" and project_type == "dbt":
return self._defaults_path / "docker-compose-snowflake-dbt.yaml"
elif db_type == "duckdb" and project_type == "dbt-fusion":
return self._defaults_path / "docker-compose-duckdb-dbtf.yaml"
elif db_type == "duckdb":
return self._defaults_path / "docker-compose-duckdb-dbt.yaml"
else:
Expand Down
30 changes: 30 additions & 0 deletions docker/base/Dockerfile.duckdb-dbtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
FROM python:3.11-slim

RUN apt-get update && apt-get install -y \
git \
tmux asciinema \
curl \
&& curl -sSL https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 \
-o /usr/bin/yq \
&& chmod +x /usr/bin/yq \
&& curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
&& apt-get install -y nodejs \
&& rm -rf /var/lib/apt/lists/*

# Install dbt Fusion
RUN export SHELL=/bin/bash && \
curl -fsSL https://public.cdn.getdbt.com/fs/install/install.sh | sh -s -- --update

# Install DuckDB Python package (needed for Fusion DuckDB connections)
RUN pip install --no-cache-dir \
duckdb \
pyyaml>=6.0 \
uv>=0.7

ENV DBT_STATIC_ANALYSIS=baseline

# Set up workspace
RUN mkdir -p /installed-agent /scripts /sage/solutions /sage /app /app/setup /app/migrations /seeds /solutions /logs /tests
WORKDIR /app

CMD ["bash"]
148 changes: 148 additions & 0 deletions docs/plans/2026-03-05-fusion-duckdb-design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Fusion + DuckDB Implementation Plan

> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.

**Goal:** Add dbt Fusion engine support for DuckDB tasks, so the benchmark can evaluate AI agents running Fusion instead of dbt-core on DuckDB.

**Architecture:** Mirror the existing Snowflake Fusion pattern. New Dockerfile + docker-compose, one routing change in the harness, and a new variant added to all 44 DuckDB tasks.

**Tech Stack:** Docker, dbt Fusion (latest), DuckDB, Python

---

### Task 1: Create Dockerfile.duckdb-dbtf

**Files:**
- Create: `docker/base/Dockerfile.duckdb-dbtf`

**Step 1: Create the Dockerfile**

```dockerfile
FROM python:3.11-slim

RUN apt-get update && apt-get install -y \
git \
tmux asciinema \
curl \
&& curl -sSL https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 \
-o /usr/bin/yq \
&& chmod +x /usr/bin/yq \
&& curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
&& apt-get install -y nodejs \
&& rm -rf /var/lib/apt/lists/*

# Install dbt Fusion
RUN export SHELL=/bin/bash && \
curl -fsSL https://public.cdn.getdbt.com/fs/install/install.sh | sh -s -- --update

# Install DuckDB Python package (needed for Fusion DuckDB connections)
RUN pip install --no-cache-dir \
duckdb \
pyyaml>=6.0 \
uv>=0.7

# Set up workspace
RUN mkdir -p /installed-agent /scripts /sage/solutions /sage /app /app/setup /app/migrations /seeds /solutions /logs /tests
WORKDIR /app

CMD ["bash"]
```

**Step 2: Commit**

```bash
git add docker/base/Dockerfile.duckdb-dbtf
git commit -m "feat: add Dockerfile for dbt Fusion + DuckDB"
```

---

### Task 2: Create docker-compose-duckdb-dbtf.yaml

**Files:**
- Create: `shared/defaults/docker-compose-duckdb-dbtf.yaml`

**Step 1: Create the compose file**

```yaml
services:
client:
build:
context: ${T_BENCH_REPO_ROOT}
dockerfile: docker/base/Dockerfile.duckdb-dbtf
image: ${T_BENCH_TASK_DOCKER_CLIENT_IMAGE_NAME}
container_name: ${T_BENCH_TASK_DOCKER_CLIENT_CONTAINER_NAME}
command: [ "sh", "-c", "sleep infinity" ]
environment:
- TEST_DIR=${T_BENCH_TEST_DIR}
volumes:
- ${T_BENCH_TASK_LOGS_PATH}:${T_BENCH_CONTAINER_LOGS_PATH}
```

**Step 2: Commit**

```bash
git add shared/defaults/docker-compose-duckdb-dbtf.yaml
git commit -m "feat: add docker-compose for dbt Fusion + DuckDB"
```

---

### Task 3: Update harness routing in trial_handler.py

**Files:**
- Modify: `ade_bench/handlers/trial_handler.py:307-312`

**Step 1: Update the routing logic**

Change the `docker_compose_path` property. Replace lines 311-312:

```python
elif db_type == "duckdb":
return self._defaults_path / "docker-compose-duckdb-dbt.yaml"
```

With:

```python
elif db_type == "duckdb" and project_type == "dbt-fusion":
return self._defaults_path / "docker-compose-duckdb-dbtf.yaml"
elif db_type == "duckdb":
return self._defaults_path / "docker-compose-duckdb-dbt.yaml"
```

**Step 2: Commit**

```bash
git add ade_bench/handlers/trial_handler.py
git commit -m "feat: route duckdb+fusion to new docker-compose"
```

---

### Task 4: Add dbt-fusion variant to all 44 DuckDB tasks

**Files:**
- Modify: All 44 `tasks/*/task.yaml` files that have a `db_type: duckdb` variant

**Step 1: Add the variant using a script**

For each task, append a new variant block after the existing variants:

```yaml
- db_type: duckdb
db_name: <same as existing duckdb variant>
project_type: dbt-fusion
project_name: <same as existing duckdb variant>
```

Run a Python script to do this programmatically across all 44 tasks.

**Step 2: Spot-check a few files** to verify the variant was added correctly.

**Step 3: Commit**

```bash
git add tasks/*/task.yaml
git commit -m "feat: add duckdb+fusion variant to all DuckDB tasks"
```
12 changes: 12 additions & 0 deletions shared/defaults/docker-compose-duckdb-dbtf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
services:
client:
build:
context: ${T_BENCH_REPO_ROOT}
dockerfile: docker/base/Dockerfile.duckdb-dbtf
image: ${T_BENCH_TASK_DOCKER_CLIENT_IMAGE_NAME}
container_name: ${T_BENCH_TASK_DOCKER_CLIENT_CONTAINER_NAME}
command: [ "sh", "-c", "sleep infinity" ]
environment:
- TEST_DIR=${T_BENCH_TEST_DIR}
volumes:
- ${T_BENCH_TASK_LOGS_PATH}:${T_BENCH_CONTAINER_LOGS_PATH}
5 changes: 2 additions & 3 deletions shared/projects/dbt/airbnb/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
Expand All @@ -17,8 +16,6 @@ analysis-paths: ["analyses"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"
Expand All @@ -39,3 +36,5 @@ seeds:
RAW_LISTINGS:
+column_types:
PRICE: "STRING"
flags:
require_generic_test_arguments_property: true
6 changes: 4 additions & 2 deletions shared/projects/dbt/airbnb/models/agg/agg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ models:
- name: REVIEW_SENTIMENT
tests:
- accepted_values:
values: ['positive','negative','neutral']
arguments:
values: ['positive', 'negative', 'neutral']

- name: wow_agg_reviews
columns:
Expand All @@ -61,4 +62,5 @@ models:
- name: REVIEW_SENTIMENT
tests:
- accepted_values:
values: ['positive','negative','neutral']
arguments:
values: ['positive', 'negative', 'neutral']
13 changes: 8 additions & 5 deletions shared/projects/dbt/airbnb/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ models:
- name: REVIEW_SENTIMENT
tests:
- accepted_values:
values: ['positive','negative','neutral']
arguments:
values: ['positive', 'negative', 'neutral']
- name: LISTING_ID
tests:
- not_null
- relationships:
to: ref('dim_listings')
field: LISTING_ID
arguments:
to: ref('dim_listings')
field: LISTING_ID
- relationships:
to: ref('dim_listings_hosts')
field: LISTING_ID
arguments:
to: ref('dim_listings_hosts')
field: LISTING_ID
8 changes: 4 additions & 4 deletions shared/projects/dbt/airbnb/package-lock.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
packages:
- name: dbt_utils
package: dbt-labs/dbt_utils
version: 1.3.2
sha1_hash: ccc8b17d4482f52f5cce6cf4ab0518ae8c147118
- package: dbt-labs/dbt_utils
name: dbt_utils
version: 1.3.2
sha1_hash: 0f4de24c53379b20795e3fc20ee5d190b39e54d5
5 changes: 3 additions & 2 deletions shared/projects/dbt/analytics_engineering/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
Expand Down Expand Up @@ -31,4 +30,6 @@ clean-targets: # directories to be removed by `dbt clean`
# directory as views. These settings can be overridden in the individual model
# files using the `{{ config(...) }}` macro.
models:
+materialized: table
+materialized: table
flags:
require_generic_test_arguments_property: true
8 changes: 4 additions & 4 deletions shared/projects/dbt/analytics_engineering/package-lock.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
packages:
- name: dbt_utils
package: dbt-labs/dbt_utils
version: 1.3.2
sha1_hash: ccc8b17d4482f52f5cce6cf4ab0518ae8c147118
- package: dbt-labs/dbt_utils
name: dbt_utils
version: 1.3.2
sha1_hash: 0f4de24c53379b20795e3fc20ee5d190b39e54d5
5 changes: 3 additions & 2 deletions shared/projects/dbt/asana/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ vars:
seeds:
+quote_columns: "{{ true if target.type == 'redshift' else false }}"
main:
+all_varchar: true
+column_types:
_fivetran_synced: timestamp
user_data:
Expand Down Expand Up @@ -102,4 +101,6 @@ seeds:

dispatch:
- macro_namespace: dbt_utils
search_order: ['spark_utils', 'dbt_utils']
search_order: ['spark_utils', 'dbt_utils']
flags:
require_generic_test_arguments_property: true
Loading