braintrustdata · Luca Forstner (lforst) · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/.github/workflows/e2e-canary.yaml b/.github/workflows/e2e-canary.yaml
@@ -32,7 +32,7 @@ jobs:
         env:
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
-          BRAINTRUST_E2E_PROJECT_NAME: ${{ secrets.BRAINTRUST_E2E_PROJECT_NAME }}
+          BRAINTRUST_E2E_PROJECT_NAME: ${{ vars.BRAINTRUST_E2E_PROJECT_NAME }}
           GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}

diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml
@@ -51,7 +51,7 @@ jobs:
     timeout-minutes: 45
     env:
       BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
-      BRAINTRUST_E2E_PROJECT_NAME: ${{ secrets.BRAINTRUST_E2E_PROJECT_NAME }}
+      BRAINTRUST_E2E_PROJECT_NAME: ${{ vars.BRAINTRUST_E2E_PROJECT_NAME }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -67,16 +67,39 @@ jobs:
       - uses: pnpm/action-setup@b906affcce14559ad1aafd4ab0e942779e9f58b1 # v4.3.0
       - name: Install dependencies
         run: pnpm install --frozen-lockfile
+      - name: Prepare e2e run context directory
+        id: run_context
+        shell: bash
+        run: |
+          RUN_CONTEXT_DIR="$(mktemp -d)"
+          echo "dir=$RUN_CONTEXT_DIR" >> "$GITHUB_OUTPUT"
       - name: Run e2e tests
+        env:
+          BRAINTRUST_E2E_RUN_CONTEXT_DIR: ${{ steps.run_context.outputs.dir }}
         run: pnpm test:e2e
+      - name: Build e2e Braintrust links summary
+        if: ${{ always() }}
+        shell: bash
+        env:
+          BRAINTRUST_E2E_RUN_CONTEXT_DIR: ${{ steps.run_context.outputs.dir }}
+          BRAINTRUST_ORG_NAME: ${{ vars.BRAINTRUST_ORG_NAME }}
+          GITHUB_HEAD_REF: ${{ github.head_ref }}
+          GITHUB_REF_NAME: ${{ github.ref_name }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_SERVER_URL: ${{ github.server_url }}
+        run: |
+          SUMMARY_PATH="$RUNNER_TEMP/e2e-braintrust-links-summary.md"
+          node e2e/scripts/build-pr-e2e-links-comment.mjs --output "$SUMMARY_PATH"
+          cat "$SUMMARY_PATH" >> "$GITHUB_STEP_SUMMARY"
 
   e2e-canary:
     if: ${{ github.event_name == 'pull_request' }}
     runs-on: ubuntu-latest
     timeout-minutes: 45
     env:
       BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
-      BRAINTRUST_E2E_PROJECT_NAME: ${{ secrets.BRAINTRUST_E2E_PROJECT_NAME }}
+      BRAINTRUST_E2E_PROJECT_NAME: ${{ vars.BRAINTRUST_E2E_PROJECT_NAME }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

diff --git a/e2e/config/pr-comment-scenarios.json b/e2e/config/pr-comment-scenarios.json
@@ -0,0 +1,76 @@
+[
+  {
+    "scenarioDirName": "openai-instrumentation",
+    "label": "OpenAI Instrumentation",
+    "metadataScenario": "openai-instrumentation",
+    "variants": [
+      { "variantKey": "openai-v4", "label": "v4" },
+      { "variantKey": "openai-v5", "label": "v5" },
+      { "variantKey": "openai-v6", "label": "v6" }
+    ]
+  },
+  {
+    "scenarioDirName": "anthropic-instrumentation",
+    "label": "Anthropic Instrumentation",
+    "metadataScenario": "anthropic-instrumentation",
+    "variants": [
+      { "variantKey": "anthropic-v0273", "label": "v0.27.3" },
+      { "variantKey": "anthropic-v0390", "label": "v0.39.0" },
+      { "variantKey": "anthropic-v0712", "label": "v0.71.2" },
+      { "variantKey": "anthropic-v0730", "label": "v0.73.0" },
+      { "variantKey": "anthropic-v0780", "label": "v0.78.0" },
+      { "variantKey": "anthropic-v0800", "label": "v0.80.0" }
+    ]
+  },
+  {
+    "scenarioDirName": "google-genai-instrumentation",
+    "label": "Google GenAI Instrumentation",
+    "metadataScenario": "google-genai-instrumentation",
+    "variants": [
+      { "variantKey": "google-genai-v1300", "label": "v1.30.0" },
+      { "variantKey": "google-genai-v1440", "label": "v1.44.0" },
+      { "variantKey": "google-genai-v1450", "label": "v1.45.0" },
+      { "variantKey": "google-genai-v1460", "label": "v1.46.0" }
+    ]
+  },
+  {
+    "scenarioDirName": "openrouter-instrumentation",
+    "label": "OpenRouter Instrumentation",
+    "metadataScenario": "openrouter-instrumentation",
+    "variants": [{ "variantKey": "openrouter-current", "label": "Current" }]
+  },
+  {
+    "scenarioDirName": "ai-sdk-instrumentation",
+    "label": "AI SDK Instrumentation",
+    "metadataScenario": "ai-sdk-instrumentation",
+    "variants": [
+      { "variantKey": "ai-sdk-v3", "label": "v3" },
+      { "variantKey": "ai-sdk-v4", "label": "v4" },
+      { "variantKey": "ai-sdk-v5", "label": "v5" },
+      { "variantKey": "ai-sdk-v6", "label": "v6" }
+    ]
+  },
+  {
+    "scenarioDirName": "claude-agent-sdk-instrumentation",
+    "label": "Claude Agent SDK Instrumentation",
+    "metadataScenario": "claude-agent-sdk-traces",
+    "variants": [
+      {
+        "variantKey": "claude-agent-sdk-v0.1",
+        "label": "v0.1"
+      },
+      {
+        "variantKey": "claude-agent-sdk-v0.2.76",
+        "label": "v0.2.76"
+      },
+      {
+        "variantKey": "claude-agent-sdk-v0.2.79",
+        "label": "v0.2.79"
+      },
+      {
+        "variantKey": "claude-agent-sdk-v0.2.81",
+        "label": "v0.2.81"
+      }
+    ]
+  }
+]
diff --git a/e2e/helpers/scenario-harness.ts b/e2e/helpers/scenario-harness.ts
@@ -1,5 +1,6 @@
 import { spawn } from "node:child_process";
 import { randomUUID } from "node:crypto";
+import { appendFile, mkdir } from "node:fs/promises";
 import { createRequire } from "node:module";
 import * as path from "node:path";
 import { fileURLToPath } from "node:url";
@@ -34,6 +35,22 @@ const DENO_COMMAND = process.platform === "win32" ? "deno.exe" : "deno";
 const DEFAULT_SCENARIO_TIMEOUT_MS = 15_000;
 const HELPERS_DIR = path.dirname(fileURLToPath(import.meta.url));
 const REPO_ROOT = path.resolve(HELPERS_DIR, "../..");
+const RUN_CONTEXT_DIR_ENV = "BRAINTRUST_E2E_RUN_CONTEXT_DIR";
+
+type ScenarioRunner = "deno" | "node" | "tsx";
+
+interface ScenarioRunContext {
+  variantKey?: string;
+}
+
+interface ScenarioRunContextRecord {
+  entry: string;
+  runner: ScenarioRunner;
+  scenarioDirName: string;
+  testRunId: string;
+  timestamp: string;
+  variantKey?: string;
+}
 
 function isRecord(value: unknown): value is Record<string, unknown> {
   return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -175,6 +192,30 @@ function createTestRunId(): string {
   return `e2e-${randomUUID()}`;
 }
 
+function getRunContextDir(): string | null {
+  const runContextDir = process.env[RUN_CONTEXT_DIR_ENV]?.trim();
+  if (!runContextDir) {
+    return null;
+  }
+  return runContextDir;
+}
+
+async function recordScenarioRunContext(
+  record: ScenarioRunContextRecord,
+): Promise<void> {
+  const runContextDir = getRunContextDir();
+  if (!runContextDir) {
+    return;
+  }
+
+  await mkdir(runContextDir, { recursive: true });
+  const recordPath = path.join(
+    runContextDir,
+    `run-context-${process.pid}.ndjson`,
+  );
+  await appendFile(recordPath, `${JSON.stringify(record)}\n`, "utf8");
+}
+
 function getTestServerEnv(
   testRunId: string,
   server: { apiKey: string; url: string },
@@ -289,6 +330,7 @@ export function resolveScenarioDir(importMetaUrl: string): string {
 export async function runScenarioDir(options: {
   env?: Record<string, string>;
   entry?: string;
+  runContext?: ScenarioRunContext;
   scenarioDir: string;
   timeoutMs?: number;
 }): Promise<ScenarioResult> {
@@ -302,6 +344,7 @@ export async function runNodeScenarioDir(options: {
   env?: Record<string, string>;
   entry?: string;
   nodeArgs?: string[];
+  runContext?: ScenarioRunContext;
   scenarioDir: string;
   timeoutMs?: number;
 }): Promise<ScenarioResult> {
@@ -317,6 +360,7 @@ export async function runDenoScenarioDir(options: {
   args?: string[];
   entry?: string;
   env?: Record<string, string>;
+  runContext?: ScenarioRunContext;
   scenarioDir: string;
   timeoutMs?: number;
 }): Promise<ScenarioResult> {
@@ -358,19 +402,22 @@ interface ScenarioHarness {
     args?: string[];
     entry?: string;
     env?: Record<string, string>;
+    runContext?: ScenarioRunContext;
     scenarioDir: string;
     timeoutMs?: number;
   }) => Promise<ScenarioResult>;
   runNodeScenarioDir: (options: {
     entry?: string;
     env?: Record<string, string>;
     nodeArgs?: string[];
+    runContext?: ScenarioRunContext;
     scenarioDir: string;
     timeoutMs?: number;
   }) => Promise<ScenarioResult>;
   runScenarioDir: (options: {
     entry?: string;
     env?: Record<string, string>;
+    runContext?: ScenarioRunContext;
     scenarioDir: string;
     timeoutMs?: number;
   }) => Promise<ScenarioResult>;
@@ -393,6 +440,27 @@ export async function withScenarioHarness(
     server,
     prodForwarding?.projectName ?? "",
   );
+  const runWithContext = async (
+    options: {
+      entry?: string;
+      runContext?: ScenarioRunContext;
+      scenarioDir: string;
+    },
+    runner: ScenarioRunner,
+    defaultEntry: string,
+    run: () => Promise<ScenarioResult>,
+  ): Promise<ScenarioResult> => {
+    const result = await run();
+    await recordScenarioRunContext({
+      entry: options.entry ?? defaultEntry,
+      runner,
+      scenarioDirName: path.basename(options.scenarioDir),
+      testRunId,
+      timestamp: new Date().toISOString(),
+      variantKey: options.runContext?.variantKey,
+    });
+    return result;
+  };
 
   try {
     await body({
@@ -404,29 +472,35 @@ export async function withScenarioHarness(
           filterItems(server.requests.slice(after), predicate),
         ),
       runDenoScenarioDir: (options) =>
-        runDenoScenarioDir({
-          ...options,
-          env: {
-            ...testEnv,
-            ...(options.env ?? {}),
-          },
-        }),
+        runWithContext(options, "deno", "runner.case.ts", async () =>
+          runDenoScenarioDir({
+            ...options,
+            env: {
+              ...testEnv,
+              ...(options.env ?? {}),
+            },
+          }),
+        ),
       runNodeScenarioDir: (options) =>
-        runNodeScenarioDir({
-          ...options,
-          env: {
-            ...testEnv,
-            ...(options.env ?? {}),
-          },
-        }),
+        runWithContext(options, "node", "scenario.mjs", async () =>
+          runNodeScenarioDir({
+            ...options,
+            env: {
+              ...testEnv,
+              ...(options.env ?? {}),
+            },
+          }),
+        ),
       runScenarioDir: (options) =>
-        runScenarioDir({
-          ...options,
-          env: {
-            ...testEnv,
-            ...(options.env ?? {}),
-          },
-        }),
+        runWithContext(options, "tsx", "scenario.ts", async () =>
+          runScenarioDir({
+            ...options,
+            env: {
+              ...testEnv,
+              ...(options.env ?? {}),
+            },
+          }),
+        ),
       testRunEvents: (predicate) =>
         filterItems(
           server.events,

diff --git a/e2e/scenarios/ai-sdk-instrumentation/assertions.ts b/e2e/scenarios/ai-sdk-instrumentation/assertions.ts
@@ -20,11 +20,13 @@ type RunAISDKScenario = (harness: {
   runNodeScenarioDir: (options: {
     entry: string;
     nodeArgs: string[];
+    runContext?: { variantKey: string };
     scenarioDir: string;
     timeoutMs: number;
   }) => Promise<unknown>;
   runScenarioDir: (options: {
     entry: string;
+    runContext?: { variantKey: string };
     scenarioDir: string;
     timeoutMs: number;
   }) => Promise<unknown>;

diff --git a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts
@@ -41,6 +41,7 @@ for (const scenario of aiSDKScenarios) {
       runScenario: async ({ runScenarioDir }) => {
         await runScenarioDir({
           entry: scenario.wrapperEntry,
+          runContext: { variantKey: scenario.snapshotName },
           scenarioDir,
           timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
         });
@@ -63,6 +64,7 @@ for (const scenario of aiSDKScenarios) {
         await runNodeScenarioDir({
           entry: scenario.autoEntry,
           nodeArgs: ["--import", "braintrust/hook.mjs"],
+          runContext: { variantKey: scenario.snapshotName },
           scenarioDir,
           timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
         });

diff --git a/e2e/scenarios/anthropic-instrumentation/assertions.ts b/e2e/scenarios/anthropic-instrumentation/assertions.ts
@@ -13,11 +13,15 @@ import { ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs";
 
 type RunAnthropicScenario = (harness: {
   runNodeScenarioDir: (options: {
+    entry?: string;
     nodeArgs: string[];
+    runContext?: { variantKey: string };
     scenarioDir: string;
     timeoutMs: number;
   }) => Promise<unknown>;
   runScenarioDir: (options: {
+    entry?: string;
+    runContext?: { variantKey: string };
     scenarioDir: string;
     timeoutMs: number;
   }) => Promise<unknown>;

diff --git a/e2e/scenarios/anthropic-instrumentation/scenario.test.ts b/e2e/scenarios/anthropic-instrumentation/scenario.test.ts
@@ -70,6 +70,7 @@ for (const scenario of anthropicScenarios) {
       runScenario: async ({ runScenarioDir }) => {
         await runScenarioDir({
           entry: scenario.wrapperEntry,
+          runContext: { variantKey: scenario.snapshotName },
           scenarioDir,
           timeoutMs: TIMEOUT_MS,
         });
@@ -86,6 +87,7 @@ for (const scenario of anthropicScenarios) {
         await runNodeScenarioDir({
           entry: scenario.autoEntry,
           nodeArgs: ["--import", "braintrust/hook.mjs"],
+          runContext: { variantKey: scenario.snapshotName },
           scenarioDir,
           timeoutMs: TIMEOUT_MS,
         });