Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/e2e-canary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
BRAINTRUST_E2E_PROJECT_NAME: ${{ secrets.BRAINTRUST_E2E_PROJECT_NAME }}
BRAINTRUST_E2E_PROJECT_NAME: ${{ vars.BRAINTRUST_E2E_PROJECT_NAME }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
Expand Down
27 changes: 25 additions & 2 deletions .github/workflows/integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
timeout-minutes: 45
env:
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
BRAINTRUST_E2E_PROJECT_NAME: ${{ secrets.BRAINTRUST_E2E_PROJECT_NAME }}
BRAINTRUST_E2E_PROJECT_NAME: ${{ vars.BRAINTRUST_E2E_PROJECT_NAME }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
Expand All @@ -67,16 +67,39 @@ jobs:
- uses: pnpm/action-setup@b906affcce14559ad1aafd4ab0e942779e9f58b1 # v4.3.0
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Prepare e2e run context directory
id: run_context
shell: bash
run: |
RUN_CONTEXT_DIR="$(mktemp -d)"
echo "dir=$RUN_CONTEXT_DIR" >> "$GITHUB_OUTPUT"
- name: Run e2e tests
env:
BRAINTRUST_E2E_RUN_CONTEXT_DIR: ${{ steps.run_context.outputs.dir }}
run: pnpm test:e2e
- name: Build e2e Braintrust links summary
if: ${{ always() }}
shell: bash
env:
BRAINTRUST_E2E_RUN_CONTEXT_DIR: ${{ steps.run_context.outputs.dir }}
BRAINTRUST_ORG_NAME: ${{ vars.BRAINTRUST_ORG_NAME }}
GITHUB_HEAD_REF: ${{ github.head_ref }}
GITHUB_REF_NAME: ${{ github.ref_name }}
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_SERVER_URL: ${{ github.server_url }}
run: |
SUMMARY_PATH="$RUNNER_TEMP/e2e-braintrust-links-summary.md"
node e2e/scripts/build-pr-e2e-links-comment.mjs --output "$SUMMARY_PATH"
cat "$SUMMARY_PATH" >> "$GITHUB_STEP_SUMMARY"

e2e-canary:
if: ${{ github.event_name == 'pull_request' }}
runs-on: ubuntu-latest
timeout-minutes: 45
env:
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
BRAINTRUST_E2E_PROJECT_NAME: ${{ secrets.BRAINTRUST_E2E_PROJECT_NAME }}
BRAINTRUST_E2E_PROJECT_NAME: ${{ vars.BRAINTRUST_E2E_PROJECT_NAME }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
Expand Down
76 changes: 76 additions & 0 deletions e2e/config/pr-comment-scenarios.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
[
{
"scenarioDirName": "openai-instrumentation",
"label": "OpenAI Instrumentation",
"metadataScenario": "openai-instrumentation",
"variants": [
{ "variantKey": "openai-v4", "label": "v4" },
{ "variantKey": "openai-v5", "label": "v5" },
{ "variantKey": "openai-v6", "label": "v6" }
]
},
{
"scenarioDirName": "anthropic-instrumentation",
"label": "Anthropic Instrumentation",
"metadataScenario": "anthropic-instrumentation",
"variants": [
{ "variantKey": "anthropic-v0273", "label": "v0.27.3" },
{ "variantKey": "anthropic-v0390", "label": "v0.39.0" },
{ "variantKey": "anthropic-v0712", "label": "v0.71.2" },
{ "variantKey": "anthropic-v0730", "label": "v0.73.0" },
{ "variantKey": "anthropic-v0780", "label": "v0.78.0" },
{ "variantKey": "anthropic-v0800", "label": "v0.80.0" }
]
},
{
"scenarioDirName": "google-genai-instrumentation",
"label": "Google GenAI Instrumentation",
"metadataScenario": "google-genai-instrumentation",
"variants": [
{ "variantKey": "google-genai-v1300", "label": "v1.30.0" },
{ "variantKey": "google-genai-v1440", "label": "v1.44.0" },
{ "variantKey": "google-genai-v1450", "label": "v1.45.0" },
{ "variantKey": "google-genai-v1460", "label": "v1.46.0" }
]
},
{
"scenarioDirName": "openrouter-instrumentation",
"label": "OpenRouter Instrumentation",
"metadataScenario": "openrouter-instrumentation",
"variants": [{ "variantKey": "openrouter-current", "label": "Current" }]
},
{
"scenarioDirName": "ai-sdk-instrumentation",
"label": "AI SDK Instrumentation",
"metadataScenario": "ai-sdk-instrumentation",
"variants": [
{ "variantKey": "ai-sdk-v3", "label": "v3" },
{ "variantKey": "ai-sdk-v4", "label": "v4" },
{ "variantKey": "ai-sdk-v5", "label": "v5" },
{ "variantKey": "ai-sdk-v6", "label": "v6" }
]
},
{
"scenarioDirName": "claude-agent-sdk-instrumentation",
"label": "Claude Agent SDK Instrumentation",
"metadataScenario": "claude-agent-sdk-traces",
"variants": [
{
"variantKey": "claude-agent-sdk-v0.1",
"label": "v0.1"
},
{
"variantKey": "claude-agent-sdk-v0.2.76",
"label": "v0.2.76"
},
{
"variantKey": "claude-agent-sdk-v0.2.79",
"label": "v0.2.79"
},
{
"variantKey": "claude-agent-sdk-v0.2.81",
"label": "v0.2.81"
}
]
}
]
116 changes: 95 additions & 21 deletions e2e/helpers/scenario-harness.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { spawn } from "node:child_process";
import { randomUUID } from "node:crypto";
import { appendFile, mkdir } from "node:fs/promises";
import { createRequire } from "node:module";
import * as path from "node:path";
import { fileURLToPath } from "node:url";
Expand Down Expand Up @@ -34,6 +35,22 @@ const DENO_COMMAND = process.platform === "win32" ? "deno.exe" : "deno";
const DEFAULT_SCENARIO_TIMEOUT_MS = 15_000;
const HELPERS_DIR = path.dirname(fileURLToPath(import.meta.url));
const REPO_ROOT = path.resolve(HELPERS_DIR, "../..");
const RUN_CONTEXT_DIR_ENV = "BRAINTRUST_E2E_RUN_CONTEXT_DIR";

type ScenarioRunner = "deno" | "node" | "tsx";

interface ScenarioRunContext {
variantKey?: string;
}

interface ScenarioRunContextRecord {
entry: string;
runner: ScenarioRunner;
scenarioDirName: string;
testRunId: string;
timestamp: string;
variantKey?: string;
}

function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
Expand Down Expand Up @@ -175,6 +192,30 @@ function createTestRunId(): string {
return `e2e-${randomUUID()}`;
}

function getRunContextDir(): string | null {
const runContextDir = process.env[RUN_CONTEXT_DIR_ENV]?.trim();
if (!runContextDir) {
return null;
}
return runContextDir;
}

async function recordScenarioRunContext(
record: ScenarioRunContextRecord,
): Promise<void> {
const runContextDir = getRunContextDir();
if (!runContextDir) {
return;
}

await mkdir(runContextDir, { recursive: true });
const recordPath = path.join(
runContextDir,
`run-context-${process.pid}.ndjson`,
);
await appendFile(recordPath, `${JSON.stringify(record)}\n`, "utf8");
}

function getTestServerEnv(
testRunId: string,
server: { apiKey: string; url: string },
Expand Down Expand Up @@ -289,6 +330,7 @@ export function resolveScenarioDir(importMetaUrl: string): string {
export async function runScenarioDir(options: {
env?: Record<string, string>;
entry?: string;
runContext?: ScenarioRunContext;
scenarioDir: string;
timeoutMs?: number;
}): Promise<ScenarioResult> {
Expand All @@ -302,6 +344,7 @@ export async function runNodeScenarioDir(options: {
env?: Record<string, string>;
entry?: string;
nodeArgs?: string[];
runContext?: ScenarioRunContext;
scenarioDir: string;
timeoutMs?: number;
}): Promise<ScenarioResult> {
Expand All @@ -317,6 +360,7 @@ export async function runDenoScenarioDir(options: {
args?: string[];
entry?: string;
env?: Record<string, string>;
runContext?: ScenarioRunContext;
scenarioDir: string;
timeoutMs?: number;
}): Promise<ScenarioResult> {
Expand Down Expand Up @@ -358,19 +402,22 @@ interface ScenarioHarness {
args?: string[];
entry?: string;
env?: Record<string, string>;
runContext?: ScenarioRunContext;
scenarioDir: string;
timeoutMs?: number;
}) => Promise<ScenarioResult>;
runNodeScenarioDir: (options: {
entry?: string;
env?: Record<string, string>;
nodeArgs?: string[];
runContext?: ScenarioRunContext;
scenarioDir: string;
timeoutMs?: number;
}) => Promise<ScenarioResult>;
runScenarioDir: (options: {
entry?: string;
env?: Record<string, string>;
runContext?: ScenarioRunContext;
scenarioDir: string;
timeoutMs?: number;
}) => Promise<ScenarioResult>;
Expand All @@ -393,6 +440,27 @@ export async function withScenarioHarness(
server,
prodForwarding?.projectName ?? "",
);
const runWithContext = async (
options: {
entry?: string;
runContext?: ScenarioRunContext;
scenarioDir: string;
},
runner: ScenarioRunner,
defaultEntry: string,
run: () => Promise<ScenarioResult>,
): Promise<ScenarioResult> => {
const result = await run();
await recordScenarioRunContext({
entry: options.entry ?? defaultEntry,
runner,
scenarioDirName: path.basename(options.scenarioDir),
testRunId,
timestamp: new Date().toISOString(),
variantKey: options.runContext?.variantKey,
});
return result;
};

try {
await body({
Expand All @@ -404,29 +472,35 @@ export async function withScenarioHarness(
filterItems(server.requests.slice(after), predicate),
),
runDenoScenarioDir: (options) =>
runDenoScenarioDir({
...options,
env: {
...testEnv,
...(options.env ?? {}),
},
}),
runWithContext(options, "deno", "runner.case.ts", async () =>
runDenoScenarioDir({
...options,
env: {
...testEnv,
...(options.env ?? {}),
},
}),
),
runNodeScenarioDir: (options) =>
runNodeScenarioDir({
...options,
env: {
...testEnv,
...(options.env ?? {}),
},
}),
runWithContext(options, "node", "scenario.mjs", async () =>
runNodeScenarioDir({
...options,
env: {
...testEnv,
...(options.env ?? {}),
},
}),
),
runScenarioDir: (options) =>
runScenarioDir({
...options,
env: {
...testEnv,
...(options.env ?? {}),
},
}),
runWithContext(options, "tsx", "scenario.ts", async () =>
runScenarioDir({
...options,
env: {
...testEnv,
...(options.env ?? {}),
},
}),
),
testRunEvents: (predicate) =>
filterItems(
server.events,
Expand Down
2 changes: 2 additions & 0 deletions e2e/scenarios/ai-sdk-instrumentation/assertions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ type RunAISDKScenario = (harness: {
runNodeScenarioDir: (options: {
entry: string;
nodeArgs: string[];
runContext?: { variantKey: string };
scenarioDir: string;
timeoutMs: number;
}) => Promise<unknown>;
runScenarioDir: (options: {
entry: string;
runContext?: { variantKey: string };
scenarioDir: string;
timeoutMs: number;
}) => Promise<unknown>;
Expand Down
2 changes: 2 additions & 0 deletions e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ for (const scenario of aiSDKScenarios) {
runScenario: async ({ runScenarioDir }) => {
await runScenarioDir({
entry: scenario.wrapperEntry,
runContext: { variantKey: scenario.snapshotName },
scenarioDir,
timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
});
Expand All @@ -63,6 +64,7 @@ for (const scenario of aiSDKScenarios) {
await runNodeScenarioDir({
entry: scenario.autoEntry,
nodeArgs: ["--import", "braintrust/hook.mjs"],
runContext: { variantKey: scenario.snapshotName },
scenarioDir,
timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
});
Expand Down
4 changes: 4 additions & 0 deletions e2e/scenarios/anthropic-instrumentation/assertions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,15 @@ import { ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs";

type RunAnthropicScenario = (harness: {
runNodeScenarioDir: (options: {
entry?: string;
nodeArgs: string[];
runContext?: { variantKey: string };
scenarioDir: string;
timeoutMs: number;
}) => Promise<unknown>;
runScenarioDir: (options: {
entry?: string;
runContext?: { variantKey: string };
scenarioDir: string;
timeoutMs: number;
}) => Promise<unknown>;
Expand Down
2 changes: 2 additions & 0 deletions e2e/scenarios/anthropic-instrumentation/scenario.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ for (const scenario of anthropicScenarios) {
runScenario: async ({ runScenarioDir }) => {
await runScenarioDir({
entry: scenario.wrapperEntry,
runContext: { variantKey: scenario.snapshotName },
scenarioDir,
timeoutMs: TIMEOUT_MS,
});
Expand All @@ -86,6 +87,7 @@ for (const scenario of anthropicScenarios) {
await runNodeScenarioDir({
entry: scenario.autoEntry,
nodeArgs: ["--import", "braintrust/hook.mjs"],
runContext: { variantKey: scenario.snapshotName },
scenarioDir,
timeoutMs: TIMEOUT_MS,
});
Expand Down
Loading
Loading