Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
289 changes: 289 additions & 0 deletions e2e/scenarios/ai-sdk-instrumentation/assertions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,38 @@ function findGenerateTrace(events: CapturedLogEvent[]) {
return { child, operation, parent };
}

function findGenerateTextTraceForOperation(
events: CapturedLogEvent[],
operationSpanName: string,
) {
const operation = findLatestSpan(events, operationSpanName);
const parent = findParentSpan(events, "generateText", operation?.span.id);
const child = findLatestModelSpan(events, parent?.span.id, "doGenerate");

return { child, operation, parent };
}

function findOutputObjectTrace(events: CapturedLogEvent[]) {
return findGenerateTextTraceForOperation(
events,
"ai-sdk-output-object-operation",
);
}

function findAttachmentTrace(events: CapturedLogEvent[]) {
return findGenerateTextTraceForOperation(
events,
"ai-sdk-attachment-operation",
);
}

function findDenyOutputOverrideTrace(events: CapturedLogEvent[]) {
return findGenerateTextTraceForOperation(
events,
"ai-sdk-deny-output-override-operation",
);
}

function findStreamTrace(events: CapturedLogEvent[]) {
const operation = findLatestSpan(events, "ai-sdk-stream-operation");
const parent = findParentSpan(events, "streamText", operation?.span.id);
Expand Down Expand Up @@ -233,6 +265,125 @@ function findAgentStreamTrace(
};
}

function operationName(
event: CapturedLogEvent | undefined,
): string | undefined {
const metadata = event?.row.metadata;
if (!isRecord(metadata)) {
return undefined;
}

return typeof metadata.operation === "string"
? metadata.operation
: undefined;
}

function hasPromptLikeInput(input: unknown): boolean {
if (!isRecord(input)) {
return false;
}

return input.prompt !== undefined || input.messages !== undefined;
}

function hasSemanticOutput(
output: unknown,
keys: string[],
allowNonEmptyString = true,
): boolean {
if (allowNonEmptyString && typeof output === "string") {
return output.length > 0;
}

if (!isRecord(output)) {
return false;
}

return keys.some((key) => key in output);
}

function toolNamesFromInput(input: unknown): string[] {
if (!isRecord(input)) {
return [];
}

const tools = input.tools;
if (isRecord(tools)) {
return Object.keys(tools);
}

if (!Array.isArray(tools)) {
return [];
}

return tools
.map((tool) => {
if (!isRecord(tool)) {
return undefined;
}

const maybeName = tool.name ?? tool.toolName;
return typeof maybeName === "string" ? maybeName : undefined;
})
.filter((name): name is string => typeof name === "string");
}

function extractOutputRecord(
event: CapturedLogEvent | undefined,
): Record<string, unknown> | undefined {
return isRecord(event?.output) ? event.output : undefined;
}

function extractFinishReason(
event: CapturedLogEvent | undefined,
): string | undefined {
const output = extractOutputRecord(event);
const outputFinishReason = output?.finishReason;
if (typeof outputFinishReason === "string") {
return outputFinishReason;
}

const metadata = event?.row.metadata;
if (!isRecord(metadata)) {
return undefined;
}

return typeof metadata.finish_reason === "string"
? metadata.finish_reason
: undefined;
}

function extractFileAttachmentReference(
input: unknown,
): Record<string, unknown> | undefined {
if (!isRecord(input) || !Array.isArray(input.messages)) {
return undefined;
}

for (const message of input.messages) {
if (!isRecord(message) || !Array.isArray(message.content)) {
continue;
}

for (const part of message.content) {
if (!isRecord(part) || part.type !== "file") {
continue;
}

const data = part.data;
if (isRecord(data) && isRecord(data.reference)) {
return data.reference;
}

if (isRecord(data) && data.type === "braintrust_attachment") {
return data;
}
}
}

return undefined;
}

function normalizeAISDKContext(value: unknown): Json {
const context = isRecord(value) ? value : {};
return {
Expand Down Expand Up @@ -493,7 +644,10 @@ export function defineAISDKInstrumentationAssertions(options: {
name: string;
runScenario: RunAISDKScenario;
snapshotName: string;
supportsAttachmentScenario: boolean;
supportsDenyOutputOverrideScenario: boolean;
supportsGenerateObject: boolean;
supportsOutputObjectScenario: boolean;
supportsStreamObject: boolean;
supportsToolExecution: boolean;
testFileUrl: string;
Expand Down Expand Up @@ -545,6 +699,19 @@ export function defineAISDKInstrumentationAssertions(options: {
completion_tokens: expect.any(Number),
prompt_tokens: expect.any(Number),
});
expect(trace.parent?.metrics?.completion_tokens).toBeUndefined();
expect(trace.parent?.metrics?.prompt_tokens).toBeUndefined();
expect(trace.parent?.metrics?.tokens).toBeUndefined();
expect(operationName(trace.operation)).toBe("generate");
expect(hasPromptLikeInput(trace.parent?.input)).toBe(true);
expect(
hasSemanticOutput(trace.parent?.output, [
"_output",
"text",
"steps",
"toolCalls",
]),
).toBe(true);
});

test("captures trace for streamText()", testConfig, () => {
Expand All @@ -561,8 +728,81 @@ export function defineAISDKInstrumentationAssertions(options: {
completion_tokens: expect.any(Number),
prompt_tokens: expect.any(Number),
});
expect(trace.parent?.metrics?.completion_tokens).toBeUndefined();
expect(trace.parent?.metrics?.prompt_tokens).toBeUndefined();
expect(trace.parent?.metrics?.tokens).toBeUndefined();
expect(operationName(trace.operation)).toBe("stream");
expect(hasPromptLikeInput(trace.parent?.input)).toBe(true);
expect(
hasSemanticOutput(trace.parent?.output, [
"_output",
"text",
"steps",
"toolCalls",
]),
).toBe(true);
expect(extractFinishReason(trace.parent)).toEqual(expect.any(String));
const output = extractOutputRecord(trace.parent);
expect(output).toBeDefined();
if (output) {
const finalText = output.text ?? output._output;
expect(typeof finalText).toBe("string");
expect(String(finalText).length).toBeGreaterThan(0);
}
});

if (options.supportsOutputObjectScenario) {
test(
"captures Output.object schema on generateText()",
testConfig,
() => {
const root = findLatestSpan(events, ROOT_NAME);
const trace = findOutputObjectTrace(events);

expectOperationParentedByRoot(trace.operation, root);
expectAISDKParentSpan(trace.parent);
expect(operationName(trace.operation)).toBe("output-object");
const input = isRecord(trace.parent?.input)
? trace.parent.input
: null;
expect(input).toBeTruthy();
expect(isRecord(input?.output)).toBe(true);

const outputInput = isRecord(input?.output) ? input.output : null;
expect(outputInput).toBeTruthy();
expect("response_format" in (outputInput ?? {})).toBe(true);
if (isRecord(outputInput?.response_format)) {
expect(typeof outputInput.response_format.type).toBe("string");
expect(outputInput.response_format.schema).toBeDefined();
}
},
);
}

if (options.supportsAttachmentScenario) {
test(
"captures file attachment normalization in input",
testConfig,
() => {
const root = findLatestSpan(events, ROOT_NAME);
const trace = findAttachmentTrace(events);

expectOperationParentedByRoot(trace.operation, root);
expectAISDKParentSpan(trace.parent);
expect(operationName(trace.operation)).toBe("attachment");
const attachmentRef = extractFileAttachmentReference(
trace.parent?.input,
);
expect(attachmentRef).toBeDefined();
expect(attachmentRef).toMatchObject({
content_type: "text/plain",
key: expect.any(String),
type: "braintrust_attachment",
});
},
);
}

test("captures trace for generateText() with tools", testConfig, () => {
const root = findLatestSpan(events, ROOT_NAME);
const trace = findToolTrace(events);
Expand All @@ -571,12 +811,17 @@ export function defineAISDKInstrumentationAssertions(options: {
expectAISDKParentSpan(trace.parent);
expect(trace.parent?.input).toBeDefined();
expect(trace.parent?.output).toBeDefined();
expect(operationName(trace.operation)).toBe("tool");
expect(toolNamesFromInput(trace.parent?.input)).toContain("get_weather");

if (options.supportsToolExecution) {
expect(trace.modelChildren.length).toBeGreaterThanOrEqual(2);
expect(trace.toolSpans.length).toBeGreaterThanOrEqual(1);
expect(trace.toolSpans[0]?.input).toBeDefined();
expect(trace.toolSpans[0]?.output).toBeDefined();
expect(collectToolCallNames(trace.parent?.output)).toContain(
"get_weather",
);
} else {
expect(trace.modelChildren.length).toBeGreaterThanOrEqual(1);
expect(collectToolCallNames(trace.parent?.output)).toContain(
Expand All @@ -592,6 +837,15 @@ export function defineAISDKInstrumentationAssertions(options: {

expectOperationParentedByRoot(trace.operation, root);
expectAISDKParentSpan(trace.parent);
expect(operationName(trace.operation)).toBe("generate-object");
expect(hasPromptLikeInput(trace.parent?.input)).toBe(true);
const generateObjectInput = isRecord(trace.parent?.input)
? trace.parent.input
: undefined;
expect(isRecord(generateObjectInput?.schema)).toBe(true);
if (isRecord(generateObjectInput?.schema)) {
expect(generateObjectInput.schema.type).toBe("object");
}
expect(trace.parent?.output).toMatchObject({
object: { city: "Paris" },
});
Expand All @@ -608,6 +862,15 @@ export function defineAISDKInstrumentationAssertions(options: {

expectOperationParentedByRoot(trace.operation, root);
expectAISDKParentSpan(trace.parent);
expect(operationName(trace.operation)).toBe("stream-object");
expect(hasPromptLikeInput(trace.parent?.input)).toBe(true);
const streamObjectInput = isRecord(trace.parent?.input)
? trace.parent.input
: undefined;
expect(isRecord(streamObjectInput?.schema)).toBe(true);
if (isRecord(streamObjectInput?.schema)) {
expect(streamObjectInput.schema.type).toBe("object");
}
if (trace.parent?.metrics?.time_to_first_token !== undefined) {
expect(trace.parent.metrics.time_to_first_token).toEqual(
expect.any(Number),
Expand Down Expand Up @@ -636,6 +899,8 @@ export function defineAISDKInstrumentationAssertions(options: {

expectOperationParentedByRoot(trace.operation, root);
expectAISDKParentSpan(trace.parent);
expect(operationName(trace.operation)).toBe("agent-generate");
expect(hasPromptLikeInput(trace.parent?.input)).toBe(true);
expect(trace.parent?.output).toBeDefined();
expect(trace.modelChildren.length).toBeGreaterThanOrEqual(1);
expect(trace.latestChild?.output).toBeDefined();
Expand All @@ -647,6 +912,8 @@ export function defineAISDKInstrumentationAssertions(options: {

expectOperationParentedByRoot(trace.operation, root);
expectAISDKParentSpan(trace.parent);
expect(operationName(trace.operation)).toBe("agent-stream");
expect(hasPromptLikeInput(trace.parent?.input)).toBe(true);
expect(trace.parent?.metrics?.time_to_first_token).toEqual(
expect.any(Number),
);
Expand All @@ -655,6 +922,28 @@ export function defineAISDKInstrumentationAssertions(options: {
});
}

if (options.supportsDenyOutputOverrideScenario) {
test(
"captures denyOutputPaths override on instrumentation events",
testConfig,
() => {
const root = findLatestSpan(events, ROOT_NAME);
const trace = findDenyOutputOverrideTrace(events);

expectOperationParentedByRoot(trace.operation, root);
expectAISDKParentSpan(trace.parent);
expect(operationName(trace.operation)).toBe("deny-output-override");

const output = extractOutputRecord(trace.parent);
expect(output).toBeDefined();
if (output) {
expect([undefined, "<omitted>"]).toContain(output.text);
expect([undefined, "<omitted>"]).toContain(output._output);
}
},
);
}

test("matches the shared span snapshot", testConfig, async () => {
await expect(
formatJsonFileSnapshot(
Expand Down
Loading
Loading