@@ -23,6 +23,11 @@ export type ToolRun = {
2323 output : string ;
2424} ;
2525
26+ export type ToolImagePart = {
27+ type : "image_url" ;
28+ image_url : { url : string ; detail : "auto" } ;
29+ } ;
30+
2631export interface NormalizedToolCall {
2732 id : string ;
2833 name : string ;
@@ -47,6 +52,7 @@ export interface ExecuteToolCallsParams {
4752export interface ToolCallExecutionResult {
4853 toolMessages : ChatCompletionMessageParam [ ] ;
4954 toolRuns : ToolRun [ ] ;
55+ toolImages : ToolImagePart [ ] ;
5056 finalAnswer ?: { text : string ; interrupted : boolean } ;
5157}
5258
@@ -64,6 +70,17 @@ const serverMap = (servers: McpServerConfig[]): Map<string, McpServerConfig> =>
6470 return map ;
6571} ;
6672
73+ function toToolImagePart ( block : unknown ) : ToolImagePart | undefined {
74+ if ( ! block || typeof block !== "object" ) return undefined ;
75+ const obj = block as Record < string , unknown > ;
76+ if ( obj . type !== "image" || typeof obj . data !== "string" || typeof obj . mimeType !== "string" )
77+ return undefined ;
78+ return {
79+ type : "image_url" ,
80+ image_url : { url : `data:${ obj . mimeType } ;base64,${ obj . data } ` , detail : "auto" } ,
81+ } ;
82+ }
83+
6784export async function * executeToolCalls ( {
6885 calls,
6986 mapping,
@@ -78,6 +95,7 @@ export async function* executeToolCalls({
7895 const effectiveTimeoutMs = toolTimeoutMs ?? getMcpToolTimeoutMs ( ) ;
7996 const toolMessages : ChatCompletionMessageParam [ ] = [ ] ;
8097 const toolRuns : ToolRun [ ] = [ ] ;
98+ const toolImages : ToolImagePart [ ] = [ ] ;
8199 const serverLookup = serverMap ( servers ) ;
82100 // Pre-emit call + ETA updates and prepare tasks
83101 type TaskResult = {
@@ -335,7 +353,14 @@ export async function* executeToolCalls({
335353 const name = prepared [ r . index ] . call . name ;
336354 const id = prepared [ r . index ] . call . id ;
337355 if ( ! r . error ) {
338- const output = r . output ?? "" ;
356+ let output = r . output ?? "" ;
357+ // Extract any image content blocks returned by the MCP tool
358+ const imageParts = ( r . blocks ?? [ ] ) . map ( toToolImagePart ) . filter ( Boolean ) as ToolImagePart [ ] ;
359+ toolImages . push ( ...imageParts ) ;
360+ // If output is empty but images were returned, provide placeholder text
361+ if ( output === "" && imageParts . length > 0 ) {
362+ output = "Tool returned image(s)." ;
363+ }
339364 toolRuns . push ( { name, parameters : r . paramsClean , output } ) ;
340365 // For the LLM follow-up call, we keep only the textual output
341366 toolMessages . push ( { role : "tool" , tool_call_id : id , content : output } ) ;
@@ -345,5 +370,5 @@ export async function* executeToolCalls({
345370 }
346371 }
347372
348- yield { type : "complete" , summary : { toolMessages, toolRuns } } ;
373+ yield { type : "complete" , summary : { toolMessages, toolRuns, toolImages } } ;
349374}
0 commit comments