Skip to content

Commit 1838605

Browse files
7418claude
andcommitted
fix: send images as multimodal vision content instead of file path references
Images were being saved to disk and referenced by text path in the prompt, causing Claude to hallucinate about image contents since it never actually saw the image data. Now images are sent as base64 content blocks via the SDK's multimodal SDKUserMessage, enabling Claude's vision capability. Non-image files continue to be saved to disk for Read tool access. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9926ce5 commit 1838605

3 files changed

Lines changed: 58 additions & 19 deletions

File tree

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "codepilot",
3-
"version": "0.6.2",
3+
"version": "0.6.3",
44
"private": true,
55
"main": "dist-electron/main.js",
66
"scripts": {

src/lib/claude-client.ts

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -366,24 +366,63 @@ export function streamClaude(options: ClaudeStreamOptions): ReadableStream<strin
366366
}
367367
};
368368

369-
// Build the prompt: save all file attachments to disk and reference them in the prompt.
370-
// Claude Code's built-in Read tool can read images (converts to base64 internally),
371-
// PDFs, and text files — this is more reliable than constructing SDK multimodal messages.
372-
let finalPrompt: string = prompt;
369+
// Build the prompt with file attachments.
370+
// Images → sent as multimodal base64 content blocks (vision).
371+
// Non-image files → saved to disk and referenced via Read tool.
372+
let finalPrompt: string | AsyncIterable<SDKUserMessage> = prompt;
373373

374374
if (files && files.length > 0) {
375-
const workDir = workingDirectory || process.cwd();
376-
const savedPaths = saveUploadedFiles(files, workDir);
377-
const fileReferences = savedPaths
378-
.map((p, i) => {
379-
const f = files[i];
380-
if (isImageFile(f.type)) {
381-
return `[User attached image: ${p} (${f.name})]`;
382-
}
383-
return `[User attached file: ${p} (${f.name})]`;
384-
})
385-
.join('\n');
386-
finalPrompt = `${fileReferences}\n\nPlease read the attached file(s) above using your Read tool, then respond to the user's message:\n\n${prompt}`;
375+
const imageFiles = files.filter(f => isImageFile(f.type));
376+
const nonImageFiles = files.filter(f => !isImageFile(f.type));
377+
378+
// Save non-image files to disk for Read tool access
379+
let textPrompt = prompt;
380+
if (nonImageFiles.length > 0) {
381+
const workDir = workingDirectory || process.cwd();
382+
const savedPaths = saveUploadedFiles(nonImageFiles, workDir);
383+
const fileReferences = savedPaths
384+
.map((p, i) => `[User attached file: ${p} (${nonImageFiles[i].name})]`)
385+
.join('\n');
386+
textPrompt = `${fileReferences}\n\nPlease read the attached file(s) above using your Read tool, then respond to the user's message:\n\n${prompt}`;
387+
}
388+
389+
// If there are images, build a multimodal SDKUserMessage
390+
if (imageFiles.length > 0) {
391+
const contentBlocks: Array<
392+
| { type: 'image'; source: { type: 'base64'; media_type: string; data: string } }
393+
| { type: 'text'; text: string }
394+
> = [];
395+
396+
for (const img of imageFiles) {
397+
contentBlocks.push({
398+
type: 'image',
399+
source: {
400+
type: 'base64',
401+
media_type: img.type || 'image/png',
402+
data: img.data,
403+
},
404+
});
405+
}
406+
407+
contentBlocks.push({ type: 'text', text: textPrompt });
408+
409+
const userMessage: SDKUserMessage = {
410+
type: 'user',
411+
message: {
412+
role: 'user',
413+
content: contentBlocks,
414+
},
415+
parent_tool_use_id: null,
416+
session_id: sdkSessionId || '',
417+
};
418+
419+
// Create a single-message async iterable
420+
finalPrompt = (async function* () {
421+
yield userMessage;
422+
})();
423+
} else {
424+
finalPrompt = textPrompt;
425+
}
387426
}
388427

389428
const conversation = query({

0 commit comments

Comments
 (0)