diff --git a/.changeset/popular-hounds-boil.md b/.changeset/popular-hounds-boil.md new file mode 100644 index 000000000000..c7d37c027305 --- /dev/null +++ b/.changeset/popular-hounds-boil.md @@ -0,0 +1,7 @@ +--- +'@ai-sdk/provider': patch +'@ai-sdk/google': patch +'ai': patch +--- + +fix(google): add thought signature to gemini 3 pro image parts diff --git a/examples/ai-core/src/generate-text/google-image-multi-step.ts b/examples/ai-core/src/generate-text/google-image-multi-step.ts new file mode 100644 index 000000000000..15b254b903eb --- /dev/null +++ b/examples/ai-core/src/generate-text/google-image-multi-step.ts @@ -0,0 +1,31 @@ +import { google } from '@ai-sdk/google'; +import { generateText } from 'ai'; + +import { presentImages } from '../lib/present-image'; +import { run } from '../lib/run'; + +import 'dotenv/config'; + +run(async () => { + const step1 = await generateText({ + model: google('gemini-3-pro-image-preview'), + prompt: + 'Create an image of Los Angeles where all car infrastructure has been replaced with bike infrastructure, trains, pedestrian zones, and parks. The image should be photorealistic and vibrant.', + }); + + await presentImages(step1.files); + + const step2 = await generateText({ + model: google('gemini-3-pro-image-preview'), + messages: [ + ...step1.response.messages, + { + role: 'user', + content: + 'Now create a variation of the image, but in the style of a watercolor painting.', + }, + ], + }); + + await presentImages(step2.files); +}); diff --git a/packages/ai/src/generate-text/generate-text.ts b/packages/ai/src/generate-text/generate-text.ts index d65732e26caa..b9a82bb39bc1 100644 --- a/packages/ai/src/generate-text/generate-text.ts +++ b/packages/ai/src/generate-text/generate-text.ts @@ -884,6 +884,9 @@ function asContent({ return { type: 'file' as const, file: new DefaultGeneratedFile(part), + ...(part.providerMetadata != null + ? { providerMetadata: part.providerMetadata } + : {}), }; } diff --git a/packages/google/src/convert-to-google-generative-ai-messages.test.ts b/packages/google/src/convert-to-google-generative-ai-messages.test.ts index d5065a1ed9ed..bec79540a2c2 100644 --- a/packages/google/src/convert-to-google-generative-ai-messages.test.ts +++ b/packages/google/src/convert-to-google-generative-ai-messages.test.ts @@ -323,19 +323,6 @@ describe('assistant messages', () => { }); }); - it('should throw error for non-PNG images in assistant messages', async () => { - expect(() => - convertToGoogleGenerativeAIMessages([ - { - role: 'assistant', - content: [ - { type: 'file', data: 'AAECAw==', mediaType: 'image/jpeg' }, - ], - }, - ]), - ).toThrow('Only PNG images are supported in assistant messages'); - }); - it('should throw error for URL file data in assistant messages', async () => { expect(() => convertToGoogleGenerativeAIMessages([ diff --git a/packages/google/src/convert-to-google-generative-ai-messages.ts b/packages/google/src/convert-to-google-generative-ai-messages.ts index b0b9bbeff1b1..377278de031b 100644 --- a/packages/google/src/convert-to-google-generative-ai-messages.ts +++ b/packages/google/src/convert-to-google-generative-ai-messages.ts @@ -107,13 +107,6 @@ export function convertToGoogleGenerativeAIMessages( } case 'file': { - if (part.mediaType !== 'image/png') { - throw new UnsupportedFunctionalityError({ - functionality: - 'Only PNG images are supported in assistant messages', - }); - } - if (part.data instanceof URL) { throw new UnsupportedFunctionalityError({ functionality: @@ -126,6 +119,7 @@ export function convertToGoogleGenerativeAIMessages( mimeType: part.mediaType, data: convertToBase64(part.data), }, + thoughtSignature, }; } diff --git a/packages/google/src/google-generative-ai-language-model.test.ts b/packages/google/src/google-generative-ai-language-model.test.ts index 7adfbe677bb3..1f837ce24859 100644 --- a/packages/google/src/google-generative-ai-language-model.test.ts +++ b/packages/google/src/google-generative-ai-language-model.test.ts @@ -1410,6 +1410,7 @@ describe('doGenerate', () => { { "data": "base64encodedimagedata", "mediaType": "image/jpeg", + "providerMetadata": undefined, "type": "file", }, { @@ -1420,6 +1421,7 @@ describe('doGenerate', () => { { "data": "anotherbase64encodedimagedata", "mediaType": "image/png", + "providerMetadata": undefined, "type": "file", }, ] @@ -1472,11 +1474,13 @@ describe('doGenerate', () => { { "data": "imagedata1", "mediaType": "image/jpeg", + "providerMetadata": undefined, "type": "file", }, { "data": "imagedata2", "mediaType": "image/png", + "providerMetadata": undefined, "type": "file", }, ] @@ -1591,11 +1595,13 @@ describe('doGenerate', () => { { "data": "validimagedata", "mediaType": "image/jpeg", + "providerMetadata": undefined, "type": "file", }, { "data": "pdfdata", "mediaType": "application/pdf", + "providerMetadata": undefined, "type": "file", }, ] diff --git a/packages/google/src/google-generative-ai-language-model.ts b/packages/google/src/google-generative-ai-language-model.ts index 6e06f6cc0a6e..862b12246bc9 100644 --- a/packages/google/src/google-generative-ai-language-model.ts +++ b/packages/google/src/google-generative-ai-language-model.ts @@ -271,6 +271,9 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { type: 'file' as const, data: part.inlineData.data, mediaType: part.inlineData.mimeType, + providerMetadata: part.thoughtSignature + ? { google: { thoughtSignature: part.thoughtSignature } } + : undefined, }); } } @@ -809,6 +812,7 @@ const getContentSchema = () => mimeType: z.string(), data: z.string(), }), + thoughtSignature: z.string().nullish(), }), z.object({ executableCode: z diff --git a/packages/provider/src/language-model/v2/language-model-v2-file.ts b/packages/provider/src/language-model/v2/language-model-v2-file.ts index 60f1120fd87b..b91f39e2f34f 100644 --- a/packages/provider/src/language-model/v2/language-model-v2-file.ts +++ b/packages/provider/src/language-model/v2/language-model-v2-file.ts @@ -1,3 +1,5 @@ +import { SharedV2ProviderMetadata } from '../../shared'; + /** A file that has been generated by the model. Generated files as base64 encoded strings or binary data. @@ -22,4 +24,9 @@ as base64 encoded strings. If the API returns binary data, the file data should be returned as binary data. */ data: string | Uint8Array; + + /** +Optional provider-specific metadata for the file part. + */ + providerMetadata?: SharedV2ProviderMetadata; };