From ff49919680250db73eb50627c498049db2119e45 Mon Sep 17 00:00:00 2001 From: Hugo Aguirre Parra Date: Tue, 15 Jul 2025 01:29:57 +0000 Subject: [PATCH 1/6] feat(go/plugins/compat_oai): Add media parts support --- go/plugins/compat_oai/generate.go | 32 ++++++++++++++++++++++++++-- go/samples/compat_oai/openai/main.go | 28 ++++++++++++++++++++---- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/go/plugins/compat_oai/generate.go b/go/plugins/compat_oai/generate.go index 2b39c3bea8..7b86568619 100644 --- a/go/plugins/compat_oai/generate.go +++ b/go/plugins/compat_oai/generate.go @@ -107,9 +107,35 @@ func (g *ModelGenerator) WithMessages(messages []*ai.Message) *ModelGenerator { ) oaiMessages = append(oaiMessages, tm) } - default: + case ai.RoleUser: + parts := []openai.ChatCompletionContentPartUnionParam{} + + // append all text parts first oaiMessages = append(oaiMessages, openai.UserMessage(content)) + + for _, p := range msg.Content { + if p.IsMedia() { + part := openai.ImageContentPart( + openai.ChatCompletionContentPartImageImageURLParam{ + URL: p.Text, + }) + parts = append(parts, part) + continue + } + } + if len(parts) > 0 { + oaiMessages = append(oaiMessages, openai.ChatCompletionMessageParamUnion{ + OfUser: &openai.ChatCompletionUserMessageParam{ + Content: openai.ChatCompletionUserMessageParamContentUnion{OfArrayOfContentParts: parts}, + }, + }) + } + + default: + // ignore parts from not supported roles + continue } + } g.messages = oaiMessages return g @@ -213,7 +239,9 @@ func (g *ModelGenerator) Generate(ctx context.Context, handleChunk func(context. func (g *ModelGenerator) concatenateContent(parts []*ai.Part) string { content := "" for _, part := range parts { - content += part.Text + if part.IsText() { + content += part.Text + } } return content } diff --git a/go/samples/compat_oai/openai/main.go b/go/samples/compat_oai/openai/main.go index 5618690ec7..5c4f318b67 100644 --- a/go/samples/compat_oai/openai/main.go +++ b/go/samples/compat_oai/openai/main.go @@ -42,11 +42,11 @@ func main() { prompt := fmt.Sprintf("tell me a joke about %s", subject) config := &openai.ChatCompletionNewParams{Temperature: openai.Float(0.5), MaxTokens: openai.Int(100)} - foo, err := genkit.Generate(ctx, g, ai.WithModel(gpt4o), ai.WithPrompt(prompt), ai.WithConfig(config)) + resp, err := genkit.Generate(ctx, g, ai.WithModel(gpt4o), ai.WithPrompt(prompt), ai.WithConfig(config)) if err != nil { return "", err } - return fmt.Sprintf("foo: %s", foo.Text()), nil + return fmt.Sprintf("resp: %s", resp.Text()), nil }) genkit.DefineFlow(g, "defined-model", func(ctx context.Context, subject string) (string, error) { @@ -56,11 +56,31 @@ func main() { } prompt := fmt.Sprintf("tell me a joke about %s", subject) config := &openai.ChatCompletionNewParams{Temperature: openai.Float(0.5)} - foo, err := genkit.Generate(ctx, g, ai.WithModel(gpt4oMini), ai.WithPrompt(prompt), ai.WithConfig(config)) + resp, err := genkit.Generate(ctx, g, ai.WithModel(gpt4oMini), ai.WithPrompt(prompt), ai.WithConfig(config)) if err != nil { return "", err } - return fmt.Sprintf("foo: %s", foo.Text()), nil + return resp.Text(), nil + }) + + genkit.DefineFlow(g, "media", func(ctx context.Context, subject string) (string, error) { + gpt4oMini := oai.Model(g, "gpt-4o-mini") + if err != nil { + return "", err + } + config := &openai.ChatCompletionNewParams{Temperature: openai.Float(0.5)} + resp, err := genkit.Generate(ctx, g, + ai.WithModel(gpt4oMini), + ai.WithConfig(config), + ai.WithMessages(ai.NewUserMessage(ai.NewTextPart("Hi, I'll provide you a quick request in the following message")), + ai.NewUserMessage( + ai.NewTextPart("can you tell me which animal is in the provider image?"), + ai.NewMediaPart("image/jpg", "https://pd.w.org/2025/05/64268380a8c42af85.63713105-2048x1152.jpg"), + ))) + if err != nil { + return "", err + } + return resp.Text(), nil }) mux := http.NewServeMux() From 5ea83194189318dd4a5e929b55e54f0b9225dbc5 Mon Sep 17 00:00:00 2001 From: Hugo Aguirre Parra Date: Tue, 15 Jul 2025 01:34:56 +0000 Subject: [PATCH 2/6] fix: typo --- go/samples/compat_oai/openai/main.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/go/samples/compat_oai/openai/main.go b/go/samples/compat_oai/openai/main.go index 5c4f318b67..1c13920d6c 100644 --- a/go/samples/compat_oai/openai/main.go +++ b/go/samples/compat_oai/openai/main.go @@ -72,9 +72,10 @@ func main() { resp, err := genkit.Generate(ctx, g, ai.WithModel(gpt4oMini), ai.WithConfig(config), - ai.WithMessages(ai.NewUserMessage(ai.NewTextPart("Hi, I'll provide you a quick request in the following message")), + ai.WithMessages( + ai.NewUserMessage(ai.NewTextPart("Hi, I'll provide you a quick request in the following message")), ai.NewUserMessage( - ai.NewTextPart("can you tell me which animal is in the provider image?"), + ai.NewTextPart("can you tell me which animal is in the provided image?"), ai.NewMediaPart("image/jpg", "https://pd.w.org/2025/05/64268380a8c42af85.63713105-2048x1152.jpg"), ))) if err != nil { From bd168fe9961877f30616ba38f28eb7640ab164ef Mon Sep 17 00:00:00 2001 From: Hugo Aguirre Parra Date: Tue, 15 Jul 2025 04:58:10 +0000 Subject: [PATCH 3/6] misc --- go/plugins/compat_oai/generate.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/go/plugins/compat_oai/generate.go b/go/plugins/compat_oai/generate.go index 7b86568619..5d66424537 100644 --- a/go/plugins/compat_oai/generate.go +++ b/go/plugins/compat_oai/generate.go @@ -108,11 +108,10 @@ func (g *ModelGenerator) WithMessages(messages []*ai.Message) *ModelGenerator { oaiMessages = append(oaiMessages, tm) } case ai.RoleUser: - parts := []openai.ChatCompletionContentPartUnionParam{} - - // append all text parts first + // content includes all the user text parts oaiMessages = append(oaiMessages, openai.UserMessage(content)) + parts := []openai.ChatCompletionContentPartUnionParam{} for _, p := range msg.Content { if p.IsMedia() { part := openai.ImageContentPart( @@ -130,7 +129,6 @@ func (g *ModelGenerator) WithMessages(messages []*ai.Message) *ModelGenerator { }, }) } - default: // ignore parts from not supported roles continue From 3d850f61220b2b493bc47b611ad22541885155c5 Mon Sep 17 00:00:00 2001 From: Hugo Aguirre Parra Date: Tue, 15 Jul 2025 05:04:11 +0000 Subject: [PATCH 4/6] update generate.go --- go/plugins/compat_oai/generate.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/go/plugins/compat_oai/generate.go b/go/plugins/compat_oai/generate.go index 5d66424537..48de5cbfba 100644 --- a/go/plugins/compat_oai/generate.go +++ b/go/plugins/compat_oai/generate.go @@ -108,7 +108,6 @@ func (g *ModelGenerator) WithMessages(messages []*ai.Message) *ModelGenerator { oaiMessages = append(oaiMessages, tm) } case ai.RoleUser: - // content includes all the user text parts oaiMessages = append(oaiMessages, openai.UserMessage(content)) parts := []openai.ChatCompletionContentPartUnionParam{} @@ -237,9 +236,7 @@ func (g *ModelGenerator) Generate(ctx context.Context, handleChunk func(context. func (g *ModelGenerator) concatenateContent(parts []*ai.Part) string { content := "" for _, part := range parts { - if part.IsText() { - content += part.Text - } + content += part.Text } return content } From 0dd21c7c7eb7f284a69280ece7d31c66e53269d0 Mon Sep 17 00:00:00 2001 From: Hugo Aguirre Parra Date: Tue, 15 Jul 2025 16:34:51 +0000 Subject: [PATCH 5/6] remove donkey taking a sunbath --- go/plugins/googlegenai/googleai_live_test.go | 10 +++++----- go/plugins/vertexai/modelgarden/anthropic_live_test.go | 10 +++++----- go/samples/compat_oai/openai/main.go | 2 +- go/samples/prompts/main.go | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/go/plugins/googlegenai/googleai_live_test.go b/go/plugins/googlegenai/googleai_live_test.go index e4edb8775d..bac6d1a612 100644 --- a/go/plugins/googlegenai/googleai_live_test.go +++ b/go/plugins/googlegenai/googleai_live_test.go @@ -331,8 +331,8 @@ func TestGoogleAILive(t *testing.T) { if err != nil { t.Fatal(err) } - if !strings.Contains(strings.ToLower(resp.Text()), "donkey") { - t.Fatalf("image detection failed, want: donkey, got: %s", resp.Text()) + if !strings.Contains(strings.ToLower(resp.Text()), "lizard") { + t.Fatalf("image detection failed, want: lizard, got: %s", resp.Text()) } }) t.Run("media content", func(t *testing.T) { @@ -357,11 +357,11 @@ func TestGoogleAILive(t *testing.T) { t.Fatal(err) } resp, err := genkit.Generate(ctx, g, - ai.WithSystem("You are a pirate expert in TV Shows, your response should include the name of the character in the image provided"), + ai.WithSystem("You are an excellent animal detector, the user will provide you a request with an image, identify which animal is in there"), ai.WithMessages( ai.NewUserMessage( ai.NewTextPart("do you know who's in the image?"), - ai.NewDataPart("data:image/png;base64,"+i), + ai.NewDataPart("data:image/jpg;base64,"+i), ), ), ) @@ -567,7 +567,7 @@ func TestCacheHelper(t *testing.T) { func fetchImgAsBase64() (string, error) { // CC0 license image - imgUrl := "https://pd.w.org/2025/05/64268380a8c42af85.63713105-2048x1152.jpg" + imgUrl := "https://pd.w.org/2025/07/58268765f177911d4.13750400-2048x1365.jpg" resp, err := http.Get(imgUrl) if err != nil { return "", err diff --git a/go/plugins/vertexai/modelgarden/anthropic_live_test.go b/go/plugins/vertexai/modelgarden/anthropic_live_test.go index 0487e92f19..5525b6b420 100644 --- a/go/plugins/vertexai/modelgarden/anthropic_live_test.go +++ b/go/plugins/vertexai/modelgarden/anthropic_live_test.go @@ -100,14 +100,14 @@ func TestAnthropicLive(t *testing.T) { ai.WithModel(m), ai.WithMessages( ai.NewUserMessage( - ai.NewTextPart("do you know who's in the image?"), - ai.NewMediaPart("", "data:image/jpeg;base64,"+i)))) + ai.NewTextPart("do you know which animal is in the image?"), + ai.NewMediaPart("", "data:image/jpg;base64,"+i)))) if err != nil { t.Fatal(err) } - if !strings.Contains(resp.Text(), "donkey") { - t.Fatalf("it should've said donkey but got: %s", resp.Text()) + if !strings.Contains(resp.Text(), "lizard") { + t.Fatalf("it should've said lizard but got: %s", resp.Text()) } }) @@ -209,7 +209,7 @@ func TestAnthropicLive(t *testing.T) { func fetchImgAsBase64() (string, error) { // CC0 license image - imgUrl := "https://pd.w.org/2025/05/64268380a8c42af85.63713105-2048x1152.jpg" + imgUrl := "https://pd.w.org/2025/07/58268765f177911d4.13750400-2048x1365.jpg" resp, err := http.Get(imgUrl) if err != nil { return "", err diff --git a/go/samples/compat_oai/openai/main.go b/go/samples/compat_oai/openai/main.go index 1c13920d6c..3901ff3e10 100644 --- a/go/samples/compat_oai/openai/main.go +++ b/go/samples/compat_oai/openai/main.go @@ -76,7 +76,7 @@ func main() { ai.NewUserMessage(ai.NewTextPart("Hi, I'll provide you a quick request in the following message")), ai.NewUserMessage( ai.NewTextPart("can you tell me which animal is in the provided image?"), - ai.NewMediaPart("image/jpg", "https://pd.w.org/2025/05/64268380a8c42af85.63713105-2048x1152.jpg"), + ai.NewMediaPart("image/jpg", "https://pd.w.org/2025/07/58268765f177911d4.13750400-2048x1365.jpg"), ))) if err != nil { return "", err diff --git a/go/samples/prompts/main.go b/go/samples/prompts/main.go index 08e217a73f..fd2caf4fd5 100644 --- a/go/samples/prompts/main.go +++ b/go/samples/prompts/main.go @@ -340,7 +340,7 @@ func PromptWithMediaType(ctx context.Context, g *genkit.Genkit) { resp, err := prompt.Execute(ctx, ai.WithModelName("vertexai/gemini-2.0-flash"), - ai.WithInput(map[string]any{"imageUrl": "data:image/png;base64," + img}), + ai.WithInput(map[string]any{"imageUrl": "data:image/jpg;base64," + img}), ) if err != nil { log.Fatal(err) @@ -349,7 +349,7 @@ func PromptWithMediaType(ctx context.Context, g *genkit.Genkit) { } func fetchImgAsBase64() (string, error) { - imgUrl := "https://pd.w.org/2025/05/64268380a8c42af85.63713105-2048x1152.jpg" + imgUrl := "https://pd.w.org/2025/07/58268765f177911d4.13750400-2048x1365.jpg" resp, err := http.Get(imgUrl) if err != nil { return "", err From 8760a8ec7ebbe54813c5fce38db1f7e426d08f4d Mon Sep 17 00:00:00 2001 From: Hugo Aguirre Parra Date: Tue, 15 Jul 2025 17:03:48 +0000 Subject: [PATCH 6/6] update live tests --- go/plugins/googlegenai/googleai_live_test.go | 16 +++++++-------- go/plugins/googlegenai/models.go | 9 --------- go/plugins/googlegenai/vertexai_live_test.go | 20 +++++++++---------- .../modelgarden/anthropic_live_test.go | 9 ++++----- 4 files changed, 22 insertions(+), 32 deletions(-) diff --git a/go/plugins/googlegenai/googleai_live_test.go b/go/plugins/googlegenai/googleai_live_test.go index bac6d1a612..a61c384fe4 100644 --- a/go/plugins/googlegenai/googleai_live_test.go +++ b/go/plugins/googlegenai/googleai_live_test.go @@ -178,7 +178,7 @@ func TestGoogleAILive(t *testing.T) { } }) t.Run("tool with thinking", func(t *testing.T) { - m := googlegenai.GoogleAIModel(g, "gemini-2.5-flash-preview-04-17") + m := googlegenai.GoogleAIModel(g, "gemini-2.5-flash") resp, err := genkit.Generate(ctx, g, ai.WithConfig(&genai.GenerateContentConfig{ ThinkingConfig: &genai.ThinkingConfig{ @@ -331,8 +331,8 @@ func TestGoogleAILive(t *testing.T) { if err != nil { t.Fatal(err) } - if !strings.Contains(strings.ToLower(resp.Text()), "lizard") { - t.Fatalf("image detection failed, want: lizard, got: %s", resp.Text()) + if !strings.Contains(strings.ToLower(resp.Text()), "cat") { + t.Fatalf("image detection failed, want: cat, got: %s", resp.Text()) } }) t.Run("media content", func(t *testing.T) { @@ -368,8 +368,8 @@ func TestGoogleAILive(t *testing.T) { if err != nil { t.Fatal(err) } - if !strings.Contains(resp.Text(), "donkey") { - t.Fatalf("image detection failed, want: donkey, got: %s", resp.Text()) + if !strings.Contains(strings.ToLower(resp.Text()), "cat") { + t.Fatalf("image detection failed, want: cat, got: %s", resp.Text()) } }) t.Run("image generation", func(t *testing.T) { @@ -435,7 +435,7 @@ func TestGoogleAILive(t *testing.T) { } }) t.Run("thinking", func(t *testing.T) { - m := googlegenai.GoogleAIModel(g, "gemini-2.5-flash-preview-04-17") + m := googlegenai.GoogleAIModel(g, "gemini-2.5-flash") resp, err := genkit.Generate(ctx, g, ai.WithConfig(genai.GenerateContentConfig{ Temperature: genai.Ptr[float32](0.4), @@ -457,7 +457,7 @@ func TestGoogleAILive(t *testing.T) { } }) t.Run("thinking disabled", func(t *testing.T) { - m := googlegenai.GoogleAIModel(g, "gemini-2.5-flash-preview-04-17") + m := googlegenai.GoogleAIModel(g, "gemini-2.5-flash") resp, err := genkit.Generate(ctx, g, ai.WithConfig(genai.GenerateContentConfig{ Temperature: genai.Ptr[float32](0.4), @@ -567,7 +567,7 @@ func TestCacheHelper(t *testing.T) { func fetchImgAsBase64() (string, error) { // CC0 license image - imgUrl := "https://pd.w.org/2025/07/58268765f177911d4.13750400-2048x1365.jpg" + imgUrl := "https://pd.w.org/2025/07/896686fbbcd9990c9.84605288-2048x1365.jpg" resp, err := http.Get(imgUrl) if err != nil { return "", err diff --git a/go/plugins/googlegenai/models.go b/go/plugins/googlegenai/models.go index 96166a3936..127bf9b586 100644 --- a/go/plugins/googlegenai/models.go +++ b/go/plugins/googlegenai/models.go @@ -28,7 +28,6 @@ const ( gemini20FlashPrevImageGen = "gemini-2.0-flash-preview-image-generation" gemini25Flash = "gemini-2.5-flash" - gemini25FlashPreview0417 = "gemini-2.5-flash-preview-04-17" gemini25FlashLite = "gemini-2.5-flash-lite" gemini25FlashLitePrev0617 = "gemini-2.5-flash-lite-preview-06-17" @@ -66,7 +65,6 @@ var ( gemini25Flash, gemini25FlashLite, gemini25Pro, - gemini25FlashPreview0417, gemini25FlashLitePrev0617, gemini25ProExp0325, gemini25ProPreview0325, @@ -90,7 +88,6 @@ var ( gemini25Flash, gemini25FlashLite, gemini25Pro, - gemini25FlashPreview0417, gemini25FlashLitePrev0617, gemini25ProExp0325, gemini25ProPreview0325, @@ -192,12 +189,6 @@ var ( Supports: &Multimodal, Stage: ai.ModelStageStable, }, - gemini25FlashPreview0417: { - Label: "Gemini 2.5 Flash Preview 04-17", - Versions: []string{}, - Supports: &Multimodal, - Stage: ai.ModelStageUnstable, - }, gemini25ProExp0325: { Label: "Gemini 2.5 Pro Exp 03-25", Versions: []string{}, diff --git a/go/plugins/googlegenai/vertexai_live_test.go b/go/plugins/googlegenai/vertexai_live_test.go index 170f43d58a..5b696de223 100644 --- a/go/plugins/googlegenai/vertexai_live_test.go +++ b/go/plugins/googlegenai/vertexai_live_test.go @@ -216,16 +216,16 @@ func TestVertexAILive(t *testing.T) { ai.WithSystem("You are a pirate expert in animals, your response should include the name of the animal in the provided image"), ai.WithMessages( ai.NewUserMessage( - ai.NewTextPart("do you know who's in the image?"), - ai.NewMediaPart("image/png", "data:image/png;base64,"+i), + ai.NewTextPart("do you know which animal is in the image?"), + ai.NewMediaPart("image/jpg", "data:image/jpg;base64,"+i), ), ), ) if err != nil { t.Fatal(err) } - if !strings.Contains(resp.Text(), "donkey") { - t.Fatalf("image detection failed, want: donkey, got: %s", resp.Text()) + if !strings.Contains(strings.ToLower(resp.Text()), "cat") { + t.Fatalf("image detection failed, want: cat, got: %s", resp.Text()) } }) t.Run("media content", func(t *testing.T) { @@ -250,24 +250,24 @@ func TestVertexAILive(t *testing.T) { t.Fatal(err) } resp, err := genkit.Generate(ctx, g, - ai.WithSystem("You are a pirate expert in TV Shows, your response should include the name of the character in the image provided"), + ai.WithSystem("You are a pirate expert in animals, your response should include the name of the animal in the image provided"), ai.WithMessages( ai.NewUserMessage( - ai.NewTextPart("do you know who's in the image?"), - ai.NewDataPart("data:image/png;base64,"+i), + ai.NewTextPart("do you know which animal is in the image?"), + ai.NewDataPart("data:image/jpg;base64,"+i), ), ), ) if err != nil { t.Fatal(err) } - if !strings.Contains(resp.Text(), "donkey") { - t.Fatalf("image detection failed, want: donkey, got: %s", resp.Text()) + if !strings.Contains(strings.ToLower(resp.Text()), "cat") { + t.Fatalf("image detection failed, want: cat, got: %s", resp.Text()) } }) t.Run("image generation", func(t *testing.T) { if location != "global" { - t.Skip("image generation in Vertex AI is only supported in region: global") + t.Skipf("image generation in Vertex AI is only supported in region: global, got: %s", location) } m := googlegenai.VertexAIModel(g, "gemini-2.0-flash-preview-image-generation") resp, err := genkit.Generate(ctx, g, diff --git a/go/plugins/vertexai/modelgarden/anthropic_live_test.go b/go/plugins/vertexai/modelgarden/anthropic_live_test.go index 5525b6b420..847f9a4719 100644 --- a/go/plugins/vertexai/modelgarden/anthropic_live_test.go +++ b/go/plugins/vertexai/modelgarden/anthropic_live_test.go @@ -101,13 +101,12 @@ func TestAnthropicLive(t *testing.T) { ai.WithMessages( ai.NewUserMessage( ai.NewTextPart("do you know which animal is in the image?"), - ai.NewMediaPart("", "data:image/jpg;base64,"+i)))) + ai.NewMediaPart("", "data:image/jpeg;base64,"+i)))) if err != nil { t.Fatal(err) } - - if !strings.Contains(resp.Text(), "lizard") { - t.Fatalf("it should've said lizard but got: %s", resp.Text()) + if !strings.Contains(strings.ToLower(resp.Text()), "cat") { + t.Fatalf("want: cat, got: %s", resp.Text()) } }) @@ -209,7 +208,7 @@ func TestAnthropicLive(t *testing.T) { func fetchImgAsBase64() (string, error) { // CC0 license image - imgUrl := "https://pd.w.org/2025/07/58268765f177911d4.13750400-2048x1365.jpg" + imgUrl := "https://pd.w.org/2025/07/896686fbbcd9990c9.84605288-2048x1365.jpg" resp, err := http.Get(imgUrl) if err != nil { return "", err