From fb193825036cb2876ba1e022118f76087bbd13a8 Mon Sep 17 00:00:00 2001 From: Erlend Powell Date: Thu, 13 Nov 2025 23:28:20 +1300 Subject: [PATCH 1/7] feat: Add ImageConfig to gemini completion API. --- rig-core/src/providers/gemini/completion.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rig-core/src/providers/gemini/completion.rs b/rig-core/src/providers/gemini/completion.rs index 6db25c870..cbb72a375 100644 --- a/rig-core/src/providers/gemini/completion.rs +++ b/rig-core/src/providers/gemini/completion.rs @@ -1362,6 +1362,8 @@ pub mod gemini_api_types { /// Configuration for thinking/reasoning. #[serde(skip_serializing_if = "Option::is_none")] pub thinking_config: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub image_config: Option, } impl Default for GenerationConfig { @@ -1380,6 +1382,7 @@ pub mod gemini_api_types { response_logprobs: None, logprobs: None, thinking_config: None, + image_config: None, } } } @@ -1390,6 +1393,13 @@ pub mod gemini_api_types { pub thinking_budget: u32, pub include_thoughts: Option, } + + #[derive(Debug, Deserialize, Serialize)] + #[serde(rename_all = "camelCase")] + pub struct ImageConfig { + pub aspect_ratio: Option, + } + /// The Schema object allows the definition of input and output data types. These types can be objects, but also /// primitives and arrays. Represents a select subset of an OpenAPI 3.0 schema object. /// From [Gemini API Reference](https://ai.google.dev/api/caching#Schema) From 4a2f0d7f0fb42bf6562b4c84e3f7ef8c6e80acfe Mon Sep 17 00:00:00 2001 From: Erlend Powell Date: Fri, 14 Nov 2025 11:57:16 +1300 Subject: [PATCH 2/7] feat: Add gemini support for images in assistant completions. --- rig-bedrock/src/types/assistant_content.rs | 3 + rig-core/src/completion/message.rs | 15 +++++ .../src/providers/anthropic/completion.rs | 22 ++++--- rig-core/src/providers/cohere/completion.rs | 58 +++++++++-------- rig-core/src/providers/galadriel.rs | 5 ++ rig-core/src/providers/gemini/completion.rs | 64 ++++++++++++++++--- rig-core/src/providers/groq.rs | 5 ++ .../src/providers/huggingface/completion.rs | 5 ++ rig-core/src/providers/mistral/completion.rs | 6 ++ rig-core/src/providers/ollama.rs | 39 ++++++----- .../src/providers/openai/completion/mod.rs | 5 ++ .../src/providers/openai/responses_api/mod.rs | 11 ++++ rig-eternalai/src/providers/eternalai.rs | 3 + 13 files changed, 183 insertions(+), 58 deletions(-) diff --git a/rig-bedrock/src/types/assistant_content.rs b/rig-bedrock/src/types/assistant_content.rs index cd9c28f0a..a03b9168a 100644 --- a/rig-bedrock/src/types/assistant_content.rs +++ b/rig-bedrock/src/types/assistant_content.rs @@ -149,6 +149,9 @@ impl TryFrom for aws_bedrock::ContentBlock { aws_bedrock::ReasoningContentBlock::ReasoningText(reasoning_text_block), )) } + AssistantContent::Image(_) => Err(CompletionError::ProviderError( + "AWS Bedrock does not support image content in assistant messages".to_owned(), + )), } } } diff --git a/rig-core/src/completion/message.rs b/rig-core/src/completion/message.rs index 719311023..b52d4ae25 100644 --- a/rig-core/src/completion/message.rs +++ b/rig-core/src/completion/message.rs @@ -65,6 +65,7 @@ pub enum AssistantContent { Text(Text), ToolCall(ToolCall), Reasoning(Reasoning), + Image(Image), } #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] @@ -591,6 +592,20 @@ impl AssistantContent { AssistantContent::Text(text.into().into()) } + /// Helper constructor to make creating assistant image content easier. + pub fn image_base64( + data: impl Into, + media_type: Option, + detail: Option, + ) -> Self { + AssistantContent::Image(Image { + data: DocumentSourceKind::Base64(data.into()), + media_type, + detail, + additional_params: None, + }) + } + /// Helper constructor to make creating assistant tool call content easier. pub fn tool_call( id: impl Into, diff --git a/rig-core/src/providers/anthropic/completion.rs b/rig-core/src/providers/anthropic/completion.rs index da0109b59..b4c6d32e4 100644 --- a/rig-core/src/providers/anthropic/completion.rs +++ b/rig-core/src/providers/anthropic/completion.rs @@ -386,25 +386,31 @@ impl TryFrom for DocumentFormat { } } -impl From for Content { - fn from(text: message::AssistantContent) -> Self { +impl TryFrom for Content { + type Error = MessageError; + fn try_from(text: message::AssistantContent) -> Result { match text { - message::AssistantContent::Text(message::Text { text }) => Content::Text { text }, + message::AssistantContent::Text(message::Text { text }) => Ok(Content::Text { text }), + message::AssistantContent::Image(_) => { + return Err(MessageError::ConversionError( + "Anthropic currently doesn't support images.".into(), + )); + } message::AssistantContent::ToolCall(message::ToolCall { id, function, .. }) => { - Content::ToolUse { + Ok(Content::ToolUse { id, name: function.name, input: function.arguments, - } + }) } message::AssistantContent::Reasoning(Reasoning { reasoning, signature, .. - }) => Content::Thinking { + }) => Ok(Content::Thinking { thinking: reasoning.first().cloned().unwrap_or(String::new()), signature, - }, + }), } } } @@ -515,7 +521,7 @@ impl TryFrom for Message { }, message::Message::Assistant { content, .. } => Message { - content: content.map(|content| content.into()), + content: content.try_map(|content| content.try_into())?, role: Role::Assistant, }, }) diff --git a/rig-core/src/providers/cohere/completion.rs b/rig-core/src/providers/cohere/completion.rs index d43df8e7e..f73c1e88d 100644 --- a/rig-core/src/providers/cohere/completion.rs +++ b/rig-core/src/providers/cohere/completion.rs @@ -393,32 +393,40 @@ impl TryFrom for Vec { message::Message::Assistant { content, .. } => { let mut text_content = vec![]; let mut tool_calls = vec![]; - content.into_iter().for_each(|content| match content { - message::AssistantContent::Text(message::Text { text }) => { - text_content.push(AssistantContent::Text { text }); - } - message::AssistantContent::ToolCall(message::ToolCall { - id, - function: - message::ToolFunction { - name, arguments, .. - }, - .. - }) => { - tool_calls.push(ToolCall { - id: Some(id), - r#type: Some(ToolType::Function), - function: Some(ToolCallFunction { - name, - arguments: serde_json::to_value(arguments).unwrap_or_default(), - }), - }); - } - message::AssistantContent::Reasoning(Reasoning { reasoning, .. }) => { - let thinking = reasoning.join("\n"); - text_content.push(AssistantContent::Thinking { thinking }); + + for content in content.into_iter() { + match content { + message::AssistantContent::Text(message::Text { text }) => { + text_content.push(AssistantContent::Text { text }); + } + message::AssistantContent::ToolCall(message::ToolCall { + id, + function: + message::ToolFunction { + name, arguments, .. + }, + .. + }) => { + tool_calls.push(ToolCall { + id: Some(id), + r#type: Some(ToolType::Function), + function: Some(ToolCallFunction { + name, + arguments: serde_json::to_value(arguments).unwrap_or_default(), + }), + }); + } + message::AssistantContent::Reasoning(Reasoning { reasoning, .. }) => { + let thinking = reasoning.join("\n"); + text_content.push(AssistantContent::Thinking { thinking }); + } + message::AssistantContent::Image(_) => { + return Err(message::MessageError::ConversionError( + "Cohere currently doesn't support images.".to_owned(), + )); + } } - }); + } vec![Message::Assistant { content: text_content, diff --git a/rig-core/src/providers/galadriel.rs b/rig-core/src/providers/galadriel.rs index 71d87d437..236d34b54 100644 --- a/rig-core/src/providers/galadriel.rs +++ b/rig-core/src/providers/galadriel.rs @@ -382,6 +382,11 @@ impl TryFrom for Message { "Galadriel currently doesn't support reasoning.".into(), )); } + message::AssistantContent::Image(_) => { + return Err(MessageError::ConversionError( + "Galadriel currently doesn't support images.".into(), + )); + } } } diff --git a/rig-core/src/providers/gemini/completion.rs b/rig-core/src/providers/gemini/completion.rs index cbb72a375..f82513043 100644 --- a/rig-core/src/providers/gemini/completion.rs +++ b/rig-core/src/providers/gemini/completion.rs @@ -29,8 +29,8 @@ pub const GEMINI_1_0_PRO: &str = "gemini-1.0-pro"; use self::gemini_api_types::Schema; use crate::http_client::HttpClientExt; -use crate::message::Reasoning; use crate::models; +use crate::message::{self, MimeType, Reasoning}; use crate::providers::gemini::completion::gemini_api_types::{ AdditionalParameters, FunctionCallingMode, ToolConfig, }; @@ -388,6 +388,24 @@ impl TryFrom for completion::CompletionResponse { + let mime_type = message::MediaType::from_mime_type(&inline_data.mime_type); + + match mime_type { + Some(message::MediaType::Image(media_type)) => { + message::AssistantContent::image_base64( + &inline_data.data, + Some(media_type), + Some(message::ImageDetail::default()), + ) + } + _ => { + return Err(CompletionError::ResponseError(format!( + "Unsupported media type {mime_type:?}" + ))); + } + } + } PartKind::FunctionCall(function_call) => { completion::AssistantContent::tool_call( &function_call.name, @@ -595,7 +613,10 @@ pub mod gemini_api_types { }, message::Message::Assistant { content, .. } => Content { role: Some(Role::Model), - parts: content.into_iter().map(|content| content.into()).collect(), + parts: content + .into_iter() + .map(|content| content.try_into()) + .collect::, _>>()?, }, }) } @@ -1014,20 +1035,47 @@ pub mod gemini_api_types { } } - impl From for Part { - fn from(content: message::AssistantContent) -> Self { + impl TryFrom for Part { + type Error = message::MessageError; + + fn try_from(content: message::AssistantContent) -> Result { match content { - message::AssistantContent::Text(message::Text { text }) => text.into(), - message::AssistantContent::ToolCall(tool_call) => tool_call.into(), + message::AssistantContent::Text(message::Text { text }) => Ok(text.into()), + message::AssistantContent::Image(message::Image { + data, media_type, .. + }) => match media_type { + Some(media_type) => match media_type { + message::ImageMediaType::JPEG + | message::ImageMediaType::PNG + | message::ImageMediaType::WEBP + | message::ImageMediaType::HEIC + | message::ImageMediaType::HEIF => { + let part = PartKind::try_from((media_type, data))?; + Ok(Part { + thought: Some(false), + thought_signature: None, + part, + additional_params: None, + }) + } + _ => Err(message::MessageError::ConversionError(format!( + "Unsupported image media type {media_type:?}" + ))), + }, + None => Err(message::MessageError::ConversionError( + "Media type for image is required for Gemini".to_string(), + )), + }, + message::AssistantContent::ToolCall(tool_call) => Ok(tool_call.into()), message::AssistantContent::Reasoning(message::Reasoning { reasoning, .. }) => { - Part { + Ok(Part { thought: Some(true), thought_signature: None, part: PartKind::Text( reasoning.first().cloned().unwrap_or_else(|| "".to_string()), ), additional_params: None, - } + }) } } } diff --git a/rig-core/src/providers/groq.rs b/rig-core/src/providers/groq.rs index b0a5e48dd..49d0bbee9 100644 --- a/rig-core/src/providers/groq.rs +++ b/rig-core/src/providers/groq.rs @@ -204,6 +204,11 @@ impl TryFrom for Message { groq_reasoning = Some(reasoning.first().cloned().unwrap_or(String::new())); } + message::AssistantContent::Image(_) => { + return Err(MessageError::ConversionError( + "Ollama currently doesn't support images.".into(), + )); + } } } diff --git a/rig-core/src/providers/huggingface/completion.rs b/rig-core/src/providers/huggingface/completion.rs index 8cd620976..75804e738 100644 --- a/rig-core/src/providers/huggingface/completion.rs +++ b/rig-core/src/providers/huggingface/completion.rs @@ -369,6 +369,11 @@ impl TryFrom for Vec { message::AssistantContent::Reasoning(_) => { unimplemented!("Reasoning is not supported on HuggingFace via Rig"); } + message::AssistantContent::Image(_) => { + unimplemented!( + "Image content is not supported on HuggingFace via Rig" + ); + } } (texts, tools) }, diff --git a/rig-core/src/providers/mistral/completion.rs b/rig-core/src/providers/mistral/completion.rs index ef432c24c..b9be84bbe 100644 --- a/rig-core/src/providers/mistral/completion.rs +++ b/rig-core/src/providers/mistral/completion.rs @@ -130,6 +130,9 @@ impl TryFrom for Vec { message::AssistantContent::Reasoning(_) => { unimplemented!("Reasoning content is not currently supported on Mistral via Rig"); } + message::AssistantContent::Image(_) => { + unimplemented!("Image content is not currently supported on Mistral via Rig"); + } } (texts, tools) }, @@ -589,6 +592,9 @@ where message::AssistantContent::Reasoning(_) => { unimplemented!("Reasoning is not supported on Mistral via Rig") } + message::AssistantContent::Image(_) => { + unimplemented!("Image content is not supported on Mistral via Rig") + } } } diff --git a/rig-core/src/providers/ollama.rs b/rig-core/src/providers/ollama.rs index a52761298..c9d19ff07 100644 --- a/rig-core/src/providers/ollama.rs +++ b/rig-core/src/providers/ollama.rs @@ -844,24 +844,29 @@ impl TryFrom for Vec { } InternalMessage::Assistant { content, .. } => { let mut thinking: Option = None; - let (text_content, tool_calls) = content.into_iter().fold( - (Vec::new(), Vec::new()), - |(mut texts, mut tools), content| { - match content { - crate::message::AssistantContent::Text(text) => texts.push(text.text), - crate::message::AssistantContent::ToolCall(tool_call) => { - tools.push(tool_call) - } - crate::message::AssistantContent::Reasoning( - crate::message::Reasoning { reasoning, .. }, - ) => { - thinking = - Some(reasoning.first().cloned().unwrap_or(String::new())); - } + let mut text_content = Vec::new(); + let mut tool_calls = Vec::new(); + + for content in content.into_iter() { + match content { + crate::message::AssistantContent::Text(text) => { + text_content.push(text.text) } - (texts, tools) - }, - ); + crate::message::AssistantContent::ToolCall(tool_call) => { + tool_calls.push(tool_call) + } + crate::message::AssistantContent::Reasoning( + crate::message::Reasoning { reasoning, .. }, + ) => { + thinking = Some(reasoning.first().cloned().unwrap_or(String::new())); + } + crate::message::AssistantContent::Image(_) => { + return Err(crate::message::MessageError::ConversionError( + "Ollama currently doesn't support images.".into(), + )); + } + } + } // `OneOrMany` ensures at least one `AssistantContent::Text` or `ToolCall` exists, // so either `content` or `tool_calls` will have some content. diff --git a/rig-core/src/providers/openai/completion/mod.rs b/rig-core/src/providers/openai/completion/mod.rs index 2c54a3234..1c1bd603e 100644 --- a/rig-core/src/providers/openai/completion/mod.rs +++ b/rig-core/src/providers/openai/completion/mod.rs @@ -474,6 +474,11 @@ impl TryFrom> for Vec { message::AssistantContent::Reasoning(_) => { unimplemented!("The OpenAI Completions API doesn't support reasoning!"); } + message::AssistantContent::Image(_) => { + unimplemented!( + "The OpenAI Completions API doesn't support image content in assistant messages!" + ); + } } (texts, tools) }, diff --git a/rig-core/src/providers/openai/responses_api/mod.rs b/rig-core/src/providers/openai/responses_api/mod.rs index 5897d43cc..f2254e698 100644 --- a/rig-core/src/providers/openai/responses_api/mod.rs +++ b/rig-core/src/providers/openai/responses_api/mod.rs @@ -395,6 +395,12 @@ impl TryFrom for Vec { }), }); } + crate::message::AssistantContent::Image(_) => { + return Err(CompletionError::ProviderError( + "Assistant image content is not supported in OpenAI Responses API" + .to_string(), + )); + } } } @@ -1480,6 +1486,11 @@ impl TryFrom for Vec { name: None, status: (ToolStatus::Completed), }]), + crate::message::AssistantContent::Image(_) => { + Err(MessageError::ConversionError( + "Assistant image content is not supported in OpenAI Responses API".into(), + )) + } } } } diff --git a/rig-eternalai/src/providers/eternalai.rs b/rig-eternalai/src/providers/eternalai.rs index 4fe968a1a..31cce31e1 100644 --- a/rig-eternalai/src/providers/eternalai.rs +++ b/rig-eternalai/src/providers/eternalai.rs @@ -711,6 +711,9 @@ impl completion::CompletionModel for CompletionModel { arguments: tc.function.arguments.clone(), }) } + AssistantContent::Image(_) => { + unimplemented!("Image content is currently unimplemented on Eternal AI. If you need this, please open a ticket!") + } AssistantContent::Reasoning(_) => { unimplemented!("Reasoning is currently unimplemented on Eternal AI. If you need this, please open a ticket!") } From 73604eab073e7693517509f38168a1eadbe726a8 Mon Sep 17 00:00:00 2001 From: Erlend Powell Date: Fri, 14 Nov 2025 22:41:34 +1300 Subject: [PATCH 3/7] fix: Fix linting. --- rig-core/src/providers/anthropic/completion.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/rig-core/src/providers/anthropic/completion.rs b/rig-core/src/providers/anthropic/completion.rs index b4c6d32e4..355aba50e 100644 --- a/rig-core/src/providers/anthropic/completion.rs +++ b/rig-core/src/providers/anthropic/completion.rs @@ -391,11 +391,9 @@ impl TryFrom for Content { fn try_from(text: message::AssistantContent) -> Result { match text { message::AssistantContent::Text(message::Text { text }) => Ok(Content::Text { text }), - message::AssistantContent::Image(_) => { - return Err(MessageError::ConversionError( - "Anthropic currently doesn't support images.".into(), - )); - } + message::AssistantContent::Image(_) => Err(MessageError::ConversionError( + "Anthropic currently doesn't support images.".to_string(), + )), message::AssistantContent::ToolCall(message::ToolCall { id, function, .. }) => { Ok(Content::ToolUse { id, @@ -458,7 +456,7 @@ impl TryFrom for Message { data, media_type, .. }) => { let media_type = media_type.ok_or(MessageError::ConversionError( - "Image media type is required for Claude API".into(), + "Image media type is required for Claude API".to_string(), ))?; let source = match data { From 27afbaa43021f7c28fe35e1ab4b8f4b1a690143f Mon Sep 17 00:00:00 2001 From: Erlend Powell Date: Mon, 24 Nov 2025 10:34:23 +1300 Subject: [PATCH 4/7] chore: Fix formatting. --- rig-core/src/providers/gemini/completion.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rig-core/src/providers/gemini/completion.rs b/rig-core/src/providers/gemini/completion.rs index f82513043..24596bf34 100644 --- a/rig-core/src/providers/gemini/completion.rs +++ b/rig-core/src/providers/gemini/completion.rs @@ -29,8 +29,8 @@ pub const GEMINI_1_0_PRO: &str = "gemini-1.0-pro"; use self::gemini_api_types::Schema; use crate::http_client::HttpClientExt; -use crate::models; use crate::message::{self, MimeType, Reasoning}; +use crate::models; use crate::providers::gemini::completion::gemini_api_types::{ AdditionalParameters, FunctionCallingMode, ToolConfig, }; From bf727fc66f2ed0cf2ba94cbaa7bd085c7e2ed651 Mon Sep 17 00:00:00 2001 From: Erlend Powell Date: Mon, 24 Nov 2025 10:46:17 +1300 Subject: [PATCH 5/7] chore: Fix missing match arm. --- .../src/providers/openrouter/completion.rs | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/rig-core/src/providers/openrouter/completion.rs b/rig-core/src/providers/openrouter/completion.rs index 27d857d56..223c7cad8 100644 --- a/rig-core/src/providers/openrouter/completion.rs +++ b/rig-core/src/providers/openrouter/completion.rs @@ -226,19 +226,24 @@ impl TryFrom> for Vec { type Error = message::MessageError; fn try_from(value: OneOrMany) -> Result { - let (text_content, tool_calls, reasoning) = value.into_iter().fold( - (Vec::new(), Vec::new(), None), - |(mut texts, mut tools, mut reasoning), content| { - match content { - message::AssistantContent::Text(text) => texts.push(text), - message::AssistantContent::ToolCall(tool_call) => tools.push(tool_call), - message::AssistantContent::Reasoning(r) => { - reasoning = r.reasoning.into_iter().next(); - } + let mut text_content = Vec::new(); + let mut tool_calls = Vec::new(); + let mut reasoning = None; + + for content in value.into_iter() { + match content { + message::AssistantContent::Text(text) => text_content.push(text), + message::AssistantContent::ToolCall(tool_call) => tool_calls.push(tool_call), + message::AssistantContent::Reasoning(r) => { + reasoning = r.reasoning.into_iter().next(); } - (texts, tools, reasoning) - }, - ); + message::AssistantContent::Image(_) => { + return Err(Self::Error::ConversionError( + "OpenRouter currently doesn't support images.".to_string(), + )); + } + } + } // `OneOrMany` ensures at least one `AssistantContent::Text` or `ToolCall` exists, // so either `content` or `tool_calls` will have some content. From 6d11153c0105bcf37bdbe216040840a5d3105f0c Mon Sep 17 00:00:00 2001 From: Erlend Powell Date: Mon, 24 Nov 2025 10:53:04 +1300 Subject: [PATCH 6/7] feat: Add missing imageSize config. --- rig-core/src/providers/gemini/completion.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rig-core/src/providers/gemini/completion.rs b/rig-core/src/providers/gemini/completion.rs index 24596bf34..e60f6d825 100644 --- a/rig-core/src/providers/gemini/completion.rs +++ b/rig-core/src/providers/gemini/completion.rs @@ -1445,7 +1445,10 @@ pub mod gemini_api_types { #[derive(Debug, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] pub struct ImageConfig { + #[serde(skip_serializing_if = "Option::is_none")] pub aspect_ratio: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub image_size: Option, } /// The Schema object allows the definition of input and output data types. These types can be objects, but also From 6067839f2b1e911692f4462f12afb003133db2e1 Mon Sep 17 00:00:00 2001 From: Erlend Powell Date: Mon, 24 Nov 2025 10:57:11 +1300 Subject: [PATCH 7/7] chore: Use consistent style. --- rig-core/src/providers/openrouter/completion.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rig-core/src/providers/openrouter/completion.rs b/rig-core/src/providers/openrouter/completion.rs index 223c7cad8..499935033 100644 --- a/rig-core/src/providers/openrouter/completion.rs +++ b/rig-core/src/providers/openrouter/completion.rs @@ -239,7 +239,7 @@ impl TryFrom> for Vec { } message::AssistantContent::Image(_) => { return Err(Self::Error::ConversionError( - "OpenRouter currently doesn't support images.".to_string(), + "OpenRouter currently doesn't support images.".into(), )); } }