Skip to content

Commit 8eee6d0

Browse files
committed
feat: rewrite OpenRouter provider to support image data response from MCP tools, add example for puppeteer MCP server
1 parent 0ce34fd commit 8eee6d0

File tree

6 files changed

+286
-101
lines changed

6 files changed

+286
-101
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

huly-coder.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,14 @@ web_search:
8282
# type: sse
8383
# url: http://127.0.0.1:8080/sse
8484
# protocol_version: 2024-11-05
85+
#
86+
# # Web Browser automation (note works only with OpenRouter provider)
87+
# puppeteer:
88+
# transport:
89+
# type: stdio
90+
# command: "C:\\Program Files\\Docker\\Docker\\resources\\bin\\docker.exe"
91+
# args: ["run", "-i", "--rm", "--init", "-e", "DOCKER_CONTAINER=true", "mcp/puppeteer"]
92+
# protocol_version: 2024-11-05
8593

8694
#---------------------------------------
8795
# AI Assistant Personality Configuration

src/agent/mod.rs

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ use rig::completion::CompletionModel;
4141
use rig::completion::CompletionResponse;
4242
use rig::embeddings::EmbeddingsBuilder;
4343
use rig::message::AssistantContent;
44+
use rig::message::ImageMediaType;
4445
use rig::message::Message;
4546
use rig::message::ToolCall;
4647
use rig::message::ToolResultContent;
@@ -214,7 +215,7 @@ impl Agent {
214215
};
215216

216217
let mut system_prompt_addons = Vec::default();
217-
for server_config in mcp_config.servers.values() {
218+
for (server_id, server_config) in mcp_config.servers.iter() {
218219
match &server_config.transport {
219220
McpClientTransport::Stdio(config) => {
220221
let transport = mcp_core::transport::ClientStdioTransport::new(
@@ -229,8 +230,18 @@ impl Agent {
229230
.unwrap_or(ProtocolVersion::V2025_03_26),
230231
)
231232
.build();
232-
mcp_client.open().await?;
233-
mcp_client.initialize().await?;
233+
mcp_client.open().await.with_context(|| {
234+
format!(
235+
"Failed to open MCP client {} with command {}",
236+
server_id, config.command
237+
)
238+
})?;
239+
mcp_client.initialize().await.with_context(|| {
240+
format!(
241+
"Failed to open MCP client {} with command {}",
242+
server_id, config.command
243+
)
244+
})?;
234245
let tools_list_res = mcp_client.list_tools(None, None).await?;
235246

236247
agent_builder = tools_list_res
@@ -610,7 +621,7 @@ impl AgentContext {
610621
.iter()
611622
.map(|t| match t {
612623
ToolResultContent::Text(text) => count_tokens(&text.text),
613-
_ => 0,
624+
ToolResultContent::Image(img) => count_tokens(&img.data),
614625
})
615626
.sum::<u32>(),
616627
_ => 0,
@@ -806,10 +817,43 @@ async fn process_messages(mut ctx: AgentContext, mut agent: Box<dyn HulyAgent>)
806817
_ => {}
807818
}
808819
}
820+
let tool_result_content: OneOrMany<ToolResultContent> = {
821+
// due incomplete rig_mcp implementation we try detect image data in response and split message
822+
if tool_result.contains("|image-data:") {
823+
let mut parts = tool_result.split("|image-data:");
824+
let text = parts.next().unwrap();
825+
let image_data = parts.next().unwrap();
826+
let mut image_parts = image_data.split(";base64,");
827+
let image_type = image_parts.next().unwrap();
828+
let image_data = image_parts.next().unwrap();
829+
tracing::info!("image type: '{}'", image_type);
830+
OneOrMany::many([
831+
ToolResultContent::text(text),
832+
ToolResultContent::image(
833+
image_data,
834+
None, //.Some(rig::message::ContentFormat::Base64),
835+
match image_type {
836+
"image/png" => Some(ImageMediaType::PNG),
837+
"image/jpeg" => Some(ImageMediaType::JPEG),
838+
"image/gif" => Some(ImageMediaType::GIF),
839+
"image/webp" => Some(ImageMediaType::WEBP),
840+
"image/heic" => Some(ImageMediaType::HEIC),
841+
"image/heif" => Some(ImageMediaType::HEIF),
842+
"image/svg+xml" => Some(ImageMediaType::SVG),
843+
_ => Some(ImageMediaType::PNG),
844+
},
845+
None,
846+
),
847+
])
848+
.unwrap()
849+
} else {
850+
OneOrMany::one(ToolResultContent::text(tool_result))
851+
}
852+
};
809853
let result_message = Message::User {
810854
content: OneOrMany::one(UserContent::tool_result(
811855
tool_call.id.clone(),
812-
OneOrMany::one(ToolResultContent::text(tool_result)),
856+
tool_result_content,
813857
)),
814858
};
815859
ctx.add_message(ctx.add_env_message(result_message).await)

src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ fn init_logger() {
6060
.with_target("ort", tracing::Level::WARN)
6161
.with_target("tokenizers", tracing::Level::WARN)
6262
.with_target("process_wrap", tracing::Level::INFO)
63+
.with_target("mcp_core::transport::client", tracing::Level::INFO)
6364
.with_default(tracing::Level::DEBUG),
6465
),
6566
)

src/providers/openrouter/completion.rs

Lines changed: 156 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use super::client::{ApiErrorResponse, ApiResponse, Client, Usage};
77

88
use rig::{
99
completion::{self, CompletionError, CompletionRequest},
10+
message::{ImageMediaType, MimeType},
1011
providers::openai::Message,
1112
OneOrMany,
1213
};
@@ -117,6 +118,71 @@ pub struct CompletionModel {
117118
pub model: String,
118119
}
119120

121+
fn user_text_to_json(content: rig::message::UserContent) -> serde_json::Value {
122+
match content {
123+
rig::message::UserContent::Text(text) => json!({
124+
"role": "user",
125+
"content": text.text,
126+
}),
127+
_ => unreachable!(),
128+
}
129+
}
130+
131+
fn user_content_to_json(
132+
content: rig::message::UserContent,
133+
) -> Result<serde_json::Value, CompletionError> {
134+
match content {
135+
rig::message::UserContent::Text(text) => Ok(json!({
136+
"type": "text",
137+
"text": text.text
138+
})),
139+
rig::message::UserContent::Image(image) => Ok(json!({
140+
"type": "image_url",
141+
"image_url": {
142+
"url": format!("data:{};base64,{}", image.media_type.unwrap_or(ImageMediaType::PNG).to_mime_type(), image.data),
143+
}
144+
})),
145+
rig::message::UserContent::Audio(_) => Err(CompletionError::RequestError(
146+
"Audio is not supported".into(),
147+
)),
148+
rig::message::UserContent::Document(_) => Err(CompletionError::RequestError(
149+
"Document is not supported".into(),
150+
)),
151+
rig::message::UserContent::ToolResult(_) => unreachable!(),
152+
}
153+
}
154+
155+
fn tool_content_to_json(
156+
content: Vec<rig::message::UserContent>,
157+
) -> Result<serde_json::Value, CompletionError> {
158+
let mut str_content = String::new();
159+
let mut tool_id = String::new();
160+
161+
for content in content.into_iter() {
162+
match content {
163+
rig::message::UserContent::ToolResult(tool_result) => {
164+
tool_id = tool_result.id;
165+
str_content = tool_result
166+
.content
167+
.iter()
168+
.map(|c| match c {
169+
rig::message::ToolResultContent::Text(text) => text.text.clone(),
170+
// ignore image content
171+
_ => "".to_string(),
172+
})
173+
.collect::<Vec<_>>()
174+
.join("");
175+
}
176+
_ => unreachable!(),
177+
}
178+
}
179+
Ok(json!({
180+
"role": "tool",
181+
"content": str_content,
182+
"tool_call_id": tool_id,
183+
}))
184+
}
185+
120186
impl CompletionModel {
121187
pub fn new(client: Client, model: &str) -> Self {
122188
Self {
@@ -130,64 +196,103 @@ impl CompletionModel {
130196
completion_request: CompletionRequest,
131197
) -> Result<Value, CompletionError> {
132198
// Add preamble to chat history (if available)
133-
let mut full_history: Vec<Message> = match &completion_request.preamble {
134-
Some(preamble) => vec![Message::system(preamble)],
199+
let mut full_history: Vec<serde_json::Value> = match &completion_request.preamble {
200+
Some(preamble) => vec![json!({
201+
"role": "system",
202+
"content": preamble,
203+
})],
135204
None => vec![],
136205
};
137206

138207
// Convert existing chat history
139-
let chat_history: Vec<Message> = completion_request
140-
.chat_history
141-
.into_iter()
142-
.map(|message| message.try_into())
143-
.collect::<Result<Vec<Vec<Message>>, _>>()?
144-
.into_iter()
145-
.flatten()
146-
.collect();
147-
148-
// Combine all messages into a single history
149-
full_history.extend(chat_history);
150-
let messages: Vec<Value> = full_history
151-
.into_iter()
152-
.map(|ref m| match m {
153-
Message::Assistant {
154-
content,
155-
refusal: _,
156-
audio: _,
157-
name: _,
158-
tool_calls,
159-
} => {
160-
if !tool_calls.is_empty() {
161-
json!({
162-
"role": "assistant",
163-
"content": null,
164-
"tool_calls": tool_calls,
165-
})
208+
for message in completion_request.chat_history.into_iter() {
209+
match message {
210+
rig::message::Message::User { content } => {
211+
if content.len() == 1
212+
&& matches!(content.first(), rig::message::UserContent::Text(_))
213+
{
214+
full_history.push(user_text_to_json(content.first()));
215+
} else if content
216+
.iter()
217+
.any(|c| matches!(c, rig::message::UserContent::ToolResult(_)))
218+
{
219+
let (tool_content, user_content) =
220+
content.into_iter().partition::<Vec<_>, _>(|c| {
221+
matches!(c, rig::message::UserContent::ToolResult(_))
222+
});
223+
full_history.push(tool_content_to_json(tool_content.clone())?);
224+
for tool_content in tool_content.into_iter() {
225+
match tool_content {
226+
rig::message::UserContent::ToolResult(result) => {
227+
for tool_result_content in result.content.into_iter() {
228+
match tool_result_content {
229+
rig::message::ToolResultContent::Image(image) => {
230+
full_history.push(json!({
231+
"role": "user",
232+
"content": [{
233+
"type": "image_url",
234+
"image_url": {
235+
"url": format!("data:{};base64,{}", image.media_type.unwrap_or(ImageMediaType::PNG).to_mime_type(), image.data),
236+
}
237+
}]
238+
}));
239+
}
240+
_ => {}
241+
}
242+
}
243+
}
244+
_ => unreachable!(),
245+
}
246+
}
247+
if !user_content.is_empty() {
248+
if user_content.len() == 1 {
249+
full_history
250+
.push(user_text_to_json(user_content.first().unwrap().clone()));
251+
} else {
252+
let user_content = user_content
253+
.into_iter()
254+
.map(user_content_to_json)
255+
.collect::<Result<Vec<_>, _>>()?;
256+
full_history
257+
.push(json!({ "role": "user", "content": user_content}));
258+
}
259+
}
166260
} else {
167-
json!({
168-
"role": "assistant",
169-
"content": match content.first().unwrap() {
170-
AssistantContent::Text { text } => text,
171-
_ => "",
172-
},
173-
})
261+
let content = content
262+
.into_iter()
263+
.map(user_content_to_json)
264+
.collect::<Result<Vec<_>, _>>()?;
265+
full_history.push(json!({ "role": "user", "content": content}));
174266
}
175267
}
176-
Message::ToolResult {
177-
tool_call_id,
178-
content,
179-
} => {
180-
let content = json!(content.first());
181-
let text = content.as_object().unwrap().get("text").unwrap();
182-
json!({
183-
"role": "tool",
184-
"content": text,
185-
"tool_call_id": tool_call_id,
186-
})
268+
rig::message::Message::Assistant { content } => {
269+
for content in content {
270+
match content {
271+
rig::message::AssistantContent::Text(text) => {
272+
full_history.push(json!({
273+
"role": "assistant",
274+
"content": text.text
275+
}));
276+
}
277+
rig::message::AssistantContent::ToolCall(tool_call) => {
278+
full_history.push(json!({
279+
"role": "assistant",
280+
"content": null,
281+
"tool_calls": [{
282+
"id": tool_call.id,
283+
"type": "function",
284+
"function": {
285+
"name": tool_call.function.name,
286+
"arguments": tool_call.function.arguments.to_string()
287+
}
288+
}]
289+
}));
290+
}
291+
}
292+
}
187293
}
188-
_ => json!(m),
189-
})
190-
.collect();
294+
};
295+
}
191296

192297
let tools = completion_request
193298
.tools
@@ -201,7 +306,7 @@ impl CompletionModel {
201306
.collect::<Vec<_>>();
202307
let request = json!({
203308
"model": self.model,
204-
"messages": messages,
309+
"messages": full_history,
205310
"tools": tools,
206311
"temperature": completion_request.temperature,
207312
});

0 commit comments

Comments
 (0)