From 2eb50d3360f939a5fde0250d64ca1befa19989fd Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Thu, 30 Oct 2025 14:11:16 +0100 Subject: [PATCH 1/2] save --- src/llm/io_processing/mistral/tool_parser.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp index c6ca6b7a56..7e6eaf8fe1 100644 --- a/src/llm/io_processing/mistral/tool_parser.cpp +++ b/src/llm/io_processing/mistral/tool_parser.cpp @@ -38,8 +38,12 @@ void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vectorbotTokenId && !immediateParsingEnabled) { - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Begin of tools token has not been found in the model output. Exiting parser."); - return; + if (parsedOutput.content.size() >= 2 && parsedOutput.content[0] == '[' && parsedOutput.content[1] == '{') { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Model output starts with '[{' but begin of tools token is missing. Proceeding with parsing."); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Begin of tools token or '[{' has not been found in the model output. Exiting parser."); + return; + } } rapidjson::Document toolsDoc; From 255c2a50cee4110b86e86f09c129c1fd14fb5d8a Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Thu, 30 Oct 2025 16:56:56 +0100 Subject: [PATCH 2/2] save --- .../mistral_output_parser_test.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp index 727dd0319e..4ab822915b 100644 --- a/src/test/llm/output_parsers/mistral_output_parser_test.cpp +++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp @@ -79,6 +79,19 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithSingleToolCall) { } } +TEST_F(MistralOutputParserTest, ParseToolCallOutputWithSingleToolCall_MissingToolCallStartTag) { + std::string testInput = "[{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"; + auto generatedTensor = mistralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); +} + TEST_F(MistralOutputParserTest, ParseToolCallOutputWithThreeToolCalls) { std::string input = "[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}," "{\"name\": \"another_tool\", \"arguments\": {\"param1\": \"data\", \"param2\": true}}," @@ -152,7 +165,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) auto generatedTensor = mistralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); - EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n[{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"); EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 0); } @@ -163,7 +176,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentOnBothSidesAndSing auto generatedTensor = mistralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); - EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n[{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."); EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 0); } @@ -179,7 +192,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsReturnsC std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); // Same expected content as tokenizer does not add special tokens - EXPECT_EQ(parsedOutput.content, "[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}] \n\nThis is some content\n\n [{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"); + EXPECT_EQ(parsedOutput.content, "[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}] \n\nThis is some content\n\n[{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"); EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 0); }