Skip to content

Commit e288b6d

Browse files
committed
Add fix for Qwen3Coder tool parser quote handling
* Improve tests to verify if produced output is proper JSON
1 parent 43b00ca commit e288b6d

File tree

2 files changed

+83
-3
lines changed

2 files changed

+83
-3
lines changed

src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,20 @@ static const ParametersTypeMap_t parseToolSchema(const std::string& functionName
127127
return result;
128128
}
129129

130-
// helper function to escape \n
130+
static std::string escapeQuotes(const std::string& input) {
131+
std::string output;
132+
output.reserve(input.size());
133+
for (char c : input) {
134+
switch (c) {
135+
case '"':
136+
output += "\\\"";
137+
break;
138+
default:
139+
output += c;
140+
}
141+
}
142+
return output;
143+
}
131144
static std::string escapeString(const std::string& input) {
132145
std::string output;
133146
output.reserve(input.size());
@@ -150,7 +163,7 @@ static std::string setCorrectValueType(std::string& inputValue, const std::strin
150163
return inputValue;
151164
}
152165
if (paramIt->second == ParameterType::STRING) {
153-
inputValue = "\"" + inputValue + "\"";
166+
inputValue = "\"" + escapeQuotes(inputValue) + "\"";
154167
return inputValue;
155168
}
156169
if (paramIt->second == ParameterType::BOOLEAN) {
@@ -236,6 +249,8 @@ bool Qwen3CoderToolParserImpl::parseUntilStateChange(ToolCalls_t& toolCalls) {
236249
if (paramIt == this->toolsParametersTypeMap.end()) {
237250
SPDLOG_DEBUG("Tool schema not found for tool: {}, leaving parameter: {} as string", this->currentFunction.name, this->currentParameterName);
238251
} else {
252+
// we don't want to escape entry/exit " for string parameters
253+
// auto escaped = escapeString(parameterValue);
239254
parameterValue = escapeString(setCorrectValueType(parameterValue, this->currentParameterName, paramIt->second));
240255
}
241256
auto res = this->currentFunction.parameters.try_emplace(this->currentParameterName, parameterValue);
@@ -356,6 +371,7 @@ std::optional<rapidjson::Document> Qwen3CoderToolParser::sendFullDelta(std::opti
356371
// now we need to add string toolCall.arguments to argumentsWrapper under "arguments" key
357372
rapidjson::Value toolCallsString(rapidjson::kStringType);
358373
toolCallsString.SetString(toolCall.arguments.c_str(), allocator);
374+
SPDLOG_TRACE("Tool call arguments string: {}", toolCall.arguments);
359375
argumentsWrapper.AddMember("arguments", toolCallsString, allocator);
360376
auto currentDelta = wrapDelta(argumentsWrapper, this->toolCallIndex);
361377
SPDLOG_DEBUG("First delta doc: {}", documentToString(currentDelta));

src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,49 @@ TEST_F(Qwen3CoderOutputParserTest, StreamingSimpleToolCall) {
612612
{"value1</parameter></function></tool_call>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":3,"function":{"arguments":"{\"arg1\": \"value1\"}"}}]}})"},
613613
{"NOTHING IMPORTANT HERE", ov::genai::GenerationFinishReason::NONE, std::nullopt},
614614
{"part of bfcl 'draft'.\n\n<function=cd>\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":4,"function":{"name":"cd"}}]}})"},
615-
{"\n<parameter=folder>\nResearchDocs\n</parameter>\n</function>\n</tool_call>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":4,"function":{"arguments":"{\"folder\": \"ResearchDocs\"}"}}]}})"}};
615+
{"\n<parameter=folder>\nResearchDocs\n</parameter>\n</function>\n</tool_call>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":4,"function":{"arguments":"{\"folder\": \"ResearchDocs\"}"}}]}})"},
616+
// example from cds:
617+
{R"(
618+
<tool_call>
619+
<function=string_tool>
620+
<parameter=arg1>
621+
)",
622+
ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":5,"function":{"name":"string_tool"}}]}})"},
623+
{R"(FUNCTION FC_CreateJsonPayload : STRING
624+
VAR_INPUT
625+
Value1 : REAL;
626+
Value2 : INT;
627+
Value3 : BOOL;
628+
Value4 : STRING(100);
629+
END_VAR
630+
VAR_OUTPUT
631+
JsonPayload : STRING(1000);
632+
END_VAR
633+
VAR
634+
TempStr : STRING(100);
635+
END_VAR
636+
637+
JsonPayload := '{';
638+
JsonPayload := JsonPayload + '"value1":' + REAL_TO_STRING(Value1, '', 2) + ',';
639+
JsonPayload := JsonPayload + '"value2":' + INT_TO_STRING(Value2) + ',';
640+
JsonPayload := JsonPayload + '"value3":' + BOOL_TO_STRING(Value3) + ',';
641+
JsonPayload := JsonPayload + '"value4":"' + Value4 + '"';
642+
JsonPayload := JsonPayload + '}';
643+
644+
END_FUNCTION</parameter>
645+
</function>
646+
</tool_call>)",
647+
ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":5,"function":{"arguments":"{\"arg1\": \"FUNCTION FC_CreateJsonPayload : STRING\\nVAR_INPUT\\n Value1 : REAL;\\n Value2 : INT;\\n Value3 : BOOL;\\n Value4 : STRING(100);\\nEND_VAR\\nVAR_OUTPUT\\n JsonPayload : STRING(1000);\\nEND_VAR\\nVAR\\n TempStr : STRING(100);\\nEND_VAR\\n\\n JsonPayload := '{';\\n JsonPayload := JsonPayload + '\\\"value1\\\":' + REAL_TO_STRING(Value1, '', 2) + ',';\\n JsonPayload := JsonPayload + '\\\"value2\\\":' + INT_TO_STRING(Value2) + ',';\\n JsonPayload := JsonPayload + '\\\"value3\\\":' + BOOL_TO_STRING(Value3) + ',';\\n JsonPayload := JsonPayload + '\\\"value4\\\":\\\"' + Value4 + '\\\"';\\n JsonPayload := JsonPayload + '}';\\n\\nEND_FUNCTION\"}"}}]}})"},
648+
{"<tool_call><function=string_tool><parameter=arg1>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":6,"function":{"name":"string_tool"}}]}})"},
649+
{R"(
650+
if __name__ == "__main__":
651+
addresses = {}
652+
addresses["Hodor"] = """The door"""
653+
addresses["Arya"] = "Winterfell"
654+
for name, address in addresses.items():
655+
print(f'{name} lives at {address}')
656+
</parameter></function></tool_call>)",
657+
ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":6,"function":{"arguments":"{\"arg1\": \"if __name__ == \\\"__main__\\\":\\n addresses = {}\\n addresses[\\\"Hodor\\\"] = \\\"\\\"\\\"The door\\\"\\\"\\\"\\n addresses[\\\"Arya\\\"] = \\\"Winterfell\\\"\\n for name, address in addresses.items():\\n print(f'{name} lives at {address}')\"}"}}]}})"}};
616658
for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVec) {
617659
i++;
618660
std::optional<rapidjson::Document> doc = outputParser->parseChunk(chunk, true, ov::genai::GenerationFinishReason::NONE);
@@ -649,6 +691,28 @@ TEST_F(Qwen3CoderOutputParserTest, StreamingSimpleToolCall) {
649691
SPDLOG_ERROR("Expected:\n{}", expected);
650692
SPDLOG_ERROR("Got:\n{}", docStr);
651693
EXPECT_EQ(docStr, expected) << "Mismatch for chunk: " << chunk;
694+
// now we do final check
695+
// we want to ensure that if we extract from expectedDelta["delta"]["tool_calls"][0]["function"]["arguments"] which as a string
696+
// and then try to read this string as a json
697+
if (expectedDelta.value().find("arguments") == std::string::npos) {
698+
SPDLOG_TRACE("No arguments to check for delta:\n{}", expectedDelta.value());
699+
continue; // no arguments to check
700+
}
701+
auto docJsonIt = doc->FindMember("delta");
702+
ASSERT_NE(docJsonIt, doc->MemberEnd());
703+
auto toolCallsIt = docJsonIt->value.FindMember("tool_calls");
704+
ASSERT_NE(toolCallsIt, docJsonIt->value.MemberEnd());
705+
for (auto& toolCall : toolCallsIt->value.GetArray()) {
706+
auto functionIt = toolCall.FindMember("function");
707+
ASSERT_NE(functionIt, toolCall.MemberEnd());
708+
auto argumentsIt = functionIt->value.FindMember("arguments");
709+
ASSERT_NE(argumentsIt, functionIt->value.MemberEnd());
710+
const std::string& argumentsStr = argumentsIt->value.GetString();
711+
rapidjson::Document argsDoc;
712+
argsDoc.Parse(argumentsStr.c_str()); // now check for errors
713+
EXPECT_FALSE(argsDoc.HasParseError()) << "Arguments is not valid JSON for chunk: " << chunk << "\nArguments string:\n"
714+
<< argumentsStr;
715+
}
652716
}
653717
} else {
654718
EXPECT_TRUE(false) << "Mismatch between expectedDelta and doc for id: " << i << " chunk:\n"

0 commit comments

Comments
 (0)