diff --git a/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.cpp b/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.cpp index 0479067fb6..31e08d2e22 100644 --- a/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.cpp +++ b/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.cpp @@ -127,8 +127,21 @@ static const ParametersTypeMap_t parseToolSchema(const std::string& functionName return result; } -// helper function to escape \n -static std::string escapeString(const std::string& input) { +static std::string escapeQuotes(const std::string& input) { + std::string output; + output.reserve(input.size()); + for (char c : input) { + switch (c) { + case '"': + output += "\\\""; + break; + default: + output += c; + } + } + return output; +} +static std::string escapeNewline(const std::string& input) { std::string output; output.reserve(input.size()); for (char c : input) { @@ -150,7 +163,7 @@ static std::string setCorrectValueType(std::string& inputValue, const std::strin return inputValue; } if (paramIt->second == ParameterType::STRING) { - inputValue = "\"" + inputValue + "\""; + inputValue = "\"" + escapeQuotes(inputValue) + "\""; return inputValue; } if (paramIt->second == ParameterType::BOOLEAN) { @@ -236,7 +249,8 @@ bool Qwen3CoderToolParserImpl::parseUntilStateChange(ToolCalls_t& toolCalls) { if (paramIt == this->toolsParametersTypeMap.end()) { SPDLOG_DEBUG("Tool schema not found for tool: {}, leaving parameter: {} as string", this->currentFunction.name, this->currentParameterName); } else { - parameterValue = escapeString(setCorrectValueType(parameterValue, this->currentParameterName, paramIt->second)); + // we don't want to escape entry/exit " for string parameters + parameterValue = escapeNewline(setCorrectValueType(parameterValue, this->currentParameterName, paramIt->second)); } auto res = this->currentFunction.parameters.try_emplace(this->currentParameterName, parameterValue); if (!res.second) @@ -356,6 +370,7 @@ std::optional Qwen3CoderToolParser::sendFullDelta(std::opti // now we need to add string toolCall.arguments to argumentsWrapper under "arguments" key rapidjson::Value toolCallsString(rapidjson::kStringType); toolCallsString.SetString(toolCall.arguments.c_str(), allocator); + SPDLOG_TRACE("Tool call arguments string: {}", toolCall.arguments); argumentsWrapper.AddMember("arguments", toolCallsString, allocator); auto currentDelta = wrapDelta(argumentsWrapper, this->toolCallIndex); SPDLOG_DEBUG("First delta doc: {}", documentToString(currentDelta)); diff --git a/src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp b/src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp index daf908a00d..559baa8264 100644 --- a/src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp +++ b/src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp @@ -612,7 +612,49 @@ TEST_F(Qwen3CoderOutputParserTest, StreamingSimpleToolCall) { {"value1", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":3,"function":{"arguments":"{\"arg1\": \"value1\"}"}}]}})"}, {"NOTHING IMPORTANT HERE", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"part of bfcl 'draft'.\n\n\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":4,"function":{"name":"cd"}}]}})"}, - {"\n\nResearchDocs\n\n\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":4,"function":{"arguments":"{\"folder\": \"ResearchDocs\"}"}}]}})"}}; + {"\n\nResearchDocs\n\n\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":4,"function":{"arguments":"{\"folder\": \"ResearchDocs\"}"}}]}})"}, + // example from cds: + {R"( + + + +)", + ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":5,"function":{"name":"string_tool"}}]}})"}, + {R"(FUNCTION FC_CreateJsonPayload : STRING +VAR_INPUT + Value1 : REAL; + Value2 : INT; + Value3 : BOOL; + Value4 : STRING(100); +END_VAR +VAR_OUTPUT + JsonPayload : STRING(1000); +END_VAR +VAR + TempStr : STRING(100); +END_VAR + + JsonPayload := '{'; + JsonPayload := JsonPayload + '"value1":' + REAL_TO_STRING(Value1, '', 2) + ','; + JsonPayload := JsonPayload + '"value2":' + INT_TO_STRING(Value2) + ','; + JsonPayload := JsonPayload + '"value3":' + BOOL_TO_STRING(Value3) + ','; + JsonPayload := JsonPayload + '"value4":"' + Value4 + '"'; + JsonPayload := JsonPayload + '}'; + +END_FUNCTION + +)", + ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":5,"function":{"arguments":"{\"arg1\": \"FUNCTION FC_CreateJsonPayload : STRING\\nVAR_INPUT\\n Value1 : REAL;\\n Value2 : INT;\\n Value3 : BOOL;\\n Value4 : STRING(100);\\nEND_VAR\\nVAR_OUTPUT\\n JsonPayload : STRING(1000);\\nEND_VAR\\nVAR\\n TempStr : STRING(100);\\nEND_VAR\\n\\n JsonPayload := '{';\\n JsonPayload := JsonPayload + '\\\"value1\\\":' + REAL_TO_STRING(Value1, '', 2) + ',';\\n JsonPayload := JsonPayload + '\\\"value2\\\":' + INT_TO_STRING(Value2) + ',';\\n JsonPayload := JsonPayload + '\\\"value3\\\":' + BOOL_TO_STRING(Value3) + ',';\\n JsonPayload := JsonPayload + '\\\"value4\\\":\\\"' + Value4 + '\\\"';\\n JsonPayload := JsonPayload + '}';\\n\\nEND_FUNCTION\"}"}}]}})"}, + {"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":6,"function":{"name":"string_tool"}}]}})"}, + {R"( +if __name__ == "__main__": + addresses = {} + addresses["Hodor"] = """The door""" + addresses["Arya"] = "Winterfell" + for name, address in addresses.items(): + print(f'{name} lives at {address}') +)", + ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":6,"function":{"arguments":"{\"arg1\": \"if __name__ == \\\"__main__\\\":\\n addresses = {}\\n addresses[\\\"Hodor\\\"] = \\\"\\\"\\\"The door\\\"\\\"\\\"\\n addresses[\\\"Arya\\\"] = \\\"Winterfell\\\"\\n for name, address in addresses.items():\\n print(f'{name} lives at {address}')\"}"}}]}})"}}; for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVec) { i++; std::optional doc = outputParser->parseChunk(chunk, true, ov::genai::GenerationFinishReason::NONE); @@ -649,6 +691,28 @@ TEST_F(Qwen3CoderOutputParserTest, StreamingSimpleToolCall) { SPDLOG_ERROR("Expected:\n{}", expected); SPDLOG_ERROR("Got:\n{}", docStr); EXPECT_EQ(docStr, expected) << "Mismatch for chunk: " << chunk; + // now we do final check + // we want to ensure that if we extract from expectedDelta["delta"]["tool_calls"][0]["function"]["arguments"] which as a string + // and then try to read this string as a json + if (expectedDelta.value().find("arguments") == std::string::npos) { + SPDLOG_TRACE("No arguments to check for delta:\n{}", expectedDelta.value()); + continue; // no arguments to check + } + auto docJsonIt = doc->FindMember("delta"); + ASSERT_NE(docJsonIt, doc->MemberEnd()); + auto toolCallsIt = docJsonIt->value.FindMember("tool_calls"); + ASSERT_NE(toolCallsIt, docJsonIt->value.MemberEnd()); + for (const auto& toolCall : toolCallsIt->value.GetArray()) { + auto functionIt = toolCall.FindMember("function"); + ASSERT_NE(functionIt, toolCall.MemberEnd()); + auto argumentsIt = functionIt->value.FindMember("arguments"); + ASSERT_NE(argumentsIt, functionIt->value.MemberEnd()); + const std::string& argumentsStr = argumentsIt->value.GetString(); + rapidjson::Document argsDoc; + argsDoc.Parse(argumentsStr.c_str()); // now check for errors + EXPECT_FALSE(argsDoc.HasParseError()) << "Arguments is not valid JSON for chunk: " << chunk << "\nArguments string:\n" + << argumentsStr; + } } } else { EXPECT_TRUE(false) << "Mismatch between expectedDelta and doc for id: " << i << " chunk:\n"