-
-
Notifications
You must be signed in to change notification settings - Fork 6.6k
fix(nvidia_nim): extract <think> reasoning blocks from content in transform_response #24276
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,13 +1,21 @@ | ||
| """ | ||
| Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer | ||
| Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer | ||
|
|
||
| This is OpenAI compatible | ||
| This is OpenAI compatible | ||
|
|
||
| This file only contains param mapping logic | ||
|
|
||
| API calling is done using the OpenAI SDK with an api_base | ||
| """ | ||
| from typing import Any, List, Optional, cast | ||
|
|
||
| import httpx | ||
|
|
||
| from litellm.litellm_core_utils.prompt_templates.common_utils import ( | ||
| _parse_content_for_reasoning, | ||
| ) | ||
| from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig | ||
| from litellm.types.utils import ModelResponse | ||
|
|
||
|
|
||
| class NvidiaNimConfig(OpenAIGPTConfig): | ||
|
|
@@ -108,3 +116,64 @@ def map_openai_params( | |
| elif param in supported_openai_params: | ||
| optional_params[param] = value | ||
| return optional_params | ||
|
|
||
| def transform_response( # type: ignore[override] | ||
| self, | ||
| model: str, | ||
| raw_response: httpx.Response, | ||
| model_response: ModelResponse, | ||
| logging_obj: Any, | ||
| request_data: dict, | ||
| messages: List[Any], | ||
| optional_params: dict, | ||
| litellm_params: dict, | ||
| encoding: Any, | ||
| api_key: Optional[str] = None, | ||
| json_mode: Optional[bool] = None, | ||
| ) -> ModelResponse: | ||
| """ | ||
| Override transform_response to extract <think>…</think> reasoning blocks | ||
| produced by NVIDIA NIM reasoning models (e.g. minimax/minimax-m1). | ||
|
|
||
| NIM forwards the raw model output as a plain OpenAI-compatible response: | ||
| the <think> block appears inside ``choices[0].message.content`` and | ||
| ``reasoning_content`` is absent. The parent class's | ||
| ``_extract_reasoning_content`` helper already calls | ||
| ``_parse_content_for_reasoning``, but only after the response has been | ||
| deserialized. We do the same pre-processing here on the raw JSON so | ||
| that the upstream consumer always sees a clean split between | ||
| ``reasoning_content`` and ``content``. | ||
| """ | ||
| # Let the parent class build the base ModelResponse first. | ||
| result = super().transform_response( | ||
| model=model, | ||
| raw_response=raw_response, | ||
| model_response=model_response, | ||
| logging_obj=logging_obj, | ||
| request_data=request_data, | ||
| messages=messages, | ||
| optional_params=optional_params, | ||
| litellm_params=litellm_params, | ||
| encoding=encoding, | ||
| api_key=api_key, | ||
| json_mode=json_mode, | ||
| ) | ||
|
|
||
| # Post-process: if reasoning_content is still None but the content | ||
| # contains <think>…</think> raw tags, extract them now. | ||
| for choice in getattr(result, "choices", []): | ||
| message = getattr(choice, "message", None) | ||
| if message is None: | ||
| continue | ||
| if getattr(message, "reasoning_content", None) is not None: | ||
| # Already parsed — nothing to do. | ||
| continue | ||
| content = getattr(message, "content", None) | ||
| if not isinstance(content, str): | ||
| continue | ||
| reasoning, stripped_content = _parse_content_for_reasoning(content) | ||
| if reasoning is not None: | ||
| message.reasoning_content = reasoning | ||
| message.content = stripped_content | ||
|
|
||
| return result | ||
|
Comment on lines
+120
to
+179
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This PR claims to fix issue #24253 but the Per the project's review standards, a fix claiming to resolve a reported issue should include passing tests as evidence. Please add a unit test similar to the following pattern: def test_nvidia_nim_transform_response_extracts_reasoning():
from unittest.mock import MagicMock
import json, httpx
from litellm.llms.nvidia_nim.chat.transformation import NvidiaNimConfig
config = NvidiaNimConfig()
fake_body = {
"id": "test-id",
"object": "chat.completion",
"created": 1234567890,
"model": "minimax/minimax-m1",
"choices": [{"index": 0, "message": {"role": "assistant", "content": "<think>The user just sent ping...</think>\n\npong"}, "finish_reason": "stop"}],
"usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
}
raw_response = MagicMock(spec=httpx.Response)
raw_response.text = json.dumps(fake_body)
raw_response.json.return_value = fake_body
raw_response.status_code = 200
raw_response.headers = {}
from litellm import ModelResponse
result = config.transform_response(
model="minimax/minimax-m1",
raw_response=raw_response,
model_response=ModelResponse(),
logging_obj=MagicMock(),
request_data={},
messages=[],
optional_params={},
litellm_params={},
encoding=None,
)
assert result.choices[0].message.reasoning_content == "The user just sent ping..."
assert result.choices[0].message.content == "\n\npong"Rule Used: What: Ensure that any PR claiming to fix an issue ... (source) |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
castimportcastis imported fromtypingbut never referenced anywhere in this file. Remove it to keep the import block clean.