Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 71 additions & 2 deletions litellm/llms/nvidia_nim/chat/transformation.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
"""
Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer

This is OpenAI compatible
This is OpenAI compatible

This file only contains param mapping logic

API calling is done using the OpenAI SDK with an api_base
"""
from typing import Any, List, Optional, cast
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Unused cast import

cast is imported from typing but never referenced anywhere in this file. Remove it to keep the import block clean.

Suggested change
from typing import Any, List, Optional, cast
from typing import Any, List, Optional


import httpx

from litellm.litellm_core_utils.prompt_templates.common_utils import (
_parse_content_for_reasoning,
)
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
from litellm.types.utils import ModelResponse


class NvidiaNimConfig(OpenAIGPTConfig):
Expand Down Expand Up @@ -108,3 +116,64 @@ def map_openai_params(
elif param in supported_openai_params:
optional_params[param] = value
return optional_params

def transform_response( # type: ignore[override]
self,
model: str,
raw_response: httpx.Response,
model_response: ModelResponse,
logging_obj: Any,
request_data: dict,
messages: List[Any],
optional_params: dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
"""
Override transform_response to extract <think>…</think> reasoning blocks
produced by NVIDIA NIM reasoning models (e.g. minimax/minimax-m1).

NIM forwards the raw model output as a plain OpenAI-compatible response:
the <think> block appears inside ``choices[0].message.content`` and
``reasoning_content`` is absent. The parent class's
``_extract_reasoning_content`` helper already calls
``_parse_content_for_reasoning``, but only after the response has been
deserialized. We do the same pre-processing here on the raw JSON so
that the upstream consumer always sees a clean split between
``reasoning_content`` and ``content``.
"""
# Let the parent class build the base ModelResponse first.
result = super().transform_response(
model=model,
raw_response=raw_response,
model_response=model_response,
logging_obj=logging_obj,
request_data=request_data,
messages=messages,
optional_params=optional_params,
litellm_params=litellm_params,
encoding=encoding,
api_key=api_key,
json_mode=json_mode,
)

# Post-process: if reasoning_content is still None but the content
# contains <think>…</think> raw tags, extract them now.
for choice in getattr(result, "choices", []):
message = getattr(choice, "message", None)
if message is None:
continue
if getattr(message, "reasoning_content", None) is not None:
# Already parsed — nothing to do.
continue
content = getattr(message, "content", None)
if not isinstance(content, str):
continue
reasoning, stripped_content = _parse_content_for_reasoning(content)
if reasoning is not None:
message.reasoning_content = reasoning
message.content = stripped_content

return result
Comment on lines +120 to +179
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 No unit test for the new reasoning extraction

This PR claims to fix issue #24253 but the tests/llm_translation/test_nvidia_nim.py file has no new test that covers the transform_response reasoning-extraction path. Without a mock-based test (e.g. constructing a fake httpx.Response whose body contains <think>…</think> and asserting reasoning_content / content are correctly split), there is no automated verification that the fix works and no regression guard.

Per the project's review standards, a fix claiming to resolve a reported issue should include passing tests as evidence. Please add a unit test similar to the following pattern:

def test_nvidia_nim_transform_response_extracts_reasoning():
    from unittest.mock import MagicMock
    import json, httpx
    from litellm.llms.nvidia_nim.chat.transformation import NvidiaNimConfig

    config = NvidiaNimConfig()
    fake_body = {
        "id": "test-id",
        "object": "chat.completion",
        "created": 1234567890,
        "model": "minimax/minimax-m1",
        "choices": [{"index": 0, "message": {"role": "assistant", "content": "<think>The user just sent ping...</think>\n\npong"}, "finish_reason": "stop"}],
        "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
    }
    raw_response = MagicMock(spec=httpx.Response)
    raw_response.text = json.dumps(fake_body)
    raw_response.json.return_value = fake_body
    raw_response.status_code = 200
    raw_response.headers = {}

    from litellm import ModelResponse
    result = config.transform_response(
        model="minimax/minimax-m1",
        raw_response=raw_response,
        model_response=ModelResponse(),
        logging_obj=MagicMock(),
        request_data={},
        messages=[],
        optional_params={},
        litellm_params={},
        encoding=None,
    )
    assert result.choices[0].message.reasoning_content == "The user just sent ping..."
    assert result.choices[0].message.content == "\n\npong"

Rule Used: What: Ensure that any PR claiming to fix an issue ... (source)

6 changes: 6 additions & 0 deletions litellm/llms/openrouter/chat/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,12 @@ def transform_request(
Returns:
dict: The transformed request. Sent as the body of the API call.
"""
# OpenRouter expects the actual model ID (e.g. "mistralai/mistral-7b-instruct"),
# not the litellm-internal "openrouter/<model>" form. Strip the provider
# prefix so it is never sent to the API.
if model.startswith("openrouter/"):
model = model[len("openrouter/"):]

if self._supports_cache_control_in_content(model):
messages = self._move_cache_control_to_content(messages)

Expand Down
Loading