-
-
Notifications
You must be signed in to change notification settings - Fork 9.3k
Add anthropic endpoint #21341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Add anthropic endpoint #21341
Changes from all commits
48d9510
aa5e717
850bfce
f6e5ebd
183115f
b4ea8e6
905d958
ce891c4
0189bed
f527b58
56433bf
a28bff2
057e7f6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doc changes persists |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -106,13 +106,54 @@ | |||||
from vllm.v1.metrics.prometheus import get_prometheus_registry | ||||||
from vllm.version import __version__ as VLLM_VERSION | ||||||
|
||||||
from fastapi import APIRouter, Request, HTTPException | ||||||
from uuid import uuid4 | ||||||
from .schemas import AnthropicMessagesRequest, AnthropicMessagesResponse | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The import path for the Anthropic schemas appears to be incorrect. The new schemas are defined in
Suggested change
|
||||||
|
||||||
prometheus_multiproc_dir: tempfile.TemporaryDirectory | ||||||
|
||||||
# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765) | ||||||
logger = init_logger('vllm.entrypoints.openai.api_server') | ||||||
|
||||||
_running_tasks: set[asyncio.Task] = set() | ||||||
|
||||||
router = APIRouter() | ||||||
|
||||||
@router.post("/v1/messages") | ||||||
async def anthropic_messages(request: Request): | ||||||
body = await request.json() | ||||||
# Validate Anthropic headers and fields | ||||||
api_key = request.headers.get("x-api-key") | ||||||
version = request.headers.get("anthropic-version") | ||||||
if not api_key or not version: | ||||||
raise HTTPException(status_code=400, detail="Missing required Anthropic headers.") | ||||||
|
||||||
# Convert messages to prompt | ||||||
prompt = convert_messages_to_prompt(body["messages"]) | ||||||
|
||||||
# Call existing vLLM generation logic | ||||||
llm_response = await vllm_generate( | ||||||
model=body["model"], | ||||||
prompt=prompt, | ||||||
max_tokens=body.get("max_tokens", 1024) | ||||||
) | ||||||
|
||||||
# Return response in Anthropic format | ||||||
output = { | ||||||
"id": f"msg_{uuid4().hex[:24]}", | ||||||
"type": "message", | ||||||
"role": "assistant", | ||||||
"content": [{"type": "text", "text": llm_response["text"]}], | ||||||
"model": body["model"], | ||||||
"stop_reason": llm_response.get("stop_reason", "end_turn"), | ||||||
"stop_sequence": None, | ||||||
"usage": { | ||||||
"input_tokens": llm_response["prompt_tokens"], | ||||||
"output_tokens": llm_response["completion_tokens"], | ||||||
} | ||||||
} | ||||||
return output | ||||||
Comment on lines
+122
to
+155
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This endpoint implementation has several critical issues that will prevent it from working correctly:
I suggest rewriting this function to address these points by using the You will need to add the following imports at the top of the file: from vllm.entrypoints.openai.protocol import (CompletionRequest,
CompletionResponse, ErrorResponse)
from vllm.entrypoints.openai.tool_parsers.utils import (
convert_messages_to_prompt) Here is the suggested implementation for the @router.post("/v1/messages", response_model=AnthropicMessagesResponse)
async def anthropic_messages(anthropic_request: AnthropicMessagesRequest,
raw_request: Request):
# Validate Anthropic headers
api_key = raw_request.headers.get("x-api-key")
version = raw_request.headers.get("anthropic-version")
if not api_key or not version:
raise HTTPException(status_code=400,
detail="Missing required Anthropic headers.")
# Convert messages to prompt
prompt = convert_messages_to_prompt(anthropic_request.messages)
# Create a vLLM CompletionRequest
completion_request = CompletionRequest(
model=anthropic_request.model,
prompt=prompt,
max_tokens=anthropic_request.max_tokens,
stream=False, # This endpoint is non-streaming.
)
# Get the completion handler and call it
completion_handler = completion(raw_request)
if completion_handler is None:
raise HTTPException(status_code=500,
detail="Completion handler is not available.")
result = await completion_handler.create_completion(completion_request,
raw_request)
if isinstance(result, ErrorResponse):
raise HTTPException(status_code=result.code, detail=result.message)
assert isinstance(result, CompletionResponse)
# Return response in Anthropic format
return AnthropicMessagesResponse(
id=f"msg_{uuid4().hex[:24]}",
type="message",
role="assistant",
content=[
{
"type": "text",
"text": result.choices[0].text
}
],
model=anthropic_request.model,
stop_reason=result.choices[0].finish_reason,
stop_sequence=None,
usage={
"input_tokens": result.usage.prompt_tokens,
"output_tokens": result.usage.completion_tokens,
}) |
||||||
|
||||||
|
||||||
@asynccontextmanager | ||||||
async def lifespan(app: FastAPI): | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry to ask you to move this again, could it instead be moved to |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from pydantic import BaseModel | ||
from typing import List, Dict, Any, Optional | ||
Check failure on line 2 in vllm/entrypoints/openai/protocol_anthropic.py
|
||
|
||
class AnthropicMessageBlock(BaseModel): | ||
role: str # "user" | "assistant" | ||
content: Any | ||
|
||
class AnthropicMessagesRequest(BaseModel): | ||
model: str | ||
messages: List[AnthropicMessageBlock] | ||
max_tokens: int | ||
system: Optional[str] = None | ||
# Add further optional fields per API docs | ||
|
||
class AnthropicMessagesResponse(BaseModel): | ||
id: str | ||
type: str = "message" | ||
role: str = "assistant" | ||
content: List[Dict[str, Any]] | ||
model: str | ||
stop_reason: Optional[str] | ||
stop_sequence: Optional[str] | ||
Check failure on line 22 in vllm/entrypoints/openai/protocol_anthropic.py
|
||
usage: Dict[str, int] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -122,3 +122,14 @@ def consume_space(i: int, s: str) -> int: | |
while i < len(s) and s[i].isspace(): | ||
i += 1 | ||
return i | ||
|
||
def convert_messages_to_prompt(messages): | ||
# Converts an Anthropic-style conversation to a plain prompt string. | ||
prompt = "" | ||
for msg in messages: | ||
if msg["role"] == "user": | ||
prompt += f"Human: {msg['content']}\n" | ||
elif msg["role"] == "assistant": | ||
prompt += f"Assistant: {msg['content']}\n" | ||
return prompt | ||
Comment on lines
+126
to
+134
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is not robust enough for the Anthropic API and has several issues:
I suggest a more robust implementation that handles these cases gracefully. You will need to add def convert_messages_to_prompt(messages: List[Dict[str, Any]]) -> str:
# Converts an Anthropic-style conversation to a plain prompt string.
prompt = ""
for msg in messages:
role = msg.get("role")
content = msg.get("content")
if role == "user":
role_str = "Human"
elif role == "assistant":
role_str = "Assistant"
else:
# Skip unknown roles
continue
text_content = ""
if isinstance(content, str):
text_content = content
elif isinstance(content, list):
for block in content:
if isinstance(block, dict) and block.get("type") == "text":
text_content += block.get("text", "")
if text_content:
prompt += f"{role_str}: {text_content}\n"
return prompt
Comment on lines
+126
to
+134
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure this is a tool parser |
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doc changes persists