Skip to content

Commit 9dad57e

Browse files
author
chen zhang
committed
enable deep <think> mode for local ollama
1 parent 4c67ded commit 9dad57e

File tree

1 file changed

+26
-8
lines changed

1 file changed

+26
-8
lines changed

api/websocket_wiki.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,10 @@ async def handle_websocket_chat(websocket: WebSocket):
403403
conversation_history += f"<turn>\n<user>{turn.user_query.query_str}</user>\n<assistant>{turn.assistant_response.response_str}</assistant>\n</turn>\n"
404404

405405
# Create the prompt with context
406-
prompt = f"/no_think {system_prompt}\n\n"
406+
if request.provider == "ollama":
407+
prompt = f"/think {system_prompt}\n\n"
408+
else:
409+
prompt = f"/no_think {system_prompt}\n\n"
407410

408411
if conversation_history:
409412
prompt += f"<conversation_history>\n{conversation_history}</conversation_history>\n\n"
@@ -428,7 +431,7 @@ async def handle_websocket_chat(websocket: WebSocket):
428431
model_config = get_model_config(request.provider, request.model)["model_kwargs"]
429432

430433
if request.provider == "ollama":
431-
prompt += " /no_think"
434+
prompt += " /think"
432435

433436
model = OllamaClient()
434437
model_kwargs = {
@@ -527,11 +530,18 @@ async def handle_websocket_chat(websocket: WebSocket):
527530
# Get the response and handle it properly using the previously created api_kwargs
528531
response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
529532
# Handle streaming response from Ollama
533+
think = False
530534
async for chunk in response:
531535
text = getattr(chunk, 'response', None) or getattr(chunk, 'text', None) or str(chunk)
532536
if text and not text.startswith('model=') and not text.startswith('created_at='):
533-
text = text.replace('<think>', '').replace('</think>', '')
534-
await websocket.send_text(text)
537+
if text == '<think>':
538+
think = True
539+
logger.info("think enabled")
540+
elif text == '</think>':
541+
think = False
542+
# skip <think>.....</think> in output
543+
if not think:
544+
await websocket.send_text(text)
535545
# Explicitly close the WebSocket connection after the response is complete
536546
await websocket.close()
537547
elif request.provider == "openrouter":
@@ -614,7 +624,10 @@ async def handle_websocket_chat(websocket: WebSocket):
614624
logger.warning("Token limit exceeded, retrying without context")
615625
try:
616626
# Create a simplified prompt without context
617-
simplified_prompt = f"/no_think {system_prompt}\n\n"
627+
if request.provider == "ollama":
628+
simplified_prompt = f"/think {system_prompt}\n\n"
629+
else:
630+
simplified_prompt = f"/no_think {system_prompt}\n\n"
618631
if conversation_history:
619632
simplified_prompt += f"<conversation_history>\n{conversation_history}</conversation_history>\n\n"
620633

@@ -626,7 +639,7 @@ async def handle_websocket_chat(websocket: WebSocket):
626639
simplified_prompt += f"<query>\n{query}\n</query>\n\nAssistant: "
627640

628641
if request.provider == "ollama":
629-
simplified_prompt += " /no_think"
642+
simplified_prompt += " /think"
630643

631644
# Create new api_kwargs with the simplified prompt
632645
fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
@@ -639,10 +652,15 @@ async def handle_websocket_chat(websocket: WebSocket):
639652
fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)
640653

641654
# Handle streaming fallback_response from Ollama
642-
async for chunk in fallback_response:
655+
think = False
656+
async for chunk in response:
643657
text = getattr(chunk, 'response', None) or getattr(chunk, 'text', None) or str(chunk)
644658
if text and not text.startswith('model=') and not text.startswith('created_at='):
645-
text = text.replace('<think>', '').replace('</think>', '')
659+
if text == '<think>':
660+
think = True
661+
elif text == '</think>':
662+
think = False
663+
if not think:
646664
await websocket.send_text(text)
647665
elif request.provider == "openrouter":
648666
try:

0 commit comments

Comments
 (0)