@@ -211,6 +211,7 @@ def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
211
211
response = self ._invoke_bedrock (chat_request , stream = True )
212
212
message_id = self .generate_message_id ()
213
213
stream = response .get ("stream" )
214
+ self .think_emitted = False
214
215
for chunk in stream :
215
216
stream_response = self ._create_response_stream (
216
217
model_id = chat_request .model , message_id = message_id , chunk = chunk
@@ -235,6 +236,7 @@ def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
235
236
236
237
# return an [DONE] message at the end.
237
238
yield self .stream_response_to_bytes ()
239
+ self .think_emitted = False # Cleanup
238
240
239
241
def _parse_system_prompts (self , chat_request : ChatRequest ) -> list [dict [str , str ]]:
240
242
"""Create system prompts.
@@ -498,6 +500,9 @@ def _create_response(
498
500
message .content = c ["text" ]
499
501
else :
500
502
logger .warning ("Unknown tag in message content " + "," .join (c .keys ()))
503
+ if message .reasoning_content :
504
+ message .content = f"<think>{ message .reasoning_content } </think>{ message .content } "
505
+ message .reasoning_content = None
501
506
502
507
response = ChatResponse (
503
508
id = message_id ,
@@ -566,11 +571,19 @@ def _create_response_stream(
566
571
content = delta ["text" ],
567
572
)
568
573
elif "reasoningContent" in delta :
569
- # ignore "signature" in the delta.
570
574
if "text" in delta ["reasoningContent" ]:
571
- message = ChatResponseMessage (
572
- reasoning_content = delta ["reasoningContent" ]["text" ],
573
- )
575
+ content = delta ["reasoningContent" ]["text" ]
576
+ if not self .think_emitted :
577
+ # Port of "content_block_start" with "thinking"
578
+ content = "<think>" + content
579
+ self .think_emitted = True
580
+ message = ChatResponseMessage (content = content )
581
+ elif "signature" in delta ["reasoningContent" ]:
582
+ # Port of "signature_delta"
583
+ if self .think_emitted :
584
+ message = ChatResponseMessage (content = "\n </think> \n \n " )
585
+ else :
586
+ return None # Ignore signature if no <think> started
574
587
else :
575
588
# tool use
576
589
index = chunk ["contentBlockDelta" ]["contentBlockIndex" ] - 1
0 commit comments