@@ -452,10 +452,11 @@ async def async_request_openai_chat_completions(
452452 "model" : request_func_input .model ,
453453 "messages" : [{"role" : "user" , "content" : content_body }],
454454 "max_tokens" : request_func_input .output_len ,
455- "stream" : True ,
455+ "stream" : request_func_input . stream ,
456456 "ignore_eos" : request_func_input .ignore_eos ,
457- "stream_options" : {"include_usage" : True },
458457 }
458+ if request_func_input .stream :
459+ payload ["stream_options" ] = {"include_usage" : True }
459460 apply_sampling_params (payload , request_func_input , always_top_p = False )
460461 if request_func_input .logprobs is not None :
461462 payload ["logprobs" ] = True
@@ -501,12 +502,14 @@ async def async_request_openai_chat_completions(
501502 else :
502503 timestamp = time .perf_counter ()
503504 data = json .loads (chunk )
505+ delta = None
506+ content = None
507+ reasoning_content = None
508+ if request_func_input .stream and len (data ["choices" ]) > 0 :
509+ delta = data ["choices" ][0 ]["delta" ]
510+ content = delta .get ("content" , None )
511+ reasoning_content = delta .get ("reasoning_content" , None )
504512
505- delta = data ["choices" ][0 ]["delta" ] if len (data ["choices" ]) > 0 else None
506- content = delta .get ("content" , None ) if delta is not None else None
507- reasoning_content = (
508- delta .get ("reasoning_content" , None ) if delta is not None else None
509- )
510513 if (content is not None or reasoning_content is not None ) and not (
511514 ttft == 0.0 and (content == '' or reasoning_content == '' )
512515 ):
0 commit comments