Skip to content

Commit a328f78

Browse files
authored
[FEAT] Support --disable-stream in openai chat completion (#111)
Support --disable-stream in openai chat complition
1 parent 99a3350 commit a328f78

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

src/flexible_inference_benchmark/engine/backend_functions.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -452,10 +452,11 @@ async def async_request_openai_chat_completions(
452452
"model": request_func_input.model,
453453
"messages": [{"role": "user", "content": content_body}],
454454
"max_tokens": request_func_input.output_len,
455-
"stream": True,
455+
"stream": request_func_input.stream,
456456
"ignore_eos": request_func_input.ignore_eos,
457-
"stream_options": {"include_usage": True},
458457
}
458+
if request_func_input.stream:
459+
payload["stream_options"] = {"include_usage": True}
459460
apply_sampling_params(payload, request_func_input, always_top_p=False)
460461
if request_func_input.logprobs is not None:
461462
payload["logprobs"] = True
@@ -501,12 +502,14 @@ async def async_request_openai_chat_completions(
501502
else:
502503
timestamp = time.perf_counter()
503504
data = json.loads(chunk)
505+
delta = None
506+
content = None
507+
reasoning_content = None
508+
if request_func_input.stream and len(data["choices"]) > 0:
509+
delta = data["choices"][0]["delta"]
510+
content = delta.get("content", None)
511+
reasoning_content = delta.get("reasoning_content", None)
504512

505-
delta = data["choices"][0]["delta"] if len(data["choices"]) > 0 else None
506-
content = delta.get("content", None) if delta is not None else None
507-
reasoning_content = (
508-
delta.get("reasoning_content", None) if delta is not None else None
509-
)
510513
if (content is not None or reasoning_content is not None) and not (
511514
ttft == 0.0 and (content == '' or reasoning_content == '')
512515
):

0 commit comments

Comments
 (0)