@@ -126,7 +126,7 @@ async def async_stream_infer(self,
126126 int: The number of the output tokens.
127127 """
128128 if len (input_ids ) > self .max_input_len :
129- yield EngineOutput (ResponseType .INPUT_LENGTH_ERROR , [], 0 )
129+ yield EngineOutput (ResponseType .INPUT_LENGTH_ERROR , [])
130130 return
131131 gen_config = gen_config or GenerationConfig ()
132132 sampling_param = SamplingParam .from_gen_config (gen_config = gen_config )
@@ -158,7 +158,6 @@ async def async_stream_infer(self,
158158 logger .debug (f'session[{ session_id } ] success: num_out_ids={ num_ids } .' )
159159 yield EngineOutput (resp .type ,
160160 token_ids [output_offset :],
161- num_ids ,
162161 cache_block_ids = cache_block_ids ,
163162 req_metrics = req_metrics ,
164163 logprobs = logprobs )
@@ -171,15 +170,14 @@ async def async_stream_infer(self,
171170 logger .debug (f'session[{ session_id } ] finish: num_out_ids={ num_ids } .' )
172171 yield EngineOutput (resp .type ,
173172 token_ids [output_offset :],
174- num_ids ,
175173 logits = logits ,
176174 cache_block_ids = cache_block_ids ,
177175 req_metrics = req_metrics ,
178176 logprobs = logprobs )
179177 break
180178 else :
181179 logger .debug (f'session[{ session_id } ] failed.' )
182- yield EngineOutput (resp .type , [], 0 )
180+ yield EngineOutput (resp .type , [])
183181 break
184182
185183 async def async_infer (self ,
0 commit comments