@@ -361,16 +361,6 @@ async def send_request_to_service(client: httpx.AsyncClient,
361361 aborted_requests = proxy_state .aquire_aborted_prefiller_requests (
362362 prefiller_id )
363363 req_data = req_data .copy ()
364- # req_data['kv_transfer_params'] = {
365- # "do_remote_decode": True,
366- # "do_remote_prefill": False,
367- # "remote_engine_id": None,
368- # "remote_block_ids": None,
369- # "remote_host": None,
370- # "remote_port": None,
371- # "aborted_request": list(aborted_requests),
372- # "metaserver": f"http://{global_args.host}:{global_args.port}/v1/metaserver"
373- # }
374364 req_data ["stream" ] = False
375365 req_data ["max_tokens" ] = 1
376366 if "stream_options" in req_data :
@@ -474,33 +464,10 @@ async def _handle_completions(api: str, request: Request):
474464 req_data = await request .json ()
475465 req_body = await request .body ()
476466 request_length = len (req_body )
477- # prefiller_score = proxy_state.calculate_prefill_scores(request_length)
478- # logger.debug(
479- # f"Request length: {request_length}, Prefiller score: {prefiller_score}"
480- # )
481467 request_id = await proxy_state .next_req_id ()
482468 request_id_api = get_api_request_id (api , request_id )
483469 proxy_state .req_data_dict [request_id_api ] = (req_data , request_length ,
484470 api )
485- # # Select prefiller
486- # prefiller_idx = proxy_state.select_prefiller(prefiller_score)
487- # prefiller = proxy_state.prefillers[prefiller_idx]
488- # result_future = asyncio.Future() # type: ignore
489- # proxy_state.req_id_future[request_id_api] = result_future
490- # # Send request to prefiller
491- # asyncio.get_running_loop().create_task(send_request_to_service(
492- # prefiller.client,
493- # prefiller_idx,
494- # api,
495- # req_data,
496- # request_id,
497- # max_retries=global_args.max_retries,
498- # base_delay=global_args.retry_delay))
499- # proxy_state.release_prefiller(prefiller_idx, prefiller_score)
500-
501- # response = await result_future
502- # del proxy_state.req_id_future[request_id_api]
503- # req_data["kv_transfer_params"] = response
504471 req_data ['kv_transfer_params' ] = {
505472 "do_remote_decode" :
506473 False ,
@@ -530,18 +497,11 @@ async def generate_stream():
530497 request_id = request_id ,
531498 max_retries = global_args .max_retries ,
532499 base_delay = global_args .retry_delay ):
533- # if not released_kv and chunk:
534- # proxy_state.release_prefiller_kv(
535- # prefiller_idx, prefiller_score)
536- # released_kv = True
537500 yield chunk
538501 except Exception as e :
539502 logger .error (
540503 f"Error during streaming from decoder { decoder .url } : { str (e )} the aborted request { request_id } will be routing to the target prefiller when new request is ready to dispatch to it"
541504 )
542- # proxy_state.abort_prefiller_request(prefiller_idx, request_id)
543- # proxy_state.release_prefiller_kv(prefiller_idx,
544- # prefiller_score)
545505
546506 # After streaming done, release tokens
547507 proxy_state .release_decoder (decoder_idx , decoder_score )
@@ -587,9 +547,6 @@ async def metaserver(request: Request):
587547 request_id = kv_transfer_params ["request_id" ]
588548 assert request_id in proxy_state .req_data_dict
589549 req_data , request_length , api = proxy_state .req_data_dict [request_id ]
590- # output_prompt = proxy_state.tokenizer.decode(kv_transfer_params["token_ids"])
591- # req_data["prompt"] = output_prompt
592- # del kv_transfer_params['token_ids']
593550 request_id = get_origin_request_id (api , request_id )
594551 req_data ["kv_transfer_params" ] = kv_transfer_params
595552 prefiller_score = proxy_state .calculate_prefill_scores (request_length )
@@ -612,8 +569,6 @@ async def metaserver(request: Request):
612569 base_delay = global_args .retry_delay )
613570 proxy_state .release_prefiller (prefiller_idx , prefiller_score )
614571
615- # del req_data["prompt"]
616-
617572 except Exception as e :
618573 logger .error (f"Post metaserver failed with: { str (e )} " )
619574
@@ -622,4 +577,4 @@ async def metaserver(request: Request):
622577 global global_args
623578 global_args = parse_args ()
624579 import uvicorn
625- uvicorn .run (app , host = global_args .host , port = global_args .port )
580+ uvicorn .run (app , host = global_args .host , port = global_args .port )
0 commit comments