Skip to content

Commit 07251ed

Browse files
add ut for layerwise_connector
Signed-off-by: wangxiaoteng <[email protected]>
1 parent dd8ea3e commit 07251ed

File tree

6 files changed

+544
-694
lines changed

6 files changed

+544
-694
lines changed

examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py

Lines changed: 1 addition & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -361,16 +361,6 @@ async def send_request_to_service(client: httpx.AsyncClient,
361361
aborted_requests = proxy_state.aquire_aborted_prefiller_requests(
362362
prefiller_id)
363363
req_data = req_data.copy()
364-
# req_data['kv_transfer_params'] = {
365-
# "do_remote_decode": True,
366-
# "do_remote_prefill": False,
367-
# "remote_engine_id": None,
368-
# "remote_block_ids": None,
369-
# "remote_host": None,
370-
# "remote_port": None,
371-
# "aborted_request": list(aborted_requests),
372-
# "metaserver": f"http://{global_args.host}:{global_args.port}/v1/metaserver"
373-
# }
374364
req_data["stream"] = False
375365
req_data["max_tokens"] = 1
376366
if "stream_options" in req_data:
@@ -474,33 +464,10 @@ async def _handle_completions(api: str, request: Request):
474464
req_data = await request.json()
475465
req_body = await request.body()
476466
request_length = len(req_body)
477-
# prefiller_score = proxy_state.calculate_prefill_scores(request_length)
478-
# logger.debug(
479-
# f"Request length: {request_length}, Prefiller score: {prefiller_score}"
480-
# )
481467
request_id = await proxy_state.next_req_id()
482468
request_id_api = get_api_request_id(api, request_id)
483469
proxy_state.req_data_dict[request_id_api] = (req_data, request_length,
484470
api)
485-
# # Select prefiller
486-
# prefiller_idx = proxy_state.select_prefiller(prefiller_score)
487-
# prefiller = proxy_state.prefillers[prefiller_idx]
488-
# result_future = asyncio.Future() # type: ignore
489-
# proxy_state.req_id_future[request_id_api] = result_future
490-
# # Send request to prefiller
491-
# asyncio.get_running_loop().create_task(send_request_to_service(
492-
# prefiller.client,
493-
# prefiller_idx,
494-
# api,
495-
# req_data,
496-
# request_id,
497-
# max_retries=global_args.max_retries,
498-
# base_delay=global_args.retry_delay))
499-
# proxy_state.release_prefiller(prefiller_idx, prefiller_score)
500-
501-
# response = await result_future
502-
# del proxy_state.req_id_future[request_id_api]
503-
# req_data["kv_transfer_params"] = response
504471
req_data['kv_transfer_params'] = {
505472
"do_remote_decode":
506473
False,
@@ -530,18 +497,11 @@ async def generate_stream():
530497
request_id=request_id,
531498
max_retries=global_args.max_retries,
532499
base_delay=global_args.retry_delay):
533-
# if not released_kv and chunk:
534-
# proxy_state.release_prefiller_kv(
535-
# prefiller_idx, prefiller_score)
536-
# released_kv = True
537500
yield chunk
538501
except Exception as e:
539502
logger.error(
540503
f"Error during streaming from decoder {decoder.url}: {str(e)} the aborted request {request_id} will be routing to the target prefiller when new request is ready to dispatch to it"
541504
)
542-
# proxy_state.abort_prefiller_request(prefiller_idx, request_id)
543-
# proxy_state.release_prefiller_kv(prefiller_idx,
544-
# prefiller_score)
545505

546506
# After streaming done, release tokens
547507
proxy_state.release_decoder(decoder_idx, decoder_score)
@@ -587,9 +547,6 @@ async def metaserver(request: Request):
587547
request_id = kv_transfer_params["request_id"]
588548
assert request_id in proxy_state.req_data_dict
589549
req_data, request_length, api = proxy_state.req_data_dict[request_id]
590-
# output_prompt = proxy_state.tokenizer.decode(kv_transfer_params["token_ids"])
591-
# req_data["prompt"] = output_prompt
592-
# del kv_transfer_params['token_ids']
593550
request_id = get_origin_request_id(api, request_id)
594551
req_data["kv_transfer_params"] = kv_transfer_params
595552
prefiller_score = proxy_state.calculate_prefill_scores(request_length)
@@ -612,8 +569,6 @@ async def metaserver(request: Request):
612569
base_delay=global_args.retry_delay)
613570
proxy_state.release_prefiller(prefiller_idx, prefiller_score)
614571

615-
# del req_data["prompt"]
616-
617572
except Exception as e:
618573
logger.error(f"Post metaserver failed with: {str(e)}")
619574

@@ -622,4 +577,4 @@ async def metaserver(request: Request):
622577
global global_args
623578
global_args = parse_args()
624579
import uvicorn
625-
uvicorn.run(app, host=global_args.host, port=global_args.port)
580+
uvicorn.run(app, host=global_args.host, port=global_args.port)

tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
from __future__ import annotations
22

3-
import os
4-
from unittest.mock import patch
5-
63
import pytest
74
from vllm import SamplingParams
85
from vllm.config import CompilationConfig, CUDAGraphMode

tests/ut/kv_connector/test_mooncake_connector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1136,4 +1136,4 @@ def test_device_id_selection_with_physical_devices(self):
11361136

11371137

11381138
if __name__ == '__main__':
1139-
unittest.main()
1139+
unittest.main()

0 commit comments

Comments
 (0)