Skip to content

Commit 8675b1c

Browse files
committed
Fix test case
Signed-off-by: Hui Gao <[email protected]>
1 parent df5de48 commit 8675b1c

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3696,10 +3696,14 @@ class TestQwen2_7BInstruct(LlmapiAccuracyTestHarness):
36963696
)
36973697

36983698
def test_auto_dtype(self):
3699+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6, )
36993700
with LLM(self.MODEL_PATH) as llm:
37003701
task = CnnDailymail(self.MODEL_NAME)
3701-
task.evaluate(llm,
3702-
extra_evaluator_kwargs=self.EXTRA_EVALUATOR_KWARGS)
3702+
task.evaluate(
3703+
llm,
3704+
extra_evaluator_kwargs=self.EXTRA_EVALUATOR_KWARGS,
3705+
kv_cache_config=kv_cache_config,
3706+
)
37033707

37043708

37053709
class TestQwen3_4B(LlmapiAccuracyTestHarness):
@@ -3744,11 +3748,12 @@ def test_fp8_block_scales(self, tp_size, pp_size, ep_size, attention_dp,
37443748
pytorch_config = dict(
37453749
disable_overlap_scheduler=not overlap_scheduler,
37463750
cuda_graph_config=CudaGraphConfig() if cuda_graph else None)
3747-
3751+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
37483752
with LLM(f"{llm_models_root()}/Qwen3/Qwen3-8B-FP8",
37493753
tensor_parallel_size=tp_size,
37503754
pipeline_parallel_size=pp_size,
37513755
moe_expert_parallel_size=ep_size,
3756+
kv_cache_config=kv_cache_config,
37523757
**pytorch_config,
37533758
enable_attention_dp=attention_dp,
37543759
max_batch_size=64) as llm:
@@ -3759,9 +3764,11 @@ def test_fp8_block_scales(self, tp_size, pp_size, ep_size, attention_dp,
37593764

37603765
@skip_pre_hopper
37613766
def test_dummy_load_format(self):
3767+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
37623768
llm = LLM(
37633769
f"{llm_models_root()}/Qwen3/Qwen3-8B-FP8",
37643770
load_format="dummy",
3771+
kv_cache_config=kv_cache_config,
37653772
)
37663773
with llm:
37673774
task = MMLU(self.MODEL_NAME)
@@ -3786,11 +3793,13 @@ def test_bf16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
37863793
disable_overlap_scheduler=not overlap_scheduler,
37873794
cuda_graph_config=CudaGraphConfig() if cuda_graph else None)
37883795

3796+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
37893797
with LLM(f"{llm_models_root()}/Qwen3/Qwen3-8B"
37903798
if is_cached else "Qwen/Qwen3-8B",
37913799
tensor_parallel_size=tp_size,
37923800
pipeline_parallel_size=pp_size,
37933801
moe_expert_parallel_size=ep_size,
3802+
kv_cache_config=kv_cache_config,
37943803
**pytorch_config,
37953804
enable_attention_dp=attention_dp) as llm:
37963805
task = CnnDailymail(self.MODEL_NAME)

0 commit comments

Comments
 (0)