We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b084cd2 commit 4bcf8bdCopy full SHA for 4bcf8bd
tests/v1/tpu/test_basic.py
@@ -59,7 +59,7 @@ def test_basic(
59
# actually test chunked prompt
60
max_num_batched_tokens=1024,
61
max_model_len=8192,
62
- gpu_memory_utilization=0.95,
+ gpu_memory_utilization=0.7,
63
max_num_seqs=max_num_seqs,
64
tensor_parallel_size=tensor_parallel_size) as vllm_model:
65
vllm_outputs = vllm_model.generate_greedy(example_prompts,
0 commit comments