From c2e829d3f7a0aaa23580bfdf8e81788f9f2fc526 Mon Sep 17 00:00:00 2001 From: nouamanetazi Date: Fri, 27 Jun 2025 14:05:19 +0000 Subject: [PATCH 1/3] fix vllm evals using dp>1 and tp>1 --- src/lighteval/models/vllm/vllm_model.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index b1de9d0a7..42208db53 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -402,14 +402,7 @@ def _generate( sampling_params.detokenize = False if self.data_parallel_size > 1: - # vLLM hangs if tensor_parallel > 1 and resources are set in ray.remote - # also seems to only work with decorator and not with ray.remote() fn - # see https://github.com/vllm-project/vllm/issues/973 - # note: this has changed on 0.3.3, and it only works now if num_gpus are set. - # but then tensor_parallel breaks - # Hynek: With the newest vllm, it actually breaks when tensor_parallel_size == 1 and num_gpus not set, - # as VLLM complains about no GPUs available. - @ray.remote(num_gpus=1 if self.tensor_parallel_size == 1 else None) + @ray.remote(num_gpus=self.tensor_parallel_size) def run_inference_one_model(model_args: dict, sampling_params: SamplingParams, requests): llm = LLM(**model_args) return llm.generate(prompt_token_ids=requests, sampling_params=sampling_params) From a9f828abf1fa45cdcc5a4b784072c5e796fd3942 Mon Sep 17 00:00:00 2001 From: nouamanetazi Date: Fri, 27 Jun 2025 14:05:37 +0000 Subject: [PATCH 2/3] Update vllm dependency version to 0.8.5.post1 in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index abd1897f8..28d91981f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,7 +94,7 @@ nanotron = [ "tensorboardX" ] tensorboardX = ["tensorboardX"] -vllm = ["vllm>=0.8.4", "ray", "more_itertools"] +vllm = ["vllm>=0.8.5.post1", "ray", "more_itertools"] quality = ["ruff>=v0.11.0","pre-commit"] tests = ["pytest>=7.4.0","deepdiff"] dev = ["lighteval[accelerate,quality,tests,multilingual,math,extended_tasks,vllm]"] From cd73093864be74bd40d73c9c4b91ea15efada95f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Fourrier?= <22726840+clefourrier@users.noreply.github.com> Date: Fri, 1 Aug 2025 14:26:35 +0200 Subject: [PATCH 3/3] should fix case --- src/lighteval/models/vllm/vllm_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index ad6b62ae3..53aba0c37 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -399,6 +399,7 @@ def _generate( sampling_params.detokenize = False if self.data_parallel_size > 1: + @ray.remote(num_gpus=self.tensor_parallel_size) def run_inference_one_model(model_args: dict, sampling_params: SamplingParams, requests): llm = LLM(**model_args)