diff --git a/.github/workflows/latest_tests.yaml b/.github/workflows/latest_tests.yaml new file mode 100644 index 000000000..cd0029e39 --- /dev/null +++ b/.github/workflows/latest_tests.yaml @@ -0,0 +1,59 @@ +name: Tests on dev branch of vllm and transformers + +on: + # Run automatically every Saturday at 00:00 UTC + schedule: + - cron: "0 0 * * 6" + + # Allow manual triggering via GitHub UI + workflow_dispatch: + + # Optional: run on pushes to main or release branches + push: + branches: + - main + - v*-release + + pull_request: + branches: + - main + +jobs: + run_tests: + name: Run tests on dev branch of vllm and transformers + runs-on: 'aws-g4dn-2xlarge-use1-public-80' + steps: + - name: Install Git LFS + run: | + sudo apt-get update && sudo apt-get install -y git-lfs + git lfs install + + - name: Install Python development headers + run: sudo apt-get update && sudo apt-get install -y python3.10-dev + + - name: Checkout repository + uses: actions/checkout@v4 + with: + lfs: true + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install the project + run: | + uv sync --extra dev + VLLM_USE_PRECOMPILED=1 uv pip install --upgrade git+https://github.com/vllm-project/vllm.git@main + uv pip install --upgrade git+https://github.com/huggingface/transformers.git@main + + - name: run nvidia-smi + run: nvidia-smi + + - name: Pip freeze + run: uv pip freeze + + - name: Run tests + run: | + VLLM_WORKER_MULTIPROC_METHOD=spawn uv run pytest --disable-pytest-warnings --runslow tests/slow_tests/test_vllm_model.py + uv run pytest --disable-pytest-warnings --runslow tests/slow_tests/test_accelerate_model.py diff --git a/pyproject.toml b/pyproject.toml index 411a7b898..744b49645 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ dependencies = [ "GitPython>=3.1.41", # for logging "datasets>=4.0.0", "pydantic", - "numpy>=2", # pinned to avoid incompatibilities + "numpy>=2,<2.3", # pinned to avoid incompatibilities "hf-xet>=1.1.8", # pinned to avoid failing test suite # Prettiness "typer", @@ -98,7 +98,7 @@ nanotron = [ "tensorboardX" ] tensorboardX = ["tensorboardX"] -vllm = ["vllm>=0.10.0,<0.10.2", "ray", "more_itertools"] +vllm = ["vllm", "ray", "more_itertools"] sglang = ["sglang"] quality = ["ruff>=v0.11.0","pre-commit"] tests = ["pytest>=7.4.0","deepdiff","pip>=25.2"] diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 969caf8fa..d92d5f223 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -48,6 +48,7 @@ import ray from more_itertools import distribute from vllm import LLM, RequestOutput, SamplingParams + from vllm.inputs.data import TokensPrompt from vllm.distributed.parallel_state import ( destroy_distributed_environment, destroy_model_parallel, @@ -291,7 +292,7 @@ def _create_auto_model(self, config: VLLMModelConfig) -> Optional[LLM]: # Inferring from the tokenizer will cause vllm to bug for models with mismatches between model # config and tk config, like mistralai/Mistral-7B-v0.1 if self._max_length is None: - self._max_length = model.llm_engine.model_config.max_seq_len_to_capture + self._max_length = model.llm_engine.model_config.max_model_len return model @@ -455,7 +456,7 @@ def run_inference_one_model(model_args: dict, sampling_params: SamplingParams, r ] else: outputs = self.model.generate( - prompt_token_ids=inputs, + prompts=[TokensPrompt(prompt_token_ids=input) for input in inputs], sampling_params=sampling_params, use_tqdm=True, )