Enable v1 metrics tests (#20953)

eicherseiji · web-flow · commit d1fb65bde367 · 2025-07-20T03:22:02.000Z
Signed-off-by: Seiji Eicher &lt;seiji@anyscale.com&gt;
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -264,6 +264,7 @@ steps:
     - pytest -v -s v1/structured_output
     - pytest -v -s v1/spec_decode
     - pytest -v -s v1/kv_connector/unit
+    - pytest -v -s v1/metrics
     - pytest -v -s v1/test_serial_utils.py
     - pytest -v -s v1/test_utils.py
     - pytest -v -s v1/test_oracle.py
diff --git a/tests/v1/metrics/test_ray_metrics.py b/tests/v1/metrics/test_ray_metrics.py
@@ -1,8 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
+
 import pytest
 import ray
 
+from vllm.config import ModelDType
 from vllm.sampling_params import SamplingParams
 from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
 from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger
@@ -27,7 +30,7 @@ def use_v1_only(monkeypatch):
 def test_engine_log_metrics_ray(
     example_prompts,
     model: str,
-    dtype: str,
+    dtype: ModelDType,
     max_tokens: int,
 ) -> None:
     """ Simple smoke test, verifying this can be used without exceptions.
@@ -37,11 +40,14 @@ def test_engine_log_metrics_ray(
     class EngineTestActor:
 
         async def run(self):
-            engine_args = AsyncEngineArgs(
-                model=model,
-                dtype=dtype,
-                disable_log_stats=False,
-            )
+            # Set environment variable inside the Ray actor since environment
+            # variables from pytest fixtures don't propagate to Ray actors
+            os.environ['VLLM_USE_V1'] = '1'
+
+            engine_args = AsyncEngineArgs(model=model,
+                                          dtype=dtype,
+                                          disable_log_stats=False,
+                                          enforce_eager=True)
 
             engine = AsyncLLM.from_engine_args(
                 engine_args, stat_loggers=[RayPrometheusStatLogger])
diff --git a/vllm/v1/metrics/ray_wrappers.py b/vllm/v1/metrics/ray_wrappers.py
@@ -51,7 +51,13 @@ class RayGaugeWrapper(RayPrometheusMetric):
     def __init__(self,
                  name: str,
                  documentation: Optional[str] = "",
-                 labelnames: Optional[list[str]] = None):
+                 labelnames: Optional[list[str]] = None,
+                 multiprocess_mode: Optional[str] = ""):
+
+        # All Ray metrics are keyed by WorkerId, so multiprocess modes like
+        # "mostrecent", "all", "sum" do not apply. This logic can be manually
+        # implemented at the observability layer (Prometheus/Grafana).
+        del multiprocess_mode
         labelnames_tuple = tuple(labelnames) if labelnames else None
         self.metric = ray_metrics.Gauge(name=name,
                                         description=documentation,