File tree Expand file tree Collapse file tree 3 files changed +20
-7
lines changed Expand file tree Collapse file tree 3 files changed +20
-7
lines changed Original file line number Diff line number Diff line change @@ -264,6 +264,7 @@ steps:
264
264
- pytest -v -s v1/structured_output
265
265
- pytest -v -s v1/spec_decode
266
266
- pytest -v -s v1/kv_connector/unit
267
+ - pytest -v -s v1/metrics
267
268
- pytest -v -s v1/test_serial_utils.py
268
269
- pytest -v -s v1/test_utils.py
269
270
- pytest -v -s v1/test_oracle.py
Original file line number Diff line number Diff line change 1
1
# SPDX-License-Identifier: Apache-2.0
2
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
+ import os
4
+
3
5
import pytest
4
6
import ray
5
7
8
+ from vllm .config import ModelDType
6
9
from vllm .sampling_params import SamplingParams
7
10
from vllm .v1 .engine .async_llm import AsyncEngineArgs , AsyncLLM
8
11
from vllm .v1 .metrics .ray_wrappers import RayPrometheusStatLogger
@@ -27,7 +30,7 @@ def use_v1_only(monkeypatch):
27
30
def test_engine_log_metrics_ray (
28
31
example_prompts ,
29
32
model : str ,
30
- dtype : str ,
33
+ dtype : ModelDType ,
31
34
max_tokens : int ,
32
35
) -> None :
33
36
""" Simple smoke test, verifying this can be used without exceptions.
@@ -37,11 +40,14 @@ def test_engine_log_metrics_ray(
37
40
class EngineTestActor :
38
41
39
42
async def run (self ):
40
- engine_args = AsyncEngineArgs (
41
- model = model ,
42
- dtype = dtype ,
43
- disable_log_stats = False ,
44
- )
43
+ # Set environment variable inside the Ray actor since environment
44
+ # variables from pytest fixtures don't propagate to Ray actors
45
+ os .environ ['VLLM_USE_V1' ] = '1'
46
+
47
+ engine_args = AsyncEngineArgs (model = model ,
48
+ dtype = dtype ,
49
+ disable_log_stats = False ,
50
+ enforce_eager = True )
45
51
46
52
engine = AsyncLLM .from_engine_args (
47
53
engine_args , stat_loggers = [RayPrometheusStatLogger ])
Original file line number Diff line number Diff line change @@ -51,7 +51,13 @@ class RayGaugeWrapper(RayPrometheusMetric):
51
51
def __init__ (self ,
52
52
name : str ,
53
53
documentation : Optional [str ] = "" ,
54
- labelnames : Optional [list [str ]] = None ):
54
+ labelnames : Optional [list [str ]] = None ,
55
+ multiprocess_mode : Optional [str ] = "" ):
56
+
57
+ # All Ray metrics are keyed by WorkerId, so multiprocess modes like
58
+ # "mostrecent", "all", "sum" do not apply. This logic can be manually
59
+ # implemented at the observability layer (Prometheus/Grafana).
60
+ del multiprocess_mode
55
61
labelnames_tuple = tuple (labelnames ) if labelnames else None
56
62
self .metric = ray_metrics .Gauge (name = name ,
57
63
description = documentation ,
You can’t perform that action at this time.
0 commit comments