File tree Expand file tree Collapse file tree 2 files changed +4
-3
lines changed Expand file tree Collapse file tree 2 files changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -53,7 +53,6 @@ def __init__(
53
53
include_finished_set , log_stats )
54
54
self .scheduled_req_ids : set [str ] = set ()
55
55
self .running : list [Request ] = []
56
- self .lock_version = vllm_version_is ("0.9.2" )
57
56
58
57
def schedule (self ) -> SchedulerOutput :
59
58
if self .scheduler_config .chunked_prefill_enabled :
@@ -284,13 +283,13 @@ def skip_cur_request():
284
283
# allow the lower-priority requests to be scheduled.
285
284
req_index += 1
286
285
continue
287
- if self . lock_version :
286
+ if vllm_version_is ( "0.9.2" ) :
288
287
num_draft_tokens = max (
289
288
num_new_tokens + request .num_computed_tokens -
290
289
request .num_tokens , 0 )
291
290
292
291
while True :
293
- if self . lock_version :
292
+ if vllm_version_is ( "0.9.2" ) :
294
293
new_blocks = self .kv_cache_manager .allocate_slots (
295
294
request ,
296
295
num_new_tokens ,
Original file line number Diff line number Diff line change 22
22
import math
23
23
import os
24
24
import shutil
25
+ import functools
25
26
from contextlib import contextmanager , nullcontext
26
27
from enum import Enum
27
28
from threading import Lock
@@ -280,6 +281,7 @@ def adapt_patch(is_global_patch: bool = False):
280
281
from vllm_ascend .patch import worker # noqa: F401
281
282
282
283
284
+ @functools .cache
283
285
def vllm_version_is (target_vllm_version : str ):
284
286
if envs .VLLM_VERSION is not None :
285
287
vllm_version = envs .VLLM_VERSION
You can’t perform that action at this time.
0 commit comments