@@ -33,7 +33,7 @@ def __init__(self):
33
33
if is_flashinfer_available :
34
34
flashinfer_version = flashinfer .__version__
35
35
if flashinfer_version < "0.2.3" :
36
- logger .warning (
36
+ logger .warning_once (
37
37
"FlashInfer version >= 0.2.3 required. "
38
38
"Falling back to default sampling implementation." )
39
39
self .forward = self .forward_native
@@ -46,17 +46,18 @@ def __init__(self):
46
46
# None means False, while in V1, None means True. This is
47
47
# why we use the condition
48
48
# `envs.VLLM_USE_FLASHINFER_SAMPLER is not False` here.
49
- logger .info ("Using FlashInfer for top-p & top-k sampling." )
49
+ logger .info_once (
50
+ "Using FlashInfer for top-p & top-k sampling." )
50
51
self .forward = self .forward_cuda
51
52
else :
52
- logger .warning (
53
+ logger .warning_once (
53
54
"FlashInfer is available, but it is not enabled. "
54
55
"Falling back to the PyTorch-native implementation of "
55
56
"top-p & top-k sampling. For the best performance, "
56
57
"please set VLLM_USE_FLASHINFER_SAMPLER=1." )
57
58
self .forward = self .forward_native
58
59
else :
59
- logger .warning (
60
+ logger .warning_once (
60
61
"FlashInfer is not available. Falling back to the PyTorch-"
61
62
"native implementation of top-p & top-k sampling. For the "
62
63
"best performance, please install FlashInfer." )
@@ -97,9 +98,9 @@ def forward_cuda(
97
98
probs = logits .softmax (dim = - 1 , dtype = torch .float32 )
98
99
return random_sample (probs , generators )
99
100
if generators :
100
- logger .warning ("FlashInfer 0.2.3+ does not support "
101
- "per-request generators. Falling back to "
102
- "PyTorch-native implementation." )
101
+ logger .warning_once ("FlashInfer 0.2.3+ does not support "
102
+ "per-request generators. Falling back to "
103
+ "PyTorch-native implementation." )
103
104
return self .forward_native (logits , generators , k , p )
104
105
# flashinfer sampling functions expect contiguous logits.
105
106
# In flex_attn/triton_attn fp32 inference, logits can be non-contiguous
0 commit comments