@@ -282,7 +282,18 @@ async def should_refresh_models(self) -> bool:
282
282
# Strictly respecting the refresh_models directive
283
283
return self .config .refresh_models
284
284
285
+ async def allow_listing_models (self ) -> bool :
286
+ # Respecting the allow_listing_models directive
287
+ result = self .config .allow_listing_models
288
+ log .debug (f"VLLM allow_listing_models: { result } " )
289
+ return result
290
+
285
291
async def list_models (self ) -> list [Model ] | None :
292
+ log .debug (f"VLLM list_models called, allow_listing_models={ self .config .allow_listing_models } " )
293
+ if not self .config .allow_listing_models :
294
+ log .debug ("VLLM list_models returning None due to allow_listing_models=False" )
295
+ return None
296
+
286
297
models = []
287
298
async for m in self .client .models .list ():
288
299
model_type = ModelType .llm # unclear how to determine embedding vs. llm models
@@ -332,24 +343,34 @@ async def _get_model(self, model_id: str) -> Model:
332
343
def get_extra_client_params (self ):
333
344
return {"http_client" : httpx .AsyncClient (verify = self .config .tls_verify )}
334
345
335
- async def register_model (self , model : Model ) -> Model :
336
- try :
337
- model = await self .register_helper .register_model (model )
338
- except ValueError :
339
- pass # Ignore statically unknown model, will check live listing
346
+ async def check_model_availability (self , model : str ) -> bool :
347
+ """
348
+ Check if a specific model is available from the vLLM server.
349
+
350
+ This method respects the allow_listing_models configuration flag.
351
+ If allow_listing_models is False, it returns True to allow model registration
352
+ without making HTTP requests (trusting that the model exists).
353
+
354
+ :param model: The model identifier to check.
355
+ :return: True if the model is available or if allow_listing_models is False, False otherwise.
356
+ """
357
+ # Check if provider allows listing models before making HTTP request
358
+ if not self .config .allow_listing_models :
359
+ log .debug (
360
+ "VLLM check_model_availability returning True due to allow_listing_models=False (trusting model exists)"
361
+ )
362
+ return True
363
+
340
364
try :
341
365
res = self .client .models .list ()
342
366
except APIConnectionError as e :
343
- raise ValueError (
344
- f"Failed to connect to vLLM at { self . config . url } . Please check if vLLM is running and accessible at that URL."
345
- ) from e
367
+ log . warning ( f"Failed to connect to vLLM at { self . config . url } : { e } " )
368
+ return False
369
+
346
370
available_models = [m .id async for m in res ]
347
- if model .provider_resource_id not in available_models :
348
- raise ValueError (
349
- f"Model { model .provider_resource_id } is not being served by vLLM. "
350
- f"Available models: { ', ' .join (available_models )} "
351
- )
352
- return model
371
+ is_available = model in available_models
372
+ log .debug (f"VLLM model { model } availability: { is_available } " )
373
+ return is_available
353
374
354
375
async def _get_params (self , request : ChatCompletionRequest ) -> dict :
355
376
options = get_sampling_options (request .sampling_params )
0 commit comments