Skip to content

Commit f6c3e91

Browse files
committed
feat: implement graceful model discovery for vLLM provider
- Attempt model discovery first for backward compatibility - If discovery fails and refresh_models=false, continue without error - If discovery fails and refresh_models=true, fail hard with ValueError - Supports dynamic token authentication scenarios Fixes OAuth authentication issues when vLLM service requires dynamic tokens
1 parent 0a41c4e commit f6c3e91

File tree

1 file changed

+21
-8
lines changed
  • llama_stack/providers/remote/inference/vllm

1 file changed

+21
-8
lines changed

llama_stack/providers/remote/inference/vllm/vllm.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -430,16 +430,29 @@ async def register_model(self, model: Model) -> Model:
430430
pass # Ignore statically unknown model, will check live listing
431431
try:
432432
res = self.client.models.list()
433+
available_models = [m.id async for m in res]
434+
if model.provider_resource_id not in available_models:
435+
raise ValueError(
436+
f"Model {model.provider_resource_id} is not being served by vLLM. "
437+
f"Available models: {', '.join(available_models)}"
438+
)
433439
except APIConnectionError as e:
434-
raise ValueError(
435-
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
436-
) from e
437-
available_models = [m.id async for m in res]
438-
if model.provider_resource_id not in available_models:
439-
raise ValueError(
440-
f"Model {model.provider_resource_id} is not being served by vLLM. "
441-
f"Available models: {', '.join(available_models)}"
440+
if self.config.refresh_models:
441+
raise ValueError(
442+
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
443+
) from e
444+
# Otherwise, gracefully continue without verification
445+
log.warning(
446+
f"Failed to connect to vLLM at {self.config.url} for model verification. Continuing without live check (refresh_models=false)."
442447
)
448+
except Exception as e:
449+
if self.config.refresh_models:
450+
raise ValueError(f"Model verification failed: {e}") from e
451+
# Otherwise, gracefully continue without verification
452+
log.warning(
453+
f"Model verification failed for {model.provider_resource_id}: {e}. Continuing without live check (refresh_models=false)."
454+
)
455+
443456
return model
444457

445458
async def _get_params(self, request: ChatCompletionRequest) -> dict:

0 commit comments

Comments
 (0)