Skip to content

Commit e9214f9

Browse files
committed
feat: Add allow_listing_models
• Add allow_listing_models configuration flag to VLLM provider to control model listing behavior • Implement allow_listing_models() method across all providers with default implementations in base classes • Prevent HTTP requests to /v1/models endpoint when allow_listing_models=false for improved security and performance • Fix unit tests to include allow_listing_models method in test classes and mock objects
1 parent 188a56a commit e9214f9

File tree

15 files changed

+143
-25
lines changed

15 files changed

+143
-25
lines changed

docs/docs/providers/inference/remote_vllm.mdx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Remote vLLM inference provider for connecting to vLLM servers.
2020
| `api_token` | `str \| None` | No | fake | The API token |
2121
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
2222
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |
23+
| `allow_listing_models` | `<class 'bool'>` | No | True | Whether to allow listing models from the vLLM server |
2324

2425
## Sample Configuration
2526

@@ -28,4 +29,5 @@ url: ${env.VLLM_URL:=}
2829
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
2930
api_token: ${env.VLLM_API_TOKEN:=fake}
3031
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
32+
allow_listing_models: ${env.VLLM_ALLOW_LISTING_MODELS:=true}
3133
```

llama_stack/core/routing_tables/models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ async def refresh(self) -> None:
4343
await self.update_registered_models(provider_id, models)
4444

4545
async def list_models(self) -> ListModelsResponse:
46+
# Check if providers allow listing models before returning models
47+
for provider_id, provider in self.impls_by_provider_id.items():
48+
allow_listing_models = await provider.allow_listing_models()
49+
logger.debug(f"Provider {provider_id}: allow_listing_models={allow_listing_models}")
50+
if not allow_listing_models:
51+
logger.debug(f"Provider {provider_id} has allow_listing_models disabled")
4652
return ListModelsResponse(data=await self.get_all_with_type("model"))
4753

4854
async def openai_list_models(self) -> OpenAIListModelsResponse:

llama_stack/distributions/ci-tests/run.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ providers:
3131
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
3232
api_token: ${env.VLLM_API_TOKEN:=fake}
3333
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
34+
allow_listing_models: ${env.VLLM_ALLOW_LISTING_MODELS:=true}
3435
- provider_id: ${env.TGI_URL:+tgi}
3536
provider_type: remote::tgi
3637
config:

llama_stack/distributions/postgres-demo/run.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ providers:
1616
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
1717
api_token: ${env.VLLM_API_TOKEN:=fake}
1818
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
19+
allow_listing_models: ${env.VLLM_ALLOW_LISTING_MODELS:=true}
1920
- provider_id: sentence-transformers
2021
provider_type: inline::sentence-transformers
2122
vector_io:

llama_stack/distributions/starter-gpu/run.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ providers:
3131
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
3232
api_token: ${env.VLLM_API_TOKEN:=fake}
3333
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
34+
allow_listing_models: ${env.VLLM_ALLOW_LISTING_MODELS:=true}
3435
- provider_id: ${env.TGI_URL:+tgi}
3536
provider_type: remote::tgi
3637
config:

llama_stack/distributions/starter/run.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ providers:
3131
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
3232
api_token: ${env.VLLM_API_TOKEN:=fake}
3333
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
34+
allow_listing_models: ${env.VLLM_ALLOW_LISTING_MODELS:=true}
3435
- provider_id: ${env.TGI_URL:+tgi}
3536
provider_type: remote::tgi
3637
config:

llama_stack/providers/inline/inference/meta_reference/inference.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ async def openai_completion(self, *args, **kwargs):
7171
async def should_refresh_models(self) -> bool:
7272
return False
7373

74+
async def allow_listing_models(self) -> bool:
75+
return True
76+
7477
async def list_models(self) -> list[Model] | None:
7578
return None
7679

llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ async def shutdown(self) -> None:
5252
async def should_refresh_models(self) -> bool:
5353
return False
5454

55+
async def allow_listing_models(self) -> bool:
56+
return True
57+
5558
async def list_models(self) -> list[Model] | None:
5659
return [
5760
Model(

llama_stack/providers/remote/inference/vllm/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
3434
default=False,
3535
description="Whether to refresh models periodically",
3636
)
37+
allow_listing_models: bool = Field(
38+
default=True,
39+
description="Whether to allow listing models from the vLLM server",
40+
)
3741

3842
@field_validator("tls_verify")
3943
@classmethod
@@ -59,4 +63,5 @@ def sample_run_config(
5963
"max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
6064
"api_token": "${env.VLLM_API_TOKEN:=fake}",
6165
"tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
66+
"allow_listing_models": "${env.VLLM_ALLOW_LISTING_MODELS:=true}",
6267
}

llama_stack/providers/remote/inference/vllm/vllm.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,18 @@ async def should_refresh_models(self) -> bool:
282282
# Strictly respecting the refresh_models directive
283283
return self.config.refresh_models
284284

285+
async def allow_listing_models(self) -> bool:
286+
# Respecting the allow_listing_models directive
287+
result = self.config.allow_listing_models
288+
log.debug(f"VLLM allow_listing_models: {result}")
289+
return result
290+
285291
async def list_models(self) -> list[Model] | None:
292+
log.debug(f"VLLM list_models called, allow_listing_models={self.config.allow_listing_models}")
293+
if not self.config.allow_listing_models:
294+
log.debug("VLLM list_models returning None due to allow_listing_models=False")
295+
return None
296+
286297
models = []
287298
async for m in self.client.models.list():
288299
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
@@ -332,24 +343,34 @@ async def _get_model(self, model_id: str) -> Model:
332343
def get_extra_client_params(self):
333344
return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
334345

335-
async def register_model(self, model: Model) -> Model:
336-
try:
337-
model = await self.register_helper.register_model(model)
338-
except ValueError:
339-
pass # Ignore statically unknown model, will check live listing
346+
async def check_model_availability(self, model: str) -> bool:
347+
"""
348+
Check if a specific model is available from the vLLM server.
349+
350+
This method respects the allow_listing_models configuration flag.
351+
If allow_listing_models is False, it returns True to allow model registration
352+
without making HTTP requests (trusting that the model exists).
353+
354+
:param model: The model identifier to check.
355+
:return: True if the model is available or if allow_listing_models is False, False otherwise.
356+
"""
357+
# Check if provider allows listing models before making HTTP request
358+
if not self.config.allow_listing_models:
359+
log.debug(
360+
"VLLM check_model_availability returning True due to allow_listing_models=False (trusting model exists)"
361+
)
362+
return True
363+
340364
try:
341365
res = self.client.models.list()
342366
except APIConnectionError as e:
343-
raise ValueError(
344-
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
345-
) from e
367+
log.warning(f"Failed to connect to vLLM at {self.config.url}: {e}")
368+
return False
369+
346370
available_models = [m.id async for m in res]
347-
if model.provider_resource_id not in available_models:
348-
raise ValueError(
349-
f"Model {model.provider_resource_id} is not being served by vLLM. "
350-
f"Available models: {', '.join(available_models)}"
351-
)
352-
return model
371+
is_available = model in available_models
372+
log.debug(f"VLLM model {model} availability: {is_available}")
373+
return is_available
353374

354375
async def _get_params(self, request: ChatCompletionRequest) -> dict:
355376
options = get_sampling_options(request.sampling_params)

0 commit comments

Comments
 (0)