feat(llm): Add vLLM Support (#683)

par4m · web-flow · commit c1ce4465df35 · 2025-07-03T00:16:16.000-07:00
* add vllm

* fix

* fix formatting

* add to table
diff --git a/docs/docs/ai/llm.mdx b/docs/docs/ai/llm.mdx
@@ -26,6 +26,7 @@ We support the following types of LLM APIs:
 | [Voyage](#voyage) | `LlmApiType.VOYAGE` | ❌ | ✅ |
 | [LiteLLM](#litellm) | `LlmApiType.LITE_LLM` | ✅ | ❌ |
 | [OpenRouter](#openrouter) | `LlmApiType.OPEN_ROUTER` | ✅ | ❌ |
+| [vLLM](#vllm) | `LlmApiType.VLLM` | ✅ | ❌ |
 
 ## LLM Tasks
 
@@ -307,3 +308,34 @@ cocoindex.LlmSpec(
 </Tabs>
 
 You can find the full list of models supported by OpenRouter [here](https://openrouter.ai/models).
+
+### vLLM 
+
+Install vLLM:
+
+```bash
+pip install vllm
+```
+
+Run vLLM Server
+
+```bash
+vllm serve deepseek-ai/deepseek-coder-1.3b-instruct
+```
+
+
+A spec for vLLM looks like this:
+
+<Tabs>
+<TabItem value="python" label="Python" default>
+
+```python
+cocoindex.LlmSpec(
+    api_type=cocoindex.LlmApiType.VLLM,
+    model="deepseek-ai/deepseek-coder-1.3b-instruct",
+    address="http://127.0.0.1:8000/v1",
+)
+```
+
+</TabItem>
+</Tabs>
diff --git a/python/cocoindex/llm.py b/python/cocoindex/llm.py
@@ -12,6 +12,7 @@ class LlmApiType(Enum):
     LITE_LLM = "LiteLlm"
     OPEN_ROUTER = "OpenRouter"
     VOYAGE = "Voyage"
+    VLLM = "Vllm"
 
 
 @dataclass
diff --git a/src/llm/mod.rs b/src/llm/mod.rs
@@ -13,6 +13,7 @@ pub enum LlmApiType {
     LiteLlm,
     OpenRouter,
     Voyage,
+    Vllm,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -81,6 +82,7 @@ mod litellm;
 mod ollama;
 mod openai;
 mod openrouter;
+mod vllm;
 mod voyage;
 
 pub async fn new_llm_generation_client(
@@ -108,6 +110,9 @@ pub async fn new_llm_generation_client(
         LlmApiType::Voyage => {
             api_bail!("Voyage is not supported for generation")
         }
+        LlmApiType::Vllm => {
+            Box::new(vllm::Client::new_vllm(address).await?) as Box<dyn LlmGenerationClient>
+        }
     };
     Ok(client)
 }
@@ -129,6 +134,7 @@ pub fn new_llm_embedding_client(
         LlmApiType::Ollama
         | LlmApiType::OpenRouter
         | LlmApiType::LiteLlm
+        | LlmApiType::Vllm
         | LlmApiType::Anthropic => {
             api_bail!("Embedding is not supported for API type {:?}", api_type)
         }
diff --git a/src/llm/vllm.rs b/src/llm/vllm.rs
@@ -0,0 +1,16 @@
+use async_openai::Client as OpenAIClient;
+use async_openai::config::OpenAIConfig;
+
+pub use super::openai::Client;
+
+impl Client {
+    pub async fn new_vllm(address: Option<String>) -> anyhow::Result<Self> {
+        let address = address.unwrap_or_else(|| "http://127.0.0.1:8000/v1".to_string());
+        let api_key = std::env::var("VLLM_API_KEY").ok();
+        let mut config = OpenAIConfig::new().with_api_base(address);
+        if let Some(api_key) = api_key {
+            config = config.with_api_key(api_key);
+        }
+        Ok(Client::from_parts(OpenAIClient::with_config(config)))
+    }
+}