Skip to content

Commit c1ce446

Browse files
authored
feat(llm): Add vLLM Support (#683)
* add vllm * fix * fix formatting * add to table
1 parent 5e5e37d commit c1ce446

File tree

4 files changed

+55
-0
lines changed

4 files changed

+55
-0
lines changed

docs/docs/ai/llm.mdx

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ We support the following types of LLM APIs:
2626
| [Voyage](#voyage) | `LlmApiType.VOYAGE` |||
2727
| [LiteLLM](#litellm) | `LlmApiType.LITE_LLM` |||
2828
| [OpenRouter](#openrouter) | `LlmApiType.OPEN_ROUTER` |||
29+
| [vLLM](#vllm) | `LlmApiType.VLLM` |||
2930

3031
## LLM Tasks
3132

@@ -307,3 +308,34 @@ cocoindex.LlmSpec(
307308
</Tabs>
308309

309310
You can find the full list of models supported by OpenRouter [here](https://openrouter.ai/models).
311+
312+
### vLLM
313+
314+
Install vLLM:
315+
316+
```bash
317+
pip install vllm
318+
```
319+
320+
Run vLLM Server
321+
322+
```bash
323+
vllm serve deepseek-ai/deepseek-coder-1.3b-instruct
324+
```
325+
326+
327+
A spec for vLLM looks like this:
328+
329+
<Tabs>
330+
<TabItem value="python" label="Python" default>
331+
332+
```python
333+
cocoindex.LlmSpec(
334+
api_type=cocoindex.LlmApiType.VLLM,
335+
model="deepseek-ai/deepseek-coder-1.3b-instruct",
336+
address="http://127.0.0.1:8000/v1",
337+
)
338+
```
339+
340+
</TabItem>
341+
</Tabs>

python/cocoindex/llm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ class LlmApiType(Enum):
1212
LITE_LLM = "LiteLlm"
1313
OPEN_ROUTER = "OpenRouter"
1414
VOYAGE = "Voyage"
15+
VLLM = "Vllm"
1516

1617

1718
@dataclass

src/llm/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pub enum LlmApiType {
1313
LiteLlm,
1414
OpenRouter,
1515
Voyage,
16+
Vllm,
1617
}
1718

1819
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -81,6 +82,7 @@ mod litellm;
8182
mod ollama;
8283
mod openai;
8384
mod openrouter;
85+
mod vllm;
8486
mod voyage;
8587

8688
pub async fn new_llm_generation_client(
@@ -108,6 +110,9 @@ pub async fn new_llm_generation_client(
108110
LlmApiType::Voyage => {
109111
api_bail!("Voyage is not supported for generation")
110112
}
113+
LlmApiType::Vllm => {
114+
Box::new(vllm::Client::new_vllm(address).await?) as Box<dyn LlmGenerationClient>
115+
}
111116
};
112117
Ok(client)
113118
}
@@ -129,6 +134,7 @@ pub fn new_llm_embedding_client(
129134
LlmApiType::Ollama
130135
| LlmApiType::OpenRouter
131136
| LlmApiType::LiteLlm
137+
| LlmApiType::Vllm
132138
| LlmApiType::Anthropic => {
133139
api_bail!("Embedding is not supported for API type {:?}", api_type)
134140
}

src/llm/vllm.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
use async_openai::Client as OpenAIClient;
2+
use async_openai::config::OpenAIConfig;
3+
4+
pub use super::openai::Client;
5+
6+
impl Client {
7+
pub async fn new_vllm(address: Option<String>) -> anyhow::Result<Self> {
8+
let address = address.unwrap_or_else(|| "http://127.0.0.1:8000/v1".to_string());
9+
let api_key = std::env::var("VLLM_API_KEY").ok();
10+
let mut config = OpenAIConfig::new().with_api_base(address);
11+
if let Some(api_key) = api_key {
12+
config = config.with_api_key(api_key);
13+
}
14+
Ok(Client::from_parts(OpenAIClient::with_config(config)))
15+
}
16+
}

0 commit comments

Comments
 (0)