File tree Expand file tree Collapse file tree 4 files changed +55
-0
lines changed Expand file tree Collapse file tree 4 files changed +55
-0
lines changed Original file line number Diff line number Diff line change @@ -26,6 +26,7 @@ We support the following types of LLM APIs:
26
26
| [ Voyage] ( #voyage ) | ` LlmApiType.VOYAGE ` | ❌ | ✅ |
27
27
| [ LiteLLM] ( #litellm ) | ` LlmApiType.LITE_LLM ` | ✅ | ❌ |
28
28
| [ OpenRouter] ( #openrouter ) | ` LlmApiType.OPEN_ROUTER ` | ✅ | ❌ |
29
+ | [ vLLM] ( #vllm ) | ` LlmApiType.VLLM ` | ✅ | ❌ |
29
30
30
31
## LLM Tasks
31
32
@@ -307,3 +308,34 @@ cocoindex.LlmSpec(
307
308
</Tabs>
308
309
309
310
You can find the full list of models supported by OpenRouter [here](https://openrouter.ai/models).
311
+
312
+ # ## vLLM
313
+
314
+ Install vLLM :
315
+
316
+ ` ` ` bash
317
+ pip install vllm
318
+ ` ` `
319
+
320
+ Run vLLM Server
321
+
322
+ ` ` ` bash
323
+ vllm serve deepseek-ai/deepseek-coder-1.3b-instruct
324
+ ` ` `
325
+
326
+
327
+ A spec for vLLM looks like this :
328
+
329
+ <Tabs>
330
+ <TabItem value="python" label="Python" default>
331
+
332
+ ` ` ` python
333
+ cocoindex.LlmSpec(
334
+ api_type=cocoindex.LlmApiType.VLLM,
335
+ model="deepseek-ai/deepseek-coder-1.3b-instruct",
336
+ address="http://127.0.0.1:8000/v1",
337
+ )
338
+ ` ` `
339
+
340
+ </TabItem>
341
+ </Tabs>
Original file line number Diff line number Diff line change @@ -12,6 +12,7 @@ class LlmApiType(Enum):
12
12
LITE_LLM = "LiteLlm"
13
13
OPEN_ROUTER = "OpenRouter"
14
14
VOYAGE = "Voyage"
15
+ VLLM = "Vllm"
15
16
16
17
17
18
@dataclass
Original file line number Diff line number Diff line change @@ -13,6 +13,7 @@ pub enum LlmApiType {
13
13
LiteLlm ,
14
14
OpenRouter ,
15
15
Voyage ,
16
+ Vllm ,
16
17
}
17
18
18
19
#[ derive( Debug , Clone , Serialize , Deserialize ) ]
@@ -81,6 +82,7 @@ mod litellm;
81
82
mod ollama;
82
83
mod openai;
83
84
mod openrouter;
85
+ mod vllm;
84
86
mod voyage;
85
87
86
88
pub async fn new_llm_generation_client (
@@ -108,6 +110,9 @@ pub async fn new_llm_generation_client(
108
110
LlmApiType :: Voyage => {
109
111
api_bail ! ( "Voyage is not supported for generation" )
110
112
}
113
+ LlmApiType :: Vllm => {
114
+ Box :: new ( vllm:: Client :: new_vllm ( address) . await ?) as Box < dyn LlmGenerationClient >
115
+ }
111
116
} ;
112
117
Ok ( client)
113
118
}
@@ -129,6 +134,7 @@ pub fn new_llm_embedding_client(
129
134
LlmApiType :: Ollama
130
135
| LlmApiType :: OpenRouter
131
136
| LlmApiType :: LiteLlm
137
+ | LlmApiType :: Vllm
132
138
| LlmApiType :: Anthropic => {
133
139
api_bail ! ( "Embedding is not supported for API type {:?}" , api_type)
134
140
}
Original file line number Diff line number Diff line change
1
+ use async_openai:: Client as OpenAIClient ;
2
+ use async_openai:: config:: OpenAIConfig ;
3
+
4
+ pub use super :: openai:: Client ;
5
+
6
+ impl Client {
7
+ pub async fn new_vllm ( address : Option < String > ) -> anyhow:: Result < Self > {
8
+ let address = address. unwrap_or_else ( || "http://127.0.0.1:8000/v1" . to_string ( ) ) ;
9
+ let api_key = std:: env:: var ( "VLLM_API_KEY" ) . ok ( ) ;
10
+ let mut config = OpenAIConfig :: new ( ) . with_api_base ( address) ;
11
+ if let Some ( api_key) = api_key {
12
+ config = config. with_api_key ( api_key) ;
13
+ }
14
+ Ok ( Client :: from_parts ( OpenAIClient :: with_config ( config) ) )
15
+ }
16
+ }
You can’t perform that action at this time.
0 commit comments