Skip to content

Commit 13e1a98

Browse files
Merge pull request #87 from intelligentnode/86-add-nvidia-models
Add Nvidia models
2 parents 670a129 + af65b74 commit 13e1a98

File tree

9 files changed

+236
-0
lines changed

9 files changed

+236
-0
lines changed

intelli/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@
7373
"messages": "/v1/messages",
7474
"version": "2023-06-01"
7575
},
76+
"nvidia": {
77+
"base": "https://integrate.api.nvidia.com",
78+
"chat": "/v1/chat/completions",
79+
"embeddings": "/v1/embeddings"
80+
},
7681
},
7782
"models": {
7883
"replicate": {

intelli/controller/remote_embed_model.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from intelli.wrappers.geminiai_wrapper import GeminiAIWrapper
33
from intelli.wrappers.mistralai_wrapper import MistralAIWrapper
44
from intelli.wrappers.openai_wrapper import OpenAIWrapper
5+
from intelli.wrappers.nvidia_wrapper import NvidiaWrapper
56

67

78
class RemoteEmbedModel:
@@ -11,6 +12,7 @@ def __init__(self, api_key, provider_name):
1112
'openai': OpenAIWrapper,
1213
'mistral': MistralAIWrapper,
1314
'gemini': GeminiAIWrapper,
15+
'nvidia': NvidiaWrapper
1416
}
1517
if self.provider_name in providers:
1618
self.provider = providers[self.provider_name](api_key)
@@ -30,6 +32,8 @@ def get_embeddings(self, embed_input):
3032
params = embed_input.get_mistral_inputs()
3133
elif self.provider_name == 'gemini':
3234
params = embed_input.get_gemini_inputs()
35+
elif self.provider_name == 'nvidia':
36+
params = embed_input.get_nvidia_inputs()
3337
else:
3438
raise Exception("Invalid provider name.")
3539

intelli/function/chatbot.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from intelli.wrappers.openai_wrapper import OpenAIWrapper
99
from intelli.wrappers.anthropic_wrapper import AnthropicWrapper
1010
from intelli.wrappers.keras_wrapper import KerasWrapper
11+
from intelli.wrappers.nvidia_wrapper import NvidiaWrapper
1112
from enum import Enum
1213

1314
class ChatProvider(Enum):
@@ -16,6 +17,7 @@ class ChatProvider(Enum):
1617
MISTRAL = "mistral"
1718
ANTHROPIC = "anthropic"
1819
KERAS = "keras"
20+
NVIDIA = "nvidia"
1921

2022
class Chatbot:
2123

@@ -58,6 +60,8 @@ def _initialize_provider(self):
5860
return AnthropicWrapper(self.api_key)
5961
elif self.provider == ChatProvider.KERAS.value:
6062
return KerasWrapper(self.options['model_name'], self.options.get('model_params', {}))
63+
elif self.provider == ChatProvider.NVIDIA.value:
64+
return NvidiaWrapper(self.api_key)
6165
else:
6266
raise ValueError(f"Unsupported provider: {self.provider}")
6367

@@ -104,6 +108,13 @@ def _chat_anthropic(self, params):
104108
response = self.wrapper.generate_text(params)
105109

106110
return [message['text'] for message in response['content']]
111+
112+
def _chat_nvidia(self, params):
113+
result = self.wrapper.generate_text(params)
114+
choices = result.get("choices", [])
115+
if not choices:
116+
raise Exception("No choices returned from NVIDIA API")
117+
return [choices[0]["message"]["content"]]
107118

108119
def stream(self, chat_input):
109120
"""Streams responses from the selected provider for the given chat input."""
@@ -156,6 +167,20 @@ def _stream_anthropic(self, params):
156167
except json.JSONDecodeError as e:
157168
print("Error decoding JSON from stream:", e)
158169

170+
def _stream_nvidia(self, params):
171+
params["stream"] = True
172+
stream = self.wrapper.generate_text_stream(params)
173+
for line in stream:
174+
if line.strip() and line.startswith("data: ") and line != "data: [DONE]":
175+
json_content = line[len("data: "):].strip()
176+
try:
177+
data_chunk = json.loads(json_content)
178+
content = data_chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
179+
if content:
180+
yield content
181+
except json.JSONDecodeError as e:
182+
print("Error decoding JSON:", e)
183+
159184
# helpers
160185
def _parse_openai_responses(self, results):
161186
responses = []

intelli/model/input/chatbot_input.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,15 @@ def get_keras_input(self):
131131
**self.options
132132
}
133133
return params
134+
135+
def get_nvidia_input(self):
136+
messages = [{'role': msg.role, 'content': msg.content} for msg in self.messages]
137+
params = {
138+
'model': self.model,
139+
'messages': messages,
140+
**({'temperature': self.temperature} if self.temperature is not None else {}),
141+
**({'max_tokens': self.max_tokens} if self.max_tokens is not None else {}),
142+
**self.options
143+
}
144+
return params
145+

intelli/model/input/embed_input.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,16 @@ def get_gemini_inputs(self):
2121
"parts": [{"text": text} for text in self.texts]
2222
}
2323
}
24+
25+
def get_nvidia_inputs(self):
26+
inputs = {
27+
"input": self.texts,
28+
"model": self.model,
29+
"input_type": "query",
30+
"encoding_format": "float",
31+
"truncate": "NONE"
32+
}
33+
return inputs
2434

2535
def set_default_values(self, provider):
2636
if provider == "openai":
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import unittest
2+
import os
3+
import asyncio
4+
from dotenv import load_dotenv
5+
from intelli.function.chatbot import Chatbot, ChatProvider
6+
from intelli.model.input.chatbot_input import ChatModelInput
7+
8+
load_dotenv()
9+
10+
class TestChatbotNvidiaChatAndStream(unittest.TestCase):
11+
def setUp(self):
12+
self.nvidia_api_key = os.getenv("NVIDIA_API_KEY")
13+
assert self.nvidia_api_key, "NVIDIA_API_KEY is not set."
14+
self.chatbot = Chatbot(self.nvidia_api_key, ChatProvider.NVIDIA.value)
15+
16+
def test_nvidia_chat_and_stream(self):
17+
18+
# Test normal chat
19+
print("Testing Nvidia chat")
20+
normal_input = ChatModelInput("You are a helpful assistant.", model="deepseek-ai/deepseek-r1", max_tokens=1024, temperature=0.6)
21+
normal_input.add_user_message("What is the capital city of france?")
22+
response = self.chatbot.chat(normal_input)
23+
if isinstance(response, dict) and "result" in response:
24+
normal_output = response["result"]
25+
else:
26+
normal_output = response
27+
self.assertTrue(len(normal_output) > 0, "Nvidia normal chat response should not be empty")
28+
print("Nvidia normal chat output:", normal_output)
29+
30+
# Test streaming chat
31+
print("Testing Nvidia stream")
32+
stream_input = ChatModelInput("You are a helpful assistant.", model="deepseek-ai/deepseek-r1", max_tokens=1024, temperature=0.6)
33+
stream_input.add_user_message("What is the capital city of france?")
34+
stream_output = asyncio.run(self.get_stream_output(stream_input))
35+
self.assertTrue(len(stream_output) > 0, "Nvidia stream response should not be empty")
36+
print("Nvidia stream output:", stream_output)
37+
38+
async def get_stream_output(self, chat_input):
39+
output = ""
40+
for chunk in self.chatbot.stream(chat_input):
41+
output += chunk
42+
return output
43+
44+
if __name__ == "__main__":
45+
unittest.main()
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import unittest
2+
import os
3+
from dotenv import load_dotenv
4+
from intelli.wrappers.nvidia_wrapper import NvidiaWrapper
5+
6+
load_dotenv()
7+
8+
9+
class TestNvidiaWrapper(unittest.TestCase):
10+
@classmethod
11+
def setUpClass(cls):
12+
cls.api_key = os.getenv("NVIDIA_API_KEY")
13+
assert cls.api_key, "NVIDIA_API_KEY is not set."
14+
cls.wrapper = NvidiaWrapper(cls.api_key)
15+
16+
def test_generate_text_llama(self):
17+
params = {
18+
"model": "meta/llama-3.3-70b-instruct",
19+
"messages": [
20+
{"role": "user", "content": "Write a limerick about GPU computing."}
21+
],
22+
"max_tokens": 1024,
23+
"temperature": 0.2,
24+
"top_p": 0.7,
25+
"stream": False,
26+
}
27+
response = self.wrapper.generate_text(params)
28+
self.assertIn("choices", response)
29+
self.assertGreater(len(response["choices"]), 0)
30+
message = response["choices"][0]["message"]["content"]
31+
self.assertTrue(isinstance(message, str) and len(message) > 0)
32+
33+
def test_generate_text_deepseek(self):
34+
params = {
35+
"model": "deepseek-ai/deepseek-r1",
36+
"messages": [
37+
{"role": "user", "content": "Which number is larger, 9.11 or 9.8?"}
38+
],
39+
"max_tokens": 4096,
40+
"temperature": 0.6,
41+
"top_p": 0.7,
42+
"stream": False,
43+
}
44+
response = self.wrapper.generate_text(params)
45+
self.assertIn("choices", response)
46+
self.assertGreater(len(response["choices"]), 0)
47+
message = response["choices"][0]["message"]["content"]
48+
self.assertTrue(isinstance(message, str) and len(message) > 0)
49+
50+
def test_get_embeddings(self):
51+
params = {
52+
"input": ["What is the capital of France?"],
53+
"model": "nvidia/llama-3.2-nv-embedqa-1b-v2",
54+
"input_type": "query",
55+
"encoding_format": "float",
56+
"truncate": "NONE",
57+
}
58+
response = self.wrapper.get_embeddings(params)
59+
self.assertIn("data", response)
60+
self.assertGreater(len(response["data"]), 0)
61+
self.assertIn("embedding", response["data"][0])
62+
embedding = response["data"][0]["embedding"]
63+
self.assertIsInstance(embedding, list)
64+
self.assertGreater(len(embedding), 0)
65+
66+
67+
if __name__ == "__main__":
68+
unittest.main()
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import unittest
2+
import os
3+
from dotenv import load_dotenv
4+
from intelli.model.input.embed_input import EmbedInput
5+
from intelli.controller.remote_embed_model import RemoteEmbedModel
6+
7+
load_dotenv()
8+
9+
class TestRemoteEmbedModelNvidia(unittest.TestCase):
10+
@classmethod
11+
def setUpClass(cls):
12+
cls.api_key = os.getenv("NVIDIA_API_KEY")
13+
assert cls.api_key, "NVIDIA_API_KEY is not set."
14+
cls.embed_model = RemoteEmbedModel(cls.api_key, "nvidia")
15+
16+
def test_get_embeddings(self):
17+
text = "What is the capital of France?"
18+
embed_input = EmbedInput([text], model="nvidia/llama-3.2-nv-embedqa-1b-v2")
19+
result = self.embed_model.get_embeddings(embed_input)
20+
self.assertIn("data", result)
21+
self.assertGreater(len(result["data"]), 0)
22+
self.assertIn("embedding", result["data"][0])
23+
embedding = result["data"][0]["embedding"]
24+
self.assertIsInstance(embedding, list)
25+
self.assertGreater(len(embedding), 0)
26+
print("Nvidia embedding sample:", embedding[:5])
27+
28+
if __name__ == "__main__":
29+
unittest.main()

intelli/wrappers/nvidia_wrapper.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import requests
2+
from intelli.config import config
3+
4+
5+
class NvidiaWrapper:
6+
def __init__(self, api_key: str):
7+
self.api_key = api_key
8+
self.base_url = config["url"]["nvidia"]["base"]
9+
self.chat_endpoint = config["url"]["nvidia"]["chat"]
10+
self.embeddings_endpoint = config["url"]["nvidia"]["embeddings"]
11+
self.headers = {
12+
"Content-Type": "application/json",
13+
"Accept": "application/json",
14+
"Authorization": f"Bearer {api_key}",
15+
}
16+
17+
def generate_text(self, params: dict) -> dict:
18+
if "stream" not in params:
19+
params["stream"] = False
20+
url = self.base_url + self.chat_endpoint
21+
response = requests.post(url, json=params, headers=self.headers)
22+
response.raise_for_status()
23+
return response.json()
24+
25+
def generate_text_stream(self, params: dict):
26+
params["stream"] = True
27+
url = self.base_url + self.chat_endpoint
28+
response = requests.post(url, json=params, headers=self.headers, stream=True)
29+
response.raise_for_status()
30+
for line in response.iter_lines(decode_unicode=True):
31+
if line:
32+
yield line
33+
34+
def get_embeddings(self, params: dict) -> dict:
35+
url = self.base_url + self.embeddings_endpoint
36+
response = requests.post(url, json=params, headers=self.headers)
37+
response.raise_for_status()
38+
return response.json()

0 commit comments

Comments
 (0)