Skip to content

Commit a1f8a66

Browse files
next refactor
Signed-off-by: greg pereira <[email protected]>
1 parent eeb65e8 commit a1f8a66

File tree

11 files changed

+538
-91
lines changed

11 files changed

+538
-91
lines changed

milvus/build/merlinite-qq.sh

Lines changed: 0 additions & 7 deletions
This file was deleted.

milvus/seed/README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
RAG application with ILAB
2+
3+
1. setup a vector DB (Milvus)
4+
5+
Development story:
6+
0. Starting Goal:
7+
- Naive RAG no KG aided
8+
- Addition:
9+
1. identify what the model lacks knowledge in
10+
2. Can I use the interal trained model or do I have to use the HF model
11+
-
12+
13+
- UI integration
14+
15+
-----------------------------------------------
16+
17+
variable definition
18+
class Config
19+
20+
_identify_params,
21+
_llm_type, _extract_token_usage,
22+
23+
Inherint in defining this spec which could eventually live as a contribution to langchain are some assumptions / questions I made:
24+
- Is the model serializable: Assumed no
25+
- Max tokens for merlinite and granite: Both assumed 4096
26+
- Does this model have attention / memmory?
27+
- Does these models have a verbosity option for output?
28+
- Recomended default values:
29+
-
Binary file not shown.
Binary file not shown.

milvus/seed/client.py

Lines changed: 53 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1-
import httpx
1+
import requests
2+
import json
23
import os
3-
import ssl
4+
from ilab_model import IlabLLM
5+
from dotenv import load_dotenv
6+
from langchain_core.prompts import PromptTemplate
7+
from langchain.chains import LLMChain
8+
9+
load_dotenv()
410

511
# manage ENV
612
model_endpoint=os.getenv('MODEL_ENDPOINT')
@@ -11,48 +17,50 @@
1117
if model_name == "":
1218
model_name = "ibm/merlinite-7b"
1319

14-
model_token=os.getenv('MODEL_TOKEN')
20+
model_token=os.getenv('ILAB_API_TOKEN')
1521

1622
# HTTPS client
17-
client_key_path = "/home/fedora/client-tls-key.pem2"
18-
client_crt_path = "/home/fedora/client-tls-crt.pem2"
19-
server_ca_crt = "/home/fedora/server-ca-crt.pem2"
20-
21-
ssl_context = ssl.create_default_context(cafile=server_ca_crt)
22-
ssl_context.load_cert_chain(certfile=client_crt_path, keyfile=client_key_path)
23-
24-
client = httpx.Client(verify=ssl_context)
25-
26-
27-
def get_openai_response(prompt, **kwargs):
28-
url = model_endpoint
29-
headers = {
30-
"Authorization": f"Bearer {model_token}",
31-
"Content-Type": "application/json"
32-
}
33-
data = {
34-
"model": model_name,
35-
"max_tokens": 4096,
36-
"messages": [
37-
{
38-
"role": "system",
39-
"content": "You are an AI language model developed by IBM Research. You are a cautious assistant that carefully follows instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
40-
},
41-
{
42-
"role":"user",
43-
"content": prompt
44-
}
45-
],
46-
"logprobs":False,
47-
"stream":False
48-
}
49-
50-
response = client.post(url, json=data, headers=headers)
51-
response.raise_for_status()
52-
return response.json()
53-
54-
question = """ Question: I am training for an upcoming marathon but I am completely out of shape! Can you help me to implement a plan to prepare me for running a marathon in 12 weeks?
55-
56-
Answer: Let's think step by step. """
57-
58-
# get_openai_response(question)
23+
# client_key_path = "/home/fedora/client-tls-key.pem2"
24+
# client_crt_path = "/home/fedora/client-tls-crt.pem2"
25+
# server_ca_crt = "/home/fedora/server-ca-crt.pem2"
26+
27+
# ssl_context = ssl.create_default_context(cafile=server_ca_crt)
28+
# ssl_context.load_cert_chain(certfile=client_crt_path, keyfile=client_key_path)
29+
30+
# client = httpx.Client(verify=ssl_context)
31+
32+
# data = {
33+
# "model": "instructlab/granite-7b-lab",
34+
# "messages": [
35+
# {"role": "system", "content": "your name is carl"},
36+
# {"role": "user", "content": "what is your name?"}
37+
# ],
38+
# "temperature": 1,
39+
# "max_tokens": 1792,
40+
# "top_p": 1,
41+
# "repetition_penalty": 1.05,
42+
# "stop": ["<|endoftext|>"],
43+
# "logprobs": False,
44+
# "stream": False
45+
# }
46+
47+
# response = requests.post(url, headers=headers, data=json.dumps(data), verify=False)
48+
# print(response.json())
49+
print(f'model_name={model_name}')
50+
llm = IlabLLM(
51+
model_endpoint=model_endpoint,
52+
model_name=model_name,
53+
apikey=model_token,
54+
temperature=1,
55+
max_tokens=500,
56+
top_p=1,
57+
repetition_penalty=1.05,
58+
stop=["<|endoftext|>"],
59+
streaming=False
60+
)
61+
62+
prompt="I am training for a marathon in 12 weeks. Can you help me build an exercise plan to help prepare myself?"
63+
prompts=[prompt]
64+
# prompt_template = PromptTemplate.from_template(prompt)
65+
llm.generate(prompts)
66+
# llm.invoke("dog")

milvus/seed/dumb_client.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import requests
2+
import json
3+
import os
4+
from dotenv import load_dotenv
5+
6+
load_dotenv()
7+
8+
# manage ENV
9+
model_endpoint=os.getenv('MODEL_ENDPOINT')
10+
if model_endpoint == "":
11+
model_endpoint = "http://localhost:8001"
12+
13+
model_name=os.getenv('MODEL_NAME')
14+
if model_name == "":
15+
model_name = "ibm/merlinite-7b"
16+
17+
model_token=os.getenv('MODEL_TOKEN')
18+
19+
headers = {
20+
"Content-Type": "application/json",
21+
"Authorization": f"Bearer {model_token}"
22+
}
23+
24+
data = {
25+
"model": model_name,
26+
"messages": [
27+
{"role": "system", "content": "your name is carl"},
28+
{"role": "user", "content": "what is your name?"}
29+
],
30+
"temperature": 1,
31+
"max_tokens": 1792,
32+
"top_p": 1,
33+
"repetition_penalty": 1.05,
34+
"stop": ["<|endoftext|>"],
35+
"logprobs": False,
36+
"stream": False
37+
}
38+
39+
response = requests.post(model_endpoint, headers=headers, data=json.dumps(data), verify=False)
40+
print(response.json())

0 commit comments

Comments
 (0)