Skip to content

Commit a245d5c

Browse files
committed
try fix CI error
Signed-off-by: JaredforReal <[email protected]>
1 parent a7191cf commit a245d5c

File tree

6 files changed

+371
-5
lines changed

6 files changed

+371
-5
lines changed

deploy/kubernetes/base/config.yaml

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
bert_model:
2+
model_id: models/all-MiniLM-L12-v2
3+
threshold: 0.6
4+
use_cpu: true
5+
6+
semantic_cache:
7+
enabled: true
8+
backend_type: "memory" # Options: "memory" or "milvus"
9+
similarity_threshold: 0.8
10+
max_entries: 1000 # Only applies to memory backend
11+
ttl_seconds: 3600
12+
eviction_policy: "fifo"
13+
14+
tools:
15+
enabled: true
16+
top_k: 3
17+
similarity_threshold: 0.2
18+
tools_db_path: "config/tools_db.json"
19+
fallback_to_empty: true
20+
21+
prompt_guard:
22+
enabled: true
23+
use_modernbert: true
24+
model_id: "models/jailbreak_classifier_modernbert-base_model"
25+
threshold: 0.7
26+
use_cpu: true
27+
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
28+
29+
# vLLM Endpoints Configuration
30+
# IMPORTANT: 'address' field must be a valid IP address (IPv4 or IPv6)
31+
# Supported formats: 127.0.0.1, 192.168.1.1, ::1, 2001:db8::1
32+
# NOT supported: domain names (example.com), protocol prefixes (http://), paths (/api), ports in address (use 'port' field)
33+
vllm_endpoints:
34+
- name: "endpoint1"
35+
address: "127.0.0.1" # llm-katan sidecar or local backend
36+
port: 8002
37+
weight: 1
38+
39+
model_config:
40+
"qwen3":
41+
reasoning_family: "qwen3" # Match docker-compose default model name
42+
preferred_endpoints: ["endpoint1"]
43+
pii_policy:
44+
allow_by_default: true
45+
46+
# Classifier configuration
47+
classifier:
48+
category_model:
49+
model_id: "models/category_classifier_modernbert-base_model"
50+
use_modernbert: true
51+
threshold: 0.6
52+
use_cpu: true
53+
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
54+
pii_model:
55+
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
56+
use_modernbert: true
57+
threshold: 0.7
58+
use_cpu: true
59+
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
60+
61+
# Categories with new use_reasoning field structure
62+
categories:
63+
- name: business
64+
model_scores:
65+
- model: qwen3
66+
score: 0.7
67+
use_reasoning: false # Business performs better without reasoning
68+
- name: law
69+
model_scores:
70+
- model: qwen3
71+
score: 0.4
72+
use_reasoning: false
73+
- name: psychology
74+
model_scores:
75+
- model: qwen3
76+
score: 0.6
77+
use_reasoning: false
78+
- name: biology
79+
model_scores:
80+
- model: qwen3
81+
score: 0.9
82+
use_reasoning: false
83+
- name: chemistry
84+
model_scores:
85+
- model: qwen3
86+
score: 0.6
87+
use_reasoning: true # Enable reasoning for complex chemistry
88+
- name: history
89+
model_scores:
90+
- model: qwen3
91+
score: 0.7
92+
use_reasoning: false
93+
- name: other
94+
model_scores:
95+
- model: qwen3
96+
score: 0.7
97+
use_reasoning: false
98+
- name: health
99+
model_scores:
100+
- model: qwen3
101+
score: 0.5
102+
use_reasoning: false
103+
- name: economics
104+
model_scores:
105+
- model: qwen3
106+
score: 1.0
107+
use_reasoning: false
108+
- name: math
109+
model_scores:
110+
- model: qwen3
111+
score: 1.0
112+
use_reasoning: true # Enable reasoning for complex math
113+
- name: physics
114+
model_scores:
115+
- model: qwen3
116+
score: 0.7
117+
use_reasoning: true # Enable reasoning for physics
118+
- name: computer science
119+
model_scores:
120+
- model: qwen3
121+
score: 0.6
122+
use_reasoning: false
123+
- name: philosophy
124+
model_scores:
125+
- model: qwen3
126+
score: 0.5
127+
use_reasoning: false
128+
- name: engineering
129+
model_scores:
130+
- model: qwen3
131+
score: 0.7
132+
use_reasoning: false
133+
134+
default_model: qwen3
135+
136+
# Reasoning family configurations
137+
reasoning_families:
138+
deepseek:
139+
type: "chat_template_kwargs"
140+
parameter: "thinking"
141+
142+
qwen3:
143+
type: "chat_template_kwargs"
144+
parameter: "enable_thinking"
145+
146+
gpt-oss:
147+
type: "reasoning_effort"
148+
parameter: "reasoning_effort"
149+
gpt:
150+
type: "reasoning_effort"
151+
parameter: "reasoning_effort"
152+
153+
# Global default reasoning effort level
154+
default_reasoning_effort: high
155+
156+
# API Configuration
157+
api:
158+
batch_classification:
159+
max_batch_size: 100
160+
concurrency_threshold: 5
161+
max_concurrency: 8
162+
metrics:
163+
enabled: true
164+
detailed_goroutine_tracking: true
165+
high_resolution_timing: false
166+
sample_rate: 1.0
167+
duration_buckets:
168+
[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
169+
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

deploy/kubernetes/base/kustomization.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@ apiVersion: kustomize.config.k8s.io/v1beta1
22
kind: Kustomization
33

44
resources:
5-
- ../namespace.yaml
6-
- ../pvc.yaml
7-
- ../service.yaml
5+
- ./namespace.yaml
6+
- ./pvc.yaml
7+
- ./service.yaml
88

99
configMapGenerator:
1010
- name: semantic-router-config
1111
files:
12-
- ../config.yaml
13-
- ../tools_db.json
12+
- ./config.yaml
13+
- ./tools_db.json
1414

1515
namespace: vllm-semantic-router-system
1616

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: v1
2+
kind: Namespace
3+
metadata:
4+
name: vllm-semantic-router-system

deploy/kubernetes/base/pvc.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: v1
2+
kind: PersistentVolumeClaim
3+
metadata:
4+
name: semantic-router-models
5+
labels:
6+
app: semantic-router
7+
spec:
8+
accessModes:
9+
- ReadWriteOnce
10+
resources:
11+
requests:
12+
storage: 30Gi
13+
storageClassName: standard
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: semantic-router
5+
namespace: vllm-semantic-router-system
6+
labels:
7+
app: semantic-router
8+
spec:
9+
type: ClusterIP
10+
ports:
11+
- port: 50051
12+
targetPort: grpc
13+
protocol: TCP
14+
name: grpc
15+
- port: 8080
16+
targetPort: 8080
17+
protocol: TCP
18+
name: classify-api
19+
selector:
20+
app: semantic-router
21+
---
22+
apiVersion: v1
23+
kind: Service
24+
metadata:
25+
name: semantic-router-metrics
26+
namespace: vllm-semantic-router-system
27+
labels:
28+
app: semantic-router
29+
service: metrics
30+
spec:
31+
type: ClusterIP
32+
ports:
33+
- port: 9190
34+
targetPort: metrics
35+
protocol: TCP
36+
name: metrics
37+
selector:
38+
app: semantic-router
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
[
2+
{
3+
"tool": {
4+
"type": "function",
5+
"function": {
6+
"name": "get_weather",
7+
"description": "Get current weather information for a location",
8+
"parameters": {
9+
"type": "object",
10+
"properties": {
11+
"location": {
12+
"type": "string",
13+
"description": "The city and state, e.g. San Francisco, CA"
14+
},
15+
"unit": {
16+
"type": "string",
17+
"enum": ["celsius", "fahrenheit"],
18+
"description": "Temperature unit"
19+
}
20+
},
21+
"required": ["location"]
22+
}
23+
}
24+
},
25+
"description": "Get current weather information, temperature, conditions, forecast for any location, city, or place. Check weather today, now, current conditions, temperature, rain, sun, cloudy, hot, cold, storm, snow",
26+
"category": "weather",
27+
"tags": ["weather", "temperature", "forecast", "climate"]
28+
},
29+
{
30+
"tool": {
31+
"type": "function",
32+
"function": {
33+
"name": "search_web",
34+
"description": "Search the web for information",
35+
"parameters": {
36+
"type": "object",
37+
"properties": {
38+
"query": {
39+
"type": "string",
40+
"description": "The search query"
41+
},
42+
"num_results": {
43+
"type": "integer",
44+
"description": "Number of results to return",
45+
"default": 5
46+
}
47+
},
48+
"required": ["query"]
49+
}
50+
}
51+
},
52+
"description": "Search the internet, web search, find information online, browse web content, lookup, research, google, find answers, discover, investigate",
53+
"category": "search",
54+
"tags": ["search", "web", "internet", "information", "browse"]
55+
},
56+
{
57+
"tool": {
58+
"type": "function",
59+
"function": {
60+
"name": "calculate",
61+
"description": "Perform mathematical calculations",
62+
"parameters": {
63+
"type": "object",
64+
"properties": {
65+
"expression": {
66+
"type": "string",
67+
"description": "Mathematical expression to evaluate"
68+
}
69+
},
70+
"required": ["expression"]
71+
}
72+
}
73+
},
74+
"description": "Calculate mathematical expressions, solve math problems, arithmetic operations, compute numbers, addition, subtraction, multiplication, division, equations, formula",
75+
"category": "math",
76+
"tags": ["math", "calculation", "arithmetic", "compute", "numbers"]
77+
},
78+
{
79+
"tool": {
80+
"type": "function",
81+
"function": {
82+
"name": "send_email",
83+
"description": "Send an email message",
84+
"parameters": {
85+
"type": "object",
86+
"properties": {
87+
"to": {
88+
"type": "string",
89+
"description": "Recipient email address"
90+
},
91+
"subject": {
92+
"type": "string",
93+
"description": "Email subject"
94+
},
95+
"body": {
96+
"type": "string",
97+
"description": "Email body content"
98+
}
99+
},
100+
"required": ["to", "subject", "body"]
101+
}
102+
}
103+
},
104+
"description": "Send email messages, email communication, contact people via email, mail, message, correspondence, notify, inform",
105+
"category": "communication",
106+
"tags": ["email", "send", "communication", "message", "contact"]
107+
},
108+
{
109+
"tool": {
110+
"type": "function",
111+
"function": {
112+
"name": "create_calendar_event",
113+
"description": "Create a new calendar event or appointment",
114+
"parameters": {
115+
"type": "object",
116+
"properties": {
117+
"title": {
118+
"type": "string",
119+
"description": "Event title"
120+
},
121+
"date": {
122+
"type": "string",
123+
"description": "Event date in YYYY-MM-DD format"
124+
},
125+
"time": {
126+
"type": "string",
127+
"description": "Event time in HH:MM format"
128+
},
129+
"duration": {
130+
"type": "integer",
131+
"description": "Duration in minutes"
132+
}
133+
},
134+
"required": ["title", "date", "time"]
135+
}
136+
}
137+
},
138+
"description": "Schedule meetings, create calendar events, set appointments, manage calendar, book time, plan meeting, organize schedule, reminder, agenda",
139+
"category": "productivity",
140+
"tags": ["calendar", "event", "meeting", "appointment", "schedule"]
141+
}
142+
]

0 commit comments

Comments
 (0)