-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathtasks.json
More file actions
101 lines (101 loc) · 2.96 KB
/
tasks.json
File metadata and controls
101 lines (101 loc) · 2.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
"version": "2.0.0",
"tasks": [
{
"label": "Start API (8080 + 58080)",
"type": "shell",
"command": "uv",
"args": [
"run",
"dnet-api",
"--http-port",
"8080",
"--grpc-port",
"58080"
],
"group": "build",
"presentation": {
"echo": true,
"reveal": "always",
"focus": false,
"panel": "new",
"showReuseMessage": true,
"clear": false
}
},
{
"label": "Start Shard (8081 + 58081)",
"type": "shell",
"command": "uv",
"args": [
"run",
"dnet-shard",
"--http-port",
"8081",
"--grpc-port",
"58081"
],
"group": "build",
"presentation": {
"echo": true,
"reveal": "always",
"focus": false,
"panel": "new",
"showReuseMessage": true,
"clear": false
}
},
{
"label": "Health (Shard 8081)",
"type": "shell",
"command": "curl -s http://localhost:8081/health -H \"Content-Type: application/json\" | bun -p \"Bun.inspect(await Bun.stdin.json(), { colors: true })\""
},
{
"label": "Start Shard (8082 + 58082)",
"type": "shell",
"command": "uv",
"args": [
"run",
"dnet-shard",
"--http-port",
"8082",
"--grpc-port",
"58082"
],
"group": "build",
"presentation": {
"echo": true,
"reveal": "always",
"focus": false,
"panel": "new",
"showReuseMessage": true,
"clear": false
}
},
{
"label": "Prepare Topology (Qwen/Qwen3-4B-MLX-4bit)",
"type": "shell",
"command": "curl -X POST http://localhost:8080/v1/prepare_topology -H \"Content-Type: application/json\" -d '{ \"model\": \"Qwen/Qwen3-4B-MLX-4bit\" }' | bun -p \"Bun.inspect(await Bun.stdin.json(), { colors: true })\""
},
{
"label": "Prepare & Load (Qwen/Qwen3-4B-MLX-4bit)",
"type": "shell",
"command": "uv run ./scripts/prepare_model.py Qwen/Qwen3-4B-MLX-4bit"
},
{
"label": "Get API Devices",
"type": "shell",
"command": "curl -s http://localhost:8080/v1/devices -H \"Content-Type: application/json\" | bun -p \"Bun.inspect(await Bun.stdin.json(), { colors: true })\""
},
{
"label": "Get API Topology",
"type": "shell",
"command": "curl -s http://localhost:8080/v1/topology -H \"Content-Type: application/json\" | bun -p \"Bun.inspect(await Bun.stdin.json(), { colors: true })\""
},
{
"label": "Chat Completions (Qwen/Qwen3-4B-MLX-4bit)",
"type": "shell",
"command": "curl -X POST http://localhost:8080/v1/chat/completions -H \"Content-Type: application/json\" -d '{\"model\":\"Qwen/Qwen3-4B-MLX-4bit\", \"messages\": [{\"role\": \"user\", \"content\": \"What is the capital of France?\"}], \"max_tokens\": 100}' | bun -p \"Bun.inspect(await Bun.stdin.json(), { colors: true })\""
}
]
}