1
+ #! /bin/bash
2
+
3
+ # DESCRIPTION:
4
+ # This script performs comprehensive stress testing of the VLLM router's
5
+ # round-robin routing logic under high concurrent loads. It also validates that
6
+ # requests are evenly distributed across multiple backend servers.
7
+
8
+ # USAGE:
9
+ # pip install -e .
10
+ # bash tests/e2e/stress-test.sh
11
+
12
+ # OUTPUT EXAMPLE:
13
+ # bash tests/e2e/stress-test.sh
14
+ # [INFO] Checking prerequisites...
15
+ # [INFO] Router stress test configuration:
16
+ # [INFO] Concurrent requests: 2000
17
+ # [INFO] Total requests: 10000
18
+ # [INFO] Router port: 30080
19
+ # [INFO] Backend ports: 8001, 8002
20
+ # [INFO] Model: facebook/opt-125m
21
+ # [INFO] Starting router with round-robin routing (stress test mode)
22
+ # [INFO] Router started with PID: 1307668
23
+ # [INFO] Waiting for router to be ready...
24
+ # [INFO] Router is ready
25
+ # [INFO] Running stress test with Apache Bench
26
+ # [INFO] Concurrent: 2000, Total: 10000
27
+ # This is ApacheBench, Version 2.3 <$Revision: 1879490 $>
28
+ # Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
29
+ # Licensed to The Apache Software Foundation, http://www.apache.org/
30
+
31
+ # Benchmarking localhost (be patient)
32
+ # Completed 1000 requests
33
+ # Completed 2000 requests
34
+ # Completed 3000 requests
35
+ # Completed 4000 requests
36
+ # Completed 5000 requests
37
+ # Completed 6000 requests
38
+ # Completed 7000 requests
39
+ # Completed 8000 requests
40
+ # Completed 9000 requests
41
+ # Completed 10000 requests
42
+ # Finished 10000 requests
43
+
44
+
45
+ # Server Software: uvicorn
46
+ # Server Hostname: localhost
47
+ # Server Port: 30080
48
+
49
+ # Document Path: /v1/chat/completions
50
+ # Document Length: 21 bytes
51
+
52
+ # Concurrency Level: 2000
53
+ # Time taken for tests: 54.648 seconds
54
+ # Complete requests: 10000
55
+ # Failed requests: 0
56
+ # Non-2xx responses: 10000
57
+ # Total transferred: 1930000 bytes
58
+ # Total body sent: 3920000
59
+ # HTML transferred: 210000 bytes
60
+ # Requests per second: 182.99 [#/sec] (mean)
61
+ # Time per request: 10929.546 [ms] (mean)
62
+ # Time per request: 5.465 [ms] (mean, across all concurrent requests)
63
+ # Transfer rate: 34.49 [Kbytes/sec] received
64
+ # 70.05 kb/s sent
65
+ # 104.54 kb/s total
66
+
67
+ # Connection Times (ms)
68
+ # min mean[+/-sd] median max
69
+ # Connect: 0 14 18.0 4 63
70
+ # Processing: 118 9322 3654.3 8204 18354
71
+ # Waiting: 25 8933 3648.5 7785 17623
72
+ # Total: 118 9336 3646.5 8239 18357
73
+
74
+ # Percentage of the requests served within a certain time (ms)
75
+ # 50% 8239
76
+ # 66% 9501
77
+ # 75% 10511
78
+ # 80% 11791
79
+ # 90% 16048
80
+ # 95% 16759
81
+ # 98% 17191
82
+ # 99% 17494
83
+ # 100% 18357 (longest request)
84
+ # [INFO] Stress test completed
85
+ # [INFO] Checking round-robin routing correctness...
86
+ # [INFO] Round-robin routing results:
87
+ # [INFO] Backend localhost:8001: 5000 requests
88
+ # [INFO] Backend localhost:8002: 5000 requests
89
+ # [INFO] Total routed: 10000 requests
90
+ # [INFO] Backend localhost:8001: 50%
91
+ # [INFO] Backend localhost:8002: 50%
92
+ # [INFO] ✅ Round-robin routing is working correctly (0% difference)
93
+ # [INFO] Test completed successfully!
94
+ # [INFO] Cleaning up router processes...
95
+
96
+
97
+ set -euo pipefail
98
+
99
+ # Default values
100
+ ROUTER_PORT=30080
101
+ CONCURRENT=2000
102
+ REQUESTS=10000
103
+ LOG_DIR=" /tmp/router-stress-logs"
104
+ MODEL=" facebook/opt-125m"
105
+ BACKEND1_PORT=8001
106
+ BACKEND2_PORT=8002
107
+ BACKENDS_URL=" http://localhost:$BACKEND1_PORT ,http://localhost:$BACKEND2_PORT "
108
+
109
+ # Colors for output
110
+ GREEN=' \033[0;32m'
111
+ RED=' \033[0;31m'
112
+ YELLOW=' \033[1;33m'
113
+ NC=' \033[0m'
114
+
115
+ print_status () {
116
+ echo -e " ${GREEN} [INFO]${NC} $1 "
117
+ }
118
+
119
+ print_error () {
120
+ echo -e " ${RED} [ERROR]${NC} $1 "
121
+ }
122
+
123
+ print_warning () {
124
+ echo -e " ${YELLOW} [WARNING]${NC} $1 "
125
+ }
126
+
127
+ show_usage () {
128
+ cat << EOF
129
+ Router Stress Test - Tests round-robin routing logic
130
+
131
+ Usage: $0 [options]
132
+
133
+ Options:
134
+ -c, --concurrent N Concurrent requests (default: 2000)
135
+ -n, --requests N Total requests (default: 10000)
136
+ -p, --port PORT Router port (default: 30080)
137
+ -l, --log-dir DIR Log directory (default: /tmp/router-stress-logs)
138
+ -m, --model MODEL Model to use (default: facebook/opt-125m)
139
+ --backend1-port PORT First backend port (default: 8000)
140
+ --backend2-port PORT Second backend port (default: 8001)
141
+ -h, --help Show this help
142
+
143
+ Examples:
144
+ $0 # Basic test (2000 concurrent, 10000 requests)
145
+ $0 -c 500 -n 20000 # High load test
146
+ $0 -p 8080 -c 100 # Different port, lower load
147
+ $0 --backend1-port 9000 --backend2-port 9001 # Custom backend ports
148
+
149
+ Prerequisites:
150
+ - Router must be started with VLLM_ROUTER_STRESS_TEST_MODE=true
151
+ EOF
152
+ }
153
+
154
+ # Check if Apache Bench is available
155
+ check_ab () {
156
+ if ! command -v ab > /dev/null 2>&1 ; then
157
+ print_error " Apache Bench (ab) not found!"
158
+ print_error " Install with: sudo apt-get install apache2-utils"
159
+ exit 1
160
+ fi
161
+ }
162
+
163
+ # Function to cleanup processes
164
+ cleanup () {
165
+ print_status " Cleaning up router processes..."
166
+ pkill -f " python3 -m src.vllm_router.app" || true
167
+ sleep 2
168
+ }
169
+
170
+ # Function to start router
171
+ start_router () {
172
+ local log_file=" $LOG_DIR /router.log"
173
+
174
+ print_status " Starting router with round-robin routing (stress test mode)"
175
+
176
+ # Create log directory
177
+ mkdir -p " $( dirname " $log_file " ) "
178
+
179
+ # Set stress test mode
180
+ export VLLM_ROUTER_STRESS_TEST_MODE=true
181
+
182
+ # Start router with detailed logging
183
+ python3 -m src.vllm_router.app --port " $ROUTER_PORT " \
184
+ --service-discovery static \
185
+ --static-backends " $BACKENDS_URL " \
186
+ --static-models " $MODEL ,$MODEL " \
187
+ --static-model-types " chat,chat" \
188
+ --routing-logic roundrobin \
189
+ --log-stats \
190
+ --log-stats-interval 5 > " $log_file " 2>&1 &
191
+
192
+ ROUTER_PID=$!
193
+ print_status " Router started with PID: $ROUTER_PID "
194
+
195
+ # Wait for router to be ready
196
+ print_status " Waiting for router to be ready..."
197
+ timeout 30 bash -c " until curl -s http://localhost:$ROUTER_PORT /v1/models > /dev/null 2>&1; do sleep 1; done" || {
198
+ print_error " Router failed to start within 30 seconds"
199
+ print_error " Router log:"
200
+ tail -20 " $log_file " || true
201
+ exit 1
202
+ }
203
+ print_status " Router is ready"
204
+ }
205
+
206
+ # Function to run stress test
207
+ run_stress_test () {
208
+ print_status " Running stress test with Apache Bench"
209
+ print_status " Concurrent: $CONCURRENT , Total: $REQUESTS "
210
+
211
+ # Create payload file
212
+ local payload_file=" /tmp/stress_payload.json"
213
+ cat > " $payload_file " << EOF
214
+ {
215
+ "model": "$MODEL ",
216
+ "messages": [
217
+ {"role": "user", "content": "Test message for stress testing"}
218
+ ],
219
+ "max_tokens": 10,
220
+ "temperature": 0.7
221
+ }
222
+ EOF
223
+
224
+ # Run Apache Bench
225
+ ab -c " $CONCURRENT " \
226
+ -n " $REQUESTS " \
227
+ -p " $payload_file " \
228
+ -T " application/json" \
229
+ -H " Authorization: Bearer test" \
230
+ -H " x-user-id: stress-test-user" \
231
+ " http://localhost:$ROUTER_PORT /v1/chat/completions"
232
+
233
+ # Clean up payload file
234
+ rm -f " $payload_file "
235
+
236
+ print_status " Stress test completed"
237
+
238
+ # Small delay to ensure all logs are written
239
+ sleep 2
240
+ }
241
+
242
+ # Function to check round-robin correctness
243
+ check_roundrobin_correctness () {
244
+ local log_file=" $LOG_DIR /router.log"
245
+
246
+ print_status " Checking round-robin routing correctness..."
247
+
248
+ if [ ! -f " $log_file " ]; then
249
+ print_error " Router log file not found: $log_file "
250
+ return 1
251
+ fi
252
+
253
+ # Extract backend routing decisions from logs
254
+ # Look for "Routing request ... to http://localhost:XXXX"
255
+ local backend1_count=$( grep -c " to http://localhost:$BACKEND1_PORT " " $log_file " || echo " 0" )
256
+ local backend2_count=$( grep -c " to http://localhost:$BACKEND2_PORT " " $log_file " || echo " 0" )
257
+ local total_routed=$(( backend1 _count + backend2 _count))
258
+
259
+ print_status " Round-robin routing results:"
260
+ print_status " Backend localhost:$BACKEND1_PORT : $backend1_count requests"
261
+ print_status " Backend localhost:$BACKEND2_PORT : $backend2_count requests"
262
+ print_status " Total routed: $total_routed requests"
263
+
264
+ if [ " $total_routed " -eq 0 ]; then
265
+ print_error " No routing decisions found in logs"
266
+ return 1
267
+ fi
268
+
269
+ # Calculate percentages
270
+ local backend1_pct=$(( backend1 _count * 100 / total_routed))
271
+ local backend2_pct=$(( backend2 _count * 100 / total_routed))
272
+
273
+ print_status " Backend localhost:$BACKEND1_PORT : ${backend1_pct} %"
274
+ print_status " Backend localhost:$BACKEND2_PORT : ${backend2_pct} %"
275
+
276
+ # Check if distribution is roughly even (within 20% tolerance)
277
+ local diff=$(( backend1 _pct > backend2 _pct ? backend1 _pct - backend2 _pct : backend2 _pct - backend1 _pct))
278
+
279
+ if [ " $diff " -le 20 ]; then
280
+ print_status " ✅ Round-robin routing is working correctly (${diff} % difference)"
281
+ return 0
282
+ else
283
+ print_error " ❌ Round-robin routing appears uneven (${diff} % difference)"
284
+ print_status " Last 10 routing decisions from logs:"
285
+ grep " Routing request.*to http://localhost:" " $log_file " | tail -10 | sed ' s/^/ /' || true
286
+ return 1
287
+ fi
288
+ }
289
+
290
+ # Function to show log summary
291
+ show_log_summary () {
292
+ local log_file=" $LOG_DIR /router.log"
293
+
294
+ if [ -f " $log_file " ]; then
295
+ print_status " Log summary (last 20 lines):"
296
+ tail -20 " $log_file " | sed ' s/^/ /'
297
+ fi
298
+ }
299
+
300
+ # Parse command line arguments
301
+ while [[ $# -gt 0 ]]; do
302
+ case $1 in
303
+ -c|--concurrent)
304
+ CONCURRENT=" $2 "
305
+ shift 2
306
+ ;;
307
+ -n|--requests)
308
+ REQUESTS=" $2 "
309
+ shift 2
310
+ ;;
311
+ -p|--port)
312
+ ROUTER_PORT=" $2 "
313
+ shift 2
314
+ ;;
315
+ -l|--log-dir)
316
+ LOG_DIR=" $2 "
317
+ shift 2
318
+ ;;
319
+ -m|--model)
320
+ MODEL=" $2 "
321
+ shift 2
322
+ ;;
323
+ --backend1-port)
324
+ BACKEND1_PORT=" $2 "
325
+ shift 2
326
+ ;;
327
+ --backend2-port)
328
+ BACKEND2_PORT=" $2 "
329
+ shift 2
330
+ ;;
331
+ -h|--help)
332
+ show_usage
333
+ exit 0
334
+ ;;
335
+ * )
336
+ print_error " Unknown option: $1 "
337
+ show_usage
338
+ exit 1
339
+ ;;
340
+ esac
341
+ done
342
+
343
+ # Set trap for cleanup
344
+ trap cleanup EXIT
345
+
346
+ # Update backends URL with final port values
347
+ BACKENDS_URL=" http://localhost:$BACKEND1_PORT ,http://localhost:$BACKEND2_PORT "
348
+
349
+ # Check prerequisites
350
+ print_status " Checking prerequisites..."
351
+ check_ab
352
+
353
+ print_status " Router stress test configuration:"
354
+ print_status " Concurrent requests: $CONCURRENT "
355
+ print_status " Total requests: $REQUESTS "
356
+ print_status " Router port: $ROUTER_PORT "
357
+ print_status " Backend ports: $BACKEND1_PORT , $BACKEND2_PORT "
358
+ print_status " Model: $MODEL "
359
+
360
+ # Run test
361
+ start_router
362
+ run_stress_test
363
+
364
+ # Check correctness and show results
365
+ if check_roundrobin_correctness; then
366
+ print_status " Test completed successfully!"
367
+ else
368
+ print_error " Test completed but round-robin routing correctness check failed!"
369
+ show_log_summary
370
+ exit 1
371
+ fi
0 commit comments