Skip to content

Commit ad9513f

Browse files
sjmonsonmarkurtz
andauthored
Unify RPS and Concurrent Scheduler Paths (#233)
Unify the scheduling method used for async and synchronous modes in preparation for multi-turn conversation support. This change with also significantly reduce the number PIDs used by GuideLLM at high concurrency. Relates to #196 --------- Signed-off-by: Samuel Monson <[email protected]> Co-authored-by: Mark Kurtz <[email protected]>
1 parent d0aca38 commit ad9513f

File tree

12 files changed

+213
-195
lines changed

12 files changed

+213
-195
lines changed

src/guidellm/benchmark/aggregator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@
3232
GenerationRequest,
3333
GenerativeRequestLoaderDescription,
3434
RequestLoaderDescription,
35+
RequestT,
36+
ResponseT,
3537
)
3638
from guidellm.scheduler import (
3739
GenerativeRequestsWorkerDescription,
38-
RequestT,
39-
ResponseT,
4040
SchedulerRequestResult,
4141
WorkerDescription,
4242
)

src/guidellm/benchmark/benchmarker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@
2727
GenerationRequest,
2828
GenerativeRequestLoaderDescription,
2929
RequestLoaderDescription,
30+
RequestT,
31+
ResponseT,
3032
)
3133
from guidellm.scheduler import (
3234
GenerativeRequestsWorker,
3335
RequestsWorker,
34-
RequestT,
35-
ResponseT,
3636
Scheduler,
3737
SchedulerRequestResult,
3838
SchedulingStrategy,

src/guidellm/config.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import os
23
from collections.abc import Sequence
34
from enum import Enum
45
from typing import Literal, Optional
@@ -131,8 +132,12 @@ class Settings(BaseSettings):
131132

132133
# Scheduler settings
133134
max_concurrency: int = 512
134-
max_worker_processes: int = 10
135-
max_add_requests_per_loop: int = 20
135+
max_worker_processes: int = Field(
136+
# use number of CPUs - 1, but at least 10
137+
default_factory=lambda: max((os.cpu_count() or 1) - 1, 10)
138+
)
139+
min_queued_requests: int = 20
140+
scheduler_start_delay: float = 5
136141

137142
# Data settings
138143
dataset: DatasetSettings = DatasetSettings()

src/guidellm/request/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55
RequestLoaderDescription,
66
)
77
from .request import GenerationRequest
8+
from .types import RequestT, ResponseT
89

910
__all__ = [
1011
"GenerationRequest",
1112
"GenerativeRequestLoader",
1213
"GenerativeRequestLoaderDescription",
1314
"RequestLoader",
1415
"RequestLoaderDescription",
16+
"RequestT",
17+
"ResponseT",
1518
]

src/guidellm/request/loader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ class RequestLoaderDescription(StandardBaseModel):
3030

3131
class RequestLoader(Iterable):
3232
@abstractmethod
33-
def __iter__(self): ...
33+
def __iter__(self) -> Iterator: ...
3434

3535
@abstractmethod
36-
def __len__(self): ...
36+
def __len__(self) -> int: ...
3737

3838
@property
3939
@abstractmethod

src/guidellm/scheduler/types.py renamed to src/guidellm/request/types.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from typing import TypeVar
22

3-
__all__ = ["RequestT", "ResponseT"]
3+
__all__ = [
4+
"RequestT",
5+
"ResponseT",
6+
]
47

58

69
RequestT = TypeVar("RequestT")

src/guidellm/scheduler/__init__.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,12 @@
1515
ThroughputStrategy,
1616
strategy_display_str,
1717
)
18-
from .types import RequestT, ResponseT
1918
from .worker import (
2019
GenerativeRequestsWorker,
2120
GenerativeRequestsWorkerDescription,
2221
RequestsWorker,
2322
ResolveStatus,
2423
WorkerDescription,
25-
WorkerProcessRequest,
2624
WorkerProcessResult,
2725
)
2826

@@ -32,10 +30,8 @@
3230
"ConcurrentStrategy",
3331
"GenerativeRequestsWorker",
3432
"GenerativeRequestsWorkerDescription",
35-
"RequestT",
3633
"RequestsWorker",
3734
"ResolveStatus",
38-
"ResponseT",
3935
"Scheduler",
4036
"SchedulerRequestInfo",
4137
"SchedulerRequestResult",
@@ -46,7 +42,6 @@
4642
"SynchronousStrategy",
4743
"ThroughputStrategy",
4844
"WorkerDescription",
49-
"WorkerProcessRequest",
5045
"WorkerProcessResult",
5146
"strategy_display_str",
5247
]

src/guidellm/scheduler/queues.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""
2+
Helper module for importing the correct queue types.
3+
"""
4+
5+
from dataclasses import dataclass
6+
from queue import Empty as QueueEmpty
7+
from queue import Full as QueueFull
8+
from queue import Queue
9+
from typing import Generic
10+
11+
from guidellm.request.types import RequestT, ResponseT
12+
from guidellm.scheduler.result import WorkerProcessRequest, WorkerProcessResult
13+
14+
__all__ = [
15+
"MPQueues",
16+
"Queue",
17+
"QueueEmpty",
18+
"QueueFull",
19+
]
20+
21+
22+
@dataclass
23+
class MPQueues(Generic[RequestT, ResponseT]):
24+
requests: Queue[WorkerProcessRequest[RequestT, ResponseT]]
25+
responses: Queue[WorkerProcessResult[RequestT, ResponseT]]

src/guidellm/scheduler/result.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
1+
from dataclasses import dataclass
12
from typing import (
23
Generic,
34
Literal,
45
Optional,
56
)
67

78
from guidellm.objects import StandardBaseModel
9+
from guidellm.request.types import RequestT, ResponseT
810
from guidellm.scheduler.strategy import SchedulingStrategy
9-
from guidellm.scheduler.types import RequestT, ResponseT
1011

1112
__all__ = [
1213
"SchedulerRequestInfo",
1314
"SchedulerRequestResult",
1415
"SchedulerResult",
1516
"SchedulerRunInfo",
17+
"WorkerProcessRequest",
18+
"WorkerProcessResult",
1619
]
1720

1821

@@ -135,3 +138,18 @@ class SchedulerRequestResult(
135138
request: RequestT
136139
request_info: SchedulerRequestInfo
137140
response: Optional[ResponseT] = None
141+
142+
143+
@dataclass
144+
class WorkerProcessRequest(Generic[RequestT, ResponseT]):
145+
request: RequestT
146+
timeout_time: float
147+
queued_time: float
148+
149+
150+
@dataclass
151+
class WorkerProcessResult(Generic[RequestT, ResponseT]):
152+
type_: Literal["request_scheduled", "request_start", "request_complete"]
153+
request: RequestT
154+
response: Optional[ResponseT]
155+
info: SchedulerRequestInfo

0 commit comments

Comments
 (0)