Skip to content

Commit 2e74a1b

Browse files
committed
Unify rps and concurrent scheduler paths
Signed-off-by: Samuel Monson <[email protected]>
1 parent f1f8ca8 commit 2e74a1b

File tree

8 files changed

+244
-173
lines changed

8 files changed

+244
-173
lines changed

src/guidellm/request/loader.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from guidellm.dataset import ColumnInputTypes, load_dataset
1616
from guidellm.objects import StandardBaseModel
1717
from guidellm.request.request import GenerationRequest
18+
from guidellm.request.session import GenerativeRequestSession
1819

1920
__all__ = [
2021
"GenerativeRequestLoader",
@@ -30,10 +31,10 @@ class RequestLoaderDescription(StandardBaseModel):
3031

3132
class RequestLoader(Iterable):
3233
@abstractmethod
33-
def __iter__(self): ...
34+
def __iter__(self) -> Iterator: ...
3435

3536
@abstractmethod
36-
def __len__(self): ...
37+
def __len__(self) -> int: ...
3738

3839
@property
3940
@abstractmethod
@@ -105,14 +106,14 @@ def __init__(
105106
self.preserve_iter_state = iter_type == "infinite" # ensure no caching requests
106107
self._preserved_iter = None
107108

108-
def __iter__(self) -> Iterator[GenerationRequest]:
109+
def __iter__(self) -> Iterator[GenerativeRequestSession]:
109110
scope_create_count = 0
110111

111112
while (dataset_iter := self._get_dataset_iter(scope_create_count)) is not None:
112113
scope_create_count += 1
113114

114115
for item in dataset_iter:
115-
yield self._create_request(item)
116+
yield GenerativeRequestSession(self._create_request(item))
116117

117118
self._preserved_iter = None
118119

src/guidellm/request/session.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from abc import ABC, abstractmethod
2+
from typing import Generic, TypeVar
3+
4+
from guidellm.backend.response import ResponseSummary
5+
from guidellm.request.request import GenerationRequest
6+
7+
__all__ = ["GenerativeRequestSession", "RequestSession"]
8+
9+
# TODO: Replace with specific types that implement needed features
10+
RequestT = TypeVar("RequestT")
11+
ResponseT = TypeVar("ResponseT")
12+
13+
14+
class RequestSession(ABC, Generic[RequestT, ResponseT]):
15+
@abstractmethod
16+
def __len__(self) -> int: ...
17+
18+
@abstractmethod
19+
def get_next_request(self) -> RequestT: ...
20+
21+
@abstractmethod
22+
def get_next_delay(self) -> float: ...
23+
24+
@abstractmethod
25+
def push_response(self, response: ResponseT) -> None: ...
26+
27+
@property
28+
@abstractmethod
29+
def complete(self) -> bool: ...
30+
31+
32+
# TODO: Implement multiturn support
33+
class GenerativeRequestSession(RequestSession[GenerationRequest, ResponseSummary]):
34+
def __init__(self, request: GenerationRequest) -> None:
35+
self.request = request
36+
self._complete = False
37+
38+
def __len__(self) -> int:
39+
return 1
40+
41+
def get_next_request(self) -> GenerationRequest:
42+
return self.request
43+
44+
def get_next_delay(self) -> float:
45+
return 0.0
46+
47+
def push_response(self, response: ResponseSummary) -> None: # noqa: ARG002
48+
self._complete = True
49+
50+
@property
51+
def complete(self) -> bool:
52+
return self._complete

src/guidellm/scheduler/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
RequestsWorker,
2323
ResolveStatus,
2424
WorkerDescription,
25-
WorkerProcessRequest,
2625
WorkerProcessResult,
2726
)
2827

@@ -46,7 +45,6 @@
4645
"SynchronousStrategy",
4746
"ThroughputStrategy",
4847
"WorkerDescription",
49-
"WorkerProcessRequest",
5048
"WorkerProcessResult",
5149
"strategy_display_str",
5250
]

src/guidellm/scheduler/result.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,24 @@
1+
from dataclasses import dataclass
2+
from queue import Queue
13
from typing import (
24
Generic,
35
Literal,
46
Optional,
57
)
68

79
from guidellm.objects import StandardBaseModel
10+
from guidellm.request.session import RequestSession
811
from guidellm.scheduler.strategy import SchedulingStrategy
912
from guidellm.scheduler.types import RequestT, ResponseT
1013

1114
__all__ = [
15+
"MPQueues",
1216
"SchedulerRequestInfo",
1317
"SchedulerRequestResult",
1418
"SchedulerResult",
1519
"SchedulerRunInfo",
20+
"WorkerProcessRequestTime",
21+
"WorkerProcessResult",
1622
]
1723

1824

@@ -135,3 +141,28 @@ class SchedulerRequestResult(
135141
request: RequestT
136142
request_info: SchedulerRequestInfo
137143
response: Optional[ResponseT] = None
144+
145+
146+
# TODO: Move dataclasses somewhere else
147+
148+
149+
@dataclass
150+
class WorkerProcessRequestTime:
151+
start_time: float
152+
timeout_time: float
153+
queued_time: float
154+
155+
156+
@dataclass
157+
class WorkerProcessResult(Generic[RequestT, ResponseT]):
158+
type_: Literal["request_scheduled", "request_start", "request_complete"]
159+
request: RequestT
160+
response: Optional[ResponseT]
161+
info: SchedulerRequestInfo
162+
163+
164+
@dataclass
165+
class MPQueues(Generic[RequestT, ResponseT]):
166+
requests: Queue[RequestSession[RequestT, ResponseT]]
167+
times: Queue[WorkerProcessRequestTime]
168+
responses: Queue[WorkerProcessResult[RequestT, ResponseT]]

src/guidellm/scheduler/scheduler.py

Lines changed: 63 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import asyncio
22
import math
3-
import multiprocessing
4-
import multiprocessing.queues
53
import time
64
from collections.abc import AsyncGenerator, Iterable, Iterator
75
from concurrent.futures import ProcessPoolExecutor
6+
from multiprocessing import Manager
7+
from queue import Empty as QueueEmpty
8+
from queue import Queue
9+
from threading import Event
810
from typing import (
911
Any,
1012
Generic,
@@ -15,17 +17,22 @@
1517
from loguru import logger
1618

1719
from guidellm.config import settings
20+
from guidellm.request.session import RequestSession
1821
from guidellm.scheduler.result import (
22+
MPQueues,
1923
SchedulerRequestResult,
2024
SchedulerResult,
2125
SchedulerRunInfo,
26+
WorkerProcessRequestTime,
27+
WorkerProcessResult,
2228
)
2329
from guidellm.scheduler.strategy import SchedulingStrategy
24-
from guidellm.scheduler.types import RequestT, ResponseT
30+
from guidellm.scheduler.types import (
31+
RequestT,
32+
ResponseT,
33+
)
2534
from guidellm.scheduler.worker import (
2635
RequestsWorker,
27-
WorkerProcessRequest,
28-
WorkerProcessResult,
2936
)
3037

3138
__all__ = ["Scheduler"]
@@ -114,13 +121,13 @@ async def run(
114121
raise ValueError(f"Invalid max_duration: {max_duration}")
115122

116123
with (
117-
multiprocessing.Manager() as manager,
124+
Manager() as manager,
118125
ProcessPoolExecutor(
119126
max_workers=scheduling_strategy.processes_limit
120127
) as executor,
121128
):
122129
requests_iter: Optional[Iterator[Any]] = None
123-
futures, requests_queue, responses_queue = await self._start_processes(
130+
futures, queues, stop_event = await self._start_processes(
124131
manager, executor, scheduling_strategy
125132
)
126133
run_info, requests_iter, times_iter = self._run_setup(
@@ -149,13 +156,14 @@ async def run(
149156
requests_iter = self._add_requests(
150157
requests_iter,
151158
times_iter,
152-
requests_queue,
159+
queues.requests,
160+
queues.times,
153161
run_info,
154162
)
155163
await asyncio.sleep(0) # enable requests to start
156164

157165
iter_result = self._check_result_ready(
158-
responses_queue,
166+
queues.responses,
159167
run_info,
160168
)
161169
if iter_result is not None:
@@ -171,7 +179,7 @@ async def run(
171179
run_info=run_info,
172180
)
173181

174-
await self._stop_processes(futures, requests_queue)
182+
await self._stop_processes(futures, stop_event)
175183

176184
async def _start_processes(
177185
self,
@@ -180,14 +188,18 @@ async def _start_processes(
180188
scheduling_strategy: SchedulingStrategy,
181189
) -> tuple[
182190
list[asyncio.Future],
183-
multiprocessing.Queue,
184-
multiprocessing.Queue,
191+
MPQueues[RequestT, ResponseT],
192+
Event,
185193
]:
186194
await self.worker.prepare_multiprocessing()
187-
requests_queue = manager.Queue(
188-
maxsize=scheduling_strategy.queued_requests_limit
195+
queues: MPQueues[RequestT, ResponseT] = MPQueues(
196+
requests=manager.Queue(
197+
maxsize=scheduling_strategy.processing_requests_limit
198+
),
199+
times=manager.Queue(maxsize=scheduling_strategy.processing_requests_limit),
200+
responses=manager.Queue(),
189201
)
190-
responses_queue = manager.Queue()
202+
stop_event = manager.Event()
191203

192204
num_processes = min(
193205
scheduling_strategy.processes_limit,
@@ -212,36 +224,21 @@ async def _start_processes(
212224
futures = []
213225
loop = asyncio.get_event_loop()
214226
for id_, requests_limit in zip(process_ids, process_requests_limits):
215-
if scheduling_strategy.processing_mode == "sync":
216-
futures.append(
217-
loop.run_in_executor(
218-
executor,
219-
self.worker.process_loop_synchronous,
220-
requests_queue,
221-
responses_queue,
222-
id_,
223-
)
224-
)
225-
elif scheduling_strategy.processing_mode == "async":
226-
futures.append(
227-
loop.run_in_executor(
228-
executor,
229-
self.worker.process_loop_asynchronous,
230-
requests_queue,
231-
responses_queue,
232-
requests_limit,
233-
id_,
234-
)
235-
)
236-
else:
237-
raise ValueError(
238-
f"Invalid processing mode: {scheduling_strategy.processing_mode} "
239-
f"for strategy: {scheduling_strategy}"
227+
futures.append(
228+
loop.run_in_executor(
229+
executor,
230+
self.worker.process_loop_asynchronous,
231+
queues,
232+
stop_event,
233+
False, # TODO: Make configurable
234+
requests_limit,
235+
id_,
240236
)
237+
)
241238

242239
await asyncio.sleep(0.1) # give time for processes to start
243240

244-
return futures, requests_queue, responses_queue
241+
return futures, queues, stop_event
245242

246243
def _run_setup(
247244
self,
@@ -284,7 +281,8 @@ def _add_requests(
284281
self,
285282
requests_iter: Optional[Iterator[Any]],
286283
times_iter: Iterator[float],
287-
requests_queue: multiprocessing.Queue,
284+
requests_queue: Queue[RequestSession[RequestT, ResponseT]],
285+
times_queue: Queue[WorkerProcessRequestTime],
288286
run_info: SchedulerRunInfo,
289287
) -> Optional[Iterator[Any]]:
290288
if requests_iter is not None:
@@ -298,23 +296,24 @@ def _add_requests(
298296
if run_info.created_requests >= run_info.end_number:
299297
raise StopIteration
300298

301-
if (
302-
request_time := next(times_iter)
303-
) >= run_info.end_time or time.time() >= run_info.end_time:
304-
raise StopIteration
305-
306-
request = next(requests_iter)
307-
work_req: WorkerProcessRequest[RequestT] = WorkerProcessRequest(
308-
request=request,
309-
start_time=request_time,
310-
timeout_time=run_info.end_time,
311-
queued_time=time.time(),
312-
)
313-
requests_queue.put(work_req)
314-
315-
run_info.created_requests += 1
316-
run_info.queued_requests += 1
317-
added_count += 1
299+
session = next(requests_iter)
300+
requests_queue.put(session)
301+
for _ in range(len(session)):
302+
if (
303+
request_time := next(times_iter)
304+
) >= run_info.end_time or time.time() >= run_info.end_time:
305+
raise StopIteration
306+
307+
work_req = WorkerProcessRequestTime(
308+
start_time=request_time,
309+
timeout_time=run_info.end_time,
310+
queued_time=time.time(),
311+
)
312+
times_queue.put(work_req)
313+
314+
run_info.created_requests += 1
315+
run_info.queued_requests += 1
316+
added_count += 1
318317
except StopIteration:
319318
# we've reached the limit number, limit time, or exhausted the requests
320319
# set to None to stop adding more and tell the loop no more requests
@@ -324,14 +323,14 @@ def _add_requests(
324323

325324
def _check_result_ready(
326325
self,
327-
responses_queue: multiprocessing.Queue,
326+
responses_queue: Queue[WorkerProcessResult[RequestT, ResponseT]],
328327
run_info: SchedulerRunInfo,
329328
) -> Optional[SchedulerRequestResult[RequestT, ResponseT]]:
330329
try:
331330
process_response: WorkerProcessResult[RequestT, ResponseT] = (
332331
responses_queue.get_nowait()
333332
)
334-
except multiprocessing.queues.Empty: # type: ignore[attr-defined]
333+
except QueueEmpty:
335334
return None
336335

337336
if process_response.type_ == "request_scheduled":
@@ -374,9 +373,9 @@ def _check_result_ready(
374373
async def _stop_processes(
375374
self,
376375
futures: list[asyncio.Future],
377-
requests_queue: multiprocessing.Queue,
376+
stop_event: Event,
378377
):
379-
for _ in futures:
380-
requests_queue.put(None)
378+
# stop all processes
379+
stop_event.set()
381380

382381
await asyncio.gather(*futures)

0 commit comments

Comments
 (0)