File tree Expand file tree Collapse file tree 12 files changed +213
-195
lines changed Expand file tree Collapse file tree 12 files changed +213
-195
lines changed Original file line number Diff line number Diff line change 32
32
GenerationRequest ,
33
33
GenerativeRequestLoaderDescription ,
34
34
RequestLoaderDescription ,
35
+ RequestT ,
36
+ ResponseT ,
35
37
)
36
38
from guidellm .scheduler import (
37
39
GenerativeRequestsWorkerDescription ,
38
- RequestT ,
39
- ResponseT ,
40
40
SchedulerRequestResult ,
41
41
WorkerDescription ,
42
42
)
Original file line number Diff line number Diff line change 27
27
GenerationRequest ,
28
28
GenerativeRequestLoaderDescription ,
29
29
RequestLoaderDescription ,
30
+ RequestT ,
31
+ ResponseT ,
30
32
)
31
33
from guidellm .scheduler import (
32
34
GenerativeRequestsWorker ,
33
35
RequestsWorker ,
34
- RequestT ,
35
- ResponseT ,
36
36
Scheduler ,
37
37
SchedulerRequestResult ,
38
38
SchedulingStrategy ,
Original file line number Diff line number Diff line change 1
1
import json
2
+ import os
2
3
from collections .abc import Sequence
3
4
from enum import Enum
4
5
from typing import Literal , Optional
@@ -131,8 +132,12 @@ class Settings(BaseSettings):
131
132
132
133
# Scheduler settings
133
134
max_concurrency : int = 512
134
- max_worker_processes : int = 10
135
- max_add_requests_per_loop : int = 20
135
+ max_worker_processes : int = Field (
136
+ # use number of CPUs - 1, but at least 10
137
+ default_factory = lambda : max ((os .cpu_count () or 1 ) - 1 , 10 )
138
+ )
139
+ min_queued_requests : int = 20
140
+ scheduler_start_delay : float = 5
136
141
137
142
# Data settings
138
143
dataset : DatasetSettings = DatasetSettings ()
Original file line number Diff line number Diff line change 5
5
RequestLoaderDescription ,
6
6
)
7
7
from .request import GenerationRequest
8
+ from .types import RequestT , ResponseT
8
9
9
10
__all__ = [
10
11
"GenerationRequest" ,
11
12
"GenerativeRequestLoader" ,
12
13
"GenerativeRequestLoaderDescription" ,
13
14
"RequestLoader" ,
14
15
"RequestLoaderDescription" ,
16
+ "RequestT" ,
17
+ "ResponseT" ,
15
18
]
Original file line number Diff line number Diff line change @@ -30,10 +30,10 @@ class RequestLoaderDescription(StandardBaseModel):
30
30
31
31
class RequestLoader (Iterable ):
32
32
@abstractmethod
33
- def __iter__ (self ): ...
33
+ def __iter__ (self ) -> Iterator : ...
34
34
35
35
@abstractmethod
36
- def __len__ (self ): ...
36
+ def __len__ (self ) -> int : ...
37
37
38
38
@property
39
39
@abstractmethod
Original file line number Diff line number Diff line change 1
1
from typing import TypeVar
2
2
3
- __all__ = ["RequestT" , "ResponseT" ]
3
+ __all__ = [
4
+ "RequestT" ,
5
+ "ResponseT" ,
6
+ ]
4
7
5
8
6
9
RequestT = TypeVar ("RequestT" )
Original file line number Diff line number Diff line change 15
15
ThroughputStrategy ,
16
16
strategy_display_str ,
17
17
)
18
- from .types import RequestT , ResponseT
19
18
from .worker import (
20
19
GenerativeRequestsWorker ,
21
20
GenerativeRequestsWorkerDescription ,
22
21
RequestsWorker ,
23
22
ResolveStatus ,
24
23
WorkerDescription ,
25
- WorkerProcessRequest ,
26
24
WorkerProcessResult ,
27
25
)
28
26
32
30
"ConcurrentStrategy" ,
33
31
"GenerativeRequestsWorker" ,
34
32
"GenerativeRequestsWorkerDescription" ,
35
- "RequestT" ,
36
33
"RequestsWorker" ,
37
34
"ResolveStatus" ,
38
- "ResponseT" ,
39
35
"Scheduler" ,
40
36
"SchedulerRequestInfo" ,
41
37
"SchedulerRequestResult" ,
46
42
"SynchronousStrategy" ,
47
43
"ThroughputStrategy" ,
48
44
"WorkerDescription" ,
49
- "WorkerProcessRequest" ,
50
45
"WorkerProcessResult" ,
51
46
"strategy_display_str" ,
52
47
]
Original file line number Diff line number Diff line change
1
+ """
2
+ Helper module for importing the correct queue types.
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+ from queue import Empty as QueueEmpty
7
+ from queue import Full as QueueFull
8
+ from queue import Queue
9
+ from typing import Generic
10
+
11
+ from guidellm .request .types import RequestT , ResponseT
12
+ from guidellm .scheduler .result import WorkerProcessRequest , WorkerProcessResult
13
+
14
+ __all__ = [
15
+ "MPQueues" ,
16
+ "Queue" ,
17
+ "QueueEmpty" ,
18
+ "QueueFull" ,
19
+ ]
20
+
21
+
22
+ @dataclass
23
+ class MPQueues (Generic [RequestT , ResponseT ]):
24
+ requests : Queue [WorkerProcessRequest [RequestT , ResponseT ]]
25
+ responses : Queue [WorkerProcessResult [RequestT , ResponseT ]]
Original file line number Diff line number Diff line change
1
+ from dataclasses import dataclass
1
2
from typing import (
2
3
Generic ,
3
4
Literal ,
4
5
Optional ,
5
6
)
6
7
7
8
from guidellm .objects import StandardBaseModel
9
+ from guidellm .request .types import RequestT , ResponseT
8
10
from guidellm .scheduler .strategy import SchedulingStrategy
9
- from guidellm .scheduler .types import RequestT , ResponseT
10
11
11
12
__all__ = [
12
13
"SchedulerRequestInfo" ,
13
14
"SchedulerRequestResult" ,
14
15
"SchedulerResult" ,
15
16
"SchedulerRunInfo" ,
17
+ "WorkerProcessRequest" ,
18
+ "WorkerProcessResult" ,
16
19
]
17
20
18
21
@@ -135,3 +138,18 @@ class SchedulerRequestResult(
135
138
request : RequestT
136
139
request_info : SchedulerRequestInfo
137
140
response : Optional [ResponseT ] = None
141
+
142
+
143
+ @dataclass
144
+ class WorkerProcessRequest (Generic [RequestT , ResponseT ]):
145
+ request : RequestT
146
+ timeout_time : float
147
+ queued_time : float
148
+
149
+
150
+ @dataclass
151
+ class WorkerProcessResult (Generic [RequestT , ResponseT ]):
152
+ type_ : Literal ["request_scheduled" , "request_start" , "request_complete" ]
153
+ request : RequestT
154
+ response : Optional [ResponseT ]
155
+ info : SchedulerRequestInfo
You can’t perform that action at this time.
0 commit comments