Skip to content
Merged
4 changes: 2 additions & 2 deletions src/guidellm/benchmark/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@
GenerationRequest,
GenerativeRequestLoaderDescription,
RequestLoaderDescription,
RequestT,
ResponseT,
)
from guidellm.scheduler import (
GenerativeRequestsWorkerDescription,
RequestT,
ResponseT,
SchedulerRequestResult,
WorkerDescription,
)
Expand Down
4 changes: 2 additions & 2 deletions src/guidellm/benchmark/benchmarker.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
GenerationRequest,
GenerativeRequestLoaderDescription,
RequestLoaderDescription,
RequestT,
ResponseT,
)
from guidellm.scheduler import (
GenerativeRequestsWorker,
RequestsWorker,
RequestT,
ResponseT,
Scheduler,
SchedulerRequestResult,
SchedulingStrategy,
Expand Down
7 changes: 6 additions & 1 deletion src/guidellm/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
from collections.abc import Sequence
from enum import Enum
from typing import Literal, Optional
Expand Down Expand Up @@ -131,8 +132,12 @@ class Settings(BaseSettings):

# Scheduler settings
max_concurrency: int = 512
max_worker_processes: int = 10
max_worker_processes: int = Field(
# use number of CPUs - 1, but at least 10
default_factory=lambda: max((os.cpu_count() or 1) - 1, 10)
)
max_add_requests_per_loop: int = 20
scheduler_start_delay: float = 5

# Data settings
dataset: DatasetSettings = DatasetSettings()
Expand Down
3 changes: 3 additions & 0 deletions src/guidellm/request/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
RequestLoaderDescription,
)
from .request import GenerationRequest
from .types import RequestT, ResponseT

__all__ = [
"GenerationRequest",
"GenerativeRequestLoader",
"GenerativeRequestLoaderDescription",
"RequestLoader",
"RequestLoaderDescription",
"RequestT",
"ResponseT",
]
4 changes: 2 additions & 2 deletions src/guidellm/request/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ class RequestLoaderDescription(StandardBaseModel):

class RequestLoader(Iterable):
@abstractmethod
def __iter__(self): ...
def __iter__(self) -> Iterator: ...

@abstractmethod
def __len__(self): ...
def __len__(self) -> int: ...

@property
@abstractmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from typing import TypeVar

__all__ = ["RequestT", "ResponseT"]
__all__ = [
"RequestT",
"ResponseT",
]


RequestT = TypeVar("RequestT")
Expand Down
5 changes: 0 additions & 5 deletions src/guidellm/scheduler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,12 @@
ThroughputStrategy,
strategy_display_str,
)
from .types import RequestT, ResponseT
from .worker import (
GenerativeRequestsWorker,
GenerativeRequestsWorkerDescription,
RequestsWorker,
ResolveStatus,
WorkerDescription,
WorkerProcessRequest,
WorkerProcessResult,
)

Expand All @@ -32,10 +30,8 @@
"ConcurrentStrategy",
"GenerativeRequestsWorker",
"GenerativeRequestsWorkerDescription",
"RequestT",
"RequestsWorker",
"ResolveStatus",
"ResponseT",
"Scheduler",
"SchedulerRequestInfo",
"SchedulerRequestResult",
Expand All @@ -46,7 +42,6 @@
"SynchronousStrategy",
"ThroughputStrategy",
"WorkerDescription",
"WorkerProcessRequest",
"WorkerProcessResult",
"strategy_display_str",
]
25 changes: 25 additions & 0 deletions src/guidellm/scheduler/queues.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Helper module for importing the correct queue types.
"""

from dataclasses import dataclass
from queue import Empty as QueueEmpty
from queue import Full as QueueFull
from queue import Queue
from typing import Generic

from guidellm.request.types import RequestT, ResponseT
from guidellm.scheduler.result import WorkerProcessRequest, WorkerProcessResult

__all__ = [
"MPQueues",
"Queue",
"QueueEmpty",
"QueueFull",
]


@dataclass
class MPQueues(Generic[RequestT, ResponseT]):
requests: Queue[WorkerProcessRequest[RequestT, ResponseT]]
responses: Queue[WorkerProcessResult[RequestT, ResponseT]]
20 changes: 19 additions & 1 deletion src/guidellm/scheduler/result.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
from dataclasses import dataclass
from typing import (
Generic,
Literal,
Optional,
)

from guidellm.objects import StandardBaseModel
from guidellm.request.types import RequestT, ResponseT
from guidellm.scheduler.strategy import SchedulingStrategy
from guidellm.scheduler.types import RequestT, ResponseT

__all__ = [
"SchedulerRequestInfo",
"SchedulerRequestResult",
"SchedulerResult",
"SchedulerRunInfo",
"WorkerProcessRequest",
"WorkerProcessResult",
]


Expand Down Expand Up @@ -135,3 +138,18 @@ class SchedulerRequestResult(
request: RequestT
request_info: SchedulerRequestInfo
response: Optional[ResponseT] = None


@dataclass
class WorkerProcessRequest(Generic[RequestT, ResponseT]):
request: RequestT
timeout_time: float
queued_time: float


@dataclass
class WorkerProcessResult(Generic[RequestT, ResponseT]):
type_: Literal["request_scheduled", "request_start", "request_complete"]
request: RequestT
response: Optional[ResponseT]
info: SchedulerRequestInfo
Loading