Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion src/guidellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import asyncio
import codecs
import json
from pathlib import Path

import click
Expand Down Expand Up @@ -382,10 +383,43 @@ def benchmark():
default=BenchmarkGenerativeTextArgs.get_default("max_global_error_rate"),
help="Maximum global error rate across all benchmarks.",
)
def run(**kwargs):
@click.option(
"--over-saturation",
"--detect-saturation", # alias
default=None,
help=(
"Enable over-saturation detection. "
"Use --over-saturation=True for boolean flag, "
"or a JSON dict with configuration "
'(e.g., \'{"enabled": true, "min_seconds": 30}\'). '
"Defaults to None (disabled)."
),
type=click.UNPROCESSED,
)
def run(**kwargs): # noqa: C901
# Only set CLI args that differ from click defaults
kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)

# Handle over_saturation parsing (can be bool flag or JSON dict string)
if "over_saturation" in kwargs and kwargs["over_saturation"] is not None:
over_sat = kwargs["over_saturation"]
if isinstance(over_sat, str):
try:
# Try parsing as JSON dict
kwargs["over_saturation"] = json.loads(over_sat)
except (json.JSONDecodeError, ValueError):
# If not valid JSON, treat as bool flag
kwargs["over_saturation"] = over_sat.lower() in (
"true",
"1",
"yes",
"on",
)
elif isinstance(over_sat, bool):
# Already a bool, keep as is
pass
# If it's already a dict, keep as is

# Handle remapping for request params
request_type = kwargs.pop("request_type", None)
request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)
Expand Down
4 changes: 4 additions & 0 deletions src/guidellm/benchmark/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ async def resolve_profile(
max_errors: int | None,
max_error_rate: float | None,
max_global_error_rate: float | None,
over_saturation: bool | dict[str, Any] | None = None,
console: Console | None = None,
) -> Profile:
"""
Expand All @@ -343,6 +344,7 @@ async def resolve_profile(
:param max_errors: Maximum number of errors before stopping
:param max_error_rate: Maximum error rate threshold before stopping
:param max_global_error_rate: Maximum global error rate threshold before stopping
:param over_saturation: Over-saturation detection configuration (bool or dict)
:param console: Console instance for progress reporting, or None
:return: Configured Profile instance ready for benchmarking
:raises ValueError: If constraints are provided with a pre-configured Profile
Expand All @@ -359,6 +361,7 @@ async def resolve_profile(
"max_errors": max_errors,
"max_error_rate": max_error_rate,
"max_global_error_rate": max_global_error_rate,
"over_saturation": over_saturation,
}.items():
if val is not None:
constraints[key] = val
Expand Down Expand Up @@ -500,6 +503,7 @@ async def benchmark_generative_text(
max_errors=args.max_errors,
max_error_rate=args.max_error_rate,
max_global_error_rate=args.max_global_error_rate,
over_saturation=args.over_saturation,
console=console,
)
output_formats = await resolve_output_formats(
Expand Down
5 changes: 2 additions & 3 deletions src/guidellm/benchmark/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Generic, Literal

from rich.console import Group
Expand All @@ -37,7 +36,7 @@
GenerativeBenchmarkAccumulator,
)
from guidellm.scheduler import SchedulerState, SchedulingStrategy
from guidellm.utils import Colors, format_value_display
from guidellm.utils import Colors, format_value_display, safe_format_timestamp

__all__ = ["BenchmarkerProgress", "GenerativeConsoleBenchmarkerProgress"]

Expand Down Expand Up @@ -390,7 +389,7 @@ def formatted_start_time(self) -> str:
if self.start_time < 0.0:
return "--:--:--"

return datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S")
return safe_format_timestamp(self.start_time, format_="%H:%M:%S")

@property
def formatted_progress_status(self) -> str:
Expand Down
8 changes: 8 additions & 0 deletions src/guidellm/benchmark/schemas/generative/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,14 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
max_global_error_rate: float | None = Field(
default=None, description="Maximum global error rate (0-1) before stopping"
)
over_saturation: bool | dict[str, Any] | None = Field(
default=None,
description=(
"Over-saturation detection configuration. Can be a bool to enable/disable "
"with defaults, or a dict with configuration parameters (enabled, "
"min_seconds, max_window_seconds, moe_threshold, etc.)."
),
)

@field_validator("data", "data_args", "rate", mode="wrap")
@classmethod
Expand Down
4 changes: 4 additions & 0 deletions src/guidellm/scheduler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
MaxErrorsConstraint,
MaxGlobalErrorRateConstraint,
MaxNumberConstraint,
OverSaturationConstraint,
OverSaturationConstraintInitializer,
PydanticConstraintInitializer,
SerializableConstraintInitializer,
UnserializableConstraintInitializer,
Expand Down Expand Up @@ -66,6 +68,8 @@
"MaxNumberConstraint",
"MultiTurnRequestT",
"NonDistributedEnvironment",
"OverSaturationConstraint",
"OverSaturationConstraintInitializer",
"PydanticConstraintInitializer",
"RequestT",
"ResponseT",
Expand Down
Loading
Loading