vllm-project · AlonKellner-RedHat · Nov 17, 2025 · Nov 17, 2025 · Nov 17, 2025 · Nov 17, 2025
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -25,6 +25,7 @@
 
 import asyncio
 import codecs
+import json
 from pathlib import Path
 
 import click
@@ -382,10 +383,43 @@ def benchmark():
     default=BenchmarkGenerativeTextArgs.get_default("max_global_error_rate"),
     help="Maximum global error rate across all benchmarks.",
 )
-def run(**kwargs):
+@click.option(
+    "--over-saturation",
+    "--detect-saturation",  # alias
+    default=None,
+    help=(
+        "Enable over-saturation detection. "
+        "Use --over-saturation=True for boolean flag, "
+        "or a JSON dict with configuration "
+        '(e.g., \'{"enabled": true, "min_seconds": 30}\'). '
+        "Defaults to None (disabled)."
+    ),
+    type=click.UNPROCESSED,
+)
+def run(**kwargs):  # noqa: C901
     # Only set CLI args that differ from click defaults
     kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)
 
+    # Handle over_saturation parsing (can be bool flag or JSON dict string)
+    if "over_saturation" in kwargs and kwargs["over_saturation"] is not None:
+        over_sat = kwargs["over_saturation"]
+        if isinstance(over_sat, str):
+            try:
+                # Try parsing as JSON dict
+                kwargs["over_saturation"] = json.loads(over_sat)
+            except (json.JSONDecodeError, ValueError):
+                # If not valid JSON, treat as bool flag
+                kwargs["over_saturation"] = over_sat.lower() in (
+                    "true",
+                    "1",
+                    "yes",
+                    "on",
+                )
+        elif isinstance(over_sat, bool):
+            # Already a bool, keep as is
+            pass
+        # If it's already a dict, keep as is
+
     # Handle remapping for request params
     request_type = kwargs.pop("request_type", None)
     request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)

diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
@@ -323,6 +323,7 @@ async def resolve_profile(
     max_errors: int | None,
     max_error_rate: float | None,
     max_global_error_rate: float | None,
+    over_saturation: bool | dict[str, Any] | None = None,
     console: Console | None = None,
 ) -> Profile:
     """
@@ -343,6 +344,7 @@ async def resolve_profile(
     :param max_errors: Maximum number of errors before stopping
     :param max_error_rate: Maximum error rate threshold before stopping
     :param max_global_error_rate: Maximum global error rate threshold before stopping
+    :param over_saturation: Over-saturation detection configuration (bool or dict)
     :param console: Console instance for progress reporting, or None
     :return: Configured Profile instance ready for benchmarking
     :raises ValueError: If constraints are provided with a pre-configured Profile
@@ -359,6 +361,7 @@ async def resolve_profile(
         "max_errors": max_errors,
         "max_error_rate": max_error_rate,
         "max_global_error_rate": max_global_error_rate,
+        "over_saturation": over_saturation,
     }.items():
         if val is not None:
             constraints[key] = val
@@ -500,6 +503,7 @@ async def benchmark_generative_text(
         max_errors=args.max_errors,
         max_error_rate=args.max_error_rate,
         max_global_error_rate=args.max_global_error_rate,
+        over_saturation=args.over_saturation,
         console=console,
     )
     output_formats = await resolve_output_formats(

diff --git a/src/guidellm/benchmark/progress.py b/src/guidellm/benchmark/progress.py
@@ -12,7 +12,6 @@
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from datetime import datetime
 from typing import Any, Generic, Literal
 
 from rich.console import Group
@@ -37,7 +36,7 @@
     GenerativeBenchmarkAccumulator,
 )
 from guidellm.scheduler import SchedulerState, SchedulingStrategy
-from guidellm.utils import Colors, format_value_display
+from guidellm.utils import Colors, format_value_display, safe_format_timestamp
 
 __all__ = ["BenchmarkerProgress", "GenerativeConsoleBenchmarkerProgress"]
 
@@ -390,7 +389,7 @@ def formatted_start_time(self) -> str:
         if self.start_time < 0.0:
             return "--:--:--"
 
-        return datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S")
+        return safe_format_timestamp(self.start_time, format_="%H:%M:%S")
 
     @property
     def formatted_progress_status(self) -> str:

diff --git a/src/guidellm/benchmark/schemas/generative/entrypoints.py b/src/guidellm/benchmark/schemas/generative/entrypoints.py
@@ -283,6 +283,14 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
     max_global_error_rate: float | None = Field(
         default=None, description="Maximum global error rate (0-1) before stopping"
     )
+    over_saturation: bool | dict[str, Any] | None = Field(
+        default=None,
+        description=(
+            "Over-saturation detection configuration. Can be a bool to enable/disable "
+            "with defaults, or a dict with configuration parameters (enabled, "
+            "min_seconds, max_window_seconds, moe_threshold, etc.)."
+        ),
+    )
 
     @field_validator("data", "data_args", "rate", mode="wrap")
     @classmethod

diff --git a/src/guidellm/scheduler/__init__.py b/src/guidellm/scheduler/__init__.py
@@ -19,6 +19,8 @@
     MaxErrorsConstraint,
     MaxGlobalErrorRateConstraint,
     MaxNumberConstraint,
+    OverSaturationConstraint,
+    OverSaturationConstraintInitializer,
     PydanticConstraintInitializer,
     SerializableConstraintInitializer,
     UnserializableConstraintInitializer,
@@ -66,6 +68,8 @@
     "MaxNumberConstraint",
     "MultiTurnRequestT",
     "NonDistributedEnvironment",
+    "OverSaturationConstraint",
+    "OverSaturationConstraintInitializer",
     "PydanticConstraintInitializer",
     "RequestT",
     "ResponseT",