Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 52 additions & 4 deletions src/llama_stack_api/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# the root directory of this source tree.

from collections.abc import AsyncIterator
from typing import Annotated, Protocol, runtime_checkable
from typing import Annotated, Protocol, runtime_checkable, Any, Literal

from pydantic import BaseModel

Expand All @@ -30,12 +30,60 @@
class ResponseGuardrailSpec(BaseModel):
"""Specification for a guardrail to apply during response generation.
:param type: The type/identifier of the guardrail.
Production-focused configuration allowing safety, moderation, and policy controls.
Fields
------
type: Identifier for the guardrail implementation (e.g. 'llama-guard', 'content-filter').
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these look great, but can I ask where these are coming from?

Copy link
Author

@natedemoss natedemoss Nov 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, the “type” values come from the backend guardrail config, not from agents.py. They’re IDs the server maps to concrete handlers (e.g., llama-guard, content-filter). Resolution happens during create_openai_response on the backend. If helpful, I can add a doc note in ResponseGuardrailSpec pointing to the registry module or config path.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I can make a new PR with a comment with less specificality.

description: Human readable explanation / purpose.
enabled: Whether enforcement is active (default True).
severity: Severity classification for violations (info | warn | block).
action: Action when violation occurs (flag | block | redact | annotate). If omitted, provider default applies.
policy_id: Optional external policy/reference identifier to map violations to organizational policy.
version: Optional version of this guardrail configuration (for audit/rollback).
categories: List of safety/moderation categories this guardrail targets (e.g. ['violence','self-harm']).
thresholds: Per-category numeric thresholds (e.g. {'violence':0.8}). Semantics depend on provider.
max_violations: If set, cap number of violations before early termination.
config: Provider/model specific free-form settings (nested allowed).
tags: Arbitrary labels to assist analytics/telemetry and routing.
metadata: Arbitrary supplemental structured metadata for downstream logging.
"""

type: str
# TODO: more fields to be added for guardrail configuration

description: str | None = None
enabled: bool = True
severity: Literal["info", "warn", "block"] | None = None
action: Literal["flag", "block", "redact", "annotate"] | None = None
policy_id: str | None = None
version: str | None = None
categories: list[str] | None = None
thresholds: dict[str, float] | None = None
max_violations: int | None = None
config: dict[str, Any] | None = None
tags: list[str] | None = None
metadata: dict[str, Any] | None = None

model_config = {
"extra": "forbid",
"title": "ResponseGuardrailSpec",
}

@classmethod
def _non_empty(cls, value: str, field_name: str) -> str: # internal helper
if not value or not value.strip():
raise ValueError(f"{field_name} cannot be empty")
return value

@classmethod
def validate(cls, value: Any): # pydantic v2 uses model validators; minimal safeguard here if invoked directly
return value

def normalized(self) -> "ResponseGuardrailSpec":
"""Return a normalized copy (e.g., lower-casing categories, stripping whitespace)."""
if self.categories:
object.__setattr__(self, "categories", [c.strip().lower() for c in self.categories])
return self


ResponseGuardrail = str | ResponseGuardrailSpec

Expand Down