From 01ecf5eac7c2480df9b0fd78f06c50a970b6aceb Mon Sep 17 00:00:00 2001 From: David Weese Date: Tue, 23 Sep 2025 22:48:31 +0200 Subject: [PATCH 1/4] add max-input-length and max-output-token filter --- .../filters/message_length_filter_pipeline.py | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 examples/filters/message_length_filter_pipeline.py diff --git a/examples/filters/message_length_filter_pipeline.py b/examples/filters/message_length_filter_pipeline.py new file mode 100644 index 00000000..d122278f --- /dev/null +++ b/examples/filters/message_length_filter_pipeline.py @@ -0,0 +1,97 @@ +""" +title: Message Length Filter +author: David Weese +date: 2025-09-23 +version: 1.0 +license: MIT +description: Filter that enforces maximum input message length and caps/truncates assistant output length. +""" + +from typing import List, Optional, Any +from pydantic import BaseModel +from schemas import OpenAIChatMessage +import os +import math + + +def _get_last_message_by_roles(messages: List[dict], roles: List[str]) -> Optional[dict]: + for message in reversed(messages): + if message.get("role") in roles: + return message + return None + + +def _compute_text_length(content: Any) -> int: + # content can be a string or a list (multi-part). We count only textual segments. + if isinstance(content, str): + return len(content) + if isinstance(content, list): + total = 0 + for part in content: + # Common OpenAI-style structured parts may include {type: "text", text: "..."} + if isinstance(part, str): + total += len(part) + elif isinstance(part, dict): + text = part.get("text") or part.get("content") + if isinstance(text, str): + total += len(text) + return total + return 0 + + +class Pipeline: + class Valves(BaseModel): + # Connect to these pipelines (models). Use ["*"] for all. + pipelines: List[str] = ["*"] + + # Filter execution order among filters. Lower runs first. + priority: int = 0 + + # Input validation + target_user_roles: List[str] = ["user"] + max_user_message_chars: Optional[int] = int(os.getenv("MAX_USER_MESSAGE_CHARS", "4000")) + + # Output limits + # If set, we will constrain generation length via tokens only + max_assistant_response_tokens: Optional[int] = None + + def __init__(self): + # Pipeline filters are only compatible with Open WebUI + self.type = "filter" + self.name = "Message Length Filter" + + # Initialize valves with environment overrides where applicable + self.valves = self.Valves( + **{ + "pipelines": os.getenv("MSG_LEN_FILTER_PIPELINES", "*").split(","), + # Other valves fall back to their defaults + } + ) + + def _apply_output_token_cap(self, body: dict): + # Enforce explicit token cap only + desired_tokens = self.valves.max_assistant_response_tokens + if desired_tokens is None: + return + + options = body.get("options") + if not isinstance(options, dict): + options = {} + options["max_tokens"] = min(options.get("max_tokens", desired_tokens), desired_tokens) + body["options"] = options + + async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: + # Validate input message length + max_chars = self.valves.max_user_message_chars + if max_chars and max_chars > 0: + last_target_msg = _get_last_message_by_roles(body.get("messages", []), self.valves.target_user_roles) + if last_target_msg: + length = _compute_text_length(last_target_msg.get("content")) + if length > max_chars: + raise Exception( + f"Input message exceeds limit: {length} > {max_chars} characters." + ) + + # Enforce output cap via tokens (top-level and options) + self._apply_output_token_cap(body) + return body From 9bbb39a14b64e4140b363d6f0140e15261630f3f Mon Sep 17 00:00:00 2001 From: David Weese Date: Wed, 24 Sep 2025 09:32:39 +0200 Subject: [PATCH 2/4] increase default message length --- examples/filters/message_length_filter_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/filters/message_length_filter_pipeline.py b/examples/filters/message_length_filter_pipeline.py index d122278f..613147ba 100644 --- a/examples/filters/message_length_filter_pipeline.py +++ b/examples/filters/message_length_filter_pipeline.py @@ -49,7 +49,7 @@ class Valves(BaseModel): # Input validation target_user_roles: List[str] = ["user"] - max_user_message_chars: Optional[int] = int(os.getenv("MAX_USER_MESSAGE_CHARS", "4000")) + max_user_message_chars: Optional[int] = int(os.getenv("MAX_USER_MESSAGE_CHARS", "10000")) # Output limits # If set, we will constrain generation length via tokens only From 853054d7548dd1d95a3bc71c7bfe641d0553dba7 Mon Sep 17 00:00:00 2001 From: David Weese Date: Mon, 29 Sep 2025 12:30:30 +0200 Subject: [PATCH 3/4] given response token length also a configurable default --- examples/filters/message_length_filter_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/filters/message_length_filter_pipeline.py b/examples/filters/message_length_filter_pipeline.py index 613147ba..f31bbf4a 100644 --- a/examples/filters/message_length_filter_pipeline.py +++ b/examples/filters/message_length_filter_pipeline.py @@ -49,11 +49,11 @@ class Valves(BaseModel): # Input validation target_user_roles: List[str] = ["user"] - max_user_message_chars: Optional[int] = int(os.getenv("MAX_USER_MESSAGE_CHARS", "10000")) + max_user_message_chars: Optional[int] = int(os.getenv("MAX_USER_MESSAGE_CHARS", "20000")) # Output limits # If set, we will constrain generation length via tokens only - max_assistant_response_tokens: Optional[int] = None + max_assistant_response_tokens: Optional[int] = int(os.getenv("MAX_RESPONSE_TOKENS", "8192")) def __init__(self): # Pipeline filters are only compatible with Open WebUI From 649d5006d5ab0e9fb45f4447d9d7047f064fadb1 Mon Sep 17 00:00:00 2001 From: David Weese Date: Tue, 30 Sep 2025 14:15:39 +0200 Subject: [PATCH 4/4] silence please --- examples/filters/rate_limit_filter_pipeline.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/filters/rate_limit_filter_pipeline.py b/examples/filters/rate_limit_filter_pipeline.py index d1e88236..e203f65c 100644 --- a/examples/filters/rate_limit_filter_pipeline.py +++ b/examples/filters/rate_limit_filter_pipeline.py @@ -114,10 +114,6 @@ def rate_limited(self, user_id: str) -> bool: return False async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: - print(f"pipe:{__name__}") - print(body) - print(user) - if user.get("role", "admin") == "user": user_id = user["id"] if user and "id" in user else "default_user" if self.rate_limited(user_id):