refactor: adapt logic to check projected tool calls against limits before execution

tradeqvest · tradeqvest · commit 9a459ab5957f · 2025-10-02T09:53:00.000+02:00
diff --git a/docs/agents.md b/docs/agents.md
@@ -630,12 +630,14 @@ try:
     agent.run_sync('Please call the tool twice', usage_limits=UsageLimits(tool_calls_limit=1))
 except UsageLimitExceeded as e:
     print(e)
-    #> The next tool call would exceed the tool_calls_limit of 1 (tool_calls=1)
+    """
+    With the next tool call(s), the projected amount of tool calls (2) would exceed the limit of 1.
+    """
 ```
 
 !!! note
     - Usage limits are especially relevant if you've registered many tools. Use `request_limit` to bound the number of model turns, and `tool_calls_limit` to cap the number of successful tool executions within a run.
-    - These limits are enforced at the final stage before the LLM is called. If your limits are stricter than your retry settings, the usage limit will be reached before all retries are attempted.
+    - The `tool_calls_limit` is checked before executing tool calls. If the projected total would exceed the limit, no tools from that batch are executed.
 
 #### Model (Run) Settings
 
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -863,27 +863,22 @@ async def process_tool_calls(  # noqa: C901
         output_final_result.append(final_result)
 
 
-def _enforce_tool_call_limits(
+def _check_tool_call_limits(
     tool_manager: ToolManager[DepsT],
     tool_calls: list[_messages.ToolCallPart],
     usage_limits: _usage.UsageLimits | None,
-) -> tuple[list[_messages.ToolCallPart], int]:
-    """Enforce tool call limits and return limited calls and extra count."""
+) -> None:
+    """Check if executing the tool calls would exceed the limit."""
     if usage_limits is None or usage_limits.tool_calls_limit is None:
-        return tool_calls, 0
+        return
 
     current_tool_calls = tool_manager.ctx.usage.tool_calls if tool_manager.ctx is not None else 0
-    remaining_allowed = usage_limits.tool_calls_limit - current_tool_calls
+    projected_tool_calls = current_tool_calls + len(tool_calls)
 
-    if remaining_allowed <= 0:
-        usage_limits.check_before_tool_call(tool_manager.ctx.usage if tool_manager.ctx else _usage.RunUsage())
-
-    if remaining_allowed < len(tool_calls):
-        limited_tool_calls = tool_calls[: max(0, remaining_allowed)]
-        extra_calls_count = len(tool_calls) - len(limited_tool_calls)
-        return limited_tool_calls, extra_calls_count
-
-    return tool_calls, 0
+    if projected_tool_calls > usage_limits.tool_calls_limit:
+        projected_usage = deepcopy(tool_manager.ctx.usage) if tool_manager.ctx else _usage.RunUsage()
+        projected_usage.tool_calls = projected_tool_calls
+        usage_limits.check_before_tool_call(projected_usage)
 
 
 async def _call_tools(
@@ -899,6 +894,8 @@ async def _call_tools(
     user_parts_by_index: dict[int, _messages.UserPromptPart] = {}
     deferred_calls_by_index: dict[int, Literal['external', 'unapproved']] = {}
 
+    _check_tool_call_limits(tool_manager, tool_calls, usage_limits)
+
     for call in tool_calls:
         yield _messages.FunctionToolCallEvent(call)
 
@@ -943,8 +940,6 @@ async def handle_call_or_result(
                     yield event
 
         else:
-            executed_calls, extra_calls_count = _enforce_tool_call_limits(tool_manager, tool_calls, usage_limits)
-
             tasks = [
                 asyncio.create_task(
                     _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id), usage_limits),
@@ -961,10 +956,6 @@ async def handle_call_or_result(
                     if event := await handle_call_or_result(coro_or_task=task, index=index):
                         yield event
 
-            # If there were extra calls beyond the allowed limit, raise now
-            if extra_calls_count and usage_limits is not None:
-                usage_limits.check_before_tool_call(tool_manager.ctx.usage if tool_manager.ctx else _usage.RunUsage())
-
     # We append the results at the end, rather than as they are received, to retain a consistent ordering
     # This is mostly just to simplify testing
     output_parts.extend([tool_parts_by_index[k] for k in sorted(tool_parts_by_index)])
diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -340,12 +340,12 @@ def check_tokens(self, usage: RunUsage) -> None:
         if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit:
             raise UsageLimitExceeded(f'Exceeded the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})')
 
-    def check_before_tool_call(self, usage: RunUsage) -> None:
-        """Raises a `UsageLimitExceeded` exception if the next tool call would exceed the tool call limit."""
+    def check_before_tool_call(self, projected_usage: RunUsage) -> None:
+        """Raises a `UsageLimitExceeded` exception if the next tool call(s) would exceed the tool call limit."""
         tool_calls_limit = self.tool_calls_limit
-        if tool_calls_limit is not None and usage.tool_calls >= tool_calls_limit:
+        if tool_calls_limit is not None and projected_usage.tool_calls > tool_calls_limit:
             raise UsageLimitExceeded(
-                f'The next tool call would exceed the tool_calls_limit of {tool_calls_limit} (tool_calls={usage.tool_calls})'
+                f'With the next tool call(s), the projected amount of tool calls ({projected_usage.tool_calls}) would exceed the limit of {tool_calls_limit}.'
             )
 
     __repr__ = _utils.dataclasses_no_defaults_repr
diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -387,7 +387,10 @@ async def call_tool(
         'The capital of Italy is Rome (Roma, in Italian), which has been a cultural and political center for centuries.'
         'Rome is known for its rich history, stunning architecture, and delicious cuisine.'
     ),
-    'Please call the tool twice': ToolCallPart(tool_name='do_work', args={}, tool_call_id='pyd_ai_tool_call_id'),
+    'Please call the tool twice': [
+        ToolCallPart(tool_name='do_work', args={}, tool_call_id='pyd_ai_tool_call_id_1'),
+        ToolCallPart(tool_name='do_work', args={}, tool_call_id='pyd_ai_tool_call_id_2'),
+    ],
     'Begin infinite retry loop!': ToolCallPart(
         tool_name='infinite_retry_tool', args={}, tool_call_id='pyd_ai_tool_call_id'
     ),
diff --git a/tests/test_usage_limits.py b/tests/test_usage_limits.py
@@ -13,6 +13,7 @@
 
 from pydantic_ai import (
     Agent,
+    ModelMessage,
     ModelRequest,
     ModelResponse,
     RunContext,
@@ -22,6 +23,7 @@
     UserPromptPart,
 )
 from pydantic_ai.exceptions import ModelRetry
+from pydantic_ai.models.function import AgentInfo, FunctionModel
 from pydantic_ai.models.test import TestModel
 from pydantic_ai.output import ToolOutput
 from pydantic_ai.usage import RequestUsage, RunUsage, UsageLimits
@@ -254,7 +256,10 @@ async def ret_a(x: str) -> str:
         return f'{x}-apple'
 
     with pytest.raises(
-        UsageLimitExceeded, match=re.escape('The next tool call would exceed the tool_calls_limit of 0 (tool_calls=0)')
+        UsageLimitExceeded,
+        match=re.escape(
+            'With the next tool call(s), the projected amount of tool calls (1) would exceed the limit of 0.'
+        ),
     ):
         await test_agent.run('Hello', usage_limits=UsageLimits(tool_calls_limit=0))
 
@@ -330,7 +335,7 @@ def test_model_function(messages: list[ModelMessage], info: AgentInfo) -> ModelR
         model_call_count += 1
 
         if model_call_count == 1:
-            # First response: 5 parallel tool calls
+            # First response: 5 parallel tool calls (within limit)
             return ModelResponse(
                 parts=[
                     ToolCallPart('tool_a', {}, 'call_1'),
@@ -342,7 +347,7 @@ def test_model_function(messages: list[ModelMessage], info: AgentInfo) -> ModelR
             )
         else:
             assert model_call_count == 2
-            # Second response: 3 parallel tool calls (should exceed limit)
+            # Second response: 3 parallel tool calls (would exceed limit of 6)
             return ModelResponse(
                 parts=[
                     ToolCallPart('tool_c', {}, 'call_6'),
@@ -372,12 +377,14 @@ async def tool_c() -> str:
         executed_tools.append('c')
         return 'result c'
 
-    # Run with tool call limit of 6; expecting an error once the limit is reached
+    # Run with tool call limit of 6; expecting an error when trying to execute 3 more tools
     with pytest.raises(
         UsageLimitExceeded,
-        match=r'The next tool call would exceed the tool_calls_limit of 6 \(tool_calls=(6)\)',
+        match=re.escape(
+            'With the next tool call(s), the projected amount of tool calls (8) would exceed the limit of 6.'
+        ),
     ):
         await agent.run('Use tools', usage_limits=UsageLimits(tool_calls_limit=6))
 
-    # Only 6 tool calls should have actually executed
-    assert len(executed_tools) == 6
+    # Only the first batch of 5 tools should have executed
+    assert len(executed_tools) == 5