From 332d38f1f8553cbd1b35425595970880cda4d90f Mon Sep 17 00:00:00 2001
From: Benjamin Michaelis <git@relay.benjamin.michaelis.net>
Date: Fri, 8 May 2026 21:51:36 -0700
Subject: [PATCH 1/4] Fix AI chat Responses API tool-chaining and context limit
 handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix streaming tool continuations: track currentLegResponseId from
  StreamingResponseCreatedUpdate and clone options with that ID before
  calling ExecuteFunctionCallAsync, so the continuation correctly
  references the response that produced the function call (not the
  outer conversation root).

- Fix non-streaming loop token waste: after each tool-call response,
  advance PreviousResponseId to the new responseId and reset
  responseItems to [] so subsequent iterations only send new tool
  outputs — not the growing full history. This prevents quadratic
  token re-processing.

- Remove redundant functionCallItem from continuation inputs: when
  using previous_response_id chaining, the function call is already
  stored server-side in the referenced response; only the
  FunctionCallOutputResponseItem is new content.

- Add ConversationContextLimitExceededException domain exception and
  IsContextLengthError helper to detect context window overflow from
  ClientResultException (HTTP 400 with context_length_exceeded text).

- Handle context limit in ChatController: both /api/chat/message and
  /api/chat/stream return 400 with errorCode 'context_limit_exceeded'.

- Add CloneOptionsWithPreviousResponseId helper to avoid mutating
  shared options across concurrent streaming legs.

- Add ParseToolArguments helper to deduplicate JSON argument parsing
  in both streaming and non-streaming paths.

- Fix OPENAI001 pragma: extend the disable region to cover the full
  loop body including the ClientResult<OpenAIResponse> declaration.
---
 .../Services/AIChatService.cs                 | 163 +++++++++++-------
 ...nversationContextLimitExceededException.cs |  30 ++++
 .../Controllers/ChatController.cs             |  47 +++--
 3 files changed, 165 insertions(+), 75 deletions(-)
 create mode 100644 EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs
diff --git a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
index 5655781c..273b4ded 100644
--- a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
+++ b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
@@ -4,6 +4,7 @@
 using ModelContextProtocol.Client;
 using ModelContextProtocol.Protocol;
 using OpenAI.Responses;
+using System.ClientModel;
 using System.Collections.Frozen;
 
 namespace EssentialCSharp.Chat.Common.Services;
@@ -183,6 +184,10 @@ private static string SanitizeForXmlContext(string? input) =>
         string? endUserId = null,
         [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
+        // Track this leg's response ID so tool-call continuations chain from it,
+        // ensuring the model's context includes the user's message + reasoning.
+        string? currentLegResponseId = null;
+
         await foreach (var update in streamingUpdates.WithCancellation(cancellationToken))
         {
 #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
@@ -190,7 +195,8 @@ private static string SanitizeForXmlContext(string? input) =>
             {
                 // Emit the response ID early so the controller can record ownership
                 // before the stream completes — handles client disconnects mid-stream.
-                yield return (string.Empty, responseId: created.Response.Id);
+                currentLegResponseId = created.Response.Id;
+                yield return (string.Empty, responseId: currentLegResponseId);
             }
             else if (update is StreamingResponseOutputItemDoneUpdate itemDone)
             {
@@ -200,10 +206,13 @@ private static string SanitizeForXmlContext(string? input) =>
                     if (toolCallDepth >= 10)
                         throw new InvalidOperationException("Maximum tool call depth exceeded.");
 
-                    // Execute the function call and stream its response.
-                    // Each nested ProcessStreamingUpdatesAsync emits its own StreamingResponseCreatedUpdate
-                    // (which already yields its responseId early), so no extra tracking is needed here.
-                    await foreach (var functionResult in ExecuteFunctionCallAsync(functionCallItem, responseOptions, mcpClient, toolCallDepth + 1, endUserId, cancellationToken))
+                    // Chain the continuation from THIS leg's response ID so the model sees:
+                    //   context(outer) → userMessage + AI reasoning + funcCall (stored in currentLegResponseId)
+                    //   → [toolOutput only] → next response
+                    // Using the outer previousResponseId here would omit the user's message from context.
+                    var continuationOptions = CloneOptionsWithPreviousResponseId(responseOptions, currentLegResponseId);
+
+                    await foreach (var functionResult in ExecuteFunctionCallAsync(functionCallItem, continuationOptions, mcpClient, toolCallDepth + 1, endUserId, cancellationToken))
                     {
                         yield return functionResult;
                     }
@@ -239,12 +248,11 @@ private static string SanitizeForXmlContext(string? input) =>
         if (!IsMcpToolAllowed(functionCallItem.FunctionName))
         {
             LogMcpToolCallRejected(_Logger, functionCallItem.FunctionName, endUserId);
-            // Feed a benign error back to the model so it can recover gracefully,
-            // mirroring what GetChatCompletionCore does on the non-streaming path.
+            // Feed a benign error back to the model so it can recover gracefully.
+            // The functionCallItem is in the stored response at PreviousResponseId — only send the output.
 #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
             var errorItems = new List<ResponseItem>
             {
-                functionCallItem,
                 new FunctionCallOutputResponseItem(
                     functionCallItem.CallId,
                     $"Tool '{functionCallItem.FunctionName}' is not available.")
@@ -262,32 +270,7 @@ private static string SanitizeForXmlContext(string? input) =>
 
         LogMcpToolCallInvokedStream(_Logger, functionCallItem.FunctionName, toolCallDepth, endUserId);
         // A dictionary of arguments to pass to the tool. Each key represents a parameter name, and its associated value represents the argument value.
-        Dictionary<string, object?> arguments = [];
-        // example JsonResponse:
-        // "{\"question\":\"Azure OpenAI Responses API (Preview)\"}"
-        var jsonResponse = functionCallItem.FunctionArguments.ToString();
-        var jsonArguments = System.Text.Json.JsonSerializer.Deserialize<Dictionary<string, object?>>(jsonResponse) ?? new Dictionary<string, object?>();
-
-        // Convert JsonElement values to their actual types
-        foreach (var kvp in jsonArguments)
-        {
-            if (kvp.Value is System.Text.Json.JsonElement jsonElement)
-            {
-                arguments[kvp.Key] = jsonElement.ValueKind switch
-                {
-                    System.Text.Json.JsonValueKind.String => jsonElement.GetString(),
-                    System.Text.Json.JsonValueKind.Number => jsonElement.GetDecimal(),
-                    System.Text.Json.JsonValueKind.True => true,
-                    System.Text.Json.JsonValueKind.False => false,
-                    System.Text.Json.JsonValueKind.Null => null,
-                    _ => jsonElement.ToString()
-                };
-            }
-            else
-            {
-                arguments[kvp.Key] = kvp.Value;
-            }
-        }
+        var arguments = ParseToolArguments(functionCallItem.FunctionArguments);
 
         // Execute the function call using the MCP client
         var toolResult = await mcpClient.CallToolAsync(
@@ -295,12 +278,11 @@ private static string SanitizeForXmlContext(string? input) =>
             arguments: arguments,
             cancellationToken: cancellationToken);
 
-        // Create input items with both the function call and the result
-        // This matches the Python pattern: append both tool_call and result
+        // The functionCallItem is already stored in the server's context (referenced by
+        // responseOptions.PreviousResponseId). Only the tool output is new content.
 #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
         var inputItems = new List<ResponseItem>
         {
-            functionCallItem, // The original function call
             new FunctionCallOutputResponseItem(functionCallItem.CallId, McpToolResultFormatter.GetModelInput(toolResult))
         };
 #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
@@ -412,15 +394,22 @@ private async Task<ResponseCreationOptions> CreateResponseOptionsAsync(
         // Create the response using the Responses API
 #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
         List<ResponseItem> responseItems = [ResponseItem.CreateUserMessageItem(prompt)];
-#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
 
         const int MaxToolCallIterations = 10;
         for (int iteration = 0; iteration < MaxToolCallIterations; iteration++)
         {
-            var response = await _ResponseClient.CreateResponseAsync(
-                responseItems,
-                options: responseOptions,
-                cancellationToken: cancellationToken);
+            ClientResult<OpenAIResponse> response;
+            try
+            {
+                response = await _ResponseClient.CreateResponseAsync(
+                    responseItems,
+                    options: responseOptions,
+                    cancellationToken: cancellationToken);
+            }
+            catch (ClientResultException ex) when (IsContextLengthError(ex))
+            {
+                throw new ConversationContextLimitExceededException(responseOptions.PreviousResponseId, ex);
+            }
 
             string responseId = response.Value.Id;
 
@@ -429,6 +418,12 @@ private async Task<ResponseCreationOptions> CreateResponseOptionsAsync(
 
             if (functionCalls.Count > 0 && mcpClient != null)
             {
+                // Advance the chain: the server now has everything up to responseId stored
+                // (user message + all prior function calls/results + this response's funcCalls).
+                // The next request only needs to supply the tool outputs — not the growing history.
+                responseOptions.PreviousResponseId = responseId;
+                responseItems = [];
+
                 foreach (var functionCallItem in functionCalls)
                 {
                     // Defense-in-depth: validate tool name against static allowlist before executing.
@@ -436,8 +431,7 @@ private async Task<ResponseCreationOptions> CreateResponseOptionsAsync(
                     if (!IsMcpToolAllowed(functionCallItem.FunctionName))
                     {
                         LogMcpToolCallRejected(_Logger, functionCallItem.FunctionName, endUserId);
-                        // Return a benign error to the model so it can respond gracefully
-                        responseItems.Add(functionCallItem);
+                        // The functionCallItem is in the stored response; send only the error output.
                         responseItems.Add(new FunctionCallOutputResponseItem(
                             functionCallItem.CallId,
                             $"Tool '{functionCallItem.FunctionName}' is not available."));
@@ -445,31 +439,15 @@ private async Task<ResponseCreationOptions> CreateResponseOptionsAsync(
                     }
 
                     LogMcpToolCallInvoked(_Logger, functionCallItem.FunctionName, iteration, endUserId);
-                    var jsonResponse = functionCallItem.FunctionArguments.ToString();
-                    var jsonArguments = System.Text.Json.JsonSerializer.Deserialize<Dictionary<string, object?>>(jsonResponse) ?? new Dictionary<string, object?>();
-
-                    Dictionary<string, object?> arguments = [];
-                    foreach (var kvp in jsonArguments)
-                    {
-                        arguments[kvp.Key] = kvp.Value is System.Text.Json.JsonElement jsonElement
-                            ? jsonElement.ValueKind switch
-                            {
-                                System.Text.Json.JsonValueKind.String => jsonElement.GetString(),
-                                System.Text.Json.JsonValueKind.Number => jsonElement.GetDecimal(),
-                                System.Text.Json.JsonValueKind.True => true,
-                                System.Text.Json.JsonValueKind.False => false,
-                                System.Text.Json.JsonValueKind.Null => null,
-                                _ => (object?)jsonElement.ToString()
-                            }
-                            : kvp.Value;
-                    }
+                    var arguments = ParseToolArguments(functionCallItem.FunctionArguments);
 
                     var toolResult = await mcpClient.CallToolAsync(
                         functionCallItem.FunctionName,
                         arguments: arguments,
                         cancellationToken: cancellationToken);
 
-                    responseItems.Add(functionCallItem);
+                    // The functionCallItem is stored server-side in the response at PreviousResponseId.
+                    // Only the tool output is new content that needs to be included.
                     responseItems.Add(new FunctionCallOutputResponseItem(
                         functionCallItem.CallId,
                         McpToolResultFormatter.GetModelInput(toolResult)));
@@ -501,6 +479,56 @@ private bool IsMcpToolAllowed(string toolName)
         return _AllowedMcpTools.Contains(toolName);
     }
 
+    /// <summary>
+    /// Returns a shallow clone of <paramref name="source"/> with
+    /// <see cref="ResponseCreationOptions.PreviousResponseId"/> replaced.
+    /// Used when chaining tool-call continuations from a specific response leg.
+    /// </summary>
+#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+    private static ResponseCreationOptions CloneOptionsWithPreviousResponseId(
+        ResponseCreationOptions source,
+        string? previousResponseId)
+    {
+        var clone = new ResponseCreationOptions
+        {
+            Instructions = source.Instructions,
+            PreviousResponseId = previousResponseId,
+            ReasoningOptions = source.ReasoningOptions,
+        };
+        foreach (var tool in source.Tools)
+            clone.Tools.Add(tool);
+        return clone;
+    }
+#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+
+    /// <summary>
+    /// Parses function call arguments from a <see cref="BinaryData"/> JSON payload into a
+    /// strongly-typed dictionary, converting <see cref="System.Text.Json.JsonElement"/> values
+    /// to their native CLR equivalents.
+    /// </summary>
+    private static Dictionary<string, object?> ParseToolArguments(BinaryData functionArguments)
+    {
+        var jsonArguments = System.Text.Json.JsonSerializer.Deserialize<Dictionary<string, object?>>(
+            functionArguments.ToString()) ?? [];
+
+        var arguments = new Dictionary<string, object?>(jsonArguments.Count);
+        foreach (var kvp in jsonArguments)
+        {
+            arguments[kvp.Key] = kvp.Value is System.Text.Json.JsonElement jsonElement
+                ? jsonElement.ValueKind switch
+                {
+                    System.Text.Json.JsonValueKind.String => jsonElement.GetString(),
+                    System.Text.Json.JsonValueKind.Number => jsonElement.GetDecimal(),
+                    System.Text.Json.JsonValueKind.True => true,
+                    System.Text.Json.JsonValueKind.False => false,
+                    System.Text.Json.JsonValueKind.Null => null,
+                    _ => (object?)jsonElement.ToString()
+                }
+                : kvp.Value;
+        }
+        return arguments;
+    }
+
     [LoggerMessage(Level = LogLevel.Information, Message = "AI tool call invoked: tool={ToolName} iteration={Iteration} user={EndUserId}")]
     private static partial void LogMcpToolCallInvoked(ILogger logger, string toolName, int iteration, string? endUserId);
 
@@ -515,4 +543,13 @@ private bool IsMcpToolAllowed(string toolName)
 
     [LoggerMessage(Level = LogLevel.Information, Message = "AI contextual search performed for prompt enrichment")]
     private static partial void LogContextualSearchPerformed(ILogger logger);
+
+    /// <summary>
+    /// Returns <c>true</c> when the API error indicates the conversation context window was exceeded.
+    /// </summary>
+    private static bool IsContextLengthError(ClientResultException ex) =>
+        ex.Status == 400 &&
+        (ex.Message.Contains("context_length_exceeded", StringComparison.OrdinalIgnoreCase) ||
+         ex.Message.Contains("maximum context length", StringComparison.OrdinalIgnoreCase) ||
+         ex.Message.Contains("reduce the length of the messages", StringComparison.OrdinalIgnoreCase));
 }
diff --git a/EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs b/EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs
new file mode 100644
index 00000000..adc802bd
--- /dev/null
+++ b/EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs
@@ -0,0 +1,30 @@
+namespace EssentialCSharp.Chat.Common.Services;
+
+/// <summary>
+/// Thrown when a conversation's accumulated context exceeds the model's context window limit.
+/// </summary>
+/// <remarks>
+/// This occurs when using <c>previous_response_id</c> chaining over many turns — the server
+/// reconstructs the full history on each request, and that history eventually exceeds the model's
+/// maximum input tokens. Callers should prompt the user to start a new conversation rather than
+/// retrying with the same <c>previousResponseId</c>.
+/// </remarks>
+public sealed class ConversationContextLimitExceededException : Exception
+{
+    /// <summary>
+    /// The <c>previous_response_id</c> that caused the overflow, if known.
+    /// </summary>
+    public string? PreviousResponseId { get; }
+
+    public ConversationContextLimitExceededException(string? previousResponseId)
+        : base("This conversation has exceeded the model's context window limit.")
+    {
+        PreviousResponseId = previousResponseId;
+    }
+
+    public ConversationContextLimitExceededException(string? previousResponseId, Exception innerException)
+        : base("This conversation has exceeded the model's context window limit.", innerException)
+    {
+        PreviousResponseId = previousResponseId;
+    }
+}
diff --git a/EssentialCSharp.Web/Controllers/ChatController.cs b/EssentialCSharp.Web/Controllers/ChatController.cs
index f38138e6..24d79a14 100644
--- a/EssentialCSharp.Web/Controllers/ChatController.cs
+++ b/EssentialCSharp.Web/Controllers/ChatController.cs
@@ -44,21 +44,28 @@ public async Task<IActionResult> SendMessage([FromBody] ChatMessageRequest reque
         if (!_ResponseIdValidationService.ValidateResponseId(userId, previousResponseId))
             return BadRequest(new { error = "Invalid conversation context." });
 
-        var (response, responseId) = await _AiChatService.GetChatCompletion(
-            prompt: request.Message,
-            previousResponseId: previousResponseId,
-            enableContextualSearch: request.EnableContextualSearch,
-            endUserId: userId,
-            cancellationToken: cancellationToken);
+        try
+        {
+            var (response, responseId) = await _AiChatService.GetChatCompletion(
+                prompt: request.Message,
+                previousResponseId: previousResponseId,
+                enableContextualSearch: request.EnableContextualSearch,
+                endUserId: userId,
+                cancellationToken: cancellationToken);
 
-        _ResponseIdValidationService.RecordResponseId(userId, responseId);
+            _ResponseIdValidationService.RecordResponseId(userId, responseId);
 
-        return Ok(new ChatMessageResponse
+            return Ok(new ChatMessageResponse
+            {
+                Response = response,
+                ResponseId = responseId,
+                Timestamp = DateTime.UtcNow
+            });
+        }
+        catch (ConversationContextLimitExceededException)
         {
-            Response = response,
-            ResponseId = responseId,
-            Timestamp = DateTime.UtcNow
-        });
+            return BadRequest(new { error = "This conversation has grown too long. Please start a new one.", errorCode = "context_limit_exceeded" });
+        }
     }
 
     [HttpPost("stream")]
@@ -130,6 +137,22 @@ public async Task StreamMessage([FromBody] ChatMessageRequest request, Cancellat
         {
             LogChatStreamCancelled(_Logger, User.Identity?.Name);
         }
+        catch (ConversationContextLimitExceededException) when (!Response.HasStarted)
+        {
+            Response.StatusCode = 400;
+            Response.ContentType = "application/json";
+            await Response.WriteAsJsonAsync(new { error = "This conversation has grown too long. Please start a new one.", errorCode = "context_limit_exceeded" }, CancellationToken.None);
+        }
+        catch (ConversationContextLimitExceededException)
+        {
+            LogChatStreamErrorMidStream(_Logger, new InvalidOperationException("Context limit exceeded mid-stream"), User.Identity?.Name);
+            try
+            {
+                await Response.WriteAsync("data: {\"type\":\"error\",\"message\":\"This conversation has grown too long. Please start a new one.\",\"errorCode\":\"context_limit_exceeded\"}\n\n", CancellationToken.None);
+                await Response.Body.FlushAsync(CancellationToken.None);
+            }
+            catch { /* client already disconnected */ }
+        }
         catch (Exception ex) when (!Response.HasStarted)
         {
             LogChatStreamErrorBeforeResponseStarted(_Logger, ex, User.Identity?.Name);

From 97db852fff11342c1c696992852667b88edcc8d3 Mon Sep 17 00:00:00 2001
From: Benjamin Michaelis <git@relay.benjamin.michaelis.net>
Date: Fri, 8 May 2026 22:23:19 -0700
Subject: [PATCH 2/4] Fix Critical and High AI chat review findings

C1 - Two-phase streaming tool call pattern: buffer all function calls
during the stream, execute sequentially after stream completes, then
make ONE continuation with all outputs. Previously, N parallel tool
calls created N forked conversation branches.

C2 - Context length errors in streaming path: added RethrowContextLengthErrors
helper wrapper that puts try/catch only around MoveNextAsync, keeping
yield return outside. C# CS1626 prohibits yield inside try/catch, so
the wrapper is the only valid pattern for exception remapping in async
iterators.

H1 - Restore stream-write body in second catch in StreamMessage:
the SSE error event was lost in a prior edit.

H2 - Clone all relevant ResponseCreationOptions fields in
CloneOptionsWithPreviousResponseId: EndUserId, MaxOutputTokenCount,
TextOptions, TruncationMode, ParallelToolCallsEnabled,
StoredOutputEnabled, ToolChoice, Temperature, TopP, ServiceTier,
Metadata.

H3 - IsContextLengthError now parses structured JSON error code via
TryExtractErrorCode: handles status 413, code token_limit_exceeded,
and context_length_exceeded. Falls back to message substring match.

Bonus: wire EndUserId into CreateResponseOptionsAsync (SDK supports it).
Add LogMcpToolArgumentParseError logger message.
---
 .../Services/AIChatService.cs                 | 227 ++++++++++++------
 1 file changed, 151 insertions(+), 76 deletions(-)

diff --git a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
index 273b4ded..0da38d4d 100644
--- a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
+++ b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
@@ -172,7 +172,11 @@ private static string SanitizeForXmlContext(string? input) =>
         input?.Replace("<", "\u2039").Replace(">", "\u203A") ?? string.Empty;
 
     /// <summary>
-    /// Processes streaming updates from the OpenAI Responses API, handling both regular responses and function calls
+    /// Processes streaming updates from the OpenAI Responses API.
+    /// Buffers all function call items before executing them — this is critical for correctness:
+    /// if the model emits multiple parallel tool calls, firing a separate continuation per call
+    /// creates forked conversation branches. Collecting all calls and submitting all outputs
+    /// in a single continuation matches the non-streaming behavior.
     /// </summary>
     private async IAsyncEnumerable<(string text, string? responseId)> ProcessStreamingUpdatesAsync(
 #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
@@ -187,10 +191,14 @@ private static string SanitizeForXmlContext(string? input) =>
         // Track this leg's response ID so tool-call continuations chain from it,
         // ensuring the model's context includes the user's message + reasoning.
         string? currentLegResponseId = null;
+#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+        List<FunctionCallResponseItem>? pendingFunctionCalls = null;
 
-        await foreach (var update in streamingUpdates.WithCancellation(cancellationToken))
+        // Wrap the raw stream to convert context-length API errors to our domain exception.
+        // C# does not allow yield inside a try/catch, so error remapping is done in a
+        // separate helper that puts try/catch only around MoveNextAsync.
+        await foreach (var update in RethrowContextLengthErrors(streamingUpdates, responseOptions.PreviousResponseId, cancellationToken))
         {
-#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
             if (update is StreamingResponseCreatedUpdate created)
             {
                 // Emit the response ID early so the controller can record ownership
@@ -200,104 +208,124 @@ private static string SanitizeForXmlContext(string? input) =>
             }
             else if (update is StreamingResponseOutputItemDoneUpdate itemDone)
             {
-                // Check if this is a function call that needs to be executed
                 if (itemDone.Item is FunctionCallResponseItem functionCallItem && mcpClient != null)
                 {
                     if (toolCallDepth >= 10)
                         throw new InvalidOperationException("Maximum tool call depth exceeded.");
 
-                    // Chain the continuation from THIS leg's response ID so the model sees:
-                    //   context(outer) → userMessage + AI reasoning + funcCall (stored in currentLegResponseId)
-                    //   → [toolOutput only] → next response
-                    // Using the outer previousResponseId here would omit the user's message from context.
-                    var continuationOptions = CloneOptionsWithPreviousResponseId(responseOptions, currentLegResponseId);
-
-                    await foreach (var functionResult in ExecuteFunctionCallAsync(functionCallItem, continuationOptions, mcpClient, toolCallDepth + 1, endUserId, cancellationToken))
-                    {
-                        yield return functionResult;
-                    }
+                    // Buffer all function calls — do NOT fire continuations inline.
+                    // Sending one continuation per call would create N forked conversation
+                    // branches, each missing the other N-1 tool results.
+                    pendingFunctionCalls ??= [];
+                    pendingFunctionCalls.Add(functionCallItem);
                 }
             }
             else if (update is StreamingResponseOutputTextDeltaUpdate deltaUpdate)
             {
                 yield return (deltaUpdate.Delta.ToString(), null);
             }
-            else if (update is StreamingResponseCompletedUpdate)
+            // StreamingResponseCompletedUpdate: ResponseId already emitted above — no-op.
+        }
+
+        // After the stream completes, execute all buffered tool calls and send ALL outputs
+        // in a single continuation request. This mirrors the non-streaming loop in
+        // GetChatCompletionCore and avoids conversation branching.
+        if (pendingFunctionCalls is { Count: > 0 } && mcpClient != null)
+        {
+            var continuationOptions = CloneOptionsWithPreviousResponseId(responseOptions, currentLegResponseId);
+            var outputItems = new List<ResponseItem>(pendingFunctionCalls.Count);
+
+            foreach (var functionCallItem in pendingFunctionCalls)
             {
-                // ResponseId was already emitted from StreamingResponseCreatedUpdate at the start
-                // of this response leg — no need to emit again from the completion event.
+                outputItems.Add(await ExecuteSingleToolCallAsync(functionCallItem, toolCallDepth, endUserId, mcpClient, cancellationToken));
             }
+
+            var continuationStream = _ResponseClient.CreateResponseStreamingAsync(outputItems, continuationOptions, cancellationToken);
 #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+
+            await foreach (var result in ProcessStreamingUpdatesAsync(continuationStream, continuationOptions, mcpClient, toolCallDepth + 1, endUserId, cancellationToken))
+            {
+                yield return result;
+            }
         }
     }
 
     /// <summary>
-    /// Executes a function call and streams the response
+    /// Wraps a streaming response enumerable to remap <see cref="ClientResultException"/>
+    /// context-length errors to <see cref="ConversationContextLimitExceededException"/>.
+    /// <para>
+    /// C# prohibits <c>yield return</c> inside a <c>try</c> block with a <c>catch</c> clause
+    /// (CS1626). By putting the <c>try/catch</c> only around <c>MoveNextAsync</c> and the
+    /// <c>yield return</c> outside, we satisfy the compiler while still remapping exceptions.
+    /// </para>
     /// </summary>
-    private async IAsyncEnumerable<(string text, string? responseId)> ExecuteFunctionCallAsync(
 #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
-        FunctionCallResponseItem functionCallItem,
-        ResponseCreationOptions responseOptions,
+    private static async IAsyncEnumerable<StreamingResponseUpdate> RethrowContextLengthErrors(
+        IAsyncEnumerable<StreamingResponseUpdate> source,
+        string? previousResponseId,
+        [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        await using var enumerator = source.GetAsyncEnumerator(cancellationToken);
+        while (true)
+        {
+            bool hasNext;
+            try { hasNext = await enumerator.MoveNextAsync(); }
+            catch (ClientResultException ex) when (IsContextLengthError(ex))
+            { throw new ConversationContextLimitExceededException(previousResponseId, ex); }
+
+            if (!hasNext) break;
+            yield return enumerator.Current; // yield return is outside try/catch — valid
+        }
+    }
 #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
-        McpClient mcpClient,
+
+    /// <summary>
+    /// Executes a single MCP tool call and returns the output item to include in the
+    /// continuation request. Handles allowlist validation and argument-parsing errors
+    /// so a single bad tool never aborts the entire continuation.
+    /// </summary>
+#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+    private async Task<ResponseItem> ExecuteSingleToolCallAsync(
+        FunctionCallResponseItem functionCallItem,
         int toolCallDepth,
-        string? endUserId = null,
-        [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
+        string? endUserId,
+        McpClient mcpClient,
+        CancellationToken cancellationToken)
     {
         // Defense-in-depth: validate tool name against static allowlist before executing.
         if (!IsMcpToolAllowed(functionCallItem.FunctionName))
         {
             LogMcpToolCallRejected(_Logger, functionCallItem.FunctionName, endUserId);
-            // Feed a benign error back to the model so it can recover gracefully.
-            // The functionCallItem is in the stored response at PreviousResponseId — only send the output.
-#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
-            var errorItems = new List<ResponseItem>
-            {
-                new FunctionCallOutputResponseItem(
-                    functionCallItem.CallId,
-                    $"Tool '{functionCallItem.FunctionName}' is not available.")
-            };
-#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
-            var recoveryStream = _ResponseClient.CreateResponseStreamingAsync(
-                errorItems, responseOptions, cancellationToken);
-            await foreach (var result in ProcessStreamingUpdatesAsync(
-                recoveryStream, responseOptions, mcpClient, toolCallDepth, endUserId, cancellationToken))
-            {
-                yield return result;
-            }
-            yield break;
+            return new FunctionCallOutputResponseItem(
+                functionCallItem.CallId,
+                $"Tool '{functionCallItem.FunctionName}' is not available.");
         }
 
         LogMcpToolCallInvokedStream(_Logger, functionCallItem.FunctionName, toolCallDepth, endUserId);
-        // A dictionary of arguments to pass to the tool. Each key represents a parameter name, and its associated value represents the argument value.
-        var arguments = ParseToolArguments(functionCallItem.FunctionArguments);
 
-        // Execute the function call using the MCP client
+        Dictionary<string, object?> arguments;
+        try
+        {
+            arguments = ParseToolArguments(functionCallItem.FunctionArguments);
+        }
+        catch (Exception ex)
+        {
+            LogMcpToolArgumentParseError(_Logger, functionCallItem.FunctionName, ex, endUserId);
+            return new FunctionCallOutputResponseItem(
+                functionCallItem.CallId,
+                $"Error parsing arguments for '{functionCallItem.FunctionName}': invalid JSON.");
+        }
+
         var toolResult = await mcpClient.CallToolAsync(
             functionCallItem.FunctionName,
             arguments: arguments,
             cancellationToken: cancellationToken);
 
-        // The functionCallItem is already stored in the server's context (referenced by
-        // responseOptions.PreviousResponseId). Only the tool output is new content.
-#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
-        var inputItems = new List<ResponseItem>
-        {
-            new FunctionCallOutputResponseItem(functionCallItem.CallId, McpToolResultFormatter.GetModelInput(toolResult))
-        };
-#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
-
-        // Stream the function call response using the same processing logic
-        var functionResponseStream = _ResponseClient.CreateResponseStreamingAsync(
-            inputItems,
-            responseOptions,
-            cancellationToken);
-
-        await foreach (var result in ProcessStreamingUpdatesAsync(functionResponseStream, responseOptions, mcpClient, toolCallDepth, endUserId, cancellationToken))
-        {
-            yield return result;
-        }
+        return new FunctionCallOutputResponseItem(
+            functionCallItem.CallId,
+            McpToolResultFormatter.GetModelInput(toolResult));
     }
+#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
 
     /// <summary>
     /// Creates response options with optional features
@@ -330,12 +358,11 @@ private async Task<ResponseCreationOptions> CreateResponseOptionsAsync(
             options.PreviousResponseId = previousResponseId;
         }
 
-        // endUserId is reserved for forwarding to Azure OpenAI for end-user attribution
-        // (Microsoft Defender prompt-shield correlation). OpenAI .NET SDK v2.7.0 does not
-        // expose ResponseCreationOptions.User; this parameter is intentionally discarded
-        // until SDK support is available.
+        // Wire up end-user ID for Azure OpenAI abuse monitoring and Microsoft Defender
+        // prompt-shield correlation. The SDK now exposes EndUserId directly.
         // See: https://learn.microsoft.com/en-us/azure/defender-for-cloud/gain-end-user-context-ai
-        _ = endUserId;
+        if (!string.IsNullOrEmpty(endUserId))
+            options.EndUserId = endUserId;
 
         // Add tools if provided
         if (tools != null)
@@ -480,9 +507,10 @@ private bool IsMcpToolAllowed(string toolName)
     }
 
     /// <summary>
-    /// Returns a shallow clone of <paramref name="source"/> with
+    /// Returns a clone of <paramref name="source"/> with
     /// <see cref="ResponseCreationOptions.PreviousResponseId"/> replaced.
-    /// Used when chaining tool-call continuations from a specific response leg.
+    /// All behavior-affecting properties are copied so that tool-call continuation legs
+    /// produce identical generation behavior to the initial leg.
     /// </summary>
 #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
     private static ResponseCreationOptions CloneOptionsWithPreviousResponseId(
@@ -493,10 +521,23 @@ private static ResponseCreationOptions CloneOptionsWithPreviousResponseId(
         {
             Instructions = source.Instructions,
             PreviousResponseId = previousResponseId,
+            EndUserId = source.EndUserId,
             ReasoningOptions = source.ReasoningOptions,
+            MaxOutputTokenCount = source.MaxOutputTokenCount,
+            TextOptions = source.TextOptions,
+            TruncationMode = source.TruncationMode,
+            ParallelToolCallsEnabled = source.ParallelToolCallsEnabled,
+            StoredOutputEnabled = source.StoredOutputEnabled,
+            ToolChoice = source.ToolChoice,
+            Temperature = source.Temperature,
+            TopP = source.TopP,
+            ServiceTier = source.ServiceTier,
         };
         foreach (var tool in source.Tools)
             clone.Tools.Add(tool);
+        if (source.Metadata is { Count: > 0 })
+            foreach (var kvp in source.Metadata)
+                clone.Metadata[kvp.Key] = kvp.Value;
         return clone;
     }
 #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
@@ -538,6 +579,9 @@ private static ResponseCreationOptions CloneOptionsWithPreviousResponseId(
     [LoggerMessage(Level = LogLevel.Warning, Message = "AI tool call rejected — not on allowlist: tool={ToolName} user={EndUserId}")]
     private static partial void LogMcpToolCallRejected(ILogger logger, string toolName, string? endUserId);
 
+    [LoggerMessage(Level = LogLevel.Warning, Message = "Failed to parse tool arguments for '{ToolName}': user={EndUserId}")]
+    private static partial void LogMcpToolArgumentParseError(ILogger logger, string toolName, Exception exception, string? endUserId);
+
     [LoggerMessage(Level = LogLevel.Warning, Message = "MCP tool skipped during option setup — not on allowlist: tool={ToolName}")]
     private static partial void LogMcpToolSkippedNotAllowed(ILogger logger, string toolName);
 
@@ -546,10 +590,41 @@ private static ResponseCreationOptions CloneOptionsWithPreviousResponseId(
 
     /// <summary>
     /// Returns <c>true</c> when the API error indicates the conversation context window was exceeded.
+    /// Prefers structured JSON error code from the response body; falls back to message text matching.
+    /// Also handles HTTP 413 (payload too large via API gateway) and <c>token_limit_exceeded</c>.
+    /// </summary>
+    private static bool IsContextLengthError(ClientResultException ex)
+    {
+        if (ex.Status is not (400 or 413)) return false;
+
+        // Prefer structured error code from the response body
+        var errorCode = TryExtractErrorCode(ex);
+        if (errorCode is not null)
+            return errorCode is "context_length_exceeded" or "token_limit_exceeded";
+
+        // Fallback: substring match on exception message (Azure OpenAI format may vary)
+        return ex.Message.Contains("context_length_exceeded", StringComparison.OrdinalIgnoreCase) ||
+               ex.Message.Contains("maximum context length", StringComparison.OrdinalIgnoreCase) ||
+               ex.Message.Contains("reduce the length of the messages", StringComparison.OrdinalIgnoreCase) ||
+               ex.Message.Contains("token_limit_exceeded", StringComparison.OrdinalIgnoreCase);
+    }
+
+    /// <summary>
+    /// Attempts to extract the <c>error.code</c> field from the raw JSON response body.
+    /// Returns <c>null</c> on any parse failure — this is best-effort.
     /// </summary>
-    private static bool IsContextLengthError(ClientResultException ex) =>
-        ex.Status == 400 &&
-        (ex.Message.Contains("context_length_exceeded", StringComparison.OrdinalIgnoreCase) ||
-         ex.Message.Contains("maximum context length", StringComparison.OrdinalIgnoreCase) ||
-         ex.Message.Contains("reduce the length of the messages", StringComparison.OrdinalIgnoreCase));
+    private static string? TryExtractErrorCode(ClientResultException ex)
+    {
+        try
+        {
+            var content = ex.GetRawResponse()?.Content;
+            if (content is null) return null;
+            using var doc = System.Text.Json.JsonDocument.Parse(content.ToMemory());
+            if (doc.RootElement.TryGetProperty("error", out var error) &&
+                error.TryGetProperty("code", out var code))
+                return code.GetString();
+        }
+        catch { /* best-effort — don't let error parsing crash the error handler */ }
+        return null;
+    }
 }

From c69a2c3e72f0b6caaceb0da8027d1228ddac494f Mon Sep 17 00:00:00 2001
From: Benjamin Michaelis <git@relay.benjamin.michaelis.net>
Date: Fri, 8 May 2026 23:15:27 -0700
Subject: [PATCH 3/4] Fix remaining Medium issues from second subagent review
 pass
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- strictModeEnabled: false for MCP tool registration — external MCP
  schemas are not guaranteed to satisfy OpenAI strict-mode constraints
  (all properties required, additionalProperties: false everywhere).
  A single non-conforming schema with strictModeEnabled: true would
  cause a 400 at registration time for ALL tools in the request.

- Guard ParseToolArguments in non-streaming path — JsonException on
  malformed model-generated arguments would previously bubble up as
  a 500. Now caught identically to the streaming path: logs warning
  and sends an error FunctionCallOutputResponseItem so the model can
  recover without aborting the loop.

- Guard currentLegResponseId null before tool-call continuation —
  if the API never emits StreamingResponseCreatedUpdate, proceeding
  with PreviousResponseId = null causes a confusing 400 from the API.
  Now throws InvalidOperationException with a diagnostic message.

- Log original ConversationContextLimitExceededException in mid-stream
  handler — previously wrapped in a new InvalidOperationException,
  losing the stack trace, PreviousResponseId, and inner exception.
---
 .../Services/AIChatService.cs                 | 28 +++++++++++++++++--
 .../Controllers/ChatController.cs             |  4 +--
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
index 0da38d4d..a4515831 100644
--- a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
+++ b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
@@ -232,6 +232,13 @@ private static string SanitizeForXmlContext(string? input) =>
         // GetChatCompletionCore and avoids conversation branching.
         if (pendingFunctionCalls is { Count: > 0 } && mcpClient != null)
         {
+            // Guard: if the API never sent StreamingResponseCreatedUpdate, currentLegResponseId
+            // is null. Continuing would send FunctionCallOutputResponseItems referencing CallIds
+            // the server has no record of (PreviousResponseId = null), causing a 400.
+            if (currentLegResponseId is null)
+                throw new InvalidOperationException(
+                    "Cannot continue tool-call chain: the streaming leg completed with tool calls but emitted no response ID.");
+
             var continuationOptions = CloneOptionsWithPreviousResponseId(responseOptions, currentLegResponseId);
             var outputItems = new List<ResponseItem>(pendingFunctionCalls.Count);
 
@@ -387,7 +394,11 @@ private async Task<ResponseCreationOptions> CreateResponseOptionsAsync(
                 }
 
 #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
-                options.Tools.Add(ResponseTool.CreateFunctionTool(tool.Name, functionDescription: tool.Description, strictModeEnabled: true, functionParameters: BinaryData.FromString(tool.JsonSchema.GetRawText())));
+                // strictModeEnabled: false — MCP tool schemas come from external servers and are not
+                // guaranteed to satisfy OpenAI strict-mode constraints (all properties required,
+                // additionalProperties: false everywhere). A single non-conforming schema with
+                // strict mode enabled would cause a 400 at registration time for ALL tools.
+                options.Tools.Add(ResponseTool.CreateFunctionTool(tool.Name, functionDescription: tool.Description, strictModeEnabled: false, functionParameters: BinaryData.FromString(tool.JsonSchema.GetRawText())));
 #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
             }
         }
@@ -466,7 +477,20 @@ private async Task<ResponseCreationOptions> CreateResponseOptionsAsync(
                     }
 
                     LogMcpToolCallInvoked(_Logger, functionCallItem.FunctionName, iteration, endUserId);
-                    var arguments = ParseToolArguments(functionCallItem.FunctionArguments);
+
+                    Dictionary<string, object?> arguments;
+                    try
+                    {
+                        arguments = ParseToolArguments(functionCallItem.FunctionArguments);
+                    }
+                    catch (Exception ex)
+                    {
+                        LogMcpToolArgumentParseError(_Logger, functionCallItem.FunctionName, ex, endUserId);
+                        responseItems.Add(new FunctionCallOutputResponseItem(
+                            functionCallItem.CallId,
+                            $"Error parsing arguments for '{functionCallItem.FunctionName}': invalid JSON."));
+                        continue;
+                    }
 
                     var toolResult = await mcpClient.CallToolAsync(
                         functionCallItem.FunctionName,
diff --git a/EssentialCSharp.Web/Controllers/ChatController.cs b/EssentialCSharp.Web/Controllers/ChatController.cs
index 24d79a14..2b835897 100644
--- a/EssentialCSharp.Web/Controllers/ChatController.cs
+++ b/EssentialCSharp.Web/Controllers/ChatController.cs
@@ -143,9 +143,9 @@ public async Task StreamMessage([FromBody] ChatMessageRequest request, Cancellat
             Response.ContentType = "application/json";
             await Response.WriteAsJsonAsync(new { error = "This conversation has grown too long. Please start a new one.", errorCode = "context_limit_exceeded" }, CancellationToken.None);
         }
-        catch (ConversationContextLimitExceededException)
+        catch (ConversationContextLimitExceededException ex)
         {
-            LogChatStreamErrorMidStream(_Logger, new InvalidOperationException("Context limit exceeded mid-stream"), User.Identity?.Name);
+            LogChatStreamErrorMidStream(_Logger, ex, User.Identity?.Name);
             try
             {
                 await Response.WriteAsync("data: {\"type\":\"error\",\"message\":\"This conversation has grown too long. Please start a new one.\",\"errorCode\":\"context_limit_exceeded\"}\n\n", CancellationToken.None);

From e501fbe966f544fcdf9e2b14babca1c687a5fe04 Mon Sep 17 00:00:00 2001
From: Benjamin Michaelis <git@relay.benjamin.michaelis.net>
Date: Sat, 9 May 2026 00:15:53 -0700
Subject: [PATCH 4/4] Improve catch specificity in error-handler guards

Replace bare catch with catch (Exception) in two best-effort
error-handling sites, with clarifying comments explaining why a
broad catch is correct.

- TryExtractErrorCode: Azure SDK response internals are outside
  our control; a narrow typed-catch list risks a latent crash in
  production error handling. catch (Exception) is intentional.

- StreamMessage SSE error-write guards (both): Kestrel throws
  IOException (ConnectionResetException), OperationCanceledException,
  or ObjectDisposedException on abrupt client disconnect. The bot's
  OperationCanceledException-only suggestion would miss IOException
  and cause an unhandled exception that masks the original error.
  Both catch blocks updated (bot only flagged one, the other is
  identical).
---
 .../Services/AIChatService.cs                    |  7 ++++++-
 .../Controllers/ChatController.cs                | 16 ++++++++++++++--
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
index a4515831..fe1bcb4d 100644
--- a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
+++ b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs
@@ -648,7 +648,12 @@ private static bool IsContextLengthError(ClientResultException ex)
                 error.TryGetProperty("code", out var code))
                 return code.GetString();
         }
-        catch { /* best-effort — don't let error parsing crash the error handler */ }
+        catch (Exception)
+        {
+            // Best-effort extraction inside an error handler — catch all to guarantee we never
+            // throw from error-parsing logic. The Azure SDK response internals are outside our
+            // control and the exception surface can change across SDK versions.
+        }
         return null;
     }
 }
diff --git a/EssentialCSharp.Web/Controllers/ChatController.cs b/EssentialCSharp.Web/Controllers/ChatController.cs
index 2b835897..0fcd8577 100644
--- a/EssentialCSharp.Web/Controllers/ChatController.cs
+++ b/EssentialCSharp.Web/Controllers/ChatController.cs
@@ -151,7 +151,13 @@ public async Task StreamMessage([FromBody] ChatMessageRequest request, Cancellat
                 await Response.WriteAsync("data: {\"type\":\"error\",\"message\":\"This conversation has grown too long. Please start a new one.\",\"errorCode\":\"context_limit_exceeded\"}\n\n", CancellationToken.None);
                 await Response.Body.FlushAsync(CancellationToken.None);
             }
-            catch { /* client already disconnected */ }
+            catch (Exception)
+            {
+                // Best-effort write to an already-streaming response. Kestrel can throw
+                // IOException (connection reset), OperationCanceledException, or
+                // ObjectDisposedException on abrupt client disconnect — swallow all to
+                // avoid masking the original exception.
+            }
         }
         catch (Exception ex) when (!Response.HasStarted)
         {
@@ -168,7 +174,13 @@ public async Task StreamMessage([FromBody] ChatMessageRequest request, Cancellat
                 await Response.WriteAsync("data: {\"type\":\"error\",\"message\":\"Stream interrupted\"}\n\n", CancellationToken.None);
                 await Response.Body.FlushAsync(CancellationToken.None);
             }
-            catch { /* client already disconnected */ }
+            catch (Exception)
+            {
+                // Best-effort write to an already-streaming response. Kestrel can throw
+                // IOException (connection reset), OperationCanceledException, or
+                // ObjectDisposedException on abrupt client disconnect — swallow all to
+                // avoid masking the original exception.
+            }
         }
     }