From 332d38f1f8553cbd1b35425595970880cda4d90f Mon Sep 17 00:00:00 2001 From: Benjamin Michaelis Date: Fri, 8 May 2026 21:51:36 -0700 Subject: [PATCH 1/4] Fix AI chat Responses API tool-chaining and context limit handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix streaming tool continuations: track currentLegResponseId from StreamingResponseCreatedUpdate and clone options with that ID before calling ExecuteFunctionCallAsync, so the continuation correctly references the response that produced the function call (not the outer conversation root). - Fix non-streaming loop token waste: after each tool-call response, advance PreviousResponseId to the new responseId and reset responseItems to [] so subsequent iterations only send new tool outputs — not the growing full history. This prevents quadratic token re-processing. - Remove redundant functionCallItem from continuation inputs: when using previous_response_id chaining, the function call is already stored server-side in the referenced response; only the FunctionCallOutputResponseItem is new content. - Add ConversationContextLimitExceededException domain exception and IsContextLengthError helper to detect context window overflow from ClientResultException (HTTP 400 with context_length_exceeded text). - Handle context limit in ChatController: both /api/chat/message and /api/chat/stream return 400 with errorCode 'context_limit_exceeded'. - Add CloneOptionsWithPreviousResponseId helper to avoid mutating shared options across concurrent streaming legs. - Add ParseToolArguments helper to deduplicate JSON argument parsing in both streaming and non-streaming paths. - Fix OPENAI001 pragma: extend the disable region to cover the full loop body including the ClientResult declaration. --- .../Services/AIChatService.cs | 163 +++++++++++------- ...nversationContextLimitExceededException.cs | 30 ++++ .../Controllers/ChatController.cs | 47 +++-- 3 files changed, 165 insertions(+), 75 deletions(-) create mode 100644 EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs diff --git a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs index 5655781c..273b4ded 100644 --- a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs +++ b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs @@ -4,6 +4,7 @@ using ModelContextProtocol.Client; using ModelContextProtocol.Protocol; using OpenAI.Responses; +using System.ClientModel; using System.Collections.Frozen; namespace EssentialCSharp.Chat.Common.Services; @@ -183,6 +184,10 @@ private static string SanitizeForXmlContext(string? input) => string? endUserId = null, [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default) { + // Track this leg's response ID so tool-call continuations chain from it, + // ensuring the model's context includes the user's message + reasoning. + string? currentLegResponseId = null; + await foreach (var update in streamingUpdates.WithCancellation(cancellationToken)) { #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. @@ -190,7 +195,8 @@ private static string SanitizeForXmlContext(string? input) => { // Emit the response ID early so the controller can record ownership // before the stream completes — handles client disconnects mid-stream. - yield return (string.Empty, responseId: created.Response.Id); + currentLegResponseId = created.Response.Id; + yield return (string.Empty, responseId: currentLegResponseId); } else if (update is StreamingResponseOutputItemDoneUpdate itemDone) { @@ -200,10 +206,13 @@ private static string SanitizeForXmlContext(string? input) => if (toolCallDepth >= 10) throw new InvalidOperationException("Maximum tool call depth exceeded."); - // Execute the function call and stream its response. - // Each nested ProcessStreamingUpdatesAsync emits its own StreamingResponseCreatedUpdate - // (which already yields its responseId early), so no extra tracking is needed here. - await foreach (var functionResult in ExecuteFunctionCallAsync(functionCallItem, responseOptions, mcpClient, toolCallDepth + 1, endUserId, cancellationToken)) + // Chain the continuation from THIS leg's response ID so the model sees: + // context(outer) → userMessage + AI reasoning + funcCall (stored in currentLegResponseId) + // → [toolOutput only] → next response + // Using the outer previousResponseId here would omit the user's message from context. + var continuationOptions = CloneOptionsWithPreviousResponseId(responseOptions, currentLegResponseId); + + await foreach (var functionResult in ExecuteFunctionCallAsync(functionCallItem, continuationOptions, mcpClient, toolCallDepth + 1, endUserId, cancellationToken)) { yield return functionResult; } @@ -239,12 +248,11 @@ private static string SanitizeForXmlContext(string? input) => if (!IsMcpToolAllowed(functionCallItem.FunctionName)) { LogMcpToolCallRejected(_Logger, functionCallItem.FunctionName, endUserId); - // Feed a benign error back to the model so it can recover gracefully, - // mirroring what GetChatCompletionCore does on the non-streaming path. + // Feed a benign error back to the model so it can recover gracefully. + // The functionCallItem is in the stored response at PreviousResponseId — only send the output. #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. var errorItems = new List { - functionCallItem, new FunctionCallOutputResponseItem( functionCallItem.CallId, $"Tool '{functionCallItem.FunctionName}' is not available.") @@ -262,32 +270,7 @@ private static string SanitizeForXmlContext(string? input) => LogMcpToolCallInvokedStream(_Logger, functionCallItem.FunctionName, toolCallDepth, endUserId); // A dictionary of arguments to pass to the tool. Each key represents a parameter name, and its associated value represents the argument value. - Dictionary arguments = []; - // example JsonResponse: - // "{\"question\":\"Azure OpenAI Responses API (Preview)\"}" - var jsonResponse = functionCallItem.FunctionArguments.ToString(); - var jsonArguments = System.Text.Json.JsonSerializer.Deserialize>(jsonResponse) ?? new Dictionary(); - - // Convert JsonElement values to their actual types - foreach (var kvp in jsonArguments) - { - if (kvp.Value is System.Text.Json.JsonElement jsonElement) - { - arguments[kvp.Key] = jsonElement.ValueKind switch - { - System.Text.Json.JsonValueKind.String => jsonElement.GetString(), - System.Text.Json.JsonValueKind.Number => jsonElement.GetDecimal(), - System.Text.Json.JsonValueKind.True => true, - System.Text.Json.JsonValueKind.False => false, - System.Text.Json.JsonValueKind.Null => null, - _ => jsonElement.ToString() - }; - } - else - { - arguments[kvp.Key] = kvp.Value; - } - } + var arguments = ParseToolArguments(functionCallItem.FunctionArguments); // Execute the function call using the MCP client var toolResult = await mcpClient.CallToolAsync( @@ -295,12 +278,11 @@ private static string SanitizeForXmlContext(string? input) => arguments: arguments, cancellationToken: cancellationToken); - // Create input items with both the function call and the result - // This matches the Python pattern: append both tool_call and result + // The functionCallItem is already stored in the server's context (referenced by + // responseOptions.PreviousResponseId). Only the tool output is new content. #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. var inputItems = new List { - functionCallItem, // The original function call new FunctionCallOutputResponseItem(functionCallItem.CallId, McpToolResultFormatter.GetModelInput(toolResult)) }; #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. @@ -412,15 +394,22 @@ private async Task CreateResponseOptionsAsync( // Create the response using the Responses API #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. List responseItems = [ResponseItem.CreateUserMessageItem(prompt)]; -#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. const int MaxToolCallIterations = 10; for (int iteration = 0; iteration < MaxToolCallIterations; iteration++) { - var response = await _ResponseClient.CreateResponseAsync( - responseItems, - options: responseOptions, - cancellationToken: cancellationToken); + ClientResult response; + try + { + response = await _ResponseClient.CreateResponseAsync( + responseItems, + options: responseOptions, + cancellationToken: cancellationToken); + } + catch (ClientResultException ex) when (IsContextLengthError(ex)) + { + throw new ConversationContextLimitExceededException(responseOptions.PreviousResponseId, ex); + } string responseId = response.Value.Id; @@ -429,6 +418,12 @@ private async Task CreateResponseOptionsAsync( if (functionCalls.Count > 0 && mcpClient != null) { + // Advance the chain: the server now has everything up to responseId stored + // (user message + all prior function calls/results + this response's funcCalls). + // The next request only needs to supply the tool outputs — not the growing history. + responseOptions.PreviousResponseId = responseId; + responseItems = []; + foreach (var functionCallItem in functionCalls) { // Defense-in-depth: validate tool name against static allowlist before executing. @@ -436,8 +431,7 @@ private async Task CreateResponseOptionsAsync( if (!IsMcpToolAllowed(functionCallItem.FunctionName)) { LogMcpToolCallRejected(_Logger, functionCallItem.FunctionName, endUserId); - // Return a benign error to the model so it can respond gracefully - responseItems.Add(functionCallItem); + // The functionCallItem is in the stored response; send only the error output. responseItems.Add(new FunctionCallOutputResponseItem( functionCallItem.CallId, $"Tool '{functionCallItem.FunctionName}' is not available.")); @@ -445,31 +439,15 @@ private async Task CreateResponseOptionsAsync( } LogMcpToolCallInvoked(_Logger, functionCallItem.FunctionName, iteration, endUserId); - var jsonResponse = functionCallItem.FunctionArguments.ToString(); - var jsonArguments = System.Text.Json.JsonSerializer.Deserialize>(jsonResponse) ?? new Dictionary(); - - Dictionary arguments = []; - foreach (var kvp in jsonArguments) - { - arguments[kvp.Key] = kvp.Value is System.Text.Json.JsonElement jsonElement - ? jsonElement.ValueKind switch - { - System.Text.Json.JsonValueKind.String => jsonElement.GetString(), - System.Text.Json.JsonValueKind.Number => jsonElement.GetDecimal(), - System.Text.Json.JsonValueKind.True => true, - System.Text.Json.JsonValueKind.False => false, - System.Text.Json.JsonValueKind.Null => null, - _ => (object?)jsonElement.ToString() - } - : kvp.Value; - } + var arguments = ParseToolArguments(functionCallItem.FunctionArguments); var toolResult = await mcpClient.CallToolAsync( functionCallItem.FunctionName, arguments: arguments, cancellationToken: cancellationToken); - responseItems.Add(functionCallItem); + // The functionCallItem is stored server-side in the response at PreviousResponseId. + // Only the tool output is new content that needs to be included. responseItems.Add(new FunctionCallOutputResponseItem( functionCallItem.CallId, McpToolResultFormatter.GetModelInput(toolResult))); @@ -501,6 +479,56 @@ private bool IsMcpToolAllowed(string toolName) return _AllowedMcpTools.Contains(toolName); } + /// + /// Returns a shallow clone of with + /// replaced. + /// Used when chaining tool-call continuations from a specific response leg. + /// +#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + private static ResponseCreationOptions CloneOptionsWithPreviousResponseId( + ResponseCreationOptions source, + string? previousResponseId) + { + var clone = new ResponseCreationOptions + { + Instructions = source.Instructions, + PreviousResponseId = previousResponseId, + ReasoningOptions = source.ReasoningOptions, + }; + foreach (var tool in source.Tools) + clone.Tools.Add(tool); + return clone; + } +#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + + /// + /// Parses function call arguments from a JSON payload into a + /// strongly-typed dictionary, converting values + /// to their native CLR equivalents. + /// + private static Dictionary ParseToolArguments(BinaryData functionArguments) + { + var jsonArguments = System.Text.Json.JsonSerializer.Deserialize>( + functionArguments.ToString()) ?? []; + + var arguments = new Dictionary(jsonArguments.Count); + foreach (var kvp in jsonArguments) + { + arguments[kvp.Key] = kvp.Value is System.Text.Json.JsonElement jsonElement + ? jsonElement.ValueKind switch + { + System.Text.Json.JsonValueKind.String => jsonElement.GetString(), + System.Text.Json.JsonValueKind.Number => jsonElement.GetDecimal(), + System.Text.Json.JsonValueKind.True => true, + System.Text.Json.JsonValueKind.False => false, + System.Text.Json.JsonValueKind.Null => null, + _ => (object?)jsonElement.ToString() + } + : kvp.Value; + } + return arguments; + } + [LoggerMessage(Level = LogLevel.Information, Message = "AI tool call invoked: tool={ToolName} iteration={Iteration} user={EndUserId}")] private static partial void LogMcpToolCallInvoked(ILogger logger, string toolName, int iteration, string? endUserId); @@ -515,4 +543,13 @@ private bool IsMcpToolAllowed(string toolName) [LoggerMessage(Level = LogLevel.Information, Message = "AI contextual search performed for prompt enrichment")] private static partial void LogContextualSearchPerformed(ILogger logger); + + /// + /// Returns true when the API error indicates the conversation context window was exceeded. + /// + private static bool IsContextLengthError(ClientResultException ex) => + ex.Status == 400 && + (ex.Message.Contains("context_length_exceeded", StringComparison.OrdinalIgnoreCase) || + ex.Message.Contains("maximum context length", StringComparison.OrdinalIgnoreCase) || + ex.Message.Contains("reduce the length of the messages", StringComparison.OrdinalIgnoreCase)); } diff --git a/EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs b/EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs new file mode 100644 index 00000000..adc802bd --- /dev/null +++ b/EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs @@ -0,0 +1,30 @@ +namespace EssentialCSharp.Chat.Common.Services; + +/// +/// Thrown when a conversation's accumulated context exceeds the model's context window limit. +/// +/// +/// This occurs when using previous_response_id chaining over many turns — the server +/// reconstructs the full history on each request, and that history eventually exceeds the model's +/// maximum input tokens. Callers should prompt the user to start a new conversation rather than +/// retrying with the same previousResponseId. +/// +public sealed class ConversationContextLimitExceededException : Exception +{ + /// + /// The previous_response_id that caused the overflow, if known. + /// + public string? PreviousResponseId { get; } + + public ConversationContextLimitExceededException(string? previousResponseId) + : base("This conversation has exceeded the model's context window limit.") + { + PreviousResponseId = previousResponseId; + } + + public ConversationContextLimitExceededException(string? previousResponseId, Exception innerException) + : base("This conversation has exceeded the model's context window limit.", innerException) + { + PreviousResponseId = previousResponseId; + } +} diff --git a/EssentialCSharp.Web/Controllers/ChatController.cs b/EssentialCSharp.Web/Controllers/ChatController.cs index f38138e6..24d79a14 100644 --- a/EssentialCSharp.Web/Controllers/ChatController.cs +++ b/EssentialCSharp.Web/Controllers/ChatController.cs @@ -44,21 +44,28 @@ public async Task SendMessage([FromBody] ChatMessageRequest reque if (!_ResponseIdValidationService.ValidateResponseId(userId, previousResponseId)) return BadRequest(new { error = "Invalid conversation context." }); - var (response, responseId) = await _AiChatService.GetChatCompletion( - prompt: request.Message, - previousResponseId: previousResponseId, - enableContextualSearch: request.EnableContextualSearch, - endUserId: userId, - cancellationToken: cancellationToken); + try + { + var (response, responseId) = await _AiChatService.GetChatCompletion( + prompt: request.Message, + previousResponseId: previousResponseId, + enableContextualSearch: request.EnableContextualSearch, + endUserId: userId, + cancellationToken: cancellationToken); - _ResponseIdValidationService.RecordResponseId(userId, responseId); + _ResponseIdValidationService.RecordResponseId(userId, responseId); - return Ok(new ChatMessageResponse + return Ok(new ChatMessageResponse + { + Response = response, + ResponseId = responseId, + Timestamp = DateTime.UtcNow + }); + } + catch (ConversationContextLimitExceededException) { - Response = response, - ResponseId = responseId, - Timestamp = DateTime.UtcNow - }); + return BadRequest(new { error = "This conversation has grown too long. Please start a new one.", errorCode = "context_limit_exceeded" }); + } } [HttpPost("stream")] @@ -130,6 +137,22 @@ public async Task StreamMessage([FromBody] ChatMessageRequest request, Cancellat { LogChatStreamCancelled(_Logger, User.Identity?.Name); } + catch (ConversationContextLimitExceededException) when (!Response.HasStarted) + { + Response.StatusCode = 400; + Response.ContentType = "application/json"; + await Response.WriteAsJsonAsync(new { error = "This conversation has grown too long. Please start a new one.", errorCode = "context_limit_exceeded" }, CancellationToken.None); + } + catch (ConversationContextLimitExceededException) + { + LogChatStreamErrorMidStream(_Logger, new InvalidOperationException("Context limit exceeded mid-stream"), User.Identity?.Name); + try + { + await Response.WriteAsync("data: {\"type\":\"error\",\"message\":\"This conversation has grown too long. Please start a new one.\",\"errorCode\":\"context_limit_exceeded\"}\n\n", CancellationToken.None); + await Response.Body.FlushAsync(CancellationToken.None); + } + catch { /* client already disconnected */ } + } catch (Exception ex) when (!Response.HasStarted) { LogChatStreamErrorBeforeResponseStarted(_Logger, ex, User.Identity?.Name); From 97db852fff11342c1c696992852667b88edcc8d3 Mon Sep 17 00:00:00 2001 From: Benjamin Michaelis Date: Fri, 8 May 2026 22:23:19 -0700 Subject: [PATCH 2/4] Fix Critical and High AI chat review findings C1 - Two-phase streaming tool call pattern: buffer all function calls during the stream, execute sequentially after stream completes, then make ONE continuation with all outputs. Previously, N parallel tool calls created N forked conversation branches. C2 - Context length errors in streaming path: added RethrowContextLengthErrors helper wrapper that puts try/catch only around MoveNextAsync, keeping yield return outside. C# CS1626 prohibits yield inside try/catch, so the wrapper is the only valid pattern for exception remapping in async iterators. H1 - Restore stream-write body in second catch in StreamMessage: the SSE error event was lost in a prior edit. H2 - Clone all relevant ResponseCreationOptions fields in CloneOptionsWithPreviousResponseId: EndUserId, MaxOutputTokenCount, TextOptions, TruncationMode, ParallelToolCallsEnabled, StoredOutputEnabled, ToolChoice, Temperature, TopP, ServiceTier, Metadata. H3 - IsContextLengthError now parses structured JSON error code via TryExtractErrorCode: handles status 413, code token_limit_exceeded, and context_length_exceeded. Falls back to message substring match. Bonus: wire EndUserId into CreateResponseOptionsAsync (SDK supports it). Add LogMcpToolArgumentParseError logger message. --- .../Services/AIChatService.cs | 227 ++++++++++++------ 1 file changed, 151 insertions(+), 76 deletions(-) diff --git a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs index 273b4ded..0da38d4d 100644 --- a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs +++ b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs @@ -172,7 +172,11 @@ private static string SanitizeForXmlContext(string? input) => input?.Replace("<", "\u2039").Replace(">", "\u203A") ?? string.Empty; /// - /// Processes streaming updates from the OpenAI Responses API, handling both regular responses and function calls + /// Processes streaming updates from the OpenAI Responses API. + /// Buffers all function call items before executing them — this is critical for correctness: + /// if the model emits multiple parallel tool calls, firing a separate continuation per call + /// creates forked conversation branches. Collecting all calls and submitting all outputs + /// in a single continuation matches the non-streaming behavior. /// private async IAsyncEnumerable<(string text, string? responseId)> ProcessStreamingUpdatesAsync( #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. @@ -187,10 +191,14 @@ private static string SanitizeForXmlContext(string? input) => // Track this leg's response ID so tool-call continuations chain from it, // ensuring the model's context includes the user's message + reasoning. string? currentLegResponseId = null; +#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + List? pendingFunctionCalls = null; - await foreach (var update in streamingUpdates.WithCancellation(cancellationToken)) + // Wrap the raw stream to convert context-length API errors to our domain exception. + // C# does not allow yield inside a try/catch, so error remapping is done in a + // separate helper that puts try/catch only around MoveNextAsync. + await foreach (var update in RethrowContextLengthErrors(streamingUpdates, responseOptions.PreviousResponseId, cancellationToken)) { -#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. if (update is StreamingResponseCreatedUpdate created) { // Emit the response ID early so the controller can record ownership @@ -200,104 +208,124 @@ private static string SanitizeForXmlContext(string? input) => } else if (update is StreamingResponseOutputItemDoneUpdate itemDone) { - // Check if this is a function call that needs to be executed if (itemDone.Item is FunctionCallResponseItem functionCallItem && mcpClient != null) { if (toolCallDepth >= 10) throw new InvalidOperationException("Maximum tool call depth exceeded."); - // Chain the continuation from THIS leg's response ID so the model sees: - // context(outer) → userMessage + AI reasoning + funcCall (stored in currentLegResponseId) - // → [toolOutput only] → next response - // Using the outer previousResponseId here would omit the user's message from context. - var continuationOptions = CloneOptionsWithPreviousResponseId(responseOptions, currentLegResponseId); - - await foreach (var functionResult in ExecuteFunctionCallAsync(functionCallItem, continuationOptions, mcpClient, toolCallDepth + 1, endUserId, cancellationToken)) - { - yield return functionResult; - } + // Buffer all function calls — do NOT fire continuations inline. + // Sending one continuation per call would create N forked conversation + // branches, each missing the other N-1 tool results. + pendingFunctionCalls ??= []; + pendingFunctionCalls.Add(functionCallItem); } } else if (update is StreamingResponseOutputTextDeltaUpdate deltaUpdate) { yield return (deltaUpdate.Delta.ToString(), null); } - else if (update is StreamingResponseCompletedUpdate) + // StreamingResponseCompletedUpdate: ResponseId already emitted above — no-op. + } + + // After the stream completes, execute all buffered tool calls and send ALL outputs + // in a single continuation request. This mirrors the non-streaming loop in + // GetChatCompletionCore and avoids conversation branching. + if (pendingFunctionCalls is { Count: > 0 } && mcpClient != null) + { + var continuationOptions = CloneOptionsWithPreviousResponseId(responseOptions, currentLegResponseId); + var outputItems = new List(pendingFunctionCalls.Count); + + foreach (var functionCallItem in pendingFunctionCalls) { - // ResponseId was already emitted from StreamingResponseCreatedUpdate at the start - // of this response leg — no need to emit again from the completion event. + outputItems.Add(await ExecuteSingleToolCallAsync(functionCallItem, toolCallDepth, endUserId, mcpClient, cancellationToken)); } + + var continuationStream = _ResponseClient.CreateResponseStreamingAsync(outputItems, continuationOptions, cancellationToken); #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + + await foreach (var result in ProcessStreamingUpdatesAsync(continuationStream, continuationOptions, mcpClient, toolCallDepth + 1, endUserId, cancellationToken)) + { + yield return result; + } } } /// - /// Executes a function call and streams the response + /// Wraps a streaming response enumerable to remap + /// context-length errors to . + /// + /// C# prohibits yield return inside a try block with a catch clause + /// (CS1626). By putting the try/catch only around MoveNextAsync and the + /// yield return outside, we satisfy the compiler while still remapping exceptions. + /// /// - private async IAsyncEnumerable<(string text, string? responseId)> ExecuteFunctionCallAsync( #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - FunctionCallResponseItem functionCallItem, - ResponseCreationOptions responseOptions, + private static async IAsyncEnumerable RethrowContextLengthErrors( + IAsyncEnumerable source, + string? previousResponseId, + [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default) + { + await using var enumerator = source.GetAsyncEnumerator(cancellationToken); + while (true) + { + bool hasNext; + try { hasNext = await enumerator.MoveNextAsync(); } + catch (ClientResultException ex) when (IsContextLengthError(ex)) + { throw new ConversationContextLimitExceededException(previousResponseId, ex); } + + if (!hasNext) break; + yield return enumerator.Current; // yield return is outside try/catch — valid + } + } #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - McpClient mcpClient, + + /// + /// Executes a single MCP tool call and returns the output item to include in the + /// continuation request. Handles allowlist validation and argument-parsing errors + /// so a single bad tool never aborts the entire continuation. + /// +#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + private async Task ExecuteSingleToolCallAsync( + FunctionCallResponseItem functionCallItem, int toolCallDepth, - string? endUserId = null, - [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default) + string? endUserId, + McpClient mcpClient, + CancellationToken cancellationToken) { // Defense-in-depth: validate tool name against static allowlist before executing. if (!IsMcpToolAllowed(functionCallItem.FunctionName)) { LogMcpToolCallRejected(_Logger, functionCallItem.FunctionName, endUserId); - // Feed a benign error back to the model so it can recover gracefully. - // The functionCallItem is in the stored response at PreviousResponseId — only send the output. -#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - var errorItems = new List - { - new FunctionCallOutputResponseItem( - functionCallItem.CallId, - $"Tool '{functionCallItem.FunctionName}' is not available.") - }; -#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - var recoveryStream = _ResponseClient.CreateResponseStreamingAsync( - errorItems, responseOptions, cancellationToken); - await foreach (var result in ProcessStreamingUpdatesAsync( - recoveryStream, responseOptions, mcpClient, toolCallDepth, endUserId, cancellationToken)) - { - yield return result; - } - yield break; + return new FunctionCallOutputResponseItem( + functionCallItem.CallId, + $"Tool '{functionCallItem.FunctionName}' is not available."); } LogMcpToolCallInvokedStream(_Logger, functionCallItem.FunctionName, toolCallDepth, endUserId); - // A dictionary of arguments to pass to the tool. Each key represents a parameter name, and its associated value represents the argument value. - var arguments = ParseToolArguments(functionCallItem.FunctionArguments); - // Execute the function call using the MCP client + Dictionary arguments; + try + { + arguments = ParseToolArguments(functionCallItem.FunctionArguments); + } + catch (Exception ex) + { + LogMcpToolArgumentParseError(_Logger, functionCallItem.FunctionName, ex, endUserId); + return new FunctionCallOutputResponseItem( + functionCallItem.CallId, + $"Error parsing arguments for '{functionCallItem.FunctionName}': invalid JSON."); + } + var toolResult = await mcpClient.CallToolAsync( functionCallItem.FunctionName, arguments: arguments, cancellationToken: cancellationToken); - // The functionCallItem is already stored in the server's context (referenced by - // responseOptions.PreviousResponseId). Only the tool output is new content. -#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - var inputItems = new List - { - new FunctionCallOutputResponseItem(functionCallItem.CallId, McpToolResultFormatter.GetModelInput(toolResult)) - }; -#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - - // Stream the function call response using the same processing logic - var functionResponseStream = _ResponseClient.CreateResponseStreamingAsync( - inputItems, - responseOptions, - cancellationToken); - - await foreach (var result in ProcessStreamingUpdatesAsync(functionResponseStream, responseOptions, mcpClient, toolCallDepth, endUserId, cancellationToken)) - { - yield return result; - } + return new FunctionCallOutputResponseItem( + functionCallItem.CallId, + McpToolResultFormatter.GetModelInput(toolResult)); } +#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. /// /// Creates response options with optional features @@ -330,12 +358,11 @@ private async Task CreateResponseOptionsAsync( options.PreviousResponseId = previousResponseId; } - // endUserId is reserved for forwarding to Azure OpenAI for end-user attribution - // (Microsoft Defender prompt-shield correlation). OpenAI .NET SDK v2.7.0 does not - // expose ResponseCreationOptions.User; this parameter is intentionally discarded - // until SDK support is available. + // Wire up end-user ID for Azure OpenAI abuse monitoring and Microsoft Defender + // prompt-shield correlation. The SDK now exposes EndUserId directly. // See: https://learn.microsoft.com/en-us/azure/defender-for-cloud/gain-end-user-context-ai - _ = endUserId; + if (!string.IsNullOrEmpty(endUserId)) + options.EndUserId = endUserId; // Add tools if provided if (tools != null) @@ -480,9 +507,10 @@ private bool IsMcpToolAllowed(string toolName) } /// - /// Returns a shallow clone of with + /// Returns a clone of with /// replaced. - /// Used when chaining tool-call continuations from a specific response leg. + /// All behavior-affecting properties are copied so that tool-call continuation legs + /// produce identical generation behavior to the initial leg. /// #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. private static ResponseCreationOptions CloneOptionsWithPreviousResponseId( @@ -493,10 +521,23 @@ private static ResponseCreationOptions CloneOptionsWithPreviousResponseId( { Instructions = source.Instructions, PreviousResponseId = previousResponseId, + EndUserId = source.EndUserId, ReasoningOptions = source.ReasoningOptions, + MaxOutputTokenCount = source.MaxOutputTokenCount, + TextOptions = source.TextOptions, + TruncationMode = source.TruncationMode, + ParallelToolCallsEnabled = source.ParallelToolCallsEnabled, + StoredOutputEnabled = source.StoredOutputEnabled, + ToolChoice = source.ToolChoice, + Temperature = source.Temperature, + TopP = source.TopP, + ServiceTier = source.ServiceTier, }; foreach (var tool in source.Tools) clone.Tools.Add(tool); + if (source.Metadata is { Count: > 0 }) + foreach (var kvp in source.Metadata) + clone.Metadata[kvp.Key] = kvp.Value; return clone; } #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. @@ -538,6 +579,9 @@ private static ResponseCreationOptions CloneOptionsWithPreviousResponseId( [LoggerMessage(Level = LogLevel.Warning, Message = "AI tool call rejected — not on allowlist: tool={ToolName} user={EndUserId}")] private static partial void LogMcpToolCallRejected(ILogger logger, string toolName, string? endUserId); + [LoggerMessage(Level = LogLevel.Warning, Message = "Failed to parse tool arguments for '{ToolName}': user={EndUserId}")] + private static partial void LogMcpToolArgumentParseError(ILogger logger, string toolName, Exception exception, string? endUserId); + [LoggerMessage(Level = LogLevel.Warning, Message = "MCP tool skipped during option setup — not on allowlist: tool={ToolName}")] private static partial void LogMcpToolSkippedNotAllowed(ILogger logger, string toolName); @@ -546,10 +590,41 @@ private static ResponseCreationOptions CloneOptionsWithPreviousResponseId( /// /// Returns true when the API error indicates the conversation context window was exceeded. + /// Prefers structured JSON error code from the response body; falls back to message text matching. + /// Also handles HTTP 413 (payload too large via API gateway) and token_limit_exceeded. + /// + private static bool IsContextLengthError(ClientResultException ex) + { + if (ex.Status is not (400 or 413)) return false; + + // Prefer structured error code from the response body + var errorCode = TryExtractErrorCode(ex); + if (errorCode is not null) + return errorCode is "context_length_exceeded" or "token_limit_exceeded"; + + // Fallback: substring match on exception message (Azure OpenAI format may vary) + return ex.Message.Contains("context_length_exceeded", StringComparison.OrdinalIgnoreCase) || + ex.Message.Contains("maximum context length", StringComparison.OrdinalIgnoreCase) || + ex.Message.Contains("reduce the length of the messages", StringComparison.OrdinalIgnoreCase) || + ex.Message.Contains("token_limit_exceeded", StringComparison.OrdinalIgnoreCase); + } + + /// + /// Attempts to extract the error.code field from the raw JSON response body. + /// Returns null on any parse failure — this is best-effort. /// - private static bool IsContextLengthError(ClientResultException ex) => - ex.Status == 400 && - (ex.Message.Contains("context_length_exceeded", StringComparison.OrdinalIgnoreCase) || - ex.Message.Contains("maximum context length", StringComparison.OrdinalIgnoreCase) || - ex.Message.Contains("reduce the length of the messages", StringComparison.OrdinalIgnoreCase)); + private static string? TryExtractErrorCode(ClientResultException ex) + { + try + { + var content = ex.GetRawResponse()?.Content; + if (content is null) return null; + using var doc = System.Text.Json.JsonDocument.Parse(content.ToMemory()); + if (doc.RootElement.TryGetProperty("error", out var error) && + error.TryGetProperty("code", out var code)) + return code.GetString(); + } + catch { /* best-effort — don't let error parsing crash the error handler */ } + return null; + } } From c69a2c3e72f0b6caaceb0da8027d1228ddac494f Mon Sep 17 00:00:00 2001 From: Benjamin Michaelis Date: Fri, 8 May 2026 23:15:27 -0700 Subject: [PATCH 3/4] Fix remaining Medium issues from second subagent review pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - strictModeEnabled: false for MCP tool registration — external MCP schemas are not guaranteed to satisfy OpenAI strict-mode constraints (all properties required, additionalProperties: false everywhere). A single non-conforming schema with strictModeEnabled: true would cause a 400 at registration time for ALL tools in the request. - Guard ParseToolArguments in non-streaming path — JsonException on malformed model-generated arguments would previously bubble up as a 500. Now caught identically to the streaming path: logs warning and sends an error FunctionCallOutputResponseItem so the model can recover without aborting the loop. - Guard currentLegResponseId null before tool-call continuation — if the API never emits StreamingResponseCreatedUpdate, proceeding with PreviousResponseId = null causes a confusing 400 from the API. Now throws InvalidOperationException with a diagnostic message. - Log original ConversationContextLimitExceededException in mid-stream handler — previously wrapped in a new InvalidOperationException, losing the stack trace, PreviousResponseId, and inner exception. --- .../Services/AIChatService.cs | 28 +++++++++++++++++-- .../Controllers/ChatController.cs | 4 +-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs index 0da38d4d..a4515831 100644 --- a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs +++ b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs @@ -232,6 +232,13 @@ private static string SanitizeForXmlContext(string? input) => // GetChatCompletionCore and avoids conversation branching. if (pendingFunctionCalls is { Count: > 0 } && mcpClient != null) { + // Guard: if the API never sent StreamingResponseCreatedUpdate, currentLegResponseId + // is null. Continuing would send FunctionCallOutputResponseItems referencing CallIds + // the server has no record of (PreviousResponseId = null), causing a 400. + if (currentLegResponseId is null) + throw new InvalidOperationException( + "Cannot continue tool-call chain: the streaming leg completed with tool calls but emitted no response ID."); + var continuationOptions = CloneOptionsWithPreviousResponseId(responseOptions, currentLegResponseId); var outputItems = new List(pendingFunctionCalls.Count); @@ -387,7 +394,11 @@ private async Task CreateResponseOptionsAsync( } #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - options.Tools.Add(ResponseTool.CreateFunctionTool(tool.Name, functionDescription: tool.Description, strictModeEnabled: true, functionParameters: BinaryData.FromString(tool.JsonSchema.GetRawText()))); + // strictModeEnabled: false — MCP tool schemas come from external servers and are not + // guaranteed to satisfy OpenAI strict-mode constraints (all properties required, + // additionalProperties: false everywhere). A single non-conforming schema with + // strict mode enabled would cause a 400 at registration time for ALL tools. + options.Tools.Add(ResponseTool.CreateFunctionTool(tool.Name, functionDescription: tool.Description, strictModeEnabled: false, functionParameters: BinaryData.FromString(tool.JsonSchema.GetRawText()))); #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. } } @@ -466,7 +477,20 @@ private async Task CreateResponseOptionsAsync( } LogMcpToolCallInvoked(_Logger, functionCallItem.FunctionName, iteration, endUserId); - var arguments = ParseToolArguments(functionCallItem.FunctionArguments); + + Dictionary arguments; + try + { + arguments = ParseToolArguments(functionCallItem.FunctionArguments); + } + catch (Exception ex) + { + LogMcpToolArgumentParseError(_Logger, functionCallItem.FunctionName, ex, endUserId); + responseItems.Add(new FunctionCallOutputResponseItem( + functionCallItem.CallId, + $"Error parsing arguments for '{functionCallItem.FunctionName}': invalid JSON.")); + continue; + } var toolResult = await mcpClient.CallToolAsync( functionCallItem.FunctionName, diff --git a/EssentialCSharp.Web/Controllers/ChatController.cs b/EssentialCSharp.Web/Controllers/ChatController.cs index 24d79a14..2b835897 100644 --- a/EssentialCSharp.Web/Controllers/ChatController.cs +++ b/EssentialCSharp.Web/Controllers/ChatController.cs @@ -143,9 +143,9 @@ public async Task StreamMessage([FromBody] ChatMessageRequest request, Cancellat Response.ContentType = "application/json"; await Response.WriteAsJsonAsync(new { error = "This conversation has grown too long. Please start a new one.", errorCode = "context_limit_exceeded" }, CancellationToken.None); } - catch (ConversationContextLimitExceededException) + catch (ConversationContextLimitExceededException ex) { - LogChatStreamErrorMidStream(_Logger, new InvalidOperationException("Context limit exceeded mid-stream"), User.Identity?.Name); + LogChatStreamErrorMidStream(_Logger, ex, User.Identity?.Name); try { await Response.WriteAsync("data: {\"type\":\"error\",\"message\":\"This conversation has grown too long. Please start a new one.\",\"errorCode\":\"context_limit_exceeded\"}\n\n", CancellationToken.None); From e501fbe966f544fcdf9e2b14babca1c687a5fe04 Mon Sep 17 00:00:00 2001 From: Benjamin Michaelis Date: Sat, 9 May 2026 00:15:53 -0700 Subject: [PATCH 4/4] Improve catch specificity in error-handler guards Replace bare catch with catch (Exception) in two best-effort error-handling sites, with clarifying comments explaining why a broad catch is correct. - TryExtractErrorCode: Azure SDK response internals are outside our control; a narrow typed-catch list risks a latent crash in production error handling. catch (Exception) is intentional. - StreamMessage SSE error-write guards (both): Kestrel throws IOException (ConnectionResetException), OperationCanceledException, or ObjectDisposedException on abrupt client disconnect. The bot's OperationCanceledException-only suggestion would miss IOException and cause an unhandled exception that masks the original error. Both catch blocks updated (bot only flagged one, the other is identical). --- .../Services/AIChatService.cs | 7 ++++++- .../Controllers/ChatController.cs | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs index a4515831..fe1bcb4d 100644 --- a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs +++ b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs @@ -648,7 +648,12 @@ private static bool IsContextLengthError(ClientResultException ex) error.TryGetProperty("code", out var code)) return code.GetString(); } - catch { /* best-effort — don't let error parsing crash the error handler */ } + catch (Exception) + { + // Best-effort extraction inside an error handler — catch all to guarantee we never + // throw from error-parsing logic. The Azure SDK response internals are outside our + // control and the exception surface can change across SDK versions. + } return null; } } diff --git a/EssentialCSharp.Web/Controllers/ChatController.cs b/EssentialCSharp.Web/Controllers/ChatController.cs index 2b835897..0fcd8577 100644 --- a/EssentialCSharp.Web/Controllers/ChatController.cs +++ b/EssentialCSharp.Web/Controllers/ChatController.cs @@ -151,7 +151,13 @@ public async Task StreamMessage([FromBody] ChatMessageRequest request, Cancellat await Response.WriteAsync("data: {\"type\":\"error\",\"message\":\"This conversation has grown too long. Please start a new one.\",\"errorCode\":\"context_limit_exceeded\"}\n\n", CancellationToken.None); await Response.Body.FlushAsync(CancellationToken.None); } - catch { /* client already disconnected */ } + catch (Exception) + { + // Best-effort write to an already-streaming response. Kestrel can throw + // IOException (connection reset), OperationCanceledException, or + // ObjectDisposedException on abrupt client disconnect — swallow all to + // avoid masking the original exception. + } } catch (Exception ex) when (!Response.HasStarted) { @@ -168,7 +174,13 @@ public async Task StreamMessage([FromBody] ChatMessageRequest request, Cancellat await Response.WriteAsync("data: {\"type\":\"error\",\"message\":\"Stream interrupted\"}\n\n", CancellationToken.None); await Response.Body.FlushAsync(CancellationToken.None); } - catch { /* client already disconnected */ } + catch (Exception) + { + // Best-effort write to an already-streaming response. Kestrel can throw + // IOException (connection reset), OperationCanceledException, or + // ObjectDisposedException on abrupt client disconnect — swallow all to + // avoid masking the original exception. + } } }