diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts index de20ea3a5..92c96fe5b 100644 --- a/apps/gateway/src/chat/chat.ts +++ b/apps/gateway/src/chat/chat.ts @@ -148,6 +148,7 @@ import { MAX_RETRIES, providerRetryKey, selectNextProvider, + shouldRetryAlternateKey, shouldRetryRequest, } from "./tools/retry-with-fallback.js"; import { @@ -4843,7 +4844,13 @@ chat.openapi(completions, async (c) => { let sameProviderRetryContext: Awaited< ReturnType > | null = null; - if (isRetryableErrorType(finishReason)) { + if ( + shouldRetryAlternateKey( + finishReason, + res.status, + errorResponseText, + ) + ) { rememberFailedKey(usedProvider, usedRegion, { envVarName, configIndex, @@ -5112,7 +5119,13 @@ chat.openapi(completions, async (c) => { let sameProviderRetryContext: Awaited< ReturnType > | null = null; - if (isRetryableErrorType(errorType)) { + if ( + shouldRetryAlternateKey( + errorType, + inferredStatusCode, + errorResponseText, + ) + ) { rememberFailedKey(usedProvider, usedRegion, { envVarName, configIndex, @@ -8201,7 +8214,9 @@ chat.openapi(completions, async (c) => { let sameProviderRetryContext: Awaited< ReturnType > | null = null; - if (isRetryableErrorType(finishReason)) { + if ( + shouldRetryAlternateKey(finishReason, res.status, errorResponseText) + ) { rememberFailedKey(usedProvider, usedRegion, { envVarName, configIndex, diff --git a/apps/gateway/src/chat/tools/get-finish-reason-from-error.spec.ts b/apps/gateway/src/chat/tools/get-finish-reason-from-error.spec.ts index 708d0810e..63c77ae81 100644 --- a/apps/gateway/src/chat/tools/get-finish-reason-from-error.spec.ts +++ b/apps/gateway/src/chat/tools/get-finish-reason-from-error.spec.ts @@ -112,6 +112,15 @@ describe("getFinishReasonFromError", () => { expect(getFinishReasonFromError(403)).toBe("gateway_error"); }); + it("returns gateway_error for 400 invalid API key payloads", () => { + expect( + getFinishReasonFromError( + 400, + '{"error":{"message":"API key not valid. Please pass a valid API key.","type":"authentication_error","code":"invalid_api_key"}}', + ), + ).toBe("gateway_error"); + }); + it("returns client_error when no error text provided for other 4xx", () => { expect(getFinishReasonFromError(400)).toBe("client_error"); expect(getFinishReasonFromError(422)).toBe("client_error"); diff --git a/apps/gateway/src/chat/tools/get-finish-reason-from-error.ts b/apps/gateway/src/chat/tools/get-finish-reason-from-error.ts index 8386b1461..ab06b798d 100644 --- a/apps/gateway/src/chat/tools/get-finish-reason-from-error.ts +++ b/apps/gateway/src/chat/tools/get-finish-reason-from-error.ts @@ -1,3 +1,5 @@ +import { hasInvalidProviderCredentialError } from "@/lib/provider-auth-errors.js"; + /** * Determines the appropriate finish reason based on HTTP status code and error message * 5xx status codes indicate upstream provider errors @@ -55,8 +57,12 @@ export function getFinishReasonFromError( return "content_filter"; } - // 401/403 usually indicate invalid or unauthorized provider credentials - if (statusCode === 401 || statusCode === 403) { + // 401/403 and known provider credential payloads indicate bad provider keys. + if ( + statusCode === 401 || + statusCode === 403 || + hasInvalidProviderCredentialError(errorText) + ) { return "gateway_error"; } diff --git a/apps/gateway/src/chat/tools/retry-with-fallback.spec.ts b/apps/gateway/src/chat/tools/retry-with-fallback.spec.ts index 7ac1a3ee9..35a190009 100644 --- a/apps/gateway/src/chat/tools/retry-with-fallback.spec.ts +++ b/apps/gateway/src/chat/tools/retry-with-fallback.spec.ts @@ -3,6 +3,7 @@ import { describe, it, expect } from "vitest"; import { isRetryableErrorType, shouldRetryRequest, + shouldRetryAlternateKey, selectNextProvider, getErrorType, MAX_RETRIES, @@ -116,6 +117,34 @@ describe("shouldRetryRequest", () => { }); }); +describe("shouldRetryAlternateKey", () => { + it("retries alternate keys for retryable upstream failures", () => { + expect(shouldRetryAlternateKey("upstream_error", 500)).toBe(true); + expect(shouldRetryAlternateKey("network_error", 0)).toBe(true); + }); + + it("retries alternate keys for auth failures on the current provider", () => { + expect(shouldRetryAlternateKey("gateway_error", 401)).toBe(true); + expect(shouldRetryAlternateKey("gateway_error", 403)).toBe(true); + }); + + it("retries alternate keys for invalid API key payloads without 401/403", () => { + expect( + shouldRetryAlternateKey( + "gateway_error", + 400, + "API key not valid. Please pass a valid API key.", + ), + ).toBe(true); + }); + + it("does not retry alternate keys for other non-retryable failures", () => { + expect(shouldRetryAlternateKey("gateway_error", 500)).toBe(false); + expect(shouldRetryAlternateKey("client_error", 400)).toBe(false); + expect(shouldRetryAlternateKey("content_filter", 403)).toBe(false); + }); +}); + describe("selectNextProvider", () => { const modelProviders = [ { providerId: "openai", modelName: "gpt-4o" }, diff --git a/apps/gateway/src/chat/tools/retry-with-fallback.ts b/apps/gateway/src/chat/tools/retry-with-fallback.ts index 58b5b6010..d4f100703 100644 --- a/apps/gateway/src/chat/tools/retry-with-fallback.ts +++ b/apps/gateway/src/chat/tools/retry-with-fallback.ts @@ -1,3 +1,5 @@ +import { hasInvalidProviderCredentialError } from "@/lib/provider-auth-errors.js"; + export const MAX_RETRIES = 2; export type RetryableErrorType = @@ -31,6 +33,28 @@ export function isRetryableErrorType(errorType: string): boolean { ); } +/** + * Determines whether a failed request should be retried against another key + * for the same provider. + * + * Auth failures (401/403) are not eligible for cross-provider fallback, but + * they should still rotate to another configured key for the current provider + * because the failure is often isolated to a single credential. + */ +export function shouldRetryAlternateKey( + errorType: string, + statusCode?: number, + errorText?: string, +): boolean { + return ( + isRetryableErrorType(errorType) || + (errorType === "gateway_error" && + ((statusCode !== undefined && + (statusCode === 401 || statusCode === 403)) || + hasInvalidProviderCredentialError(errorText))) + ); +} + /** * Determines whether a failed request should be retried with a different provider. * Only retries when no specific provider was requested, the error is retryable, diff --git a/apps/gateway/src/fallback.spec.ts b/apps/gateway/src/fallback.spec.ts index 60044fa3e..37e26b2f8 100644 --- a/apps/gateway/src/fallback.spec.ts +++ b/apps/gateway/src/fallback.spec.ts @@ -14,6 +14,7 @@ import { getProviderDefinition } from "@llmgateway/models"; import { app } from "./app.js"; import { getApiKeyFingerprint } from "./lib/api-key-fingerprint.js"; +import { isTrackedKeyHealthy, resetKeyHealth } from "./lib/api-key-health.js"; import { startMockServer, stopMockServer, @@ -88,6 +89,7 @@ describe("fallback and error status code handling", () => { } async function resetTestState() { + resetKeyHealth(); resetFailOnceCounter(); await clearCache(); await db.update(tables.modelProviderMapping).set({ @@ -2227,6 +2229,79 @@ describe("fallback and error status code handling", () => { }); }); + test("non-streaming: retries another key for auth failures on the same explicit provider", async () => { + await setupSingleProviderWithMultipleKeys("together.ai"); + + const res = await app.request("/v1/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer real-token", + }, + body: JSON.stringify({ + model: "together.ai/glm-4.7", + messages: [{ role: "user", content: "TRIGGER_STATUS_401" }], + }), + }); + + expect(res.status).toBe(500); + const json = await res.json(); + expect(json.error.type).toBe("gateway_error"); + + const logs = await waitForLogs(2); + const authLogs = logs.filter( + (log: Log) => log.errorDetails?.statusCode === 401, + ); + expect(authLogs).toHaveLength(2); + expect(authLogs.some((log: Log) => log.retried)).toBe(true); + expect(isTrackedKeyHealthy("together.ai-key-primary")).toBe(false); + expect(isTrackedKeyHealthy("together.ai-key-secondary")).toBe(false); + }); + + test("non-streaming: retries another key for invalid API key payloads", async () => { + await setupSingleProviderWithMultipleKeys("together.ai"); + + const res = await app.request("/v1/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer real-token", + }, + body: JSON.stringify({ + model: "together.ai/glm-4.7", + messages: [ + { role: "user", content: "TRIGGER_FAIL_ONCE_INVALID_KEY" }, + ], + }), + }); + + expect(res.status).toBe(200); + const json = await res.json(); + expect(json.metadata.routing).toHaveLength(2); + expect(json.metadata.routing[0]).toMatchObject({ + provider: "together.ai", + status_code: 400, + succeeded: false, + }); + expect(json.metadata.routing[1]).toMatchObject({ + provider: "together.ai", + succeeded: true, + }); + + const logs = await waitForLogs(2); + const failedLog = logs.find( + (log: Log) => log.errorDetails?.statusCode === 400, + ); + const successLog = logs.find( + (log: Log) => log.finishReason === "stop" || !log.hasError, + ); + expect(failedLog?.finishReason).toBe("gateway_error"); + expect(failedLog?.retried).toBe(true); + expect(successLog?.routingMetadata?.routing).toHaveLength(2); + expect(isTrackedKeyHealthy("together.ai-key-primary")).toBe(false); + expect(isTrackedKeyHealthy("together.ai-key-secondary")).toBe(true); + }); + test("streaming: retries on 500 and delivers response on fallback provider", async () => { await setupMultiProviderKeys(); diff --git a/apps/gateway/src/lib/api-key-health.spec.ts b/apps/gateway/src/lib/api-key-health.spec.ts index c7e98f28e..0856166bc 100644 --- a/apps/gateway/src/lib/api-key-health.spec.ts +++ b/apps/gateway/src/lib/api-key-health.spec.ts @@ -161,6 +161,22 @@ describe("api-key-health", () => { permanentlyBlacklisted: true, }); }); + + it("should permanently blacklist ignored 4xx with invalid key text", () => { + reportKeyError( + "LLM_OPENAI_API_KEY", + 0, + 400, + "API key not valid. Please pass a valid API key.", + ); + + expect(getKeyMetrics("LLM_OPENAI_API_KEY", 0)).toMatchObject({ + uptime: 0, + totalRequests: 1, + consecutiveErrors: 0, + permanentlyBlacklisted: true, + }); + }); }); describe("getKeyHealth", () => { diff --git a/apps/gateway/src/lib/api-key-health.ts b/apps/gateway/src/lib/api-key-health.ts index baed6e4b4..75e064bde 100644 --- a/apps/gateway/src/lib/api-key-health.ts +++ b/apps/gateway/src/lib/api-key-health.ts @@ -1,3 +1,5 @@ +import { hasInvalidProviderCredentialError } from "./provider-auth-errors.js"; + /** * In-memory API key health tracking for uptime-aware routing * Tracks historical error rates per API key using a sliding window approach @@ -77,13 +79,6 @@ const PERMANENT_ERROR_CODES = [401, 403]; */ const UPTIME_RELEVANT_4XX_CODES = new Set([...PERMANENT_ERROR_CODES, 404, 429]); -/** - * Error messages that indicate permanent key issues - */ -const PERMANENT_ERROR_MESSAGES = [ - "API Key not found. Please pass a valid API key.", -]; - /** * Uptime threshold below which exponential penalty kicks in */ @@ -360,9 +355,7 @@ export function reportKeyError( keyHealthMap.set(healthKey, health); } - const isPermanentErrorMessage = - errorText !== undefined && - PERMANENT_ERROR_MESSAGES.some((msg) => errorText.includes(msg)); + const isPermanentErrorMessage = hasInvalidProviderCredentialError(errorText); // Most upstream 4xx responses are client-side request issues and should not // degrade provider uptime or influence routing decisions. @@ -422,9 +415,7 @@ export function reportTrackedKeyError( keyHealthMap.set(healthKey, health); } - const isPermanentErrorMessage = - errorText !== undefined && - PERMANENT_ERROR_MESSAGES.some((msg) => errorText.includes(msg)); + const isPermanentErrorMessage = hasInvalidProviderCredentialError(errorText); if ( statusCode !== undefined && diff --git a/apps/gateway/src/lib/provider-auth-errors.ts b/apps/gateway/src/lib/provider-auth-errors.ts new file mode 100644 index 000000000..02d164bd8 --- /dev/null +++ b/apps/gateway/src/lib/provider-auth-errors.ts @@ -0,0 +1,15 @@ +const INVALID_PROVIDER_CREDENTIAL_PATTERNS = [ + /api key not valid/i, + /api key not found/i, + /please pass a valid api key/i, +]; + +export function hasInvalidProviderCredentialError(errorText?: string): boolean { + if (!errorText) { + return false; + } + + return INVALID_PROVIDER_CREDENTIAL_PATTERNS.some((pattern) => + pattern.test(errorText), + ); +} diff --git a/apps/gateway/src/test-utils/mock-openai-server.ts b/apps/gateway/src/test-utils/mock-openai-server.ts index c1ad2a54d..a92956c72 100644 --- a/apps/gateway/src/test-utils/mock-openai-server.ts +++ b/apps/gateway/src/test-utils/mock-openai-server.ts @@ -767,6 +767,23 @@ mockOpenAIServer.post("/v1/chat/completions", async (c) => { // Subsequent requests succeed - fall through to normal response } + // Check if this request should fail on the first attempt but succeed on retry + if (userMessage.includes("TRIGGER_FAIL_ONCE_INVALID_KEY")) { + failOnceCounter++; + if (failOnceCounter === 1) { + c.status(400); + return c.json({ + error: { + message: "API key not valid. Please pass a valid API key.", + type: "authentication_error", + param: null, + code: "invalid_api_key", + }, + }); + } + // Subsequent requests succeed - fall through to normal response + } + // Check if this request should fail on the first attempt but succeed on retry if (userMessage.includes("TRIGGER_FAIL_ONCE")) { failOnceCounter++;