Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions apps/gateway/src/chat/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ import {
MAX_RETRIES,
providerRetryKey,
selectNextProvider,
shouldRetryAlternateKey,
shouldRetryRequest,
} from "./tools/retry-with-fallback.js";
import {
Expand Down Expand Up @@ -4843,7 +4844,13 @@ chat.openapi(completions, async (c) => {
let sameProviderRetryContext: Awaited<
ReturnType<typeof resolveProviderContext>
> | null = null;
if (isRetryableErrorType(finishReason)) {
if (
shouldRetryAlternateKey(
finishReason,
res.status,
errorResponseText,
)
) {
rememberFailedKey(usedProvider, usedRegion, {
envVarName,
configIndex,
Expand Down Expand Up @@ -5112,7 +5119,13 @@ chat.openapi(completions, async (c) => {
let sameProviderRetryContext: Awaited<
ReturnType<typeof resolveProviderContext>
> | null = null;
if (isRetryableErrorType(errorType)) {
if (
shouldRetryAlternateKey(
errorType,
inferredStatusCode,
errorResponseText,
)
) {
rememberFailedKey(usedProvider, usedRegion, {
envVarName,
configIndex,
Expand Down Expand Up @@ -8201,7 +8214,9 @@ chat.openapi(completions, async (c) => {
let sameProviderRetryContext: Awaited<
ReturnType<typeof resolveProviderContext>
> | null = null;
if (isRetryableErrorType(finishReason)) {
if (
shouldRetryAlternateKey(finishReason, res.status, errorResponseText)
) {
rememberFailedKey(usedProvider, usedRegion, {
envVarName,
configIndex,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ describe("getFinishReasonFromError", () => {
expect(getFinishReasonFromError(403)).toBe("gateway_error");
});

it("returns gateway_error for 400 invalid API key payloads", () => {
expect(
getFinishReasonFromError(
400,
'{"error":{"message":"API key not valid. Please pass a valid API key.","type":"authentication_error","code":"invalid_api_key"}}',
),
).toBe("gateway_error");
});

it("returns client_error when no error text provided for other 4xx", () => {
expect(getFinishReasonFromError(400)).toBe("client_error");
expect(getFinishReasonFromError(422)).toBe("client_error");
Expand Down
10 changes: 8 additions & 2 deletions apps/gateway/src/chat/tools/get-finish-reason-from-error.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { hasInvalidProviderCredentialError } from "@/lib/provider-auth-errors.js";

/**
* Determines the appropriate finish reason based on HTTP status code and error message
* 5xx status codes indicate upstream provider errors
Expand Down Expand Up @@ -55,8 +57,12 @@ export function getFinishReasonFromError(
return "content_filter";
}

// 401/403 usually indicate invalid or unauthorized provider credentials
if (statusCode === 401 || statusCode === 403) {
// 401/403 and known provider credential payloads indicate bad provider keys.
if (
statusCode === 401 ||
statusCode === 403 ||
hasInvalidProviderCredentialError(errorText)
) {
return "gateway_error";
}

Expand Down
29 changes: 29 additions & 0 deletions apps/gateway/src/chat/tools/retry-with-fallback.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { describe, it, expect } from "vitest";
import {
isRetryableErrorType,
shouldRetryRequest,
shouldRetryAlternateKey,
selectNextProvider,
getErrorType,
MAX_RETRIES,
Expand Down Expand Up @@ -116,6 +117,34 @@ describe("shouldRetryRequest", () => {
});
});

describe("shouldRetryAlternateKey", () => {
it("retries alternate keys for retryable upstream failures", () => {
expect(shouldRetryAlternateKey("upstream_error", 500)).toBe(true);
expect(shouldRetryAlternateKey("network_error", 0)).toBe(true);
});

it("retries alternate keys for auth failures on the current provider", () => {
expect(shouldRetryAlternateKey("gateway_error", 401)).toBe(true);
expect(shouldRetryAlternateKey("gateway_error", 403)).toBe(true);
});

it("retries alternate keys for invalid API key payloads without 401/403", () => {
expect(
shouldRetryAlternateKey(
"gateway_error",
400,
"API key not valid. Please pass a valid API key.",
),
).toBe(true);
});

it("does not retry alternate keys for other non-retryable failures", () => {
expect(shouldRetryAlternateKey("gateway_error", 500)).toBe(false);
expect(shouldRetryAlternateKey("client_error", 400)).toBe(false);
expect(shouldRetryAlternateKey("content_filter", 403)).toBe(false);
});
});

describe("selectNextProvider", () => {
const modelProviders = [
{ providerId: "openai", modelName: "gpt-4o" },
Expand Down
24 changes: 24 additions & 0 deletions apps/gateway/src/chat/tools/retry-with-fallback.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { hasInvalidProviderCredentialError } from "@/lib/provider-auth-errors.js";

export const MAX_RETRIES = 2;

export type RetryableErrorType =
Expand Down Expand Up @@ -31,6 +33,28 @@ export function isRetryableErrorType(errorType: string): boolean {
);
}

/**
* Determines whether a failed request should be retried against another key
* for the same provider.
*
* Auth failures (401/403) are not eligible for cross-provider fallback, but
* they should still rotate to another configured key for the current provider
* because the failure is often isolated to a single credential.
*/
export function shouldRetryAlternateKey(
errorType: string,
statusCode?: number,
errorText?: string,
): boolean {
return (
isRetryableErrorType(errorType) ||
(errorType === "gateway_error" &&
((statusCode !== undefined &&
(statusCode === 401 || statusCode === 403)) ||
hasInvalidProviderCredentialError(errorText)))
Comment on lines +51 to +54
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve gateway_error in alternate-key retry metadata

Enabling shouldRetryAlternateKey for gateway_error causes auth failures (e.g., 401/403 or 400 invalid-key payloads) to enter the retry path, but those retry attempts are still recorded via getErrorType(statusCode) in chat.ts, which maps these statuses to upstream_error. In scenarios with multiple keys on the same provider, metadata.routing[].error_type will now misreport credential failures as transient upstream failures, which can mislead routing diagnostics and dashboard analysis.

Useful? React with 👍 / 👎.

);
}

/**
* Determines whether a failed request should be retried with a different provider.
* Only retries when no specific provider was requested, the error is retryable,
Expand Down
75 changes: 75 additions & 0 deletions apps/gateway/src/fallback.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { getProviderDefinition } from "@llmgateway/models";

import { app } from "./app.js";
import { getApiKeyFingerprint } from "./lib/api-key-fingerprint.js";
import { isTrackedKeyHealthy, resetKeyHealth } from "./lib/api-key-health.js";
import {
startMockServer,
stopMockServer,
Expand Down Expand Up @@ -88,6 +89,7 @@ describe("fallback and error status code handling", () => {
}

async function resetTestState() {
resetKeyHealth();
resetFailOnceCounter();
await clearCache();
await db.update(tables.modelProviderMapping).set({
Expand Down Expand Up @@ -2227,6 +2229,79 @@ describe("fallback and error status code handling", () => {
});
});

test("non-streaming: retries another key for auth failures on the same explicit provider", async () => {
await setupSingleProviderWithMultipleKeys("together.ai");

const res = await app.request("/v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "Bearer real-token",
},
body: JSON.stringify({
model: "together.ai/glm-4.7",
messages: [{ role: "user", content: "TRIGGER_STATUS_401" }],
}),
});

expect(res.status).toBe(500);
const json = await res.json();
expect(json.error.type).toBe("gateway_error");

const logs = await waitForLogs(2);
const authLogs = logs.filter(
(log: Log) => log.errorDetails?.statusCode === 401,
);
expect(authLogs).toHaveLength(2);
expect(authLogs.some((log: Log) => log.retried)).toBe(true);
expect(isTrackedKeyHealthy("together.ai-key-primary")).toBe(false);
expect(isTrackedKeyHealthy("together.ai-key-secondary")).toBe(false);
});

test("non-streaming: retries another key for invalid API key payloads", async () => {
await setupSingleProviderWithMultipleKeys("together.ai");

const res = await app.request("/v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "Bearer real-token",
},
body: JSON.stringify({
model: "together.ai/glm-4.7",
messages: [
{ role: "user", content: "TRIGGER_FAIL_ONCE_INVALID_KEY" },
],
}),
});

expect(res.status).toBe(200);
const json = await res.json();
expect(json.metadata.routing).toHaveLength(2);
expect(json.metadata.routing[0]).toMatchObject({
provider: "together.ai",
status_code: 400,
succeeded: false,
});
expect(json.metadata.routing[1]).toMatchObject({
provider: "together.ai",
succeeded: true,
});

const logs = await waitForLogs(2);
const failedLog = logs.find(
(log: Log) => log.errorDetails?.statusCode === 400,
);
const successLog = logs.find(
(log: Log) => log.finishReason === "stop" || !log.hasError,
);
expect(failedLog?.finishReason).toBe("gateway_error");
expect(failedLog?.retried).toBe(true);
expect(successLog?.routingMetadata?.routing).toHaveLength(2);
expect(isTrackedKeyHealthy("together.ai-key-primary")).toBe(false);
expect(isTrackedKeyHealthy("together.ai-key-secondary")).toBe(true);
});

test("streaming: retries on 500 and delivers response on fallback provider", async () => {
await setupMultiProviderKeys();

Expand Down
16 changes: 16 additions & 0 deletions apps/gateway/src/lib/api-key-health.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,22 @@ describe("api-key-health", () => {
permanentlyBlacklisted: true,
});
});

it("should permanently blacklist ignored 4xx with invalid key text", () => {
reportKeyError(
"LLM_OPENAI_API_KEY",
0,
400,
"API key not valid. Please pass a valid API key.",
);

expect(getKeyMetrics("LLM_OPENAI_API_KEY", 0)).toMatchObject({
uptime: 0,
totalRequests: 1,
consecutiveErrors: 0,
permanentlyBlacklisted: true,
});
});
});

describe("getKeyHealth", () => {
Expand Down
17 changes: 4 additions & 13 deletions apps/gateway/src/lib/api-key-health.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { hasInvalidProviderCredentialError } from "./provider-auth-errors.js";

/**
* In-memory API key health tracking for uptime-aware routing
* Tracks historical error rates per API key using a sliding window approach
Expand Down Expand Up @@ -77,13 +79,6 @@ const PERMANENT_ERROR_CODES = [401, 403];
*/
const UPTIME_RELEVANT_4XX_CODES = new Set([...PERMANENT_ERROR_CODES, 404, 429]);

/**
* Error messages that indicate permanent key issues
*/
const PERMANENT_ERROR_MESSAGES = [
"API Key not found. Please pass a valid API key.",
];

/**
* Uptime threshold below which exponential penalty kicks in
*/
Expand Down Expand Up @@ -360,9 +355,7 @@ export function reportKeyError(
keyHealthMap.set(healthKey, health);
}

const isPermanentErrorMessage =
errorText !== undefined &&
PERMANENT_ERROR_MESSAGES.some((msg) => errorText.includes(msg));
const isPermanentErrorMessage = hasInvalidProviderCredentialError(errorText);

// Most upstream 4xx responses are client-side request issues and should not
// degrade provider uptime or influence routing decisions.
Expand Down Expand Up @@ -422,9 +415,7 @@ export function reportTrackedKeyError(
keyHealthMap.set(healthKey, health);
}

const isPermanentErrorMessage =
errorText !== undefined &&
PERMANENT_ERROR_MESSAGES.some((msg) => errorText.includes(msg));
const isPermanentErrorMessage = hasInvalidProviderCredentialError(errorText);

if (
statusCode !== undefined &&
Expand Down
15 changes: 15 additions & 0 deletions apps/gateway/src/lib/provider-auth-errors.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
const INVALID_PROVIDER_CREDENTIAL_PATTERNS = [
/api key not valid/i,
/api key not found/i,
/please pass a valid api key/i,
];
Comment on lines +1 to +5
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR title/description describe changing Anthropic determineStopReason to return null for unknown finish reasons, but this PR's diff does not include any changes to apps/gateway/src/anthropic/anthropic.ts (where determineStopReason currently defaults to returning "end_turn"). Please either include the missing Anthropic handler change or update the PR title/description to reflect the actual changes (provider credential detection + alternate-key retry/health behavior).

Copilot uses AI. Check for mistakes.

export function hasInvalidProviderCredentialError(errorText?: string): boolean {
if (!errorText) {
return false;
}

return INVALID_PROVIDER_CREDENTIAL_PATTERNS.some((pattern) =>
pattern.test(errorText),
);
}
17 changes: 17 additions & 0 deletions apps/gateway/src/test-utils/mock-openai-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,23 @@ mockOpenAIServer.post("/v1/chat/completions", async (c) => {
// Subsequent requests succeed - fall through to normal response
}

// Check if this request should fail on the first attempt but succeed on retry
if (userMessage.includes("TRIGGER_FAIL_ONCE_INVALID_KEY")) {
failOnceCounter++;
if (failOnceCounter === 1) {
c.status(400);
return c.json({
error: {
message: "API key not valid. Please pass a valid API key.",
type: "authentication_error",
param: null,
code: "invalid_api_key",
},
});
}
// Subsequent requests succeed - fall through to normal response
}

// Check if this request should fail on the first attempt but succeed on retry
if (userMessage.includes("TRIGGER_FAIL_ONCE")) {
failOnceCounter++;
Expand Down
Loading