diff --git a/.env.example b/.env.example index 4ff070e..4e3533e 100644 --- a/.env.example +++ b/.env.example @@ -31,6 +31,12 @@ AUTHORIZED_WALLETS= OPENROUTER_API_KEY= SIPHER_MODEL=anthropic/claude-sonnet-4.6 +# Boot-time OpenRouter self-test (issue #293). Set to 'true' to skip the +# 5s ping that validates SIPHER_MODEL + OPENROUTER_API_KEY before the agent +# accepts traffic. Useful for offline dev or any environment that cannot +# reach https://openrouter.ai at startup. Tests + e2e set this automatically. +SIPHER_SKIP_BOOT_SELF_TEST= + # Sipher public URL (used for payment links, invoices) SIPHER_BASE_URL=https://sipher.sip-protocol.org diff --git a/packages/agent/src/boot/self-test.ts b/packages/agent/src/boot/self-test.ts new file mode 100644 index 0000000..c7ac8f0 --- /dev/null +++ b/packages/agent/src/boot/self-test.ts @@ -0,0 +1,76 @@ +import { getSipherModel } from '../pi/provider.js' + +const OPENROUTER_URL = 'https://openrouter.ai/api/v1/chat/completions' +const DEFAULT_TIMEOUT_MS = 5000 + +export interface SelfTestOptions { + /** Abort budget for the OpenRouter ping. Default 5000ms. Override in tests. */ + timeoutMs?: number +} + +/** + * Validate the OpenRouter configuration at boot by sending a 2-token ping. + * + * Catches the two failure modes that produced silent prod outages in + * frontier_sip_17: + * 1. SIPHER_MODEL set to a value pi-ai's registry doesn't know (e.g. the + * hyphen-form `claude-sonnet-4-6` instead of dot-form). `getSipherModel` + * throws synchronously when the lookup fails. + * 2. OPENROUTER_API_KEY is empty, expired, or revoked. OpenRouter returns + * 401 and we surface that here, instead of letting it manifest as + * empty assistant responses on the first user chat turn. + * + * Throwing here aborts the boot sequence in `packages/agent/src/index.ts` + * (the throw propagates out of the top-level await). Docker restarts the + * container; the next boot prints the same error until env is fixed. + * + * Skip via `SIPHER_SKIP_BOOT_SELF_TEST=true` for test runs, offline dev, + * or any environment that legitimately cannot reach OpenRouter at boot. + */ +export async function selfTestOpenRouter(opts: SelfTestOptions = {}): Promise { + if (process.env.SIPHER_SKIP_BOOT_SELF_TEST === 'true') { + return + } + + // Throws synchronously when SIPHER_MODEL is invalid for pi-ai's registry. + const model = getSipherModel() + + const apiKey = process.env.OPENROUTER_API_KEY + if (!apiKey) { + throw new Error('OpenRouter self-test failed: OPENROUTER_API_KEY env var is unset') + } + + const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS + const controller = new AbortController() + const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs) + + let resp: Response + try { + resp = await fetch(OPENROUTER_URL, { + method: 'POST', + headers: { + Authorization: `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: model.id, + messages: [{ role: 'user', content: 'pong' }], + max_tokens: 2, + }), + signal: controller.signal, + }) + } catch (err) { + const isAbort = err instanceof Error && err.name === 'AbortError' + if (isAbort) { + throw new Error(`OpenRouter self-test timed out after ${timeoutMs}ms (no response from ${OPENROUTER_URL})`) + } + throw err + } finally { + clearTimeout(timeoutHandle) + } + + if (!resp.ok) { + const detail = (await resp.text().catch(() => '')).slice(0, 300) + throw new Error(`OpenRouter self-test failed: HTTP ${resp.status} - ${detail}`) + } +} diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index e33eea7..60c45dd 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -37,6 +37,7 @@ import { keysRouter } from './routes/keys.js' import { publicRouter } from './routes/public/index.js' import { buildCorsMiddleware } from './cors-config.js' import { loadNetworkConfig } from './config/network.js' +import { selfTestOpenRouter } from './boot/self-test.js' import { getAllPendingActionsWithStatus, cancelPendingAction as dbCancelPendingAction, @@ -54,6 +55,18 @@ console.log( ` Network: ${networkConfig.network} (cluster=${networkConfig.clusterName}, publicRpc=${networkConfig.publicRpcUrl}, beta=${networkConfig.beta})`, ) +// ───────────────────────────────────────────────────────────────────────────── +// OpenRouter self-test — fail fast on bad SIPHER_MODEL / OPENROUTER_API_KEY +// ───────────────────────────────────────────────────────────────────────────── +// Ping OpenRouter with a 2-token sample before accepting traffic. Catches the +// two silent-outage modes from frontier_sip_17: hyphen-form SIPHER_MODEL (the +// pi-ai registry lookup throws) and a stale/invalid OPENROUTER_API_KEY (returns +// 401 here instead of empty content on every chat turn). Skip via +// SIPHER_SKIP_BOOT_SELF_TEST=true for test runs or offline dev. +const selfTestStart = Date.now() +await selfTestOpenRouter() +console.log(` OpenRouter: self-test pass (${Date.now() - selfTestStart}ms)`) + // ───────────────────────────────────────────────────────────────────────────── // Database & session initialization // ───────────────────────────────────────────────────────────────────────────── diff --git a/packages/agent/tests/boot/self-test.test.ts b/packages/agent/tests/boot/self-test.test.ts new file mode 100644 index 0000000..6527117 --- /dev/null +++ b/packages/agent/tests/boot/self-test.test.ts @@ -0,0 +1,105 @@ +import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest' +import { selfTestOpenRouter } from '../../src/boot/self-test.js' + +describe('selfTestOpenRouter', () => { + const originalEnv = { ...process.env } + + beforeEach(() => { + process.env = { ...originalEnv } + // Default to a valid config so each test only overrides what it needs to. + process.env.OPENROUTER_API_KEY = 'sk-or-v1-test-key' + process.env.SIPHER_MODEL = 'anthropic/claude-sonnet-4.6' + delete process.env.SIPHER_SKIP_BOOT_SELF_TEST + vi.restoreAllMocks() + }) + + afterEach(() => { + process.env = { ...originalEnv } + vi.restoreAllMocks() + }) + + it('returns successfully when OpenRouter responds with 200', async () => { + const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValue( + new Response(JSON.stringify({ choices: [{ message: { content: 'pong' } }] }), { status: 200 }), + ) + + await expect(selfTestOpenRouter()).resolves.toBeUndefined() + + expect(fetchSpy).toHaveBeenCalledOnce() + const [url, init] = fetchSpy.mock.calls[0]! + expect(url).toBe('https://openrouter.ai/api/v1/chat/completions') + expect(init?.method).toBe('POST') + const headers = init?.headers as Record + expect(headers.Authorization).toBe('Bearer sk-or-v1-test-key') + expect(headers['Content-Type']).toBe('application/json') + const body = JSON.parse(init?.body as string) + expect(body.model).toBe('anthropic/claude-sonnet-4.6') + expect(body.max_tokens).toBe(2) + expect(body.messages).toHaveLength(1) + }) + + it('throws with HTTP status + body when OpenRouter returns 401', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + new Response(JSON.stringify({ error: { message: 'User not found' } }), { status: 401 }), + ) + + await expect(selfTestOpenRouter()).rejects.toThrow(/OpenRouter self-test failed.*401.*User not found/i) + }) + + it('throws when OpenRouter returns 5xx', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + new Response('upstream error', { status: 503 }), + ) + + await expect(selfTestOpenRouter()).rejects.toThrow(/OpenRouter self-test failed.*503/i) + }) + + it('throws when OPENROUTER_API_KEY is unset', async () => { + delete process.env.OPENROUTER_API_KEY + const fetchSpy = vi.spyOn(globalThis, 'fetch') + + await expect(selfTestOpenRouter()).rejects.toThrow(/OPENROUTER_API_KEY/) + expect(fetchSpy).not.toHaveBeenCalled() + }) + + it('propagates getSipherModel error when SIPHER_MODEL is invalid', async () => { + process.env.SIPHER_MODEL = 'anthropic/claude-sonnet-4-6' // hyphen-form — invalid in pi-ai registry + const fetchSpy = vi.spyOn(globalThis, 'fetch') + + await expect(selfTestOpenRouter()).rejects.toThrow(/pi-ai registry|dot notation/i) + expect(fetchSpy).not.toHaveBeenCalled() + }) + + it('aborts the fetch after the configured timeout', async () => { + // Mock fetch to hang until the abort signal fires, then reject with AbortError. + vi.spyOn(globalThis, 'fetch').mockImplementation((_url, init) => { + return new Promise((_resolve, reject) => { + const signal = (init as RequestInit | undefined)?.signal + if (!signal) return + signal.addEventListener('abort', () => { + const err = new Error('The operation was aborted') + err.name = 'AbortError' + reject(err) + }) + }) + }) + + await expect(selfTestOpenRouter({ timeoutMs: 10 })).rejects.toThrow(/timed out|abort/i) + }) + + it('skips entirely when SIPHER_SKIP_BOOT_SELF_TEST=true', async () => { + process.env.SIPHER_SKIP_BOOT_SELF_TEST = 'true' + const fetchSpy = vi.spyOn(globalThis, 'fetch') + + await expect(selfTestOpenRouter()).resolves.toBeUndefined() + expect(fetchSpy).not.toHaveBeenCalled() + }) + + it('does NOT skip when SIPHER_SKIP_BOOT_SELF_TEST is "false" or other truthy strings', async () => { + process.env.SIPHER_SKIP_BOOT_SELF_TEST = 'false' + const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValue(new Response('ok', { status: 200 })) + + await selfTestOpenRouter() + expect(fetchSpy).toHaveBeenCalledOnce() + }) +}) diff --git a/packages/agent/vitest.config.ts b/packages/agent/vitest.config.ts index c222c0a..749557c 100644 --- a/packages/agent/vitest.config.ts +++ b/packages/agent/vitest.config.ts @@ -19,6 +19,10 @@ export default defineConfig({ env: { SIPHER_NETWORK: 'devnet', SIPHER_HELIUS_API_KEY: 'test-key', + // Skip OpenRouter ping at boot. Tests don't load index.ts directly, but + // future integration tests might — and tests don't have a real OpenRouter + // key to ping with. The self-test itself is covered in tests/boot/. + SIPHER_SKIP_BOOT_SELF_TEST: 'true', }, }, }) diff --git a/playwright.config.ts b/playwright.config.ts index 448aa09..7f89db2 100644 --- a/playwright.config.ts +++ b/playwright.config.ts @@ -51,6 +51,9 @@ export default defineConfig({ // Real Helius calls are not made in e2e tests (no deposit/withdraw flows). SIPHER_NETWORK: process.env.SIPHER_NETWORK ?? 'devnet', SIPHER_HELIUS_API_KEY: process.env.SIPHER_HELIUS_API_KEY ?? 'e2e-placeholder-key', + // Skip the OpenRouter boot ping (issue #293). e2e has no real + // OPENROUTER_API_KEY and chat is mocked at the network layer. + SIPHER_SKIP_BOOT_SELF_TEST: 'true', }, }, {