From bf368494ddccddadac55071c61b2edb41cb95fee Mon Sep 17 00:00:00 2001 From: heznpc Date: Thu, 18 Jun 2026 07:10:08 +0900 Subject: [PATCH 1/6] fix: expose audit verified in summary contract + correct OAuth resource comment audit_summary already computed verified / verifiedFirstBreak / auditDisabled but omitted them from outputSchema, so the MCP SDK strips the tamper-evidence signal out of structuredContent. Declared all three; added a strict-contract test so a future drift is caught at the runtime contract, not just registration shape. oauth-verifier's header comment claimed the RFC 8707 resource claim is accepted as an audience fallback, but the impl only checks aud via jose. Made the comment match reality (resource rides untouched in raw, never gates audience) and dropped the now-unused claimsFromPayload audience parameter. --- src/audit/tools.ts | 16 ++++++++++++++++ src/server/oauth-verifier.ts | 26 ++++++++++++++------------ tests/audit-tools.test.js | 28 ++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 12 deletions(-) diff --git a/src/audit/tools.ts b/src/audit/tools.ts index f2fadc17..7d1c8fdc 100644 --- a/src/audit/tools.ts +++ b/src/audit/tools.ts @@ -98,6 +98,22 @@ export function registerAuditTools(server: McpServer, _config: AirMcpConfig): vo errors: z.number(), }), ), + // Tamper-evidence — the strongest trust signal in the codebase. The + // HMAC chain is replayed on every summary; surfacing the verdict (and + // the first break location) in the tool contract lets a consumer act + // on `verified === false` instead of trusting the log blindly. + verified: z.boolean(), + verifiedFirstBreak: z + .object({ + file: z.string(), + lineIndex: z.number(), + reason: z.enum(["hmac_mismatch", "prev_mismatch", "malformed"]), + }) + .optional(), + // Audit logging currently halted (disk full / permission / repeated + // flush failures). Surfaced so a doctor / health check can flag a gap + // in coverage rather than reading silence as "nothing happened". + auditDisabled: z.boolean(), }, annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, }, diff --git a/src/server/oauth-verifier.ts b/src/server/oauth-verifier.ts index 4ec6c6c5..17df8667 100644 --- a/src/server/oauth-verifier.ts +++ b/src/server/oauth-verifier.ts @@ -16,10 +16,13 @@ * the kid lookup, short-lived key caching, and background rotation * internally. No bespoke cache layer — duplicating jose's * battle-tested behaviour is a loss of investment. - * • Audience check: jose's built-in `audience` option accepts either a - * matching `aud` claim OR (per JWT spec) an array `aud` that - * includes the target. The RFC 8707 `resource` claim is also - * accepted as a fallback to match specs where both forms coexist. + * • Audience check: jose's built-in `audience` option matches the + * token's `aud` claim — accepting either a string `aud` equal to the + * target OR (per JWT spec) an array `aud` that includes it. The RFC + * 8707 `resource` claim is NOT consulted for this decision; a token + * whose `resource` matches but whose `aud` does not is still rejected + * (`wrong_audience`). When present, `resource` rides along untouched + * in `claims.raw` for downstream inspection. */ import { createRemoteJWKSet, jwtVerify, errors as joseErrors } from "jose"; import type { JWTPayload } from "jose"; @@ -114,14 +117,13 @@ function parseScopes(payload: JWTPayload): string[] { return []; } -function claimsFromPayload(payload: JWTPayload, audience: string): OAuthClaims | null { +function claimsFromPayload(payload: JWTPayload): OAuthClaims | null { if (typeof payload.sub !== "string" || payload.sub === "") return null; - // RFC 8707 `resource` fallback: some authorization servers return - // `resource` alongside `aud`. If `aud` already matched (jose verified - // that), `resource` is informational. If the token carried `resource` - // but not the audience claim form we recognize, we'd have bailed out - // in jose. Nothing to do here beyond surfacing it in `raw`. - void audience; + // The RFC 8707 `resource` claim is intentionally NOT used for the + // audience decision — jose already enforced `aud` above (a token whose + // `resource` matched but `aud` did not was rejected as wrong_audience + // before we reach here). When present, `resource` rides along untouched + // in `raw` for downstream inspection. return { subject: payload.sub, scopes: parseScopes(payload), @@ -164,7 +166,7 @@ export async function verifyBearer( // widen the accepted set. return { ok: false, reason: "unsupported_alg", detail: `alg=${protectedHeader.alg} not permitted` }; } - const claims = claimsFromPayload(payload, cfg.audience); + const claims = claimsFromPayload(payload); if (!claims) { return { ok: false, reason: "malformed_claims", detail: "missing sub claim" }; } diff --git a/tests/audit-tools.test.js b/tests/audit-tools.test.js index 19ec7b51..b023670c 100644 --- a/tests/audit-tools.test.js +++ b/tests/audit-tools.test.js @@ -9,6 +9,7 @@ * exercised here. */ import { describe, test, expect, beforeEach, afterEach, jest } from '@jest/globals'; +import { z } from 'zod'; import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from 'node:fs'; import { join } from 'node:path'; import { tmpdir } from 'node:os'; @@ -137,6 +138,33 @@ describe('audit_summary tool', () => { expect(sc.errorRate).toBeCloseTo(0.25, 4); expect(sc.topTools[0]).toEqual({ tool: 'alpha', count: 3, errors: 1 }); expect(sc.topTools[1]).toEqual({ tool: 'beta', count: 1, errors: 0 }); + // Tamper-evidence surfaces in the summary. These un-chained legacy lines + // (no _hmac) don't break verification, so the chain reports verified. + expect(sc.verified).toBe(true); + expect(sc.auditDisabled).toBe(false); + }); + + test('exposes tamper-evidence (verified) as part of the declared contract', async () => { + const now = new Date().toISOString(); + writeJsonl('audit.jsonl', [{ timestamp: now, tool: 'alpha', status: 'ok' }]); + const server = createMockServer(); + registerAuditTools(server, createMockConfig()); + const result = await server.callTool('audit_summary', {}); + + // The declared contract MUST carry `verified` — summarizeAuditEntries has + // always computed it, but before it was added to outputSchema the MCP SDK + // would strip it from structuredContent, hiding the strongest trust signal. + const outputSchema = server._tools.get('audit_summary').opts.outputSchema; + expect(Object.keys(outputSchema)).toContain('verified'); + + // structuredContent must conform EXACTLY (strict): every declared field + // present, no undeclared field the SDK would reject. This is the runtime + // contract — registration shape alone wouldn't catch a drift here. + const parsed = z.object(outputSchema).strict().safeParse(result.structuredContent); + if (!parsed.success) { + throw new Error(`audit_summary structuredContent breaks its outputSchema:\n${parsed.error}`); + } + expect(result.structuredContent.verified).toBe(true); }); test('empty audit returns zeros without dividing by zero', async () => { From 44f6de339c5edcbd4e3656ac17470f933ef0f4c9 Mon Sep 17 00:00:00 2001 From: heznpc Date: Thu, 18 Jun 2026 07:25:49 +0900 Subject: [PATCH 2/6] feat: detect audit-log tail truncation + warn on host-derived key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HMAC chain catches edits, inserts, reorders, and genesis-reroot, but a tail-truncated chain is still a valid shorter chain — removing the most recent lines went undetected. Each sealed line now carries a monotonic seq, and every flush overwrites a single signed checkpoint anchoring the highest seq + head. verifyAuditChain reports truncated when the chain falls short of the checkpoint and checkpoint_forged when the checkpoint MAC fails. A corrupt or absent checkpoint degrades to chain-only verification (no false alarm); an attacker who removes both the tail and the checkpoint is a documented limit, not a silent gap. Also emit a one-time warning on first flush when the chain is keyed off the host-derived fallback (tamper-evident only, not strong auth) so an operator knows to set AIRMCP_AUDIT_HMAC_KEY for cross-machine integrity. Regenerated tool-manifest.json + MCPIntents.swift for the extended audit_summary outputSchema (verified / verifiedFirstBreak / auditDisabled). --- docs/tool-manifest.json | 37 ++++- src/audit/tools.ts | 2 +- src/shared/audit.ts | 154 +++++++++++++++++- .../AirMCPKit/Generated/MCPIntents.swift | 8 + tests/audit-recovery.test.js | 4 + tests/audit-tamper-detection.test.js | 73 +++++++++ 6 files changed, 267 insertions(+), 11 deletions(-) diff --git a/docs/tool-manifest.json b/docs/tool-manifest.json index 6b3fa597..fb5f6171 100644 --- a/docs/tool-manifest.json +++ b/docs/tool-manifest.json @@ -601,6 +601,39 @@ ], "additionalProperties": false } + }, + "verified": { + "type": "boolean" + }, + "verifiedFirstBreak": { + "type": "object", + "properties": { + "file": { + "type": "string" + }, + "lineIndex": { + "type": "number" + }, + "reason": { + "type": "string", + "enum": [ + "hmac_mismatch", + "prev_mismatch", + "malformed", + "truncated", + "checkpoint_forged" + ] + } + }, + "required": [ + "file", + "lineIndex", + "reason" + ], + "additionalProperties": false + }, + "auditDisabled": { + "type": "boolean" } }, "required": [ @@ -609,7 +642,9 @@ "errors", "errorRate", "scannedFiles", - "topTools" + "topTools", + "verified", + "auditDisabled" ], "additionalProperties": false }, diff --git a/src/audit/tools.ts b/src/audit/tools.ts index 7d1c8fdc..9b20a105 100644 --- a/src/audit/tools.ts +++ b/src/audit/tools.ts @@ -107,7 +107,7 @@ export function registerAuditTools(server: McpServer, _config: AirMcpConfig): vo .object({ file: z.string(), lineIndex: z.number(), - reason: z.enum(["hmac_mismatch", "prev_mismatch", "malformed"]), + reason: z.enum(["hmac_mismatch", "prev_mismatch", "malformed", "truncated", "checkpoint_forged"]), }) .optional(), // Audit logging currently halted (disk full / permission / repeated diff --git a/src/shared/audit.ts b/src/shared/audit.ts index 1a2b441e..9fd426c7 100644 --- a/src/shared/audit.ts +++ b/src/shared/audit.ts @@ -1,4 +1,4 @@ -import { appendFile, chmod, mkdir, readdir, readFile, stat, rename } from "node:fs/promises"; +import { appendFile, chmod, mkdir, readdir, readFile, stat, rename, writeFile } from "node:fs/promises"; import { join } from "node:path"; import { hostname, platform } from "node:os"; import { createHmac } from "node:crypto"; @@ -58,11 +58,16 @@ interface AuditEntry { * (catching log doctoring after-the-fact) but explicitly NOT * strong auth. For high-assurance, set the env. */ -const AUDIT_HMAC_KEY: Buffer = (() => { - const env = process.env.AIRMCP_AUDIT_HMAC_KEY; - if (env && env.length > 0) return Buffer.from(env, "utf-8"); - return Buffer.from(`airmcp-audit::${hostname()}::${platform()}`, "utf-8"); -})(); +/** True when no AIRMCP_AUDIT_HMAC_KEY is set and the chain falls back to a + * host-derived key — tamper-EVIDENT only, not strong auth (an attacker with + * shell access can re-derive it). Surfaced as a one-time warning on first + * flush so an operator in this mode knows it rather than over-trusting the + * chain as cryptographic non-repudiation. */ +const AUDIT_USING_HOST_KEY = (process.env.AIRMCP_AUDIT_HMAC_KEY ?? "").length === 0; +const AUDIT_HMAC_KEY: Buffer = AUDIT_USING_HOST_KEY + ? Buffer.from(`airmcp-audit::${hostname()}::${platform()}`, "utf-8") + : Buffer.from(process.env.AIRMCP_AUDIT_HMAC_KEY as string, "utf-8"); +let warnedHostKey = false; const HMAC_GENESIS = "0".repeat(64); @@ -70,10 +75,34 @@ function computeHmac(prev: string, body: string): string { return createHmac("sha256", AUDIT_HMAC_KEY).update(prev).update("\0").update(body).digest("hex"); } +/** + * Tail-truncation anchor. + * + * The HMAC chain detects edits, insertions, reorders, and genesis-reroot — + * but NOT removal of the most recent lines: a truncated chain is still a valid + * shorter chain rooted at genesis. So every sealed line carries a monotonic + * `seq`, and on each flush we overwrite a single signed checkpoint recording + * the highest `seq` + chain head. `verifyAuditChain` reports + * `truncated` when the checkpoint references a `seq` past the chain's last + * line. The checkpoint's MAC is domain-separated from chain HMACs, so it can't + * be forged or rolled back without the key — same trust grade as the chain. + * Deleting the checkpoint disables only the truncation check; the rest of the + * chain still verifies. + */ +const CHECKPOINT_PATH = join(PATHS.VECTOR_STORE, "audit.checkpoint"); +const CHECKPOINT_DOMAIN = "airmcp-audit-checkpoint-v1"; + +function checkpointMac(seq: number, hmac: string): string { + return computeHmac(CHECKPOINT_DOMAIN, `${seq}:${hmac}`); +} + /** In-memory chain head — updated on every appended line. Resumed from * the on-disk tail at first flush so process restarts don't fork the * chain. */ let lastHmac: string = HMAC_GENESIS; +/** Monotonic per-line sequence, resumed from the disk tail at first flush + * alongside `lastHmac`. -1 means no chained line has been written yet. */ +let lastSeq = -1; let chainResumed = false; /** @@ -229,9 +258,14 @@ async function scanFileForChainHead(path: string, isPrimary: boolean): Promise 0) { log.warn("audit: resumed chain past malformed lines — possible tampering or corruption", { lastHmacPrefix: parsed._hmac.slice(0, 8), @@ -273,6 +307,10 @@ async function flushBuffer(): Promise { try { const obj = JSON.parse(raw) as Record; const prev = lastHmac; + // Stamp a monotonic seq into the SIGNED body (added last so the parsed- + // object insertion order the verifier relies on is preserved). The + // truncation checkpoint anchors against this seq. + obj.seq = ++lastSeq; // _hmac is computed from the body PRE-attachment. The signed payload is // the JSON without _hmac/_prev, so verifiers reconstruct the same body. const body = JSON.stringify(obj); @@ -295,16 +333,19 @@ async function flushBuffer(): Promise { } } const lines = sealedLines.join("\n") + "\n"; + let appended = false; try { await ensureDir(); await appendFile(AUDIT_PATH, lines, { encoding: "utf-8", mode: 0o600 }); await rotateIfNeeded(); consecutiveFlushFailures = 0; + appended = true; } catch { // Retry once try { await appendFile(AUDIT_PATH, lines, { encoding: "utf-8", mode: 0o600 }); consecutiveFlushFailures = 0; + appended = true; } catch (retryErr) { consecutiveFlushFailures++; log.error("audit: flush failed", { @@ -328,6 +369,41 @@ async function flushBuffer(): Promise { } finally { flushing = false; } + // Outside the flush critical section: a checkpoint failure must never fail + // or retry the append. Anchors the truncation guard at the seq just sealed. + if (appended) { + await writeCheckpoint(); + warnHostKeyOnce(); + } +} + +/** Persist the signed tail-truncation checkpoint (single small write — a + * parse/shape failure on read is treated as "absent", not tampering, so a + * rare torn write never produces a false alarm). Best-effort: a failure here + * only weakens truncation detection until the next successful flush — it must + * not disturb the append that already landed. */ +async function writeCheckpoint(): Promise { + if (lastSeq < 0) return; // nothing chained yet + try { + const mac = checkpointMac(lastSeq, lastHmac); + const payload = JSON.stringify({ seq: lastSeq, hmac: lastHmac, mac }) + "\n"; + await writeFile(CHECKPOINT_PATH, payload, { encoding: "utf-8", mode: 0o600 }); + } catch (err) { + log.warn("audit: checkpoint write failed — tail-truncation detection degraded until next flush", { + err: errToCtx(err), + }); + } +} + +/** One-time warning when the chain is keyed off the host-derived fallback. + * Fires on first successful flush (not at import) to avoid noise in tests + * and short-lived CLI invocations that never write audit lines. */ +function warnHostKeyOnce(): void { + if (warnedHostKey || !AUDIT_USING_HOST_KEY) return; + warnedHostKey = true; + log.warn("audit: HMAC chain keyed off host-derived fallback — tamper-EVIDENT only, not strong auth", { + note: "an attacker with shell access can re-derive this key; set AIRMCP_AUDIT_HMAC_KEY for cross-machine / strong integrity", + }); } async function rotateIfNeeded(): Promise { @@ -383,6 +459,8 @@ export function _testReset(): string[] { auditDisabled = false; auditDisabledSince = 0; lastHmac = HMAC_GENESIS; + lastSeq = -1; + warnedHostKey = false; chainResumed = false; return snapshot; } @@ -526,6 +604,10 @@ export async function readAuditEntries(opts: ReadAuditOptions = {}): Promise { let prev: string = HMAC_GENESIS; let chainStarted = false; + // Highest seq + head hmac seen in the chain — compared against the signed + // checkpoint after the walk to catch lines removed from the end. + let chainLastSeq = -1; + let chainHeadHmac: string = HMAC_GENESIS; for await (const { line, file, lineIndex } of readAllAuditLinesIndexed()) { let entry: Record; try { @@ -639,10 +725,60 @@ async function verifyAuditChain(): Promise<{ } prev = hmacField; chainStarted = true; + chainHeadHmac = hmacField; + if (typeof entry.seq === "number" && Number.isInteger(entry.seq)) chainLastSeq = entry.seq; + } + // Tail-truncation check: the signed checkpoint records the highest seq + + // head sealed at the last flush. A chain that doesn't reach that seq means + // lines were removed from the end after they were anchored (a plain chain + // replay can't see this — a truncated chain is a valid shorter chain). + const ck = await readCheckpoint(); + if (ck) { + if (ck.mac !== checkpointMac(ck.seq, ck.hmac)) { + // Present but the MAC (which needs the key) doesn't match → the + // checkpoint itself was edited / rolled back. + return { verified: false, firstBreak: { file: "audit.checkpoint", lineIndex: -1, reason: "checkpoint_forged" } }; + } + if (ck.seq > chainLastSeq || (ck.seq === chainLastSeq && ck.hmac !== chainHeadHmac)) { + return { verified: false, firstBreak: { file: "audit.checkpoint", lineIndex: -1, reason: "truncated" } }; + } } return { verified: true }; } +/** Read + shape-validate the truncation checkpoint. Returns null when absent + * OR present-but-unparseable / wrong-shape — both degrade to "no truncation + * check, chain still verifies on its own" rather than a false alarm (a torn + * write is indistinguishable from corruption, and the moat must not cry wolf; + * deleting the checkpoint is already an undetectable disable, documented). + * A well-formed checkpoint with a WRONG MAC is the real forgery signal — + * that's returned here so `verifyAuditChain` reports `checkpoint_forged`. */ +async function readCheckpoint(): Promise<{ seq: number; hmac: string; mac: string } | null> { + let raw: string; + try { + raw = await readFile(CHECKPOINT_PATH, "utf-8"); + } catch { + return null; + } + try { + const obj = JSON.parse(raw.trim()) as { seq?: unknown; hmac?: unknown; mac?: unknown }; + if ( + typeof obj.seq === "number" && + Number.isInteger(obj.seq) && + obj.seq >= 0 && + typeof obj.hmac === "string" && + /^[0-9a-f]{64}$/.test(obj.hmac) && + typeof obj.mac === "string" && + /^[0-9a-f]{64}$/.test(obj.mac) + ) { + return { seq: obj.seq, hmac: obj.hmac, mac: obj.mac }; + } + } catch { + // unparseable — fall through to absent + } + return null; +} + async function* readAllAuditLinesIndexed(): AsyncGenerator<{ line: string; file: string; lineIndex: number }> { let files: string[]; try { diff --git a/swift/Sources/AirMCPKit/Generated/MCPIntents.swift b/swift/Sources/AirMCPKit/Generated/MCPIntents.swift index 3d324b53..dafb3f69 100644 --- a/swift/Sources/AirMCPKit/Generated/MCPIntents.swift +++ b/swift/Sources/AirMCPKit/Generated/MCPIntents.swift @@ -50,6 +50,11 @@ public struct MCPAuditSummaryOutput: Codable, Sendable { public let count: Double public let errors: Double } + public struct Verifiedfirstbreak: Codable, Sendable { + public let file: String + public let lineIndex: Double + public let reason: String + } public let since: String public let total: Double @@ -57,6 +62,9 @@ public struct MCPAuditSummaryOutput: Codable, Sendable { public let errorRate: Double public let scannedFiles: Double public let topTools: [ToptoolsItem] + public let verified: Bool + public let verifiedFirstBreak: Verifiedfirstbreak? + public let auditDisabled: Bool } // Output type for: discover_tools diff --git a/tests/audit-recovery.test.js b/tests/audit-recovery.test.js index fc50fcca..0fd8a9f0 100644 --- a/tests/audit-recovery.test.js +++ b/tests/audit-recovery.test.js @@ -30,6 +30,7 @@ const chmod = jest.fn(); const rename = jest.fn(); const readFile = jest.fn(); const readdir = jest.fn(); +const writeFile = jest.fn(); jest.unstable_mockModule('node:fs/promises', () => ({ appendFile, @@ -39,6 +40,7 @@ jest.unstable_mockModule('node:fs/promises', () => ({ rename, readFile, readdir, + writeFile, })); const { @@ -58,6 +60,7 @@ beforeEach(() => { rename.mockReset(); readFile.mockReset(); readdir.mockReset(); + writeFile.mockReset(); // Default happy-path mock behaviour. Individual tests override. appendFile.mockResolvedValue(undefined); @@ -67,6 +70,7 @@ beforeEach(() => { rename.mockResolvedValue(undefined); readFile.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' })); readdir.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' })); + writeFile.mockResolvedValue(undefined); }); // ── flushBuffer error paths ─────────────────────────────────────────── diff --git a/tests/audit-tamper-detection.test.js b/tests/audit-tamper-detection.test.js index 08b5e3c8..ec875441 100644 --- a/tests/audit-tamper-detection.test.js +++ b/tests/audit-tamper-detection.test.js @@ -159,3 +159,76 @@ describe('audit chain tamper detection', () => { expect(summary.verified).toBe(false); }); }); + +const CHECKPOINT_PATH = join(workDir, 'audit.checkpoint'); + +describe('audit chain tail-truncation detection (signed checkpoint)', () => { + beforeEach(async () => { + await seedFiveEntries(); + }); + + test('flush writes a signed checkpoint and the clean chain verifies', async () => { + // Seeding flushed 5 sealed lines + a checkpoint anchoring the highest seq. + const ck = JSON.parse(await readFile(CHECKPOINT_PATH, 'utf-8')); + expect(ck.seq).toBe(4); + expect(ck.hmac).toMatch(/^[0-9a-f]{64}$/); + expect(ck.mac).toMatch(/^[0-9a-f]{64}$/); + const summary = await summarizeAuditEntries({ since: '2020-01-01T00:00:00Z' }); + expect(summary.verified).toBe(true); + expect(summary.verifiedFirstBreak).toBeUndefined(); + }); + + test('removing the last lines (valid shorter chain) → verified:false, truncated', async () => { + // Drop the final 2 lines. The remaining chain (seq 0..2) still verifies + // line-by-line — a plain replay would report verified:true. The checkpoint + // (seq=4) is what catches the missing tail. + const lines = (await readFile(AUDIT_PATH, 'utf-8')).trimEnd().split('\n'); + expect(lines).toHaveLength(5); + await writeFile(AUDIT_PATH, lines.slice(0, 3).join('\n') + '\n', 'utf-8'); + + const summary = await summarizeAuditEntries({ since: '2020-01-01T00:00:00Z' }); + expect(summary.verified).toBe(false); + expect(summary.verifiedFirstBreak).toBeDefined(); + expect(summary.verifiedFirstBreak.reason).toBe('truncated'); + }); + + test('editing the checkpoint MAC without the key → verified:false, checkpoint_forged', async () => { + const ck = JSON.parse(await readFile(CHECKPOINT_PATH, 'utf-8')); + ck.mac = 'a'.repeat(64); // valid hex shape, wrong MAC — forging it needs the key + await writeFile(CHECKPOINT_PATH, JSON.stringify(ck) + '\n', 'utf-8'); + + const summary = await summarizeAuditEntries({ since: '2020-01-01T00:00:00Z' }); + expect(summary.verified).toBe(false); + expect(summary.verifiedFirstBreak.reason).toBe('checkpoint_forged'); + }); + + test('a corrupt (unparseable) checkpoint degrades to chain-only (no false alarm)', async () => { + // A torn/corrupt checkpoint is indistinguishable from a partial write, so + // it's treated as absent rather than tampering — the moat must not cry + // wolf. The clean chain still verifies; truncation detection is simply off + // until the next flush rewrites the checkpoint. + await writeFile(CHECKPOINT_PATH, '{not valid json', 'utf-8'); + const summary = await summarizeAuditEntries({ since: '2020-01-01T00:00:00Z' }); + expect(summary.verified).toBe(true); + }); + + test('deleting the checkpoint disables only truncation — clean chain still verifies', async () => { + await rm(CHECKPOINT_PATH, { force: true }); + const summary = await summarizeAuditEntries({ since: '2020-01-01T00:00:00Z' }); + // Back-compat: a missing checkpoint must not turn a clean chain red. + expect(summary.verified).toBe(true); + }); + + test('truncation with the checkpoint also removed is NOT falsely flagged (documented limit)', async () => { + // Honest boundary: an attacker who removes BOTH the tail lines AND the + // checkpoint leaves a valid shorter chain with no anchor. The checkpoint + // raises the bar against naive log-doctoring; it is not an absolute + // guarantee against someone who can rewrite the whole directory. We assert + // the real behaviour rather than over-claiming detection. + const lines = (await readFile(AUDIT_PATH, 'utf-8')).trimEnd().split('\n'); + await writeFile(AUDIT_PATH, lines.slice(0, 3).join('\n') + '\n', 'utf-8'); + await rm(CHECKPOINT_PATH, { force: true }); + const summary = await summarizeAuditEntries({ since: '2020-01-01T00:00:00Z' }); + expect(summary.verified).toBe(true); + }); +}); From d975eeff771da00055d1b97f62aa28bcb498e790 Mon Sep 17 00:00:00 2001 From: heznpc Date: Thu, 18 Jun 2026 07:34:11 +0900 Subject: [PATCH 3/6] fix: write Numbers cells with native number/boolean types, not text numbers_set_cell forced every value through a single-quoted JXA string literal, so 42 landed as the text "42" (left-aligned, unsortable, invisible to formulas) and only Numbers' own coercion rescued some cases. The value schema is now a string | number | boolean union and setCellScript emits a native JS literal for numbers/booleans (quoted + escaped only for strings, where a leading '=' is still interpreted by Numbers as a formula). A scalar-union param previously returned null from swiftTypeFor and was silently dropped from the generated AppIntent, which would have removed value from the Siri/Shortcuts "Set Numbers Cell" intent. swiftTypeFor now projects a scalar union to its string member (String) so the intent keeps a usable text parameter. Regenerated tool-manifest.json + MCPIntents.swift. --- docs/tool-manifest.json | 18 ++++++++--- scripts/lib/codegen-helpers.mjs | 11 +++++++ src/numbers/scripts.ts | 15 +++++++-- src/numbers/tools.ts | 9 ++++-- .../AirMCPKit/Generated/MCPIntents.swift | 4 +-- tests/codegen-helpers.test.js | 10 ++++++ tests/numbers-tools.test.js | 31 +++++++++++++++++++ 7 files changed, 88 insertions(+), 10 deletions(-) diff --git a/docs/tool-manifest.json b/docs/tool-manifest.json index fb5f6171..abb58d83 100644 --- a/docs/tool-manifest.json +++ b/docs/tool-manifest.json @@ -8348,7 +8348,7 @@ { "name": "numbers_set_cell", "title": "Set Numbers Cell", - "description": "Write a value to a single cell.", + "description": "Write a value to a single cell. Numbers and booleans land as native cell types (not text), so they sort and feed formulas correctly; strings are written verbatim and Numbers interprets a leading '=' as a formula.", "inputSchema": { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", @@ -8369,9 +8369,19 @@ "description": "Cell address (e.g. 'A1')" }, "value": { - "type": "string", - "maxLength": 10000, - "description": "Value to write" + "anyOf": [ + { + "type": "string", + "maxLength": 10000 + }, + { + "type": "number" + }, + { + "type": "boolean" + } + ], + "description": "Value to write (number, boolean, or text)" } }, "required": [ diff --git a/scripts/lib/codegen-helpers.mjs b/scripts/lib/codegen-helpers.mjs index e90f0357..5f9031b2 100644 --- a/scripts/lib/codegen-helpers.mjs +++ b/scripts/lib/codegen-helpers.mjs @@ -136,6 +136,17 @@ export function swiftTypeFor(propSchema) { if (propSchema.type === "number") return "Double"; if (propSchema.type === "boolean") return "Bool"; if (propSchema.type === "array" && propSchema.items?.type === "string") return "[String]"; + // Scalar union (anyOf) — e.g. a cell value that may be string | number | + // boolean. The AppIntent surface projects it to a single typed @Parameter; + // prefer the string member (a text field can express any of them and the MCP + // layer coerces) so the param isn't dropped, else fall back to the first + // scalar member. Without this, an anyOf prop returns null and the parameter + // is silently skipped — losing it from the generated Siri/Shortcuts intent. + if (Array.isArray(propSchema.anyOf)) { + const members = propSchema.anyOf.map((m) => swiftTypeFor(m)).filter((t) => t !== null); + if (members.includes("String")) return "String"; + return members[0] ?? null; + } return null; } diff --git a/src/numbers/scripts.ts b/src/numbers/scripts.ts index 30b23494..2913104b 100644 --- a/src/numbers/scripts.ts +++ b/src/numbers/scripts.ts @@ -46,12 +46,23 @@ export function getCellScript(documentName: string, sheet: string, cell: string) `; } -export function setCellScript(documentName: string, sheet: string, cell: string, value: string): string { +export function setCellScript( + documentName: string, + sheet: string, + cell: string, + value: string | number | boolean, +): string { + // Numbers and booleans are emitted as native JS literals so the cell holds a + // real number/boolean (sortable, formula-referenceable) instead of text. Only + // strings are quoted + escaped; a string like '=SUM(A1:A10)' is interpreted by + // Numbers as a formula on assignment. (value is constrained to a finite + // number / boolean / string by the tool's input schema.) + const valueLiteral = typeof value === "string" ? `'${esc(value)}'` : String(value); return ` const Numbers = Application('com.apple.Numbers'); ${iworkDocLookup("Numbers", documentName)} ${sheetTableLookup(sheet)} - table.cells['${esc(cell)}'].value = '${esc(value)}'; + table.cells['${esc(cell)}'].value = ${valueLiteral}; JSON.stringify({written: true, address: '${esc(cell)}'}); `; } diff --git a/src/numbers/tools.ts b/src/numbers/tools.ts index e494077f..d49c9853 100644 --- a/src/numbers/tools.ts +++ b/src/numbers/tools.ts @@ -124,12 +124,17 @@ export function registerNumbersTools(server: McpServer, _config: AirMcpConfig): "numbers_set_cell", { title: "Set Numbers Cell", - description: "Write a value to a single cell.", + description: + "Write a value to a single cell. Numbers and booleans land as native cell types (not " + + "text), so they sort and feed formulas correctly; strings are written verbatim and " + + "Numbers interprets a leading '=' as a formula.", inputSchema: { document: z.string().max(500).describe("Document name"), sheet: z.string().max(500).describe("Sheet name"), cell: z.string().max(500).describe("Cell address (e.g. 'A1')"), - value: z.string().max(10000).describe("Value to write"), + value: z + .union([z.string().max(10000), z.number().finite(), z.boolean()]) + .describe("Value to write (number, boolean, or text)"), }, annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true, openWorldHint: false }, }, diff --git a/swift/Sources/AirMCPKit/Generated/MCPIntents.swift b/swift/Sources/AirMCPKit/Generated/MCPIntents.swift index dafb3f69..6ee8b66f 100644 --- a/swift/Sources/AirMCPKit/Generated/MCPIntents.swift +++ b/swift/Sources/AirMCPKit/Generated/MCPIntents.swift @@ -5214,7 +5214,7 @@ public struct NumbersRenameSheetIntent: AppIntent { // Tool: numbers_set_cell public struct NumbersSetCellIntent: AppIntent { nonisolated(unsafe) public static var title: LocalizedStringResource = "Set Numbers Cell" - nonisolated(unsafe) public static var description = IntentDescription("Write a value to a single cell.") + nonisolated(unsafe) public static var description = IntentDescription("Write a value to a single cell. Numbers and booleans land as native cell types (not text), so they sort and feed formulas correctly; strings are written verbatim and Numbers interprets a leading '=' as a formula.") nonisolated(unsafe) public static var openAppWhenRun: Bool = false public init() {} @@ -5228,7 +5228,7 @@ public struct NumbersSetCellIntent: AppIntent { @Parameter(title: "Cell address (e.g. 'A1')") public var cell: String - @Parameter(title: "Value to write") + @Parameter(title: "Value to write (number, boolean, or text)") public var value: String @MainActor diff --git a/tests/codegen-helpers.test.js b/tests/codegen-helpers.test.js index 7781b50d..296720c1 100644 --- a/tests/codegen-helpers.test.js +++ b/tests/codegen-helpers.test.js @@ -266,6 +266,16 @@ describe("swiftTypeFor", () => { expect(swiftTypeFor({ type: "array", items: { type: "object" } })).toBeNull(); expect(swiftTypeFor({ type: "unknown" })).toBeNull(); }); + + test("scalar union (anyOf) → String when a string member exists, else first scalar", () => { + // The set_cell value param: string | number | boolean → projects to String + // so the AppIntent keeps the parameter (a text field expresses all three). + expect(swiftTypeFor({ anyOf: [{ type: "string" }, { type: "number" }, { type: "boolean" }] })).toBe("String"); + // No string member → first usable scalar. + expect(swiftTypeFor({ anyOf: [{ type: "number" }, { type: "boolean" }] })).toBe("Double"); + // A union of only composite members stays null (still dropped). + expect(swiftTypeFor({ anyOf: [{ type: "object" }] })).toBeNull(); + }); }); describe("appEntityTypeForParam", () => { diff --git a/tests/numbers-tools.test.js b/tests/numbers-tools.test.js index 7fe189f1..c2735af5 100644 --- a/tests/numbers-tools.test.js +++ b/tests/numbers-tools.test.js @@ -69,4 +69,35 @@ describe('Numbers tools registration', () => { expect(typeof config.annotations.destructiveHint).toBe('boolean'); } }); + + describe('numbers_set_cell — native value typing (not text)', () => { + beforeEach(() => mockRunJxa.mockReset()); + + test('a number is written as a native numeric literal, not quoted text', async () => { + mockRunJxa.mockResolvedValueOnce('{"written":true,"address":"A1"}'); + await server.callTool('numbers_set_cell', { document: 'D', sheet: 'S', cell: 'A1', value: 42 }); + const script = mockRunJxa.mock.calls[0][0]; + // The cell must receive the number 42, not the string "42" — a quoted + // value lands as text and breaks sorting / formula references. + expect(script).toContain('.value = 42;'); + expect(script).not.toContain("= '42'"); + }); + + test('a boolean is written as a native boolean literal', async () => { + mockRunJxa.mockResolvedValueOnce('{"written":true,"address":"B2"}'); + await server.callTool('numbers_set_cell', { document: 'D', sheet: 'S', cell: 'B2', value: true }); + expect(mockRunJxa.mock.calls[0][0]).toContain('.value = true;'); + }); + + test('a string (incl. a formula) stays quoted + escaped', async () => { + mockRunJxa.mockResolvedValueOnce('{"written":true,"address":"C3"}'); + await server.callTool('numbers_set_cell', { + document: 'D', + sheet: 'S', + cell: 'C3', + value: '=SUM(A1:A10)', + }); + expect(mockRunJxa.mock.calls[0][0]).toContain(".value = '=SUM(A1:A10)';"); + }); + }); }); From f294607a9976536294b59bb4fc7482441182ea8e Mon Sep 17 00:00:00 2001 From: heznpc Date: Thu, 18 Jun 2026 07:38:04 +0900 Subject: [PATCH 4/6] fix: document no-Origin allow-by-default + add AIRMCP_DENY_NO_ORIGIN opt-in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit isOriginAllowed has always allowed requests with no Origin header, but RFC 0002 documented the opposite (default-deny + an AIRMCP_TRUST_NO_ORIGIN bypass that was never implemented). Allow-by-default is the correct call: a browser always attaches Origin to a cross-origin request, so a missing Origin is a non-browser client (curl, native MCP) already gated by the token/OAuth policy — denying it would break those clients for no security gain. Made the rationale explicit in code, reconciled RFC 0002 section 5.2 to match, and added an opt-in strict mode (AIRMCP_DENY_NO_ORIGIN=1) for browser-only deployments that genuinely want to reject Origin-less requests. --- docs/rfc/0002-http-allow-network.md | 2 +- src/server/http-transport.ts | 17 +++++++++++++--- tests/http-transport.test.js | 30 +++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/docs/rfc/0002-http-allow-network.md b/docs/rfc/0002-http-allow-network.md index f5118b6c..d2767184 100644 --- a/docs/rfc/0002-http-allow-network.md +++ b/docs/rfc/0002-http-allow-network.md @@ -189,7 +189,7 @@ Managed Agents·디스커버리 클라이언트가 정책을 사전에 확인 ## 5. Open Questions 1. **`unauthenticated` 모드를 제공할 가치가 있는가?** CI 환경·로컬 fuzz 테스트에서는 편하지만, 잘못 사용될 위험. 제안: stdout/stderr에 큰 경고 + `.well-known/mcp.json` `security: insecure` 공개 + audit 로그 매 요청 기록. -2. **`allowNetwork=with-token+origin`에서 `Origin` 헤더가 없는 요청(예: curl)은?** 기본 거부. MCP 클라이언트는 Origin을 보내므로 영향 없음. 단, 스모크 테스트 스크립트가 영향받을 수 있어 `AIRMCP_TRUST_NO_ORIGIN=true` 환경변수로 우회 제공. +2. **`Origin` 헤더가 없는 요청(예: curl, 네이티브 MCP 클라이언트)은?** **기본 허용 — 해결됨.** 브라우저는 cross-origin 요청에 항상 `Origin`을 붙이므로 `Origin` 부재는 브라우저발 CSRF/DNS-rebinding이 아니라 비-브라우저 클라이언트를 뜻하며, 이들은 이미 토큰/OAuth 정책으로 게이트된다. 기본 거부는 정당한 로컬 클라이언트를 보안 이득 없이 깨므로 채택하지 않았다. 브라우저 전용 배포를 위한 엄격 모드는 `AIRMCP_DENY_NO_ORIGIN=1`로 opt-in. (이 문서 초안이 적었던 `AIRMCP_TRUST_NO_ORIGIN`(기본 거부 전제)은 구현된 적이 없고, 실제 기본값은 허용이다 — `src/server/http-transport.ts: isOriginAllowed`.) 3. **CIDR allow-list까지 가야 하는가?** 필요 시 v2.9.0 `trustedNetworks: ["10.0.0.0/8"]` 형태로 추가. 초기 스코프 아님. 4. **`--bind-all`의 IPv6 대응**: 현재 IPv4만 고려. `::` 바인딩도 동일 정책 적용해야 함. 구현 시점에 확인. diff --git a/src/server/http-transport.ts b/src/server/http-transport.ts index 528a0ea2..bd8bc2e0 100644 --- a/src/server/http-transport.ts +++ b/src/server/http-transport.ts @@ -143,9 +143,14 @@ export function parseAllowedOrigins(raw: string): Set { export function isOriginAllowed( origin: string | undefined, - ctx: { policy: AllowNetwork; bindAll: boolean; allowedOrigins: Set }, + ctx: { policy: AllowNetwork; bindAll: boolean; allowedOrigins: Set; denyNoOrigin?: boolean }, ): boolean { - if (!origin) return true; + // A browser always attaches Origin to a cross-origin request, so a MISSING + // Origin is a non-browser client (curl, a native MCP client) — already gated + // by the token / OAuth policy, never a browser CSRF / DNS-rebinding vector. + // Allowed by default; `denyNoOrigin` (AIRMCP_DENY_NO_ORIGIN) opts into a + // strict deny for deployments that only ever serve browser clients. + if (!origin) return !ctx.denyNoOrigin; const normalized = normalizeOrigin(origin); if (!normalized) return false; @@ -284,9 +289,15 @@ export async function startHttpServer(options: HttpServerOptions): Promise { if (req.path !== "/mcp") return next(); - if (isOriginAllowed(req.headers.origin, { policy: allowNetwork, bindAll, allowedOrigins })) return next(); + if (isOriginAllowed(req.headers.origin, { policy: allowNetwork, bindAll, allowedOrigins, denyNoOrigin })) + return next(); res.status(403).json({ error: "Forbidden: Origin not allowed" }); }); diff --git a/tests/http-transport.test.js b/tests/http-transport.test.js index ef39f034..7456142e 100644 --- a/tests/http-transport.test.js +++ b/tests/http-transport.test.js @@ -164,6 +164,36 @@ describe('HTTP Origin allow-list helpers', () => { allowedOrigins: new Set(), })).toBe(true); }); + + test('a missing Origin is allowed by default (non-browser client, token-gated)', () => { + // A browser always sends Origin on a cross-origin request, so no Origin is + // a non-browser client the token / OAuth policy already gates. True across + // policies — denying it by default would break curl / native MCP clients. + for (const policy of ['with-token', 'with-token+origin', 'with-oauth+origin']) { + expect(isOriginAllowed(undefined, { + policy, + bindAll: true, + allowedOrigins: parseAllowedOrigins('https://claude.ai'), + })).toBe(true); + } + }); + + test('denyNoOrigin (AIRMCP_DENY_NO_ORIGIN) strict mode rejects a missing Origin', () => { + const allowedOrigins = parseAllowedOrigins('https://claude.ai'); + expect(isOriginAllowed(undefined, { + policy: 'with-token+origin', + bindAll: true, + allowedOrigins, + denyNoOrigin: true, + })).toBe(false); + // A real allow-listed Origin still passes under strict mode. + expect(isOriginAllowed('https://claude.ai', { + policy: 'with-token+origin', + bindAll: true, + allowedOrigins, + denyNoOrigin: true, + })).toBe(true); + }); }); describe('validateNetworkPolicy', () => { From bfbd11fc7e2512cab664a47d545c5c63619e8f63 Mon Sep 17 00:00:00 2001 From: heznpc Date: Thu, 18 Jun 2026 07:51:12 +0900 Subject: [PATCH 5/6] docs: reconcile public tool count to 286 (manifest source of truth) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The public "272 tools" headline counted only server.registerTool() call sites and undercounted the real runtime surface — the generated manifest exposes 286 (it also includes the dynamically-registered, skill_*, and MCP-app tools). count-stats now reads manifest.toolCount as the single source of truth and propagates 286 across README, the docs site, registry manifests, the landing page, and locales. Left as-is on purpose: the "~111 tools" starter-preset figure, the v2.7 "262 tools across 27 modules" pitch quoted in REGISTRY_SUBMISSIONS, and the point-in-time counts inside RFC 0013/0014 (historical context, not current claims). tool-count-drift now asserts README advertises exactly manifest.toolCount, instead of carrying a stale "superset" comment — guarding the headline against future drift. --- .claude-plugin/plugin.json | 2 +- .github/AGENTS.md | 2 +- README.md | 16 ++++++------- docs/REGISTRY_SUBMISSIONS.md | 6 ++--- docs/TERMS_OF_SERVICE.md | 2 +- docs/direction.md | 2 +- docs/environment.md | 2 +- docs/index.html | 10 ++++---- docs/locales/en.json | 6 ++--- .../src/content/docs/architecture/overview.md | 2 +- docs/site/src/content/docs/index.mdx | 4 ++-- .../site/src/content/docs/modules/overview.md | 2 +- docs/skills.md | 2 +- glama.json | 2 +- mcp.json | 2 +- scripts/count-stats.mjs | 21 +++++++++++++---- server.json | 2 +- smithery.yaml | 2 +- tests/tool-count-drift.test.js | 23 ++++++++++--------- 19 files changed, 61 insertions(+), 49 deletions(-) diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index ee4099cf..30f9c0cf 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "airmcp", "displayName": "AirMCP", - "description": "Apple-native MCP server with production governance built in — HMAC-chained audit log, per-call HITL, OAuth 2.1 + Resource Indicators, scope gate, rate limit, emergency stop file. 272 tools across 29 modules: Calendar, Notes, Mail, Reminders, Contacts, Messages, Music, Finder, Safari, Photos, Maps, Podcasts, Weather, iWork, Google Workspace, Apple Intelligence, UI Automation, Shortcuts, Context Memory. Native Swift bridges into EventKit, HealthKit, PhotoKit, Vision, Foundation Models. macOS local-first, multi-client (Claude, Codex, opencode, Gemini CLI, Antigravity, Cursor, Zed, Cline, ChatGPT MCP Apps).", + "description": "Apple-native MCP server with production governance built in — HMAC-chained audit log, per-call HITL, OAuth 2.1 + Resource Indicators, scope gate, rate limit, emergency stop file. 286 tools across 29 modules: Calendar, Notes, Mail, Reminders, Contacts, Messages, Music, Finder, Safari, Photos, Maps, Podcasts, Weather, iWork, Google Workspace, Apple Intelligence, UI Automation, Shortcuts, Context Memory. Native Swift bridges into EventKit, HealthKit, PhotoKit, Vision, Foundation Models. macOS local-first, multi-client (Claude, Codex, opencode, Gemini CLI, Antigravity, Cursor, Zed, Cline, ChatGPT MCP Apps).", "version": "2.12.1", "author": { "name": "heznpc", diff --git a/.github/AGENTS.md b/.github/AGENTS.md index 045ef764..99617ed9 100644 --- a/.github/AGENTS.md +++ b/.github/AGENTS.md @@ -83,7 +83,7 @@ tests/ # Script generator tests ## Stats -- **272 tools** across 27 modules (+ dynamic shortcut tools at runtime) +- **286 tools** across 27 modules (+ dynamic shortcut tools at runtime) - **32 prompts** (per-module + cross-module + YAML skills) - **8 MCP resources** (Notes, Calendar, Reminders, Music, Mail, System, Context Snapshot) diff --git a/README.md b/README.md index 4d68fab1..3a62e413 100644 --- a/README.md +++ b/README.md @@ -13,13 +13,13 @@ **Part of:** Human-Controlled AI Systems · Research Program 1 (anchor — Apple-side agent governance). -**Requires**: macOS for the server. The default `npx -y airmcp` loads a curated **starter** module set (~111 tools); `--full` (or `AIRMCP_FULL=true`) enables all 29 modules / 272 tools. Most tools are pure JXA and work on macOS 14+ with no extra setup. **Swift-backed tools** — HealthKit, on-device semantic search, recurring events/reminders, photo import/delete/classify, Vision, Speech, Location, Bluetooth, and Apple Intelligence previews — need the **optional Swift bridge** — build it from a source checkout with `npm run swift-build` (it is shipped in **neither** the npm tarball **nor** the `.mcpb` bundle; the bundled macOS app does carry it); without it those tools return a clear "Swift bridge not found" error and everything else keeps working. FoundationModels-backed Apple Intelligence and `AskAirMCPIntent` additionally require macOS 26+ on Apple Silicon and an opt-in Swift build with `AIRMCP_ENABLE_FOUNDATION_MODELS`. +**Requires**: macOS for the server. The default `npx -y airmcp` loads a curated **starter** module set (~111 tools); `--full` (or `AIRMCP_FULL=true`) enables all 29 modules / 286 tools. Most tools are pure JXA and work on macOS 14+ with no extra setup. **Swift-backed tools** — HealthKit, on-device semantic search, recurring events/reminders, photo import/delete/classify, Vision, Speech, Location, Bluetooth, and Apple Intelligence previews — need the **optional Swift bridge** — build it from a source checkout with `npm run swift-build` (it is shipped in **neither** the npm tarball **nor** the `.mcpb` bundle; the bundled macOS app does carry it); without it those tools return a clear "Swift bridge not found" error and everything else keeps working. FoundationModels-backed Apple Intelligence and `AskAirMCPIntent` additionally require macOS 26+ on Apple Silicon and an opt-in Swift build with `AIRMCP_ENABLE_FOUNDATION_MODELS`. > Available in multiple languages at the [project landing page](https://heznpc.github.io/AirMCP/). ## What this is — at a glance -- **Currently implemented** — 272 tools across 29 modules (a curated **starter** set loads by default; `--full` enables all; Swift-backed tools need the optional bridge, see Requires above); HMAC-chained audit log with tamper-detection test suite; HITL approval per destructive call; rate limit (60/min + 10 destructive/hr); `allowNetwork` inbound HTTP exposure policy (RFC 0002); OAuth 2.1 + Resource Indicators (RFC 0005 Steps 1+2 — RS256/ES256 JWT, scope gate, `.well-known/oauth-protected-resource` per RFC 9728); sessionless `.well-known/mcp.json` discovery; 233 Shortcuts/AppIntents auto-generated from the tool manifest; native SwiftUI menubar app (ad-hoc signed; Developer ID notarization pending); Claude Code plugin package (`.claude-plugin/plugin.json` + `.mcp.json` at repo root, with the `.mcp.json` invocation pinned to the same npm version as the manifest so the marketplace SHA-approval and the installed runtime always agree). On every CI run, `npm run mcp:validate` boots the built `dist/index.js` under a pinned [`@modelcontextprotocol/inspector`](https://github.com/modelcontextprotocol/inspector) `--cli` and checks the `tools/list` response for JSON-RPC envelope drift, embedded error envelopes, and zero-tool responses — this is a wire-shape gate, not a substitute for the HMAC / HITL / audit primitives, which have their own tests. +- **Currently implemented** — 286 tools across 29 modules (a curated **starter** set loads by default; `--full` enables all; Swift-backed tools need the optional bridge, see Requires above); HMAC-chained audit log with tamper-detection test suite; HITL approval per destructive call; rate limit (60/min + 10 destructive/hr); `allowNetwork` inbound HTTP exposure policy (RFC 0002); OAuth 2.1 + Resource Indicators (RFC 0005 Steps 1+2 — RS256/ES256 JWT, scope gate, `.well-known/oauth-protected-resource` per RFC 9728); sessionless `.well-known/mcp.json` discovery; 233 Shortcuts/AppIntents auto-generated from the tool manifest; native SwiftUI menubar app (ad-hoc signed; Developer ID notarization pending); Claude Code plugin package (`.claude-plugin/plugin.json` + `.mcp.json` at repo root, with the `.mcp.json` invocation pinned to the same npm version as the manifest so the marketplace SHA-approval and the installed runtime always agree). On every CI run, `npm run mcp:validate` boots the built `dist/index.js` under a pinned [`@modelcontextprotocol/inspector`](https://github.com/modelcontextprotocol/inspector) `--cli` and checks the `tools/list` response for JSON-RPC envelope drift, embedded error envelopes, and zero-tool responses — this is a wire-shape gate, not a substitute for the HMAC / HITL / audit primitives, which have their own tests. - **Planned** — RFC 0005 Step 3 browser PKCE guide; stateless streamable HTTP for horizontal scale per MCP 2026 roadmap; iOS/visionOS exploration (v3.0+); consolidated registry re-publishing across Anthropic MCP Registry, Smithery, PulseMCP, Glama, MCP Market, Cline Marketplace, LobeHub (the `.well-known/mcp.json` endpoint is published, `mcpName` is set, and past ad-hoc registrations exist on some registries but their versions/metadata have drifted out of date — a single self-publishing PR will re-push the current manifest to each); Claude Code Plugin submission to `anthropics/claude-plugins-community` (community marketplace launched 2026-05-22; the plugin package itself — `.claude-plugin/plugin.json` + `.mcp.json` — lives at repo root and is validated by CI; the remaining step is the operator-side submission via `clau.de/plugin-directory-submission`); App Schemas codegen (WWDC 2026 introduced App Schemas — a new agentic layer over App Intents + App Entities, plus a View Annotations API for on-screen awareness and an App Intents Testing framework; the installed macOS 26.5 SDK exposes the non-deprecated `@AppIntent(schema:)` / `@AppEntity(schema:)` / `@AppEnum(schema:)` macro declarations, but the current Command Line Tools lack the `AppIntentsMacros` plugin and `AppIntentsTesting` module, so AirMCP keeps the generated default artifact on plain AppIntents/AppEntities until a full toolchain can compile the schema path). iOS companion server (`ios/Sources/AirMCPServer`, ~1500 LOC) is **preview**, not GA — macOS is the shipping surface. - **Design intent** — Core infra (HITL · audit · rate-limit · HMAC chain · network policy · OAuth scope gate) is the differentiated layer; the tool surface is broad and JXA-thin **by design**. JXA is the bridge, not the product. The interesting code lives in `src/shared/` (audit, rate-limit, HITL, network policy, OAuth gate, structured-content validators) and the Swift bridges (`swift/Sources/AirMCPKit`) for EventKit / HealthKit / PhotoKit / Vision / FoundationModels. Blast-radius unit is one tool call. Adjacent to — not a replacement for — the canonical [Model Context Protocol reference servers](https://github.com/modelcontextprotocol/servers) (Everything, Filesystem, Fetch, Git, Memory, Sequential Thinking, Time); AirMCP fills the Apple-native domain those references leave open. Aligned with Anthropic's three-layer containment doctrine ([*How we contain Claude across products*](https://anthropic.com/engineering/how-we-contain-claude), 2026-05-27 engineering blog): the Environment layer (sandbox / VM / egress controls) and Model layer (system prompts / classifiers) are Anthropic's host-side responsibility; AirMCP implements the **External Content layer** — tool-permission gating + MCP server auditing — for the Apple-native domain, complementary to (not replacing) Claude Code's process-level Seatbelt/bubblewrap sandbox. The same production governance primitives (per-call HITL, scope-gated permissions, real-time tamper-evident audit, rate-limited destructive ops, emergency stop file) that high-stakes vertical MCP servers — financial trading, crypto exchange, supply-chain attestation — build per-deployment are surfaced once here as OSS reference. - **Non-goals** — Per-session batched approval that covers "the next N calls" (failure mode this project is built around). Editable or skippable audit entries (the chain is load-bearing). Promising iOS parity on the public surface (preview only). Replacing native Apple apps — AirMCP automates them, it does not reimplement them. Headless / non-Apple platforms beyond what Google Workspace already provides. @@ -27,7 +27,7 @@ ## Features -- **272 tools** (29 modules) — Apple app CRUD + system control + Apple Intelligence + UI Automation + Screen Capture + Maps + Podcasts + Weather + iWork (Pages/Numbers/Keynote) + Google Workspace + dynamic shortcuts + context memory + audit introspection +- **286 tools** (29 modules) — Apple app CRUD + system control + Apple Intelligence + UI Automation + Screen Capture + Maps + Podcasts + Weather + iWork (Pages/Numbers/Keynote) + Google Workspace + dynamic shortcuts + context memory + audit introspection - **233 Shortcuts / Siri AppIntents** — auto-generated from the tool manifest (82 Interactive Snippet views + 13 AppEnum pickers); workflow-first AppShortcuts ship by default, while `AskAirMCPIntent` is a FoundationModels preview gated behind `AIRMCP_ENABLE_FOUNDATION_MODELS` - **32 prompts + 14 YAML skill built-ins** — per-app workflows + cross-module + developer workflows + Skills DSL (`inputs` / `parallel` / `loop` / `on_error` / `retry` / 9 event triggers) - **9 MCP resources** — Notes, Calendar, Reminders, Music, Mail, System, Context Memory + unified `context://snapshot/{depth}` @@ -146,13 +146,13 @@ These are the first-class use cases. The full tool catalog stays available when **Is**: the governed action layer for AI on Apple. Siri can understand the request; AirMCP gives it hands, memory, workflows, and guardrails. External MCP agents use the same runtime through stdio or HTTP. When Apple ships more native action APIs, AirMCP can delegate lower-level app calls to the OS while keeping the orchestration and governance layer above. -**Isn't**: a thin per-app wrapper. The distinctive thing is *integrated depth* — 272 tools + Swift bridge + Skills DSL + production-grade safety primitives + Google Workspace + iOS AppIntents in one auditable open-source codebase, with the governance layer (per-call HITL, HMAC-chained audit, scope gate, rate limit) as the load-bearing part, not the tool count. +**Isn't**: a thin per-app wrapper. The distinctive thing is *integrated depth* — 286 tools + Swift bridge + Skills DSL + production-grade safety primitives + Google Workspace + iOS AppIntents in one auditable open-source codebase, with the governance layer (per-call HITL, HMAC-chained audit, scope gate, rate limit) as the load-bearing part, not the tool count. ### Integrated depth The point is the combination in one auditable codebase, not any single capability: -- **272 tools across 29 modules** — Apple app CRUD + system control + Apple Intelligence + iWork + Google Workspace + dynamic Shortcuts. +- **286 tools across 29 modules** — Apple app CRUD + system control + Apple Intelligence + iWork + Google Workspace + dynamic Shortcuts. - **Skills DSL workflow engine** — `parallel` / `loop` / `on_error` / `retry` / 9 event triggers. - **Semantic memory** — Gemini + on-device Swift embeddings, persistent across restarts. - **Production safety primitives** — per-call HITL, HMAC-chained audit log (tamper-detection asserted in `tests/audit-tamper-detection.test.js`), rate limiting, emergency stop, OAuth 2.1 + Resource Indicators (RS256/ES256 JWT + RFC 8707 audience + RFC 9728 PRM + DPoP advertisement). DPoP is advertised in the `.well-known` card, not enforced — `dpop_bound_access_tokens_required: false`; tokens are not yet bound to a proof. @@ -214,7 +214,7 @@ User-authored skills land in `~/.config/airmcp/skills/*.yaml` and hot-reload. ## Safety & Operations -AirMCP runs with access to 272 tools on your machine. A few layers keep a buggy agent plan from turning into an incident: +AirMCP runs with access to 286 tools on your machine. A few layers keep a buggy agent plan from turning into an incident: - **HITL approval** — every destructive tool prompts before firing (via MCP Elicitation or a Unix socket fallback). Per-call, per-scope. - **Rate limit** — 60 tool calls/minute globally, 10 destructive/hour. Token-bucket so bursts are fine; sustained rate isn't. @@ -386,7 +386,7 @@ npx airmcp --http --bind-all --port 3847 curl http://127.0.0.1:3847/.well-known/mcp.json ``` -The response includes `"network_policy": "with-token+origin"` so the client can confirm what it's connecting to before a single tool call. Registry crawlers (Anthropic MCP Registry, Smithery, PulseMCP, Glama) use the same endpoint to build their catalog without connecting live — it carries the full tool inventory (`tools.count`, `tools.names`), enabled modules, license, and homepage, so a crawler can surface "AirMCP: 272 tools across calendar, notes, mail, …" without opening a session. MCP spec version pinned via `schema_version: "2025-11-25"`. When the policy is `with-oauth*`, a sibling `/.well-known/oauth-protected-resource` endpoint (RFC 9728) advertises the authorization server + audience + supported scopes so conforming clients can negotiate OAuth before the first MCP call. +The response includes `"network_policy": "with-token+origin"` so the client can confirm what it's connecting to before a single tool call. Registry crawlers (Anthropic MCP Registry, Smithery, PulseMCP, Glama) use the same endpoint to build their catalog without connecting live — it carries the full tool inventory (`tools.count`, `tools.names`), enabled modules, license, and homepage, so a crawler can surface "AirMCP: 286 tools across calendar, notes, mail, …" without opening a session. MCP spec version pinned via `schema_version: "2025-11-25"`. When the policy is `with-oauth*`, a sibling `/.well-known/oauth-protected-resource` endpoint (RFC 9728) advertises the authorization server + audience + supported scopes so conforming clients can negotiate OAuth before the first MCP call. Running AirMCP on a laptop that suspends? Put the menubar app on your Mac Mini / always-on host, point the browser at that hostname, and leave the token in Chrome's secure storage. Revoke by rotating `AIRMCP_HTTP_TOKEN` and restarting the server. @@ -966,7 +966,7 @@ Modules with OS requirements (e.g., Intelligence requires macOS 26+) are automat - **Input sanitization** — `run_javascript` blocks `javascript:` and `data:` URL schemes to prevent code injection. `escJxaShell` strips control characters from shell arguments. - **Read data exposure** — Destructive operations require HITL approval, but read operations (mail, messages, contacts) are not rate-limited. When connected to cloud LLMs, sensitive data passes through the LLM provider. Mitigations: PII scrubbing in logs, pagination limits, sensitive modules (mail, messages) require explicit opt-in. - **IPC overhead** — Multi-process path (Client → Node.js → osascript/Swift CLI → macOS app). Each JXA call adds ~50ms overhead. Pagination prevents bulk data transfers. Swift bridge path bypasses JXA for EventKit/PhotoKit operations. -- **Scope** — 272 tools across 29 modules follow 5 repeating patterns (JXA CRUD, Swift bridge, HTTP API, System Events, CLI wrapper), keeping maintenance proportional to pattern count, not tool count. +- **Scope** — 286 tools across 29 modules follow 5 repeating patterns (JXA CRUD, Swift bridge, HTTP API, System Events, CLI wrapper), keeping maintenance proportional to pattern count, not tool count. ### Location & Bluetooth diff --git a/docs/REGISTRY_SUBMISSIONS.md b/docs/REGISTRY_SUBMISSIONS.md index 4592f00a..2206655b 100644 --- a/docs/REGISTRY_SUBMISSIONS.md +++ b/docs/REGISTRY_SUBMISSIONS.md @@ -65,7 +65,7 @@ When the counts or headline features change, walk this list before you touch any The 2026-03-28 Google Form submission used the v2.7 pitch ("262 tools across 27 modules"). For the resubmission: -- **Headline for current resubmission**: "MCP server for the entire Apple ecosystem — 272 tools across 29 modules with workflow skills, context memory, queryable audit log, per-call HITL, OAuth 2.1, and inbound HTTP `allowNetwork` policy." +- **Headline for current resubmission**: "MCP server for the entire Apple ecosystem — 286 tools across 29 modules with workflow skills, context memory, queryable audit log, per-call HITL, OAuth 2.1, and inbound HTTP `allowNetwork` policy." - **Security story** (registry reviewers care): HITL approval, rate limit + emergency stop file, `allowNetwork` startup invariant (RFC 0002), PII-scrubbed audit log at `0600`. - **Differentiator vs. apple-mcp / shortcuts**: the Skills DSL (`parallel`/`loop`/`on_error`/`retry`/inputs/triggers) + event-bus triggers + governance primitives. Keep this claim bounded: based on README-level/public-surface comparison, not full source audits of every competitor. - **Demo asset**: point at `docs/demo.gif` (re-record with `./scripts/record-demo.sh` before the submission). @@ -74,7 +74,7 @@ The 2026-03-28 Google Form submission used the v2.7 pitch ("262 tools across 27 Ask after the npm publish lands. Pitch the following concrete wins over the baseline `apple-mcp` listing: -- Broad tool surface plus workflow-first entry points (272 tools, 29 modules, curated workflow catalog) +- Broad tool surface plus workflow-first entry points (286 tools, 29 modules, curated workflow catalog) - README-level Apple-native comparison: AirMCP is the only tracked listing that publicly documents the full governance stack together — HMAC audit, per-call HITL, rate limit, inbound HTTP `allowNetwork`, OAuth Resource Indicators - Queryable audit log and Skills DSL are concrete differentiators; avoid claiming competitors have zero governance without a fresh source audit - Documented inbound HTTP exposure policy (RFC 0002 in-tree) @@ -85,7 +85,7 @@ The manifest is auto-synced; they shouldn't need any new asset from our side. One-paragraph pitch for the submission form: -> AirMCP is an Apple-native MCP runtime for governed workflows across the local Apple workspace. It ships 272 tools across 29 modules (Notes, Calendar, Reminders, Contacts, Mail, Messages, Music, Finder, Safari, System, Photos, Shortcuts, Apple Intelligence previews, TV, Screen Capture, Maps, Podcasts, Weather, Pages/Numbers/Keynote, Location, Bluetooth, HealthKit, Context Memory, Audit), plus workflow skills, per-call HITL approval, HMAC-chained audit logs, rate limiting, OAuth 2.1 + Resource Indicators, and an inbound HTTP `allowNetwork` policy (RFC 0002). In README-level Apple-native MCP comparisons, this is the full-stack governance surface to beat; re-check competitor READMEs/source before publishing any unqualified "only" claim. Open source (MIT), v2.12+ on npm. iOS sibling with auto-generated AppIntents and an opt-in Foundation Models on-device agent preview (RFC 0007) in active development. +> AirMCP is an Apple-native MCP runtime for governed workflows across the local Apple workspace. It ships 286 tools across 29 modules (Notes, Calendar, Reminders, Contacts, Mail, Messages, Music, Finder, Safari, System, Photos, Shortcuts, Apple Intelligence previews, TV, Screen Capture, Maps, Podcasts, Weather, Pages/Numbers/Keynote, Location, Bluetooth, HealthKit, Context Memory, Audit), plus workflow skills, per-call HITL approval, HMAC-chained audit logs, rate limiting, OAuth 2.1 + Resource Indicators, and an inbound HTTP `allowNetwork` policy (RFC 0002). In README-level Apple-native MCP comparisons, this is the full-stack governance surface to beat; re-check competitor READMEs/source before publishing any unqualified "only" claim. Open source (MIT), v2.12+ on npm. iOS sibling with auto-generated AppIntents and an opt-in Foundation Models on-device agent preview (RFC 0007) in active development. Screenshots to attach: diff --git a/docs/TERMS_OF_SERVICE.md b/docs/TERMS_OF_SERVICE.md index 6e66aa87..d84b734f 100644 --- a/docs/TERMS_OF_SERVICE.md +++ b/docs/TERMS_OF_SERVICE.md @@ -19,7 +19,7 @@ AirMCP is open-source software released under the [MIT License](../LICENSE). The You are solely responsible for: -- **AI agent actions.** AirMCP enables AI agents to perform actions on your Mac through 272 tools across 29 modules. Any action an AI agent takes through AirMCP is performed on your behalf and at your direction. You are responsible for the outcomes of those actions. +- **AI agent actions.** AirMCP enables AI agents to perform actions on your Mac through 286 tools across 29 modules. Any action an AI agent takes through AirMCP is performed on your behalf and at your direction. You are responsible for the outcomes of those actions. - **Safety controls.** AirMCP provides a Human-in-the-Loop (HITL) approval system with configurable levels. It is your responsibility to configure an appropriate HITL level for your use case. Running AirMCP with HITL disabled means AI agents can execute destructive actions without confirmation. - **HTTP mode security.** If you enable HTTP mode for remote access, you are responsible for securing it. This includes configuring token-based authentication, restricting network access, and ensuring the server is not exposed to untrusted networks. - **Legal compliance.** You must comply with all applicable local, state, national, and international laws when using AirMCP. This includes laws governing privacy, electronic communications, data protection, and computer access. diff --git a/docs/direction.md b/docs/direction.md index 10425f94..c6d71f32 100644 --- a/docs/direction.md +++ b/docs/direction.md @@ -75,7 +75,7 @@ WWDC 6/8 overhang 대응: "Apple-native MCP server"가 아닌 **runtime layer** | Surface | 톤 | 누구에게 | 카피 예시 | |---------|----|----|----------| | 랜딩·소개 (`docs/index.html`) | 프로슈머 메이커 + 애플 미니멀 | Siri에 매일 실망하는 파워 유저 | "More than Siri. Open to every agent." + 행동체인 use-case | -| GitHub README·개발자 문서 (`docs/site/`) | 건조·정확·런타임 레이어 강조 | MCP 서버를 포크·확장할 개발자 | "Apple-native agent runtime for any MCP client. 272 tools / 29 modules + Skills DSL + semantic memory + OAuth 2.1 + HMAC audit log." (PR #216 hero) | +| GitHub README·개발자 문서 (`docs/site/`) | 건조·정확·런타임 레이어 강조 | MCP 서버를 포크·확장할 개발자 | "Apple-native agent runtime for any MCP client. 286 tools / 29 modules + Skills DSL + semantic memory + OAuth 2.1 + HMAC audit log." (PR #216 hero) | | Skills 가이드·블로그·릴리즈 노트 | 오픈소스 커뮤널 (투명성) | 공통 | 실사용 예시, 로드맵 공개, 기여 초대 | 한 문서가 두 청중을 동시에 설득하려 하지 않는다. 랜딩은 감정, docs는 스펙·레이어, 블로그는 투명성 — 각 표면이 자기 일만 한다. diff --git a/docs/environment.md b/docs/environment.md index ac4dc4a3..fcc3bace 100644 --- a/docs/environment.md +++ b/docs/environment.md @@ -13,7 +13,7 @@ If a variable accepts a path, `~` expands to `$HOME`. Booleans are `"true"` / `" | Bind HTTP server to all interfaces with token auth | `AIRMCP_ALLOW_NETWORK=with-token` + `AIRMCP_HTTP_TOKEN=…` | | Bind HTTP server with OAuth 2.1 | `AIRMCP_ALLOW_NETWORK=with-oauth` + `AIRMCP_OAUTH_ISSUER=…` + `AIRMCP_OAUTH_AUDIENCE=…` | | Disable a flaky module without removing config | `AIRMCP_DEBUG_MODULES=notes,calendar` (whitelist) | -| Send all 272 tools without compactDescription | `AIRMCP_COMPACT_TOOLS=false` | +| Send all 286 tools without compactDescription | `AIRMCP_COMPACT_TOOLS=false` | | Increase audit-log signing strength for cross-host integrity | `AIRMCP_AUDIT_HMAC_KEY=<32+ random bytes>` | | Block every destructive tool on a panic | `touch ~/.config/airmcp/emergency-stop` | diff --git a/docs/index.html b/docs/index.html index 7b8350b6..ac5e35bc 100644 --- a/docs/index.html +++ b/docs/index.html @@ -4,7 +4,7 @@ AirMCP — More than Siri - + @@ -19,7 +19,7 @@ - +