From 6723a47e5cbf7bda3802b5b591423f630ccb1d99 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Mon, 18 May 2026 11:22:57 +0200 Subject: [PATCH] =?UTF-8?q?feat(memory):=20ICM=20slice=202=20=E2=80=94=20f?= =?UTF-8?q?eedback=20record/search/stats?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the cross-agent "AI predicted X, real answer was Y" feedback lane specified in docs/icm-integration-plan.md slice 2 so a future agent can search prior corrections by topic before repeating a known mistake. - packages/storage migration 015 introduces `feedback` + a porter- unicode61 `feedback_fts` virtual table with the standard ai/ad/au triggers, plus indexes on `topic` and `created_at`. Importance is a four-level CHECK enum defaulting to `medium`. - packages/core MemoryStore exposes `recordFeedback`, `searchFeedback`, `getFeedback`, and `feedbackStats`. All three prose fields (prediction/correction/context) route through `prepareMemoryText`, the same redact-then-compress path observations use, so the compression invariant holds at the write boundary. - apps/mcp-server registers `feedback_record`, `feedback_search`, and `feedback_stats` with progressive-disclosure return shapes (id, topic, importance, FTS5 score, snippet, created_at). docs/mcp.md carries the contract. Follow-up out of scope here: a pre-tool-use hook that auto-surfaces prior corrections on inbound prompts. Tracking separately so this slice can ship behind a manual query surface first. --- .../icm-feedback-record-search-stats.md | 30 +++ apps/mcp-server/src/server.ts | 8 +- apps/mcp-server/src/tools/feedback.test.ts | 102 +++++++++ apps/mcp-server/src/tools/feedback.ts | 115 +++++++++++ apps/mcp-server/test/server.test.ts | 3 + docs/mcp.md | 40 ++++ packages/core/src/index.ts | 7 + packages/core/src/memory-store.ts | 78 ++++++- .../core/test/memory-store-feedback.test.ts | 132 ++++++++++++ packages/storage/src/index.ts | 5 + .../src/migrations/015-icm-feedback.ts | 46 +++++ packages/storage/src/schema.ts | 42 +++- packages/storage/src/storage.ts | 135 ++++++++++++ packages/storage/src/types.ts | 46 +++++ packages/storage/test/feedback.test.ts | 195 ++++++++++++++++++ 15 files changed, 981 insertions(+), 3 deletions(-) create mode 100644 .changeset/icm-feedback-record-search-stats.md create mode 100644 apps/mcp-server/src/tools/feedback.test.ts create mode 100644 apps/mcp-server/src/tools/feedback.ts create mode 100644 packages/core/test/memory-store-feedback.test.ts create mode 100644 packages/storage/src/migrations/015-icm-feedback.ts create mode 100644 packages/storage/test/feedback.test.ts diff --git a/.changeset/icm-feedback-record-search-stats.md b/.changeset/icm-feedback-record-search-stats.md new file mode 100644 index 0000000..fd91347 --- /dev/null +++ b/.changeset/icm-feedback-record-search-stats.md @@ -0,0 +1,30 @@ +--- +"@colony/storage": minor +"@colony/core": minor +"@colony/mcp-server": minor +--- + +ICM slice 2 — feedback `record`, `search`, and `stats` MCP tools. + +Adds a new `feedback` lane that records "AI predicted X, real answer +was Y" corrections so a future agent can search prior mistakes by +topic before repeating them. Migration 015 introduces the `feedback` +table plus a porter-unicode61 `feedback_fts` virtual table mirrored +by the standard `ai/ad/au` triggers; importance is a four-level enum +defaulting to `medium`. `prediction`, `correction`, and the optional +`context` flow through `MemoryStore.recordFeedback`, which routes each +body through `prepareMemoryText` — the same redact-then-compress path +observations use — so the compression invariant holds at the write +boundary. + +MCP surface (progressive disclosure): + +- `feedback_record({ topic, prediction, correction, context?, importance?, created_by? })` → `{ id }` +- `feedback_search({ query, topic?, limit? })` → compact hits (`id`, `topic`, `importance`, `score`, `snippet`, `created_at`) +- `feedback_stats({ topic? })` → per-topic counts and `last_created_at` + +Follow-up (separate PR): a pre-tool-use hook that surfaces prior +corrections on inbound prompts. This PR keeps the slice scoped to the +storage + search surface so it can ship behind a manual query first. + +Reference: `docs/icm-integration-plan.md` slice 2. diff --git a/apps/mcp-server/src/server.ts b/apps/mcp-server/src/server.ts index 8f3bc8d..16ab004 100644 --- a/apps/mcp-server/src/server.ts +++ b/apps/mcp-server/src/server.ts @@ -14,6 +14,7 @@ import * as autopilot from './tools/autopilot.js'; import * as bridge from './tools/bridge.js'; import type { ToolContext } from './tools/context.js'; import * as drift from './tools/drift.js'; +import * as feedback from './tools/feedback.js'; import * as foraging from './tools/foraging.js'; import { registerTaskForagingReport } from './tools/foraging.js'; import * as handoff from './tools/handoff.js'; @@ -30,8 +31,8 @@ import * as readyQueue from './tools/ready-queue.js'; import * as recall from './tools/recall.js'; import * as relay from './tools/relay.js'; import * as rescue from './tools/rescue.js'; -import * as savings from './tools/savings.js'; import * as savingsDrift from './tools/savings-drift.js'; +import * as savings from './tools/savings.js'; import * as search from './tools/search.js'; import * as spec from './tools/spec.js'; import * as startupPanel from './tools/startup-panel.js'; @@ -125,6 +126,11 @@ export function buildServer( savings.register(server, ctx); savingsDrift.register(server, ctx); + // ICM slice 2 feedback lane (docs/icm-integration-plan.md). Registered + // after the read-side surfaces so the heartbeat wrapper has seen every + // core tool first. + feedback.register(server, ctx); + // Autopilot lane (tick advisor + drift checker). Cheap compositions of // existing primitives; registered after the core surface so the heartbeat // wrapper has already wrapped the tools they delegate to. diff --git a/apps/mcp-server/src/tools/feedback.test.ts b/apps/mcp-server/src/tools/feedback.test.ts new file mode 100644 index 0000000..598f507 --- /dev/null +++ b/apps/mcp-server/src/tools/feedback.test.ts @@ -0,0 +1,102 @@ +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { defaultSettings } from '@colony/config'; +import { MemoryStore } from '@colony/core'; +import { Client } from '@modelcontextprotocol/sdk/client'; +import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { buildServer } from '../server.js'; + +let dir: string; +let store: MemoryStore; +let client: Client; + +beforeEach(async () => { + dir = mkdtempSync(join(tmpdir(), 'colony-feedback-mcp-')); + store = new MemoryStore({ dbPath: join(dir, 'data.db'), settings: defaultSettings }); + const server = buildServer(store, defaultSettings); + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair(); + client = new Client({ name: 'test', version: '0.0.0' }); + await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]); +}); + +afterEach(async () => { + await client.close(); + store.close(); + rmSync(dir, { recursive: true, force: true }); +}); + +interface ToolResponse { + content: Array<{ type: string; text: string }>; +} + +function parseTextContent(response: unknown): T { + const typed = response as ToolResponse; + const text = typed.content[0]?.text; + if (typeof text !== 'string') throw new Error('feedback tool returned no text content'); + return JSON.parse(text) as T; +} + +describe('feedback MCP surface (ICM slice 2)', () => { + it('records a correction and surfaces it via search + stats', async () => { + const recordRes = await client.callTool({ + name: 'feedback_record', + arguments: { + topic: 'frontend.routing', + prediction: 'useRouter returns null in server components', + correction: 'useRouter throws in server components', + context: 'reviewing apps/web App Router migration', + importance: 'high', + created_by: 'claude', + }, + }); + const recordPayload = parseTextContent<{ id: number }>(recordRes); + expect(recordPayload.id).toBeGreaterThan(0); + + await client.callTool({ + name: 'feedback_record', + arguments: { + topic: 'backend.migrations', + prediction: 'ALTER TABLE works inside transactions', + correction: 'ALTER TABLE must run outside a transaction for partitioned tables', + }, + }); + + const searchRes = await client.callTool({ + name: 'feedback_search', + arguments: { query: 'router server component', limit: 5 }, + }); + const searchPayload = parseTextContent<{ + hits: Array<{ id: number; topic: string; score: number; snippet: string }>; + }>(searchRes); + expect(searchPayload.hits.length).toBeGreaterThan(0); + expect(searchPayload.hits[0]?.topic).toBe('frontend.routing'); + + const statsRes = await client.callTool({ + name: 'feedback_stats', + arguments: {}, + }); + const statsPayload = parseTextContent<{ + stats: Array<{ topic: string; count: number; last_created_at: number }>; + }>(statsRes); + const topics = statsPayload.stats.map((row) => row.topic); + expect(topics).toContain('frontend.routing'); + expect(topics).toContain('backend.migrations'); + }); + + it('returns INTERNAL_ERROR when the prediction redacts to empty', async () => { + const res = await client.callTool({ + name: 'feedback_record', + arguments: { + topic: 'auth', + prediction: 'secret-prediction', + correction: 'real correction text', + }, + }); + const typed = res as ToolResponse & { isError?: boolean }; + expect(typed.isError).toBe(true); + const payload = JSON.parse(typed.content[0]?.text ?? '{}') as { code: string }; + expect(payload.code).toBe('INTERNAL_ERROR'); + }); +}); diff --git a/apps/mcp-server/src/tools/feedback.ts b/apps/mcp-server/src/tools/feedback.ts new file mode 100644 index 0000000..8bf79d2 --- /dev/null +++ b/apps/mcp-server/src/tools/feedback.ts @@ -0,0 +1,115 @@ +import type { FeedbackImportance } from '@colony/core'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import { type ToolContext, defaultWrapHandler } from './context.js'; +import { mcpErrorResponse } from './shared.js'; + +/** + * Feedback lane (ICM slice 2 — docs/icm-integration-plan.md). Records the + * "AI predicted X, real answer was Y" pairs that surface in code review, + * test failures, and human corrections so a later agent can search prior + * mistakes by topic. + * + * Progressive disclosure mirrors the observation surface: + * feedback_record → row id only + * feedback_search → compact hits (id, topic, score, snippet) + * feedback_stats → counts per topic + * + * Compression invariant: prediction/correction/context flow through + * `MemoryStore.recordFeedback`, which runs each body through the same + * `prepareMemoryText` path observations use. Tool handlers never write + * raw prose to storage directly. + * + * Note: this PR does not register a pre-tool-use hook that surfaces prior + * corrections on inbound prompts. That belongs to a follow-up PR so this + * slice can ship behind a search surface first. + */ +export function register(server: McpServer, ctx: ToolContext): void { + const wrapHandler = ctx.wrapHandler ?? defaultWrapHandler; + const { store } = ctx; + + const importanceSchema = z + .enum(['critical', 'high', 'medium', 'low']) + .describe('how strongly this correction should weigh against repeating the prediction'); + + server.tool( + 'feedback_record', + "Record an 'AI predicted X, real answer was Y' correction. Bodies are compressed via the same path observations use; returns only the new row id.", + { + topic: z + .string() + .min(1) + .max(200) + .describe('a short, stable label callers can pivot on (e.g. "frontend.routing")'), + prediction: z.string().min(1).describe('what the AI predicted / asserted'), + correction: z.string().min(1).describe('what the real answer turned out to be'), + context: z + .string() + .min(1) + .optional() + .describe('optional surrounding context (where the prediction was made)'), + importance: importanceSchema.optional(), + created_by: z.string().min(1).optional().describe('agent or human author for audit'), + }, + wrapHandler('feedback_record', async (args) => { + const topic = args.topic.trim(); + if (!topic) { + return mcpErrorResponse('INTERNAL_ERROR', 'feedback_record: topic must be non-empty'); + } + const id = store.recordFeedback({ + topic, + prediction: args.prediction, + correction: args.correction, + ...(args.context !== undefined ? { context: args.context } : {}), + ...(args.importance !== undefined + ? { importance: args.importance as FeedbackImportance } + : {}), + ...(args.created_by !== undefined ? { created_by: args.created_by } : {}), + }); + if (id < 0) { + return mcpErrorResponse( + 'INTERNAL_ERROR', + 'feedback_record: prediction/correction collapsed to empty after privacy redaction', + ); + } + return { content: [{ type: 'text', text: JSON.stringify({ id }) }] }; + }), + ); + + server.tool( + 'feedback_search', + 'Search prior corrections. Returns compact hits (id, topic, importance, score, snippet); use feedback_record output ids and a follow-up read if you need the full bodies.', + { + query: z + .string() + .min(1) + .describe('FTS5 query across topic + prediction + correction + context'), + topic: z + .string() + .min(1) + .optional() + .describe('optional exact-match filter on the feedback topic'), + limit: z.number().int().positive().max(100).optional(), + }, + wrapHandler('feedback_search', async (args) => { + const hits = store.searchFeedback({ + query: args.query, + ...(args.topic !== undefined ? { topic: args.topic } : {}), + ...(args.limit !== undefined ? { limit: args.limit } : {}), + }); + return { content: [{ type: 'text', text: JSON.stringify({ hits }) }] }; + }), + ); + + server.tool( + 'feedback_stats', + 'Per-topic counts of recorded corrections, sorted by most recent first. Pass a topic to scope to a single bucket.', + { + topic: z.string().min(1).optional(), + }, + wrapHandler('feedback_stats', async (args) => { + const stats = store.feedbackStats(args.topic !== undefined ? { topic: args.topic } : {}); + return { content: [{ type: 'text', text: JSON.stringify({ stats }) }] }; + }), + ); +} diff --git a/apps/mcp-server/test/server.test.ts b/apps/mcp-server/test/server.test.ts index 58de97f..05b94e4 100644 --- a/apps/mcp-server/test/server.test.ts +++ b/apps/mcp-server/test/server.test.ts @@ -56,6 +56,9 @@ describe('MCP server', () => { 'examples_integrate_plan', 'examples_list', 'examples_query', + 'feedback_record', + 'feedback_search', + 'feedback_stats', 'get_observations', 'hivemind', 'hivemind_context', diff --git a/docs/mcp.md b/docs/mcp.md index 77cfb00..67508ea 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -102,6 +102,9 @@ workflow guidance. | Rescue | `rescue_stranded_run` | Emit rescue relays and release abandoned claims. | | Metrics | `savings_report` | Report live MCP token receipts and reference savings model. | | Metrics | `savings_drift_report` | Flag tools whose median tokens-per-call drifted vs a baseline window. | +| Feedback | `feedback_record` | Record an "AI predicted X, real answer was Y" correction. | +| Feedback | `feedback_search` | Search prior corrections by FTS5 query and optional topic. | +| Feedback | `feedback_stats` | Per-topic counts of recorded corrections. | ## Ruflo sidecar boundary @@ -2266,6 +2269,43 @@ Response shape: Classifications: `up_drift`, `down_drift`, `new_tool` (no baseline data), `gone` (no recent calls), `insufficient_data` (either window below `min_calls`), or `stable`. When the baseline window starts before the earliest `mcp_metrics` receipt the response adds a `warning` field nudging callers to wait for more history before trusting signals. +## `feedback_record` + +Record an "AI predicted X, real answer was Y" correction. ICM slice 2 (see `docs/icm-integration-plan.md`). Bodies pass through the same compression path as observations — `MemoryStore.recordFeedback` runs `prediction`, `correction`, and (optional) `context` through `prepareMemoryText` before persisting. Returns only the new row id; full bodies stay behind the storage layer. + +Args: + +- `topic` — short, stable label callers can pivot on (e.g. `"frontend.routing"`). +- `prediction` — what the AI predicted or asserted. +- `correction` — what the real answer turned out to be. +- `context?` — optional surrounding context (where the prediction was made). +- `importance?` — `critical | high | medium | low`. Defaults to `medium`. +- `created_by?` — agent or human author for audit. + +Returns `{ "id": }`. When privacy redaction collapses `prediction` or `correction` to empty, the tool returns an `INTERNAL_ERROR` instead of writing a phantom row. + +## `feedback_search` + +Search prior corrections. Compact-hit progressive disclosure: returns `id`, `topic`, `importance`, FTS5 `score` (higher = better), `snippet`, and `created_at` only. Bodies live behind storage so callers don't pay the expansion cost on every search. + +Args: + +- `query` — FTS5 query across `topic + prediction + correction + context`. +- `topic?` — exact-match filter on the feedback topic. +- `limit?` — defaults to 20, max 100. + +If `query` is empty/whitespace and `topic` is set, returns a newest-first listing for that topic. Empty `query` without a `topic` returns no hits. + +## `feedback_stats` + +Per-topic counts of recorded corrections, sorted by most recent first. + +Args: + +- `topic?` — exact-match filter; scopes the response to a single bucket. + +Response shape: `{ "stats": [{ "topic": string, "count": number, "last_created_at": number }] }`. + ## Plan observation kinds The lane introduces several observation kinds on the parent spec task and on the sub-task threads. They are written through `MemoryStore.addObservation`, so content is compressed and `metadata` carries the structured payload. diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 402f430..a694bbc 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -336,6 +336,13 @@ export { export { buildDiscrepancyReport, type DiscrepancyReport } from './discrepancy.js'; export { isPseudoClaimPath, normalizeClaimPath, normalizeRepoFilePath } from '@colony/storage'; export type { ClaimPathContext, RepoFilePathContext } from '@colony/storage'; +export type { + AddFeedbackInput, + FeedbackHit, + FeedbackImportance, + FeedbackRow, + FeedbackStat, +} from '@colony/storage'; export { buildCoordinationSweep, type BlockedDownstreamTaskSignal, diff --git a/packages/core/src/memory-store.ts b/packages/core/src/memory-store.ts index ec18499..3594b53 100644 --- a/packages/core/src/memory-store.ts +++ b/packages/core/src/memory-store.ts @@ -1,6 +1,15 @@ import { compress, countTokens, expand, redactPrivate } from '@colony/compress'; import type { Settings } from '@colony/config'; -import { type NewObservation, type ObservationRow, Storage } from '@colony/storage'; +import { + type AddFeedbackInput, + type FeedbackHit, + type FeedbackImportance, + type FeedbackRow, + type FeedbackStat, + type NewObservation, + type ObservationRow, + Storage, +} from '@colony/storage'; import { inferSessionIdentity, sessionIdentityMetadata } from './infer-ide.js'; import { cosine, hybridRank } from './ranker.js'; import { type RustSearchOptions, searchWithRust } from './rust-search.js'; @@ -98,6 +107,73 @@ export class MemoryStore { }); } + // --- feedback (ICM slice 2 — docs/icm-integration-plan.md) --- + // + // All three prose fields (prediction, correction, optional context) flow + // through `prepareMemoryText` so the compression invariant matches the + // observation path exactly. Returning -1 mirrors `addObservation`'s + // contract for "everything was redacted / empty after privacy strip". + recordFeedback(input: { + topic: string; + prediction: string; + correction: string; + context?: string; + importance?: FeedbackImportance; + created_by?: string; + }): number { + const intensity = this.settings.compression.intensity; + const prediction = prepareMemoryText(input.prediction, intensity); + const correction = prepareMemoryText(input.correction, intensity); + if (!prediction || !correction) return -1; + const context = + input.context !== undefined ? prepareMemoryText(input.context, intensity) : null; + const row: AddFeedbackInput = { + topic: input.topic, + prediction: prediction.compressed, + correction: correction.compressed, + context: context ? context.compressed : null, + ...(input.importance !== undefined ? { importance: input.importance } : {}), + ...(input.created_by !== undefined ? { created_by: input.created_by } : {}), + }; + return this.storage.insertFeedback(row); + } + + searchFeedback(input: { topic?: string; query: string; limit?: number }): FeedbackHit[] { + return this.storage.searchFeedback(input); + } + + /** + * Full feedback row with prediction/correction/context expanded for human + * consumption. Same shape as `getObservations` honoring `expandForModel`. + * Pass `{ expand: false }` from a model-facing caller that wants to keep + * the bodies compressed. + */ + getFeedback( + id: number, + opts: { expand?: boolean } = {}, + ): + | (Omit & { + prediction: string; + correction: string; + context: string | null; + }) + | undefined { + const row = this.storage.getFeedback(id); + if (!row) return undefined; + const want = opts.expand ?? this.settings.compression.expandForModel; + if (!want) return row; + return { + ...row, + prediction: expand(row.prediction), + correction: expand(row.correction), + context: row.context !== null ? expand(row.context) : null, + }; + } + + feedbackStats(input: { topic?: string } = {}): FeedbackStat[] { + return this.storage.feedbackStats(input); + } + /** * Idempotently materialise a sessions row before inserting child rows. * Claude Code does not guarantee that SessionStart fires before the first diff --git a/packages/core/test/memory-store-feedback.test.ts b/packages/core/test/memory-store-feedback.test.ts new file mode 100644 index 0000000..657dbc9 --- /dev/null +++ b/packages/core/test/memory-store-feedback.test.ts @@ -0,0 +1,132 @@ +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { compress, expand } from '@colony/compress'; +import { defaultSettings } from '@colony/config'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { MemoryStore } from '../src/memory-store.js'; + +let dir: string; +let store: MemoryStore; + +beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'colony-memory-feedback-')); + store = new MemoryStore({ + dbPath: join(dir, 'data.db'), + settings: { + ...defaultSettings, + compression: { ...defaultSettings.compression, expandForModel: false }, + }, + }); +}); + +afterEach(() => { + store.close(); + rmSync(dir, { recursive: true, force: true }); +}); + +describe('MemoryStore feedback (ICM slice 2)', () => { + it('compresses prediction, correction, and context before persisting', () => { + const prediction = + 'The authentication middleware is basically really important to refresh on every single request.'; + const correction = + 'The authentication middleware is essentially only important on session creation.'; + const context = + 'The team is reviewing the apps/web authentication middleware refresh logic on the staging branch.'; + const intensity = defaultSettings.compression.intensity; + + const id = store.recordFeedback({ + topic: 'auth.middleware', + prediction, + correction, + context, + importance: 'high', + created_by: 'claude', + }); + expect(id).toBeGreaterThan(0); + + const stored = store.storage.getFeedback(id); + if (!stored) throw new Error('feedback row missing'); + expect(stored.compressed).toBe(1); + expect(stored.importance).toBe('high'); + expect(stored.created_by).toBe('claude'); + + // Bodies must equal the deterministic compress() output (no raw prose). + expect(stored.prediction).toBe(compress(prediction, { intensity })); + expect(stored.correction).toBe(compress(correction, { intensity })); + expect(stored.context).toBe(compress(context, { intensity })); + }); + + it('strips content before compression and refuses empty rows', () => { + const id = store.recordFeedback({ + topic: 'auth', + prediction: ' top-secret-prediction ', + correction: 'real answer', + }); + expect(id).toBe(-1); + }); + + it('round-trips prediction/correction/context via expand on getFeedback', () => { + const prediction = + 'The team essentially needs to be aware that this routing layer is basically really fragile.'; + const correction = + 'The team essentially needs to roll back to the previous routing implementation immediately.'; + const id = store.recordFeedback({ + topic: 'routing', + prediction, + correction, + context: 'short note', + }); + + const expanded = store.getFeedback(id, { expand: true }); + if (!expanded) throw new Error('feedback row missing'); + expect(expanded.prediction).toBe( + expand(compress(prediction, { intensity: defaultSettings.compression.intensity })), + ); + expect(expanded.correction).toBe( + expand(compress(correction, { intensity: defaultSettings.compression.intensity })), + ); + expect(expanded.context).toBe( + expand(compress('short note', { intensity: defaultSettings.compression.intensity })), + ); + + const compressedView = store.getFeedback(id, { expand: false }); + expect(compressedView?.prediction).toBe( + compress(prediction, { intensity: defaultSettings.compression.intensity }), + ); + }); + + it('searchFeedback returns compact hits and feedbackStats groups by topic', () => { + // Inline code (`useRouter`) is preserved byte-for-byte by the compressor, + // so it survives compression and remains FTS-searchable. + store.recordFeedback({ + topic: 'frontend.routing', + prediction: '`useRouter` returns null in server components', + correction: '`useRouter` throws in server components', + }); + store.recordFeedback({ + topic: 'frontend.routing', + prediction: '`useRouter` push silently no-ops on missing route', + correction: '`useRouter` push throws on missing route in next 15', + }); + store.recordFeedback({ + topic: 'backend.migrations', + prediction: '`ALTER TABLE` works inside transactions for partitioned tables', + correction: '`ALTER TABLE` must run outside a transaction for partitioned tables', + }); + + const hits = store.searchFeedback({ query: 'useRouter', limit: 10 }); + expect(hits.length).toBeGreaterThanOrEqual(2); + for (const hit of hits) { + expect(typeof hit.snippet).toBe('string'); + expect(hit.snippet.length).toBeGreaterThan(0); + } + + const stats = store.feedbackStats(); + const topics = stats.map((row) => row.topic); + expect(topics).toContain('frontend.routing'); + expect(topics).toContain('backend.migrations'); + const routing = stats.find((row) => row.topic === 'frontend.routing'); + expect(routing?.count).toBe(2); + }); +}); diff --git a/packages/storage/src/index.ts b/packages/storage/src/index.ts index 0dfce76..232e2a8 100644 --- a/packages/storage/src/index.ts +++ b/packages/storage/src/index.ts @@ -83,8 +83,13 @@ export type { ReinforcementKind, AgentProfileRow, NewAgentProfile, + AddFeedbackInput, CoachStepRow, ExampleRow, + FeedbackHit, + FeedbackImportance, + FeedbackRow, + FeedbackStat, NewExample, ExampleManifestKind, TaskEmbeddingRow, diff --git a/packages/storage/src/migrations/015-icm-feedback.ts b/packages/storage/src/migrations/015-icm-feedback.ts new file mode 100644 index 0000000..af520b1 --- /dev/null +++ b/packages/storage/src/migrations/015-icm-feedback.ts @@ -0,0 +1,46 @@ +export const version = 15; +export const name = 'icm-feedback'; + +// ICM slice 2 (`docs/icm-integration-plan.md`) — feedback lane. +// One row per "AI predicted X, real answer was Y" correction. Bodies are +// compressed via @colony/core MemoryStore.recordFeedback (same path as +// observations), so direct INSERTs that bypass the facade are a defect. +// The FTS5 mirror lets searchFeedback use bm25 across topic/prediction/ +// correction/context without scanning the base table. +export const sql = ` +CREATE TABLE IF NOT EXISTS feedback ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + topic TEXT NOT NULL, + prediction TEXT NOT NULL, + correction TEXT NOT NULL, + context TEXT, + compressed INTEGER NOT NULL DEFAULT 1, + importance TEXT NOT NULL DEFAULT 'medium' + CHECK(importance IN ('critical','high','medium','low')), + created_at INTEGER NOT NULL, + created_by TEXT +); +CREATE INDEX IF NOT EXISTS idx_feedback_topic ON feedback(topic); +CREATE INDEX IF NOT EXISTS idx_feedback_created_at ON feedback(created_at DESC); + +CREATE VIRTUAL TABLE IF NOT EXISTS feedback_fts USING fts5( + topic, prediction, correction, context, + content='feedback', content_rowid='id', + tokenize='porter unicode61' +); + +CREATE TRIGGER IF NOT EXISTS feedback_ai AFTER INSERT ON feedback BEGIN + INSERT INTO feedback_fts(rowid, topic, prediction, correction, context) + VALUES (new.id, new.topic, new.prediction, new.correction, new.context); +END; +CREATE TRIGGER IF NOT EXISTS feedback_ad AFTER DELETE ON feedback BEGIN + INSERT INTO feedback_fts(feedback_fts, rowid, topic, prediction, correction, context) + VALUES ('delete', old.id, old.topic, old.prediction, old.correction, old.context); +END; +CREATE TRIGGER IF NOT EXISTS feedback_au AFTER UPDATE ON feedback BEGIN + INSERT INTO feedback_fts(feedback_fts, rowid, topic, prediction, correction, context) + VALUES ('delete', old.id, old.topic, old.prediction, old.correction, old.context); + INSERT INTO feedback_fts(rowid, topic, prediction, correction, context) + VALUES (new.id, new.topic, new.prediction, new.correction, new.context); +END; +`; diff --git a/packages/storage/src/schema.ts b/packages/storage/src/schema.ts index f6aa54b..473685e 100644 --- a/packages/storage/src/schema.ts +++ b/packages/storage/src/schema.ts @@ -331,7 +331,47 @@ CREATE TABLE IF NOT EXISTS coach_progress ( evidence TEXT ); -INSERT OR IGNORE INTO schema_version(version) VALUES (14); +-- ICM slice 2 (docs/icm-integration-plan.md): "AI predicted X, real answer +-- was Y" feedback lane. prediction/correction/context bodies pass through +-- the @colony/core MemoryStore compression path before they ever reach this +-- table — direct INSERTs that skip the facade are a defect. +CREATE TABLE IF NOT EXISTS feedback ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + topic TEXT NOT NULL, + prediction TEXT NOT NULL, + correction TEXT NOT NULL, + context TEXT, + compressed INTEGER NOT NULL DEFAULT 1, + importance TEXT NOT NULL DEFAULT 'medium' + CHECK(importance IN ('critical','high','medium','low')), + created_at INTEGER NOT NULL, + created_by TEXT +); +CREATE INDEX IF NOT EXISTS idx_feedback_topic ON feedback(topic); +CREATE INDEX IF NOT EXISTS idx_feedback_created_at ON feedback(created_at DESC); + +CREATE VIRTUAL TABLE IF NOT EXISTS feedback_fts USING fts5( + topic, prediction, correction, context, + content='feedback', content_rowid='id', + tokenize='porter unicode61' +); + +CREATE TRIGGER IF NOT EXISTS feedback_ai AFTER INSERT ON feedback BEGIN + INSERT INTO feedback_fts(rowid, topic, prediction, correction, context) + VALUES (new.id, new.topic, new.prediction, new.correction, new.context); +END; +CREATE TRIGGER IF NOT EXISTS feedback_ad AFTER DELETE ON feedback BEGIN + INSERT INTO feedback_fts(feedback_fts, rowid, topic, prediction, correction, context) + VALUES ('delete', old.id, old.topic, old.prediction, old.correction, old.context); +END; +CREATE TRIGGER IF NOT EXISTS feedback_au AFTER UPDATE ON feedback BEGIN + INSERT INTO feedback_fts(feedback_fts, rowid, topic, prediction, correction, context) + VALUES ('delete', old.id, old.topic, old.prediction, old.correction, old.context); + INSERT INTO feedback_fts(rowid, topic, prediction, correction, context) + VALUES (new.id, new.topic, new.prediction, new.correction, new.context); +END; + +INSERT OR IGNORE INTO schema_version(version) VALUES (15); `; /** diff --git a/packages/storage/src/storage.ts b/packages/storage/src/storage.ts index c5e5394..87207de 100644 --- a/packages/storage/src/storage.ts +++ b/packages/storage/src/storage.ts @@ -17,11 +17,16 @@ import { import type { AccountClaimRow, AccountClaimState, + AddFeedbackInput, AgentProfileRow, AggregateMcpMetricsDailyOptions, AggregateMcpMetricsOptions, CoachStepRow, ExampleRow, + FeedbackHit, + FeedbackImportance, + FeedbackRow, + FeedbackStat, LaneRunState, LaneStateRow, LaneTakeoverResult, @@ -1299,6 +1304,136 @@ export class Storage { return row?.n ?? 0; } + // --- feedback (ICM slice 2) --- + // + // The MemoryStore facade is the only sanctioned write path: it routes + // prediction/correction/context through `prepareMemoryText` before calling + // `insertFeedback`. Calling `insertFeedback` directly with raw prose is a + // defect (compression invariant). `searchFeedback` mirrors the progressive- + // disclosure shape used by `searchFts`: compact hits only, callers fetch + // full bodies via `getFeedback(id)`. + + insertFeedback(input: AddFeedbackInput): number { + const ts = input.created_at ?? Date.now(); + const importance: FeedbackImportance = input.importance ?? 'medium'; + const stmt = this.db.prepare( + `INSERT INTO feedback(topic, prediction, correction, context, compressed, importance, created_at, created_by) + VALUES (?, ?, ?, ?, 1, ?, ?, ?)`, + ); + const info = stmt.run( + input.topic, + input.prediction, + input.correction, + input.context ?? null, + importance, + ts, + input.created_by ?? null, + ); + return Number(info.lastInsertRowid); + } + + getFeedback(id: number): FeedbackRow | undefined { + return this.db.prepare('SELECT * FROM feedback WHERE id = ?').get(id) as + | FeedbackRow + | undefined; + } + + searchFeedback(input: { topic?: string; query: string; limit?: number }): FeedbackHit[] { + const cap = Math.max(1, input.limit ?? 20); + const trimmedQuery = input.query.trim(); + const topicFilter = input.topic?.trim(); + + // Empty FTS query degrades to a topic listing so callers can browse a + // single topic without inventing a tokenizable keyword. + if (!trimmedQuery) { + if (!topicFilter) return []; + const rows = this.db + .prepare( + `SELECT id, topic, importance, created_at, + substr(correction, 1, 120) AS snippet + FROM feedback + WHERE topic = ? + ORDER BY created_at DESC, id DESC + LIMIT ?`, + ) + .all(topicFilter, cap) as Array<{ + id: number; + topic: string; + importance: FeedbackImportance; + created_at: number; + snippet: string | null; + }>; + return rows.map((r) => ({ + id: r.id, + topic: r.topic, + importance: r.importance, + score: 0, + snippet: r.snippet ?? '', + created_at: r.created_at, + })); + } + + const match = sanitizeMatch(trimmedQuery); + const conditions: string[] = ['feedback_fts MATCH ?']; + const params: Array = [match]; + if (topicFilter) { + conditions.push('f.topic = ?'); + params.push(topicFilter); + } + const rows = this.db + .prepare( + `SELECT f.id, f.topic, f.importance, f.created_at, + snippet(feedback_fts, 2, '[', ']', '…', 16) AS snippet, + bm25(feedback_fts) AS score + FROM feedback_fts + JOIN feedback f ON f.id = feedback_fts.rowid + WHERE ${conditions.join(' AND ')} + ORDER BY score ASC + LIMIT ?`, + ) + .all(...params, cap) as Array<{ + id: number; + topic: string; + importance: FeedbackImportance; + created_at: number; + snippet: string; + score: number; + }>; + return rows.map((r) => ({ + id: r.id, + topic: r.topic, + importance: r.importance, + // FTS5 bm25 is "lower is better"; flip so higher = better downstream. + score: -r.score, + snippet: r.snippet, + created_at: r.created_at, + })); + } + + feedbackStats(input: { topic?: string } = {}): FeedbackStat[] { + const topicFilter = input.topic?.trim(); + if (topicFilter) { + const row = this.db + .prepare( + `SELECT topic, COUNT(*) AS n, MAX(created_at) AS last_ts + FROM feedback + WHERE topic = ? + GROUP BY topic`, + ) + .get(topicFilter) as { topic: string; n: number; last_ts: number } | undefined; + return row ? [{ topic: row.topic, count: row.n, last_created_at: row.last_ts }] : []; + } + const rows = this.db + .prepare( + `SELECT topic, COUNT(*) AS n, MAX(created_at) AS last_ts + FROM feedback + GROUP BY topic + ORDER BY last_ts DESC, topic ASC`, + ) + .all() as Array<{ topic: string; n: number; last_ts: number }>; + return rows.map((r) => ({ topic: r.topic, count: r.n, last_created_at: r.last_ts })); + } + recordMcpMetric(metric: NewMcpMetric): void { this.db .prepare( diff --git a/packages/storage/src/types.ts b/packages/storage/src/types.ts index 2d91fc9..c18e363 100644 --- a/packages/storage/src/types.ts +++ b/packages/storage/src/types.ts @@ -250,6 +250,52 @@ export interface NewAgentProfile { updated_at?: number; } +/** + * ICM slice 2 — feedback lane. One row per "AI predicted X, real answer + * was Y" correction. prediction/correction/context are stored compressed + * (via @colony/core MemoryStore.recordFeedback) for the same token-budget + * reasons observations are. `topic` is left raw so callers can pivot on + * a stable string without expanding the compressor's lexicon for it. + */ +export type FeedbackImportance = 'critical' | 'high' | 'medium' | 'low'; + +export interface FeedbackRow { + id: number; + topic: string; + prediction: string; + correction: string; + context: string | null; + compressed: 0 | 1; + importance: FeedbackImportance; + created_at: number; + created_by: string | null; +} + +export interface AddFeedbackInput { + topic: string; + prediction: string; + correction: string; + context?: string | null; + importance?: FeedbackImportance; + created_by?: string | null; + created_at?: number; +} + +export interface FeedbackHit { + id: number; + topic: string; + importance: FeedbackImportance; + score: number; + snippet: string; + created_at: number; +} + +export interface FeedbackStat { + topic: string; + count: number; + last_created_at: number; +} + export interface NewSummary { session_id: string; scope: 'turn' | 'session'; diff --git a/packages/storage/test/feedback.test.ts b/packages/storage/test/feedback.test.ts new file mode 100644 index 0000000..4e1c1c2 --- /dev/null +++ b/packages/storage/test/feedback.test.ts @@ -0,0 +1,195 @@ +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { Storage } from '../src/index.js'; + +let dir: string; +let storage: Storage; + +beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'colony-feedback-')); + storage = new Storage(join(dir, 'test.db')); +}); + +afterEach(() => { + storage.close(); + rmSync(dir, { recursive: true, force: true }); +}); + +describe('Storage — feedback (ICM slice 2)', () => { + it('insertFeedback assigns an id and stores the row verbatim', () => { + const id = storage.insertFeedback({ + topic: 'frontend.routing', + prediction: 'useRouter returns null in server components', + correction: 'useRouter throws in server components', + context: 'reviewing apps/web App Router migration', + importance: 'high', + created_by: 'claude', + created_at: 1_700_000_000, + }); + expect(id).toBeGreaterThan(0); + + const row = storage.getFeedback(id); + expect(row).toMatchObject({ + id, + topic: 'frontend.routing', + prediction: 'useRouter returns null in server components', + correction: 'useRouter throws in server components', + context: 'reviewing apps/web App Router migration', + compressed: 1, + importance: 'high', + created_at: 1_700_000_000, + created_by: 'claude', + }); + }); + + it('defaults importance to medium and accepts null context', () => { + const id = storage.insertFeedback({ + topic: 'pgsql', + prediction: + 'ALTER TABLE ... DROP CONSTRAINT works inside a transaction on partitioned tables', + correction: 'must run outside a transaction on partitioned tables in postgres 15', + }); + const row = storage.getFeedback(id); + expect(row?.importance).toBe('medium'); + expect(row?.context).toBeNull(); + expect(row?.created_by).toBeNull(); + }); + + it('rejects an importance outside the allowed enum', () => { + expect(() => + storage.insertFeedback({ + topic: 't', + prediction: 'p', + correction: 'c', + // intentionally bad — the CHECK constraint should fire + importance: 'bogus' as 'high', + }), + ).toThrow(); + }); + + it('searchFeedback returns compact hits ranked by FTS5 (higher = better)', () => { + storage.insertFeedback({ + topic: 'frontend.routing', + prediction: 'returns null inside server components', + correction: 'throws an error inside server components', + created_at: 1_000, + }); + storage.insertFeedback({ + topic: 'backend.migrations', + prediction: 'alter table works inside a transaction', + correction: 'alter table on partitioned tables must run outside a transaction', + created_at: 2_000, + }); + + const hits = storage.searchFeedback({ query: 'server components', limit: 5 }); + expect(hits.length).toBeGreaterThan(0); + expect(hits[0]?.topic).toBe('frontend.routing'); + expect(hits[0]?.score).toBeGreaterThan(0); + expect(hits[0]?.snippet).toContain('throws'); + }); + + it('searchFeedback honors the topic filter', () => { + storage.insertFeedback({ + topic: 'frontend.routing', + prediction: 'returns null in server components', + correction: 'throws in server components', + }); + storage.insertFeedback({ + topic: 'backend.migrations', + prediction: 'server upgrade was easy', + correction: 'server upgrade was hard on partitioned tables', + }); + + const scoped = storage.searchFeedback({ + query: 'server', + topic: 'frontend.routing', + limit: 10, + }); + expect(scoped).toHaveLength(1); + expect(scoped[0]?.topic).toBe('frontend.routing'); + }); + + it('empty query with a topic falls back to a newest-first listing', () => { + storage.insertFeedback({ + topic: 'frontend.routing', + prediction: 'p1', + correction: 'c1', + created_at: 1_000, + }); + storage.insertFeedback({ + topic: 'frontend.routing', + prediction: 'p2', + correction: 'c2', + created_at: 2_000, + }); + storage.insertFeedback({ + topic: 'other', + prediction: 'p3', + correction: 'c3', + created_at: 3_000, + }); + + const hits = storage.searchFeedback({ query: ' ', topic: 'frontend.routing' }); + expect(hits).toHaveLength(2); + expect(hits[0]?.created_at).toBe(2_000); + expect(hits[1]?.created_at).toBe(1_000); + }); + + it('empty query without a topic returns no hits', () => { + storage.insertFeedback({ topic: 't', prediction: 'p', correction: 'c' }); + expect(storage.searchFeedback({ query: '' })).toEqual([]); + }); + + it('feedbackStats groups rows by topic with last_created_at + count', () => { + storage.insertFeedback({ + topic: 'a', + prediction: 'p', + correction: 'c', + created_at: 1_000, + }); + storage.insertFeedback({ + topic: 'a', + prediction: 'p2', + correction: 'c2', + created_at: 3_000, + }); + storage.insertFeedback({ + topic: 'b', + prediction: 'p', + correction: 'c', + created_at: 2_000, + }); + + const all = storage.feedbackStats(); + expect(all).toEqual([ + { topic: 'a', count: 2, last_created_at: 3_000 }, + { topic: 'b', count: 1, last_created_at: 2_000 }, + ]); + + const scoped = storage.feedbackStats({ topic: 'a' }); + expect(scoped).toEqual([{ topic: 'a', count: 2, last_created_at: 3_000 }]); + + expect(storage.feedbackStats({ topic: 'missing' })).toEqual([]); + }); + + it('FTS triggers track updates and deletes', () => { + const id = storage.insertFeedback({ + topic: 'topic', + prediction: 'alpha beta', + correction: 'gamma', + }); + expect(storage.searchFeedback({ query: 'beta' })).toHaveLength(1); + + // Simulate a downstream edit (we don't expose `updateFeedback` yet, but + // the trigger contract must hold so future surfaces can rely on it). + const db = (storage as unknown as { db: import('better-sqlite3').Database }).db; + db.prepare('UPDATE feedback SET prediction = ? WHERE id = ?').run('zeta', id); + expect(storage.searchFeedback({ query: 'beta' })).toHaveLength(0); + expect(storage.searchFeedback({ query: 'zeta' })).toHaveLength(1); + + db.prepare('DELETE FROM feedback WHERE id = ?').run(id); + expect(storage.searchFeedback({ query: 'zeta' })).toHaveLength(0); + }); +});