diff --git a/crates/loomweave-cli/tests/analyze_hardening.rs b/crates/loomweave-cli/tests/analyze_hardening.rs index 0e93e0d6..d98789a2 100644 --- a/crates/loomweave-cli/tests/analyze_hardening.rs +++ b/crates/loomweave-cli/tests/analyze_hardening.rs @@ -156,6 +156,24 @@ fn finding_evidence(conn: &Connection, rule_id: &str) -> String { .expect("query finding evidence") } +/// The `entity_id` anchor column of the single `rule_id` finding. +fn finding_entity_id(conn: &Connection, rule_id: &str) -> String { + conn.query_row( + "SELECT entity_id FROM findings WHERE rule_id = ?1", + [rule_id], + |row| row.get(0), + ) + .expect("query finding entity_id") +} + +/// Count of `entities` rows with the given id (FK-satisfaction probe). +fn entity_row_count(conn: &Connection, id: &str) -> i64 { + conn.query_row("SELECT COUNT(*) FROM entities WHERE id = ?1", [id], |row| { + row.get(0) + }) + .expect("query entity row count") +} + /// Assert no live process carries `marker` ("KEY=VALUE") in its environment. /// /// The marker is set on the spawned `loomweave analyze` process only; the @@ -510,3 +528,71 @@ fn abort_at_analyze_classified_and_terminal() { assert_no_leaked_child(&marker_pair); } + +/// Trust boundary (review follow-up to clarion-48af930f2a): a plugin-reported +/// finding must NOT be able to set the host-reserved `anchor_entity_id` +/// metadata key. The host's `host_finding_anchor_id` takes that key verbatim as +/// `findings.entity_id` (FK-enforced at insert), so a forged value naming a +/// nonexistent entity would hard-fail the WHOLE analyze run on the findings FK +/// (and an existing one would silently mis-anchor the finding). The fix strips +/// the key at the plugin boundary (`validate_plugin_finding`); this test proves +/// the end-to-end consequence: the run COMPLETES and the finding falls back to +/// a real (file/project) anchor instead of the forged id. +#[test] +fn plugin_forged_anchor_entity_id_does_not_win_or_fail_the_run() { + const FORGED: &str = "fixture:gadget:forged.ghost.nonexistent"; + const FORGED_RULE: &str = "LMWV-FIXTURE-FORGED-ANCHOR"; + + let fixture_bin = fixture_binary_path(); + let plugin_dir = setup_plugin_dir(&fixture_bin); + let (project_dir, new_path) = setup_project(&plugin_dir); + let (marker_key, marker_value, marker_pair) = unique_marker("forged-anchor"); + + // The forged-anchor switch makes the plugin emit a finding whose metadata + // carries `anchor_entity_id = FORGED`. With the boundary strip in place the + // run must SUCCEED — pre-fix it hard-failed on the findings FK. + loomweave_bin() + .args(["analyze"]) + .arg(project_dir.path()) + .env("PATH", &new_path) + .env(&marker_key, &marker_value) + .env("LOOMWEAVE_FIXTURE_FINDING_FORGED_ANCHOR", "1") + .timeout(ANALYZE_BACKSTOP) + .assert() + .success(); + + let conn = open_db(&project_dir); + + // The run committed cleanly — the forged FK did not abort it. + let (run_count, run_status, _failure_reason) = run_record(&conn); + assert_eq!(run_count, 1, "exactly one run row"); + assert_eq!( + run_status, "completed", + "a forged plugin anchor must not fail the run" + ); + + // The finding survived (the strip drops only the reserved key, not the + // diagnostic) and anchored to a REAL entity that is not the forged id. + assert_eq!( + finding_count(&conn, FORGED_RULE), + 1, + "the plugin finding is persisted, not dropped" + ); + let anchor = finding_entity_id(&conn, FORGED_RULE); + assert_ne!( + anchor, FORGED, + "the forged anchor_entity_id must NOT become the finding's entity_id" + ); + assert_eq!( + entity_row_count(&conn, FORGED), + 0, + "the forged id must never have been inserted as an entity" + ); + assert_eq!( + entity_row_count(&conn, &anchor), + 1, + "the finding must anchor to a real (file/project) entity; got {anchor}" + ); + + assert_no_leaked_child(&marker_pair); +} diff --git a/crates/loomweave-core/src/plugin/host_validate.rs b/crates/loomweave-core/src/plugin/host_validate.rs index b5db661b..7e7b2272 100644 --- a/crates/loomweave-core/src/plugin/host_validate.rs +++ b/crates/loomweave-core/src/plugin/host_validate.rs @@ -169,6 +169,16 @@ fn stringify_finding_metadata_value(value: serde_json::Value) -> Result MAX_ENTITY_FIELD_BYTES { return Err(format!( "metadata key exceeds {MAX_ENTITY_FIELD_BYTES} bytes" @@ -284,6 +299,44 @@ mod tests { ); } + #[test] + fn validate_plugin_finding_strips_host_reserved_anchor_entity_id() { + // `anchor_entity_id` is a HOST-reserved precedence key consumed by the + // cli's `host_finding_anchor_id`: it overrides the trusted file anchor + // and is taken verbatim as `findings.entity_id` (FK-enforced at insert). + // A plugin must NOT be able to set it across the trust boundary — a + // nonexistent id would hard-fail the whole analyze run on the findings + // FK, and an existing id would silently mis-associate the finding. + // Validation strips it (symmetric with the host-overwritten + // `anchor_file_path`); other plugin metadata is preserved. + let mut raw = finding("PY-CODE", "m"); + raw.metadata.insert( + "anchor_entity_id".to_owned(), + serde_json::Value::String("python:function:evil.injected".to_owned()), + ); + raw.metadata.insert( + "detail".to_owned(), + serde_json::Value::String("kept".to_owned()), + ); + let ok = validate_plugin_finding(raw, "PY-", Path::new("pkg/a.py")) + .expect("a well-formed finding validates"); + assert_eq!( + ok.metadata.get("anchor_entity_id"), + None, + "a plugin-supplied anchor_entity_id must be stripped (host-reserved key)" + ); + assert_eq!( + ok.metadata.get("anchor_file_path").map(String::as_str), + Some("pkg/a.py"), + "the trusted analyzed path is still recorded as anchor_file_path" + ); + assert_eq!( + ok.metadata.get("detail").map(String::as_str), + Some("kept"), + "non-reserved plugin metadata is preserved" + ); + } + #[test] fn invalid_unresolved_call_site_reason_rejects_empty_or_reversed_range() { let mut accepted = BTreeSet::new(); diff --git a/crates/loomweave-plugin-fixture/src/main.rs b/crates/loomweave-plugin-fixture/src/main.rs index 6ff3a409..37862ea4 100644 --- a/crates/loomweave-plugin-fixture/src/main.rs +++ b/crates/loomweave-plugin-fixture/src/main.rs @@ -16,6 +16,7 @@ //! twice in one file, or shared across files) without extra env knobs //! (clarion-b19fe90c3e). +use std::collections::BTreeMap; use std::io::{BufReader, Write}; /// True when the named environment variable is set to exactly `"1"`. @@ -38,8 +39,8 @@ fn hang_forever() -> ! { use loomweave_core::plugin::limits::ContentLengthCeiling; use loomweave_core::plugin::transport::{Frame, read_frame, write_frame}; use loomweave_core::plugin::{ - AnalyzeFileParams, AnalyzeFileResult, AnalyzeFileStats, InitializeResult, JsonRpcVersion, - ProtocolError, ResponseEnvelope, ResponsePayload, ShutdownResult, + AnalyzeFileFinding, AnalyzeFileParams, AnalyzeFileResult, AnalyzeFileStats, InitializeResult, + JsonRpcVersion, ProtocolError, ResponseEnvelope, ResponsePayload, ShutdownResult, }; use serde_json::Value; @@ -176,7 +177,7 @@ fn main() { entities, edges: vec![], stats: AnalyzeFileStats::default(), - findings: vec![], + findings: forged_anchor_findings(), }; send_result(&mut writer, id, serde_json::to_value(result).unwrap()); } @@ -195,6 +196,29 @@ fn main() { } } +/// Opt-in trust-boundary probe: when `LOOMWEAVE_FIXTURE_FINDING_FORGED_ANCHOR` +/// is set, emit one plugin finding whose metadata carries a FORGED +/// host-reserved `anchor_entity_id` naming an entity that does not exist. The +/// host MUST strip this key at the plugin boundary (`validate_plugin_finding`) +/// so it cannot override the trusted file anchor; otherwise the finding's +/// `entity_id` FK insert hard-fails the whole analyze run. Inert in normal runs. +fn forged_anchor_findings() -> Vec { + if !env_flag("LOOMWEAVE_FIXTURE_FINDING_FORGED_ANCHOR") { + return Vec::new(); + } + let mut metadata = BTreeMap::new(); + metadata.insert( + "anchor_entity_id".to_owned(), + Value::String("fixture:gadget:forged.ghost.nonexistent".to_owned()), + ); + vec![AnalyzeFileFinding { + subcode: "LMWV-FIXTURE-FORGED-ANCHOR".to_owned(), + severity: Some("warning".to_owned()), + message: "fixture emitted a finding with a forged anchor_entity_id".to_owned(), + metadata, + }] +} + /// Parse `gadget ` lines out of the analysed file and build one /// `fixture:gadget:` raw entity per line, carrying a one-line /// `source_range` so the host derives a content hash for it. An unreadable diff --git a/crates/loomweave-storage/tests/fixtures/wardline-taint-fact-wire.golden.json b/crates/loomweave-storage/tests/fixtures/wardline-taint-fact-wire.golden.json index ddea2227..297ea60e 100644 --- a/crates/loomweave-storage/tests/fixtures/wardline-taint-fact-wire.golden.json +++ b/crates/loomweave-storage/tests/fixtures/wardline-taint-fact-wire.golden.json @@ -64,7 +64,7 @@ }, "findings": [ { - "fingerprint": "9a291cac4a30b2cd8353f89eb428e184b01cb3919563ebeffd672745bf9cc665", + "fingerprint": "242d8565123394582c282d7356cd18a1ddcbe2a4dca9d51bce9d5afaec70230a", "line_start": 14, "path": "svc.py", "rule_id": "PY-WL-101" diff --git a/crates/loomweave-storage/tests/wardline_taint_fact_conformance_oracle.rs b/crates/loomweave-storage/tests/wardline_taint_fact_conformance_oracle.rs index 67924c43..2aaa3adf 100644 --- a/crates/loomweave-storage/tests/wardline_taint_fact_conformance_oracle.rs +++ b/crates/loomweave-storage/tests/wardline_taint_fact_conformance_oracle.rs @@ -68,7 +68,7 @@ const GOLDEN: &str = include_str!("fixtures/wardline-taint-fact-wire.golden.json /// Tamper proof: perturbing one hex char of this const (or one byte of the /// fixture) makes `golden_bytes_match_layer1_pin` fail with a /// `left != right` mismatch — the pin is load-bearing, not decorative. -const GOLDEN_BLAKE3: &str = "5ecabddd14bfb6a1c245c62bfa7b34e2cb4a5c9209c0f7da0250e7293f91ca6a"; +const GOLDEN_BLAKE3: &str = "ca7a1cd80c87ee20857aa7e9da221784ca068015bd423031bee822640ab43162"; /// The plugin under which Wardline's Python-frontend qualnames resolve. The /// golden is a Python scan (`svc.py`), so its qualnames live under