Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions crates/loomweave-cli/tests/analyze_hardening.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,24 @@ fn finding_evidence(conn: &Connection, rule_id: &str) -> String {
.expect("query finding evidence")
}

/// The `entity_id` anchor column of the single `rule_id` finding.
fn finding_entity_id(conn: &Connection, rule_id: &str) -> String {
conn.query_row(
"SELECT entity_id FROM findings WHERE rule_id = ?1",
[rule_id],
|row| row.get(0),
)
.expect("query finding entity_id")
}

/// Count of `entities` rows with the given id (FK-satisfaction probe).
fn entity_row_count(conn: &Connection, id: &str) -> i64 {
conn.query_row("SELECT COUNT(*) FROM entities WHERE id = ?1", [id], |row| {
row.get(0)
})
.expect("query entity row count")
}

/// Assert no live process carries `marker` ("KEY=VALUE") in its environment.
///
/// The marker is set on the spawned `loomweave analyze` process only; the
Expand Down Expand Up @@ -510,3 +528,71 @@ fn abort_at_analyze_classified_and_terminal() {

assert_no_leaked_child(&marker_pair);
}

/// Trust boundary (review follow-up to clarion-48af930f2a): a plugin-reported
/// finding must NOT be able to set the host-reserved `anchor_entity_id`
/// metadata key. The host's `host_finding_anchor_id` takes that key verbatim as
/// `findings.entity_id` (FK-enforced at insert), so a forged value naming a
/// nonexistent entity would hard-fail the WHOLE analyze run on the findings FK
/// (and an existing one would silently mis-anchor the finding). The fix strips
/// the key at the plugin boundary (`validate_plugin_finding`); this test proves
/// the end-to-end consequence: the run COMPLETES and the finding falls back to
/// a real (file/project) anchor instead of the forged id.
#[test]
fn plugin_forged_anchor_entity_id_does_not_win_or_fail_the_run() {
const FORGED: &str = "fixture:gadget:forged.ghost.nonexistent";
const FORGED_RULE: &str = "LMWV-FIXTURE-FORGED-ANCHOR";

let fixture_bin = fixture_binary_path();
let plugin_dir = setup_plugin_dir(&fixture_bin);
let (project_dir, new_path) = setup_project(&plugin_dir);
let (marker_key, marker_value, marker_pair) = unique_marker("forged-anchor");

// The forged-anchor switch makes the plugin emit a finding whose metadata
// carries `anchor_entity_id = FORGED`. With the boundary strip in place the
// run must SUCCEED — pre-fix it hard-failed on the findings FK.
loomweave_bin()
.args(["analyze"])
.arg(project_dir.path())
.env("PATH", &new_path)
.env(&marker_key, &marker_value)
.env("LOOMWEAVE_FIXTURE_FINDING_FORGED_ANCHOR", "1")
.timeout(ANALYZE_BACKSTOP)
.assert()
.success();

let conn = open_db(&project_dir);

// The run committed cleanly — the forged FK did not abort it.
let (run_count, run_status, _failure_reason) = run_record(&conn);
assert_eq!(run_count, 1, "exactly one run row");
assert_eq!(
run_status, "completed",
"a forged plugin anchor must not fail the run"
);

// The finding survived (the strip drops only the reserved key, not the
// diagnostic) and anchored to a REAL entity that is not the forged id.
assert_eq!(
finding_count(&conn, FORGED_RULE),
1,
"the plugin finding is persisted, not dropped"
);
let anchor = finding_entity_id(&conn, FORGED_RULE);
assert_ne!(
anchor, FORGED,
"the forged anchor_entity_id must NOT become the finding's entity_id"
);
assert_eq!(
entity_row_count(&conn, FORGED),
0,
"the forged id must never have been inserted as an entity"
);
assert_eq!(
entity_row_count(&conn, &anchor),
1,
"the finding must anchor to a real (file/project) entity; got {anchor}"
);

assert_no_leaked_child(&marker_pair);
}
53 changes: 53 additions & 0 deletions crates/loomweave-core/src/plugin/host_validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,16 @@ fn stringify_finding_metadata_value(value: serde_json::Value) -> Result<String,
}
}

/// Metadata key the HOST reserves for its own finding-anchor precedence. The
/// cli's `host_finding_anchor_id` takes this value verbatim as
/// `findings.entity_id` (FK-enforced at insert), so it overrides the trusted
/// file anchor. A plugin-reported finding must NOT be able to set it across the
/// trust boundary: a nonexistent id would hard-fail the whole analyze run on
/// the findings FK, and an existing id would silently mis-associate the
/// finding. Validation strips it (the legitimate producer — the host's
/// duplicate-locator finding — bypasses this path entirely).
const HOST_RESERVED_ANCHOR_ENTITY_ID_KEY: &str = "anchor_entity_id";

pub(crate) fn validate_plugin_finding(
raw: AnalyzeFileFinding,
rule_id_prefix: &str,
Expand Down Expand Up @@ -218,6 +228,11 @@ pub(crate) fn validate_plugin_finding(
if key.is_empty() {
return Err("metadata key is empty".to_owned());
}
if key == HOST_RESERVED_ANCHOR_ENTITY_ID_KEY {
// Host-reserved anchor-precedence key — drop any plugin-supplied
// value so it cannot override the trusted file anchor below.
continue;
}
if key.len() > MAX_ENTITY_FIELD_BYTES {
return Err(format!(
"metadata key exceeds {MAX_ENTITY_FIELD_BYTES} bytes"
Expand Down Expand Up @@ -284,6 +299,44 @@ mod tests {
);
}

#[test]
fn validate_plugin_finding_strips_host_reserved_anchor_entity_id() {
// `anchor_entity_id` is a HOST-reserved precedence key consumed by the
// cli's `host_finding_anchor_id`: it overrides the trusted file anchor
// and is taken verbatim as `findings.entity_id` (FK-enforced at insert).
// A plugin must NOT be able to set it across the trust boundary — a
// nonexistent id would hard-fail the whole analyze run on the findings
// FK, and an existing id would silently mis-associate the finding.
// Validation strips it (symmetric with the host-overwritten
// `anchor_file_path`); other plugin metadata is preserved.
let mut raw = finding("PY-CODE", "m");
raw.metadata.insert(
"anchor_entity_id".to_owned(),
serde_json::Value::String("python:function:evil.injected".to_owned()),
);
raw.metadata.insert(
"detail".to_owned(),
serde_json::Value::String("kept".to_owned()),
);
let ok = validate_plugin_finding(raw, "PY-", Path::new("pkg/a.py"))
.expect("a well-formed finding validates");
assert_eq!(
ok.metadata.get("anchor_entity_id"),
None,
"a plugin-supplied anchor_entity_id must be stripped (host-reserved key)"
);
assert_eq!(
ok.metadata.get("anchor_file_path").map(String::as_str),
Some("pkg/a.py"),
"the trusted analyzed path is still recorded as anchor_file_path"
);
assert_eq!(
ok.metadata.get("detail").map(String::as_str),
Some("kept"),
"non-reserved plugin metadata is preserved"
);
}

#[test]
fn invalid_unresolved_call_site_reason_rejects_empty_or_reversed_range() {
let mut accepted = BTreeSet::new();
Expand Down
30 changes: 27 additions & 3 deletions crates/loomweave-plugin-fixture/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
//! twice in one file, or shared across files) without extra env knobs
//! (clarion-b19fe90c3e).

use std::collections::BTreeMap;
use std::io::{BufReader, Write};

/// True when the named environment variable is set to exactly `"1"`.
Expand All @@ -38,8 +39,8 @@ fn hang_forever() -> ! {
use loomweave_core::plugin::limits::ContentLengthCeiling;
use loomweave_core::plugin::transport::{Frame, read_frame, write_frame};
use loomweave_core::plugin::{
AnalyzeFileParams, AnalyzeFileResult, AnalyzeFileStats, InitializeResult, JsonRpcVersion,
ProtocolError, ResponseEnvelope, ResponsePayload, ShutdownResult,
AnalyzeFileFinding, AnalyzeFileParams, AnalyzeFileResult, AnalyzeFileStats, InitializeResult,
JsonRpcVersion, ProtocolError, ResponseEnvelope, ResponsePayload, ShutdownResult,
};
use serde_json::Value;

Expand Down Expand Up @@ -176,7 +177,7 @@ fn main() {
entities,
edges: vec![],
stats: AnalyzeFileStats::default(),
findings: vec![],
findings: forged_anchor_findings(),
};
send_result(&mut writer, id, serde_json::to_value(result).unwrap());
}
Expand All @@ -195,6 +196,29 @@ fn main() {
}
}

/// Opt-in trust-boundary probe: when `LOOMWEAVE_FIXTURE_FINDING_FORGED_ANCHOR`
/// is set, emit one plugin finding whose metadata carries a FORGED
/// host-reserved `anchor_entity_id` naming an entity that does not exist. The
/// host MUST strip this key at the plugin boundary (`validate_plugin_finding`)
/// so it cannot override the trusted file anchor; otherwise the finding's
/// `entity_id` FK insert hard-fails the whole analyze run. Inert in normal runs.
fn forged_anchor_findings() -> Vec<AnalyzeFileFinding> {
if !env_flag("LOOMWEAVE_FIXTURE_FINDING_FORGED_ANCHOR") {
return Vec::new();
}
let mut metadata = BTreeMap::new();
metadata.insert(
"anchor_entity_id".to_owned(),
Value::String("fixture:gadget:forged.ghost.nonexistent".to_owned()),
);
vec![AnalyzeFileFinding {
subcode: "LMWV-FIXTURE-FORGED-ANCHOR".to_owned(),
severity: Some("warning".to_owned()),
message: "fixture emitted a finding with a forged anchor_entity_id".to_owned(),
metadata,
}]
}

/// Parse `gadget <name>` lines out of the analysed file and build one
/// `fixture:gadget:<name>` raw entity per line, carrying a one-line
/// `source_range` so the host derives a content hash for it. An unreadable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
},
"findings": [
{
"fingerprint": "9a291cac4a30b2cd8353f89eb428e184b01cb3919563ebeffd672745bf9cc665",
"fingerprint": "242d8565123394582c282d7356cd18a1ddcbe2a4dca9d51bce9d5afaec70230a",
"line_start": 14,
"path": "svc.py",
"rule_id": "PY-WL-101"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ const GOLDEN: &str = include_str!("fixtures/wardline-taint-fact-wire.golden.json
/// Tamper proof: perturbing one hex char of this const (or one byte of the
/// fixture) makes `golden_bytes_match_layer1_pin` fail with a
/// `left != right` mismatch — the pin is load-bearing, not decorative.
const GOLDEN_BLAKE3: &str = "5ecabddd14bfb6a1c245c62bfa7b34e2cb4a5c9209c0f7da0250e7293f91ca6a";
const GOLDEN_BLAKE3: &str = "ca7a1cd80c87ee20857aa7e9da221784ca068015bd423031bee822640ab43162";

/// The plugin under which Wardline's Python-frontend qualnames resolve. The
/// golden is a Python scan (`svc.py`), so its qualnames live under
Expand Down