Skip to content

Commit c2f864a

Browse files
fix(llm): prevent UUID leak in knowledge titles and observation update targeting
Replace UUID-based target_id with 1-based target_number in compression prompts so LLMs reference candidates by index instead of echoing raw UUIDs. Add strip_uuid_from_title() helper in core with tests. Apply UUID stripping to knowledge extraction titles. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
1 parent 41c3830 commit c2f864a

File tree

5 files changed

+118
-27
lines changed

5 files changed

+118
-27
lines changed

crates/core/src/lib.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,27 @@ pub use observation::*;
3131
pub use project_filter::*;
3232
pub use session::*;
3333

34+
/// Strips UUID patterns from text (e.g., `"sshd needs absolute path b3b61de2-..."` → `"sshd needs absolute path"`).
35+
///
36+
/// Handles both full UUIDs (`8-4-4-4-12` hex) and truncated ones (e.g., `b3b61de2-...`).
37+
#[must_use]
38+
pub fn strip_uuid_from_title(title: &str) -> String {
39+
use std::sync::LazyLock;
40+
41+
#[expect(
42+
clippy::unwrap_used,
43+
reason = "static regex pattern is compile-time validated"
44+
)]
45+
static UUID_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
46+
regex::Regex::new(
47+
r"\s*[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4,}(?:-[0-9a-fA-F]*)*\.{0,3}\s*",
48+
)
49+
.unwrap()
50+
});
51+
52+
UUID_RE.replace_all(title, " ").trim().to_string()
53+
}
54+
3455
/// Truncates a string to the given maximum length at a char boundary.
3556
#[must_use]
3657
pub fn truncate(s: &str, max_len: usize) -> &str {
@@ -44,3 +65,55 @@ pub fn truncate(s: &str, max_len: usize) -> &str {
4465
s.get(..end).unwrap_or("")
4566
}
4667
}
68+
69+
#[cfg(test)]
70+
mod tests {
71+
use super::*;
72+
73+
#[test]
74+
fn strip_uuid_full() {
75+
assert_eq!(
76+
strip_uuid_from_title("sshd needs absolute path b3b61de2-1234-5678-9abc-def012345678"),
77+
"sshd needs absolute path"
78+
);
79+
}
80+
81+
#[test]
82+
fn strip_uuid_truncated() {
83+
assert_eq!(
84+
strip_uuid_from_title("sshd needs absolute path b3b61de2-1234-5678..."),
85+
"sshd needs absolute path"
86+
);
87+
}
88+
89+
#[test]
90+
fn strip_uuid_mid_title() {
91+
assert_eq!(
92+
strip_uuid_from_title("Observation b3b61de2-1234-5678-9abc-def012345678 is important"),
93+
"Observation is important"
94+
);
95+
}
96+
97+
#[test]
98+
fn strip_uuid_no_uuid() {
99+
assert_eq!(
100+
strip_uuid_from_title("normal title without uuid"),
101+
"normal title without uuid"
102+
);
103+
}
104+
105+
#[test]
106+
fn strip_uuid_empty() {
107+
assert_eq!(strip_uuid_from_title(""), "");
108+
}
109+
110+
#[test]
111+
fn strip_uuid_multiple() {
112+
assert_eq!(
113+
strip_uuid_from_title(
114+
"a b3b61de2-1234-5678-9abc-def012345678 and c4c72ef3-5678-9abc-def0-123456789012"
115+
),
116+
"a and"
117+
);
118+
}
119+
}

crates/llm/src/ai_types.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,12 @@ pub struct ObservationJson {
112112
/// Context-aware compression action: "create", "update", or "skip"
113113
#[serde(default = "default_action")]
114114
pub action: String,
115-
/// UUID of existing observation to update (only for action="update")
115+
/// UUID of existing observation to update (only for action="update") — legacy field
116116
#[serde(default)]
117117
pub target_id: Option<String>,
118+
/// 1-based index of existing observation to update (only for action="update")
119+
#[serde(default)]
120+
pub target_number: Option<u32>,
118121
/// Reason for skipping (only for action="skip")
119122
#[serde(default)]
120123
pub skip_reason: Option<String>,

crates/llm/src/compression_prompt.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,8 @@ pub(crate) fn build_compression_prompt(
4141
format!(" facts=[{}]", obs.facts.join(", "))
4242
};
4343
entries.push_str(&format!(
44-
"[{}] id=\"{}\" type={} title=\"{}\" | {}{}\n",
44+
"[{}] type={} title=\"{}\" | {}{}\n",
4545
i.saturating_add(1),
46-
obs.id,
4746
obs.observation_type.as_str(),
4847
obs.title,
4948
narrative_preview,
@@ -57,7 +56,7 @@ EXISTING OBSERVATIONS (potentially related):
5756
{entries}
5857
DECISION (MANDATORY — choose exactly one):
5958
- If this is genuinely NEW knowledge not covered by any existing observation → action: "create"
60-
- If this REFINES or ADDS TO an existing observation above → action: "update", target_id: "<id of the observation to update>"
59+
- If this REFINES or ADDS TO an existing observation above → action: "update", target_number: <number in brackets of the observation to update>
6160
- If this adds ZERO new information beyond what already exists → action: "skip""#
6261
)
6362
};
@@ -86,7 +85,7 @@ DECISION (MANDATORY — choose exactly one):
8685
format!(
8786
r#"Return JSON:
8887
- action: one of "create", "update", "skip"
89-
- target_id: id of existing observation to update (required if action is "update")
88+
- target_number: number in brackets of existing observation to update (required if action is "update")
9089
- skip_reason: why this should be skipped (required if action is "skip")
9190
- noise_level: one of [{noise_levels}]
9291
- noise_reason: why this is/isn't worth remembering (max 100 chars)

crates/llm/src/knowledge.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,15 @@ Return JSON: {{"extract": false, "reason": "..."}}"#,
9191
LlmError::MissingField(format!("unknown knowledge_type: {}", knowledge_type_str))
9292
})?;
9393

94-
Ok(Some(KnowledgeInput::new(
95-
knowledge_type,
96-
extraction
94+
let title = opencode_mem_core::strip_uuid_from_title(
95+
&extraction
9796
.title
9897
.unwrap_or_else(|| observation.title.clone()),
98+
);
99+
100+
Ok(Some(KnowledgeInput::new(
101+
knowledge_type,
102+
title,
99103
extraction
100104
.description
101105
.unwrap_or_else(|| observation.narrative.clone().unwrap_or_default()),

crates/llm/src/observation.rs

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ fn parse_observation_response(
3030
id: &str,
3131
session_id: &str,
3232
project: Option<&str>,
33-
candidate_ids: &std::collections::HashSet<&str>,
33+
candidates: &[opencode_mem_core::Observation],
3434
) -> Result<CompressionResult, LlmError> {
3535
let stripped = opencode_mem_core::strip_markdown_json(response);
3636
let obs_json: ObservationJson =
@@ -109,22 +109,37 @@ fn parse_observation_response(
109109
.created_at(Utc::now())
110110
.build();
111111

112-
// Determine action: update requires valid target_id in candidate set
112+
// Determine action: update requires valid target resolved from candidates
113113
if action == "update" {
114-
if let Some(ref target_id) = obs_json.target_id {
115-
if candidate_ids.contains(&target_id.as_str()) {
116-
return Ok(CompressionResult::Update {
117-
target_id: target_id.clone(),
118-
observation,
119-
});
120-
}
121-
tracing::warn!(
122-
target_id = %target_id,
123-
"LLM returned update with target_id not in candidate set — treating as create"
124-
);
125-
} else {
126-
tracing::warn!("LLM returned action=update without target_id — treating as create");
114+
// Resolve target: prefer target_number (index into candidates), fall back to target_id (legacy)
115+
let resolved_target_id = obs_json
116+
.target_number
117+
.and_then(|n| {
118+
let idx = n.checked_sub(1)? as usize;
119+
candidates.get(idx).map(|o| o.id.to_string())
120+
})
121+
.or_else(|| {
122+
obs_json.target_id.as_ref().and_then(|tid| {
123+
if candidates.iter().any(|o| o.id.as_ref() == tid.as_str()) {
124+
Some(tid.clone())
125+
} else {
126+
None
127+
}
128+
})
129+
});
130+
131+
if let Some(target_id) = resolved_target_id {
132+
return Ok(CompressionResult::Update {
133+
target_id,
134+
observation,
135+
});
127136
}
137+
tracing::warn!(
138+
target_number = ?obs_json.target_number,
139+
target_id = ?obs_json.target_id,
140+
candidates_len = candidates.len(),
141+
"LLM returned update with unresolvable target — treating as create"
142+
);
128143
}
129144

130145
if action != "create" {
@@ -167,16 +182,13 @@ impl LlmClient {
167182
max_tokens: None,
168183
};
169184

170-
let candidate_ids: std::collections::HashSet<&str> =
171-
candidates.iter().map(|o| o.id.as_ref()).collect();
172-
173185
let response = self.chat_completion(&request).await?;
174186
parse_observation_response(
175187
&response,
176188
id,
177189
input.session_id.as_ref(),
178190
project,
179-
&candidate_ids,
191+
candidates,
180192
)
181193
}
182194

0 commit comments

Comments
 (0)