Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Gradata/src/gradata/hooks/adapters/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,13 @@ def hook_command(brain_dir: Path) -> str:
)


def pre_compact_command(brain_dir: Path) -> str:
return (
f"BRAIN_DIR={shlex.quote(str(brain_dir))} "
f"{shlex.quote(sys.executable)} -m gradata.hooks.pre_compact"
)


def mcp_command(brain_dir: Path) -> list[str]:
return [sys.executable, "-m", "gradata.mcp_server", "--brain-dir", str(brain_dir)]

Expand Down
79 changes: 48 additions & 31 deletions Gradata/src/gradata/hooks/adapters/claude_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
failure,
hook_command,
hook_signature,
pre_compact_command,
read_json,
write_json,
)
Expand Down Expand Up @@ -58,24 +59,41 @@ def install(brain_dir: Path, agent_config_path: Path) -> InstallResult:
data = read_json(agent_config_path)
hooks = data.setdefault("hooks", {})
pre_tool = hooks.setdefault("PreToolUse", [])
if any(sig in str(item) for item in pre_tool):
pre_compact = hooks.setdefault("PreCompact", [])
has_pre_tool = any(sig in str(item) for item in pre_tool)
has_pre_compact = any(sig in str(item) for item in pre_compact)
if has_pre_tool and has_pre_compact:
return InstallResult(
AGENT, agent_config_path, "already_present", "hook already present"
)
pre_tool.append(
{
"matcher": "*",
"hooks": [
{
"type": "command",
"command": hook_command(brain_dir),
"id": sig,
}
],
}
)
if not has_pre_tool:
pre_tool.append(
{
"matcher": "*",
"hooks": [
{
"type": "command",
"command": hook_command(brain_dir),
"id": sig,
}
],
}
)
if not has_pre_compact:
pre_compact.append(
{
"matcher": "manual|auto",
"hooks": [
{
"type": "command",
"command": pre_compact_command(brain_dir),
"id": sig,
}
],
}
)
write_json(agent_config_path, data)
return InstallResult(AGENT, agent_config_path, "added", "installed PreToolUse hook")
return InstallResult(AGENT, agent_config_path, "added", "installed Claude Code hooks")
except Exception as exc:
return failure(AGENT, agent_config_path, exc)

Expand All @@ -98,27 +116,26 @@ def uninstall(brain_dir: Path, agent_config_path: Path) -> InstallResult:
hooks = data.get("hooks")
if not isinstance(hooks, dict):
return InstallResult(AGENT, agent_config_path, "already_present", "no hooks block")
pre_tool = hooks.get("PreToolUse")
if not isinstance(pre_tool, list):
return InstallResult(AGENT, agent_config_path, "already_present", "no PreToolUse")

removed = 0
kept: list = []
for entry in pre_tool:
entry_str = str(entry)
if sig in entry_str:
# Either the entry's `hooks[].id` carries our sig, or the
# whole entry was ours. Drop it.
removed += 1
for lifecycle in ("PreToolUse", "PreCompact"):
entries = hooks.get(lifecycle)
if not isinstance(entries, list):
continue
kept.append(entry)
kept: list = []
for entry in entries:
entry_str = str(entry)
if sig in entry_str:
# Either the entry's `hooks[].id` carries our sig, or the
# whole entry was ours. Drop it.
removed += 1
continue
kept.append(entry)
if kept:
hooks[lifecycle] = kept
else:
hooks.pop(lifecycle, None)
if removed == 0:
return InstallResult(AGENT, agent_config_path, "already_present", "hook not present")

if kept:
hooks["PreToolUse"] = kept
else:
hooks.pop("PreToolUse", None)
if not hooks:
data.pop("hooks", None)
write_json(agent_config_path, data)
Expand Down
147 changes: 102 additions & 45 deletions Gradata/src/gradata/hooks/pre_compact.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,125 @@
"""PreCompact hook: save brain state snapshot before context compaction."""
"""Claude Code PreCompact hook: snapshot bounded Gradata context before compaction."""

from __future__ import annotations

import hashlib
import json
import os
import tempfile
from datetime import UTC, datetime
import re
import time
from pathlib import Path
from typing import Any

from gradata._atomic import atomic_write_text
from gradata.hooks._base import resolve_brain_dir, run_hook
from gradata.hooks._profiles import Profile

HOOK_META = {
"event": "PreCompact",
"matcher": "manual|auto",
"profile": Profile.STANDARD,
"profile": Profile.MINIMAL,
"timeout": 5000,
}
_MAX_TEXT_BYTES = 64_000
_MAX_JSON_BYTES = 128_000


def main(data: dict) -> dict | None:
def _safe_filename(value: object) -> str:
raw = str(value or "").strip()
if not raw:
raw = f"precompact-{int(time.time() * 1000)}"
safe = re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip(".-")
if not safe:
safe = hashlib.sha256(raw.encode("utf-8", errors="replace")).hexdigest()[:16]
return safe[:120]


def _session_id(data: dict[str, Any]) -> str:
for key in ("session_id", "sessionId", "conversation_id", "conversationId"):
value = data.get(key)
if isinstance(value, str) and value.strip():
return value.strip()
encoded = json.dumps(data, sort_keys=True, default=str)
return hashlib.sha256(encoded.encode("utf-8", errors="replace")).hexdigest()[:16]


def _read_bounded(path: Path, *, limit: int = _MAX_TEXT_BYTES) -> str | None:
try:
brain_dir_str = resolve_brain_dir()
if not brain_dir_str:
if not path.is_file():
return None
brain_dir = Path(brain_dir_str)

compact_type = data.get("type", "unknown") if data else "unknown"

snapshot = {
"timestamp": datetime.now(UTC).isoformat(),
"compact_type": compact_type,
"brain_dir": str(brain_dir),
}

# Include lesson count if available
lessons_path = brain_dir / "lessons.md"
if lessons_path.is_file():
text = lessons_path.read_text(encoding="utf-8")
snapshot["lesson_count"] = len(
[
line
for line in text.splitlines()
if (stripped := line.strip()) and not stripped.startswith("#")
]
)

if hasattr(os, "getuid"):
uid = os.getuid()
else:
try:
uid = os.getlogin()
except OSError:
uid = f"pid{os.getpid()}"
user_tmp = Path(tempfile.gettempdir()) / f"gradata-{uid}"
user_tmp.mkdir(parents=True, exist_ok=True)
dir_hash = hashlib.md5(str(brain_dir).encode()).hexdigest()[:8]
snapshot_path = user_tmp / f"compact-snapshot-{dir_hash}.json"
snapshot_path.write_text(json.dumps(snapshot, indent=2), encoding="utf-8")

return {"result": "State saved before compaction"}
except Exception:
data = path.read_bytes()[:limit]
return data.decode("utf-8", errors="replace")
Comment on lines +45 to +50
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Use truly bounded reads in _read_bounded.

Line 49 uses path.read_bytes()[:limit], which loads the full file before truncating. That defeats bounded-read behavior and can spike memory on large files.

Suggested fix
 def _read_bounded(path: Path, *, limit: int = _MAX_TEXT_BYTES) -> str | None:
     try:
         if not path.is_file():
             return None
-        data = path.read_bytes()[:limit]
+        with path.open("rb") as fh:
+            data = fh.read(limit)
         return data.decode("utf-8", errors="replace")
     except OSError:
         return None
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@Gradata/src/gradata/hooks/pre_compact.py` around lines 45 - 50, The function
_read_bounded currently calls path.read_bytes()[:limit] which reads the entire
file into memory then truncates; change it to perform a truly bounded read by
opening the file in binary mode and calling file.read(limit) (e.g., with
path.open("rb") as f: data = f.read(limit)) and then decode with
errors="replace"; keep the existing is_file check and return None for non-files
and return the decoded string (or None) as before.

except OSError:
return None


def _snapshot_path(brain_dir: Path, session_id: str) -> Path:
return brain_dir / ".precompact-snapshots" / f"{_safe_filename(session_id)}.json"


def _compact_payload(data: dict[str, Any]) -> dict[str, Any]:
keep = (
"hook_event_name",
"session_id",
"sessionId",
"transcript_path",
"cwd",
"trigger",
"custom_instructions",
"model",
)
return {key: data[key] for key in keep if key in data}


def _build_snapshot(brain_dir: Path, data: dict[str, Any]) -> dict[str, Any]:
session_id = _session_id(data)
relevant_context: dict[str, Any] = {}

brain_prompt = _read_bounded(brain_dir / "brain_prompt.md")
if brain_prompt is not None:
relevant_context["brain_prompt_md"] = brain_prompt

last_injection = _read_bounded(brain_dir / ".last_injection.json", limit=_MAX_JSON_BYTES)
if last_injection is not None:
try:
relevant_context["last_injection"] = json.loads(last_injection)
except json.JSONDecodeError:
relevant_context["last_injection_raw"] = last_injection

return {
"schema_version": 1,
"created_at": time.time(),
"event": "PreCompact",
"session_id": session_id,
"trigger": data.get("trigger"),
"cwd": data.get("cwd"),
"transcript_path": data.get("transcript_path"),
"custom_instructions": data.get("custom_instructions"),
"brain_dir": str(brain_dir),
"payload": _compact_payload(data),
"relevant_context": relevant_context,
"limits": {
"max_text_bytes": _MAX_TEXT_BYTES,
"max_json_bytes": _MAX_JSON_BYTES,
"transcript_content_captured": False,
},
}


def _write_snapshot(path: Path, snapshot: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
atomic_write_text(path, json.dumps(snapshot, indent=2, sort_keys=True) + "\n")


def main(data: dict[str, Any]) -> None:
resolved = resolve_brain_dir()
if not resolved:
return None
brain_dir = Path(resolved)
if not brain_dir.exists():
return None
session_id = _session_id(data)
_write_snapshot(_snapshot_path(brain_dir, session_id), _build_snapshot(brain_dir, data))
return None


if __name__ == "__main__":
Expand Down
24 changes: 24 additions & 0 deletions Gradata/tests/test_hook_adapters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import json
import os
import tomllib
from pathlib import Path
Expand Down Expand Up @@ -63,3 +64,26 @@ def test_adapter_install_does_not_touch_real_user_config(tmp_path: Path) -> None
assert result.action == "added"
after = real_config.read_text(encoding="utf-8") if real_config.exists() else None
assert after == before


def test_claude_code_install_writes_pre_compact_entry(tmp_path: Path) -> None:
brain_dir = tmp_path / "brain"
brain_dir.mkdir()
config_path = tmp_path / ".claude" / "settings.json"

adapter = get_adapter("claude-code")
first = adapter.install(brain_dir, config_path)
second = adapter.install(brain_dir, config_path)

assert first.action == "added"
assert second.action == "already_present"
settings = json.loads(config_path.read_text(encoding="utf-8"))
pre_compact = settings["hooks"]["PreCompact"]
commands = [
hook.get("command", "")
for entry in pre_compact
for hook in entry.get("hooks", [])
]
assert len(pre_compact) == 1
assert any("BRAIN_DIR=" in command for command in commands)
assert any("gradata.hooks.pre_compact" in command for command in commands)
72 changes: 72 additions & 0 deletions Gradata/tests/test_pre_compact_hook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from __future__ import annotations

import json
from pathlib import Path

from gradata.hooks import pre_compact
from gradata.hooks._base import run_hook


def test_pre_compact_writes_snapshot(tmp_path: Path, monkeypatch) -> None:
brain = tmp_path / "brain"
brain.mkdir()
(brain / "brain_prompt.md").write_text("remember this rule", encoding="utf-8")
monkeypatch.setenv("BRAIN_DIR", str(brain))

result = pre_compact.main(
{
"hook_event_name": "PreCompact",
"session_id": "abc123",
"trigger": "manual",
"cwd": "/repo",
"transcript_path": "/tmp/transcript.jsonl",
"custom_instructions": "be concise",
}
)

assert result is None
snapshot_path = brain / ".precompact-snapshots" / "abc123.json"
assert snapshot_path.exists()
snapshot = json.loads(snapshot_path.read_text(encoding="utf-8"))
assert snapshot["event"] == "PreCompact"
assert snapshot["session_id"] == "abc123"
assert snapshot["trigger"] == "manual"
assert snapshot["cwd"] == "/repo"
assert snapshot["transcript_path"] == "/tmp/transcript.jsonl"
assert snapshot["custom_instructions"] == "be concise"
assert snapshot["relevant_context"]["brain_prompt_md"] == "remember this rule"
assert snapshot["limits"]["transcript_content_captured"] is False


def test_pre_compact_sanitizes_session_id(tmp_path: Path, monkeypatch) -> None:
brain = tmp_path / "brain"
brain.mkdir()
monkeypatch.setenv("BRAIN_DIR", str(brain))

pre_compact.main({"session_id": "../../escape/session"})

snapshots = list((brain / ".precompact-snapshots").glob("*.json"))
assert len(snapshots) == 1
assert snapshots[0].parent == brain / ".precompact-snapshots"
assert ".." not in snapshots[0].name
assert "/" not in snapshots[0].name


def test_pre_compact_missing_brain_noops(tmp_path: Path, monkeypatch) -> None:
monkeypatch.setenv("BRAIN_DIR", str(tmp_path / "missing"))

assert pre_compact.main({"session_id": "abc123"}) is None


def test_pre_compact_callable_via_run_hook(tmp_path: Path, monkeypatch) -> None:
brain = tmp_path / "brain"
brain.mkdir()
monkeypatch.setenv("BRAIN_DIR", str(brain))

run_hook(
pre_compact.main,
pre_compact.HOOK_META,
raw_input=json.dumps({"session_id": "via-run-hook", "hook_event_name": "PreCompact"}),
)

assert (brain / ".precompact-snapshots" / "via-run-hook.json").exists()
Loading