Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 184 additions & 1 deletion code_review_graph/tools/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,19 @@

from __future__ import annotations

import os
import re
import subprocess
import threading
import time
from pathlib import Path
from typing import Any

from ..graph import GraphStore
from ..incremental import find_project_root, get_db_path
from ..incremental import detect_vcs, find_project_root, get_db_path

_SAFE_GIT_REF = re.compile(r"^[A-Za-z0-9_.~^/@{}\\-]+$")
_SAFE_SVN_REV = re.compile(r"^r?\d+(:r?\d+|:HEAD|:BASE|:COMMITTED)?$", re.IGNORECASE)


def _error_response(
Expand Down Expand Up @@ -58,6 +66,181 @@ def _error_response(
}


_CHANGED_FILES_CACHE_LOCK = threading.Lock()
_CHANGED_FILES_CACHE: dict[tuple[str, str], dict[str, Any]] = {}
_CHANGED_FILES_CACHE_TTL = float(os.environ.get("CRG_CHANGED_FILES_CACHE_TTL", "3.0"))
_FAST_DIFF_TIMEOUT = float(os.environ.get("CRG_FAST_DIFF_TIMEOUT", "5.0"))


def _run_git(root: Path, args: list[str], timeout_s: float) -> subprocess.CompletedProcess:
return subprocess.run(
["git", *args],
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
cwd=str(root),
timeout=timeout_s,
stdin=subprocess.DEVNULL,
check=False,
)


def _run_svn(root: Path, args: list[str], timeout_s: float) -> subprocess.CompletedProcess:
return subprocess.run(
["svn", *args],
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
cwd=str(root),
timeout=timeout_s,
stdin=subprocess.DEVNULL,
check=False,
)


def _normalize_changed_files(files: list[str]) -> list[str]:
seen: set[str] = set()
out: list[str] = []
for f in files:
rel = f.strip().replace("\\", "/")
if not rel or rel in seen:
continue
seen.add(rel)
out.append(rel)
return out


def _parse_porcelain_paths(stdout: str) -> list[str]:
files: list[str] = []
for line in stdout.splitlines():
if not line.strip() or len(line) < 4:
continue
path_part = line[3:].strip()
# Handle rename lines: "old/path -> new/path"
if " -> " in path_part:
path_part = path_part.split(" -> ", 1)[1].strip()
files.append(path_part)
return files


def resolve_changed_files(
root: Path,
changed_files: list[str] | None,
base: str,
) -> tuple[list[str], dict[str, Any]]:
"""Resolve changed files with fast auto-detect and short-lived cache.

Returns:
(files, meta)
meta includes:
- source: explicit | cache | auto
- auto_detect_timed_out: bool
- timeout_seconds: float
- cache_hit: bool
"""
if changed_files is not None:
return _normalize_changed_files(changed_files), {
"source": "explicit",
"auto_detect_timed_out": False,
"timeout_seconds": _FAST_DIFF_TIMEOUT,
"cache_hit": False,
}

cache_key = (str(root.resolve()), base)
now = time.monotonic()
with _CHANGED_FILES_CACHE_LOCK:
cached = _CHANGED_FILES_CACHE.get(cache_key)
if cached and (now - float(cached["ts"])) <= _CHANGED_FILES_CACHE_TTL:
return list(cached["files"]), {
"source": "cache",
"auto_detect_timed_out": False,
"timeout_seconds": _FAST_DIFF_TIMEOUT,
"cache_hit": True,
}

vcs = detect_vcs(root)
if vcs == "none":
return [], {
"source": "auto",
"auto_detect_timed_out": False,
"timeout_seconds": _FAST_DIFF_TIMEOUT,
"cache_hit": False,
}

timed_out = False
files: list[str] = []

if vcs == "svn":
try:
rev_range = base if _SAFE_SVN_REV.match(base) else None
if rev_range:
diff = _run_svn(
root,
["diff", "--summarize", "--non-interactive", "-r", rev_range],
_FAST_DIFF_TIMEOUT,
)
if diff.returncode == 0:
for line in diff.stdout.splitlines():
if len(line) >= 2 and line[0] in ("M", "A", "D"):
files.append(line[1:].strip())
else:
status = _run_svn(
root,
["status", "--non-interactive"],
_FAST_DIFF_TIMEOUT,
)
if status.returncode == 0:
for line in status.stdout.splitlines():
if len(line) < 2:
continue
status_char = line[0]
if status_char in ("M", "A", "D", "R", "C"):
path = line[8:].strip() if len(line) > 8 else line[1:].strip()
files.append(path)
except (FileNotFoundError, subprocess.TimeoutExpired):
timed_out = True
else: # git
if not _SAFE_GIT_REF.match(base):
raise ValueError(f"Invalid git ref: {base}")

try:
diff = _run_git(root, ["diff", "--name-only", base, "--"], _FAST_DIFF_TIMEOUT)
if diff.returncode == 0:
files.extend(diff.stdout.splitlines())
else:
cached_only = _run_git(
root,
["diff", "--name-only", "--cached"],
_FAST_DIFF_TIMEOUT,
)
if cached_only.returncode == 0:
files.extend(cached_only.stdout.splitlines())

status = _run_git(
root,
["status", "--porcelain"],
_FAST_DIFF_TIMEOUT,
)
if status.returncode == 0:
files.extend(_parse_porcelain_paths(status.stdout))
except (FileNotFoundError, subprocess.TimeoutExpired):
timed_out = True

normalized = _normalize_changed_files(files)
if not timed_out:
with _CHANGED_FILES_CACHE_LOCK:
_CHANGED_FILES_CACHE[cache_key] = {"ts": now, "files": normalized}

return normalized, {
"source": "auto",
"auto_detect_timed_out": timed_out,
"timeout_seconds": _FAST_DIFF_TIMEOUT,
"cache_hit": False,
}


def _validate_repo_root(path: "Path | str") -> Path:
"""Validate that a path is a plausible project root.

Expand Down
70 changes: 24 additions & 46 deletions code_review_graph/tools/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,13 @@

import logging
import sqlite3
import subprocess
from pathlib import Path
from typing import Any

from ._common import _get_store, compact_response
from ._common import _get_store, compact_response, resolve_changed_files

logger = logging.getLogger(__name__)


def _has_git_changes(root: Path, base: str) -> bool:
"""Quick check for uncommitted or diffed changes."""
try:
result = subprocess.run(
["git", "diff", "--name-only", base, "--"],
capture_output=True, stdin=subprocess.DEVNULL, text=True,
cwd=str(root), timeout=10,
)
if result.returncode == 0 and result.stdout.strip():
return True
# Also check staged/unstaged
result2 = subprocess.run(
["git", "status", "--porcelain"],
capture_output=True, stdin=subprocess.DEVNULL, text=True,
cwd=str(root), timeout=10,
)
return bool(result2.stdout.strip())
except (FileNotFoundError, subprocess.TimeoutExpired):
return False


def get_minimal_context(
task: str = "",
changed_files: list[str] | None = None,
Expand Down Expand Up @@ -62,33 +39,29 @@ def get_minimal_context(
risk_score = 0.0
top_affected: list[str] = []
test_gap_count = 0
if changed_files or _has_git_changes(root, base):
files, detect_meta = resolve_changed_files(root, changed_files, base)
if files:
try:
from ..changes import analyze_changes
from ..incremental import get_changed_files as _get_changed

files = changed_files
if not files:
files = _get_changed(root, base)
if files:
abs_files = [str(root / f) for f in files]
analysis = analyze_changes(
store, abs_files, repo_root=str(root), base=base,
)
risk_score = analysis.get("risk_score", 0.0)
risk = (
"high" if risk_score > 0.7
else "medium" if risk_score > 0.4
else "low"
)
top_affected = [
f.get("name", "")
for f in analysis.get("changed_functions", [])[:5]
]
test_gap_count = len(analysis.get("test_gaps", []))
abs_files = [str(root / f) for f in files]
analysis = analyze_changes(
store, abs_files, repo_root=str(root), base=base,
)
risk_score = analysis.get("risk_score", 0.0)
risk = (
"high" if risk_score > 0.7
else "medium" if risk_score > 0.4
else "low"
)
top_affected = [
f.get("name", "")
for f in analysis.get("changed_functions", [])[:5]
]
test_gap_count = len(analysis.get("test_gaps", []))
except (
ImportError, OSError, ValueError,
sqlite3.Error, subprocess.SubprocessError,
sqlite3.Error,
):
logger.debug("Risk analysis failed in get_minimal_context", exc_info=True)

Expand Down Expand Up @@ -137,6 +110,11 @@ def get_minimal_context(
]
if risk != "unknown":
summary_parts.append(f"Risk: {risk} ({risk_score:.2f}).")
if detect_meta.get("auto_detect_timed_out"):
summary_parts.append(
"Changed-file auto-detection timed out. "
"Pass changed_files for faster review tools."
)
if test_gap_count:
summary_parts.append(f"{test_gap_count} test gaps.")

Expand Down
25 changes: 19 additions & 6 deletions code_review_graph/tools/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
from ..embeddings import EmbeddingStore
from ..graph import _sanitize_name, edge_to_dict, node_to_dict
from ..hints import generate_hints, get_session
from ..incremental import get_changed_files, get_db_path, get_staged_and_unstaged
from ..incremental import get_db_path
from ..search import hybrid_search
from ._common import _BUILTIN_CALL_NAMES, _get_store, _resolve_graph_file_paths
from ._common import _BUILTIN_CALL_NAMES, _get_store, _resolve_graph_file_paths, resolve_changed_files

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -57,12 +57,25 @@ def get_impact_radius(
"""
store, root = _get_store(repo_root)
try:
if changed_files is None:
changed_files = get_changed_files(root, base)
if not changed_files:
changed_files = get_staged_and_unstaged(root)
changed_files, detect_meta = resolve_changed_files(root, changed_files, base)

if not changed_files:
if detect_meta.get("auto_detect_timed_out"):
timeout_s = detect_meta.get("timeout_seconds", 0)
return {
"status": "ok",
"summary": (
"Auto-detection of changed files timed out "
f"after {timeout_s:.1f}s. Pass changed_files explicitly "
"to compute impact radius quickly."
),
"changed_nodes": [],
"impacted_nodes": [],
"impacted_files": [],
"truncated": False,
"total_impacted": 0,
"requires_changed_files": True,
}
return {
"status": "ok",
"summary": "No changed files detected.",
Expand Down
Loading
Loading