diff --git a/AGENTS.md b/AGENTS.md index a77c7f2..27735e8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -86,4 +86,4 @@ Update docs in the same PR when changing: - Security: `docs/SECURITY.md` - Reliability: `docs/RELIABILITY.md` - Autonomous E2E workflow: `docs/runbooks/autonomous-agent-e2e.md` - +- PR evaluation harness: `docs/runbooks/pr-eval-harness.md` diff --git a/README.md b/README.md index 5f4be7c..55f6941 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,35 @@ A file-first knowledge base with Git-backed approval workflows. Edit notes local - [ARCHITECTURE.md](ARCHITECTURE.md) — Domain boundaries and flows - [AGENTS.md](AGENTS.md) — Entry point for AI agents - [docs/](docs/) — Design docs, runbooks, security, reliability +- [docs/runbooks/pr-eval-harness.md](docs/runbooks/pr-eval-harness.md) — Review another branch from an isolated `main`-based worktree + +## Review Another PR Locally + +Use the local harness when you want to evaluate someone else's branch without +switching your active checkout: + +```bash +python3 scripts/eval_pr.py +``` + +Common examples: + +```bash +python3 scripts/eval_pr.py origin/some-branch +python3 scripts/eval_pr.py my-local-branch --keep-temp +python3 scripts/eval_pr.py HEAD --tests-only +``` + +The harness: + +- creates a temp worktree rooted from local `main` +- resolves symbolic refs like `HEAD` in your current checkout before populating that worktree +- runs stable targeted tests for changed areas +- can launch `kb-server` and `vault-sync` in `tmux` +- exercises the current mainline write/sync workflow end to end + +See [docs/runbooks/pr-eval-harness.md](docs/runbooks/pr-eval-harness.md) for +prerequisites, flags, and failure inspection. ## Docs Checks Before PR diff --git a/docs/exec-plans/active/README.md b/docs/exec-plans/active/README.md index ce13997..4c65bf3 100644 --- a/docs/exec-plans/active/README.md +++ b/docs/exec-plans/active/README.md @@ -1,10 +1,9 @@ --- owner: platform status: draft -last_verified: 2026-03-21 +last_verified: 2026-03-16 source_of_truth: - ../completed/README.md - - ../../PLANS.md related_code: - ../../../scripts/docs_lint.py related_tests: @@ -14,7 +13,7 @@ review_cycle_days: 30 # Active Plans -Place in-progress execution plans here. +Move in-progress execution plans here while work is underway. +- Add one file per active plan. - Move completed plans to `../completed/`. -- Keep active plan filenames stable so `docs/PLANS.md` and agent navigation remain valid. diff --git a/docs/generated/api-surface.md b/docs/generated/api-surface.md index 0e7664f..fb651b4 100644 --- a/docs/generated/api-surface.md +++ b/docs/generated/api-surface.md @@ -1,7 +1,7 @@ --- owner: platform status: generated -last_verified: 2026-03-07 +last_verified: 2026-03-22 source_of_truth: - ../../kb-server/app/api/routes/health.py - ../../kb-server/app/api/routes/notes.py @@ -15,7 +15,7 @@ review_cycle_days: 7 # API Surface (Generated) -Generated on `2026-03-07` from route handlers. +Generated on `2026-03-22` from route handlers. | Method | Path | | --- | --- | diff --git a/docs/generated/env-catalog.md b/docs/generated/env-catalog.md index e09f48d..b1a26b7 100644 --- a/docs/generated/env-catalog.md +++ b/docs/generated/env-catalog.md @@ -1,7 +1,7 @@ --- owner: platform status: generated -last_verified: 2026-03-07 +last_verified: 2026-03-22 source_of_truth: - ../../kb-server/.env.example - ../../kb-server/app/core/config.py @@ -16,7 +16,7 @@ review_cycle_days: 7 # Environment Catalog (Generated) -Generated on `2026-03-07` from settings and env sources. +Generated on `2026-03-22` from settings and env sources. ## kb-server `.env.example` diff --git a/docs/index.md b/docs/index.md index f49b6d8..ea00ca9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -49,4 +49,5 @@ review_cycle_days: 14 - `runbooks/incident-response.md` - `runbooks/backup-restore.md` - `runbooks/autonomous-agent-e2e.md` - +- `runbooks/local-role-auth-e2e.md` +- `runbooks/pr-eval-harness.md` diff --git a/docs/runbooks/pr-eval-harness.md b/docs/runbooks/pr-eval-harness.md new file mode 100644 index 0000000..2abd02c --- /dev/null +++ b/docs/runbooks/pr-eval-harness.md @@ -0,0 +1,137 @@ +--- +owner: platform +status: draft +last_verified: 2026-03-16 +source_of_truth: + - ../../scripts/eval_pr.py + - ../../kb-server/app/api/routes/notes.py + - ../../vault-sync/vault_sync/api_client.py +related_code: + - ../../kb-server/app/services/git_batcher.py + - ../../vault-sync/vault_sync/sync.py +related_tests: + - ../../kb-server/tests/test_current_view.py + - ../../kb-server/tests/test_source_and_delete.py + - ../../vault-sync/tests/test_api_client.py + - ../../vault-sync/tests/test_sync.py +review_cycle_days: 14 +--- + +# PR Evaluation Harness + +Use `scripts/eval_pr.py` to evaluate a target ref from an isolated worktree rooted +from local `main`. + +The harness is meant for reviewing other people's branches without touching your +active checkout. It provisions a temp worktree, temp vault, temp bare remote, +temp SQLite database, and a tmux-backed local stack. + +## Prerequisites + +- local `main` exists and is up to date enough for comparison +- repo-local virtualenvs already exist: + - `kb-server/.venv` + - `vault-sync/.venv` +- `git` and `tmux` are installed + +## Default flow + +```bash +cd /path/to/flight-deck +python3 scripts/eval_pr.py +``` + +Example refs: + +```bash +python3 scripts/eval_pr.py origin/some-branch +python3 scripts/eval_pr.py my-local-branch +python3 scripts/eval_pr.py HEAD +``` + +Symbolic refs such as `HEAD` are resolved in your current checkout before the temp +worktree is created, so the harness evaluates the commit you asked for rather than +the temp worktree's initial `main` HEAD. + +By default the harness: + +- creates a temp git worktree from `main` +- resolves `` to a commit in your current checkout, then checks out that commit inside the temp worktree +- runs targeted `kb-server` and `vault-sync` tests when those areas changed +- starts `kb-server` in `tmux` +- starts `vault-sync` in `tmux` +- validates current main behavior: + - API key enforcement using `KB_API_KEY` + - `source=human` writes commit directly to `main` + - default API writes batch into `kb-api/YYYY-MM-DD` + - `view=current` includes pending branch content + - `vault-sync` pulls `view=current` and pushes local edits with `source=human` + +The default `kb-server` test subset intentionally avoids `tests/test_notes_api.py` +because that file is currently failing on `main`; auth behavior is covered by the +harness's own HTTP smoke checks instead. + +## Useful flags + +```bash +python3 scripts/eval_pr.py --keep-temp +python3 scripts/eval_pr.py --tests-only +python3 scripts/eval_pr.py --e2e-only +python3 scripts/eval_pr.py --no-sync +python3 scripts/eval_pr.py --port 8021 +python3 scripts/eval_pr.py --tmux-session-name fd-review +``` + +## Recommended review flow + +For a normal review pass: + +```bash +git fetch origin +python3 scripts/eval_pr.py origin/ +``` + +For a quicker check when you only want test feedback: + +```bash +python3 scripts/eval_pr.py origin/ --tests-only +``` + +For debugging a runtime issue and keeping the temp environment around: + +```bash +python3 scripts/eval_pr.py origin/ --keep-temp +``` + +## Artifacts and inspection + +The script prints: + +- temp root +- temp worktree path +- changed files relative to `main` +- interpreter paths +- logs directory +- tmux session name + +On failure it keeps the temp root and any harness-created worktree/tmux session so +you can inspect: + +- env files +- runtime logs +- worktree contents +- tmux panes + +If the requested tmux session name already exists, the harness stops before +starting services and leaves that pre-existing session untouched. + +Useful commands after a failed run: + +```bash +tmux attach -t +tmux capture-pane -pt :0.0 +tmux capture-pane -pt :0.1 +``` + +`--keep-temp` uses the same preservation behavior after a successful run. Remove the +temp directory and any preserved worktree/tmux session yourself after inspection. diff --git a/scripts/eval_pr.py b/scripts/eval_pr.py new file mode 100644 index 0000000..1aa141d --- /dev/null +++ b/scripts/eval_pr.py @@ -0,0 +1,687 @@ +#!/usr/bin/env python3 +"""Evaluate a target git ref in an isolated main-based worktree.""" + +from __future__ import annotations + +import argparse +import json +import os +import shlex +import shutil +import subprocess +import sys +import tempfile +import textwrap +import time +import urllib.error +import urllib.request +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parent.parent +DEFAULT_API_KEY = "fd-eval-key" +DEFAULT_PORT = 8011 + + +class HarnessError(RuntimeError): + """Raised when the evaluation harness cannot proceed.""" + + +@dataclass +class EvalPaths: + temp_root: Path + worktree_root: Path + vault_root: Path + sync_root: Path + remote_root: Path + db_path: Path + logs_root: Path + kb_env_file: Path + sync_env_file: Path + + +@dataclass +class EvalContext: + args: argparse.Namespace + repo_root: Path + paths: EvalPaths + kb_python: Path + vault_python: Path + requested_target_ref: str + resolved_target_ref: str + base_ref: str + changed_files: list[str] + created_worktree: bool = False + created_tmux_session: bool = False + + +def log(message: str) -> None: + print(f"[eval_pr] {message}", flush=True) + + +def stage(message: str) -> None: + log(f"==> {message}") + + +def run_command( + cmd: list[str], + *, + cwd: Path | None = None, + env: dict[str, str] | None = None, + log_path: Path | None = None, + check: bool = True, +) -> subprocess.CompletedProcess[str]: + if log_path is not None: + log_path.parent.mkdir(parents=True, exist_ok=True) + rendered = " ".join(shlex.quote(part) for part in cmd) + if cwd is not None: + log(f"$ {rendered} (cwd={cwd})") + else: + log(f"$ {rendered}") + + process = subprocess.Popen( + cmd, + cwd=str(cwd) if cwd else None, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + + output_chunks: list[str] = [] + assert process.stdout is not None + if log_path is None: + sink = open(os.devnull, "w", encoding="utf-8") + else: + sink = log_path.open("w", encoding="utf-8") + + with sink: + for line in process.stdout: + sys.stdout.write(line) + sink.write(line) + output_chunks.append(line) + returncode = process.wait() + output = "".join(output_chunks) + result = subprocess.CompletedProcess(cmd, returncode, output, "") + if check and returncode != 0: + raise HarnessError(f"command failed ({returncode}): {rendered}") + return result + + +def run_git(repo_root: Path, *args: str, check: bool = True) -> str: + result = subprocess.run( + ["git", *args], + cwd=repo_root, + capture_output=True, + text=True, + check=check, + ) + return result.stdout.strip() + + +def ensure_binaries(repo_root: Path) -> tuple[Path, Path]: + for binary in ("git", "tmux"): + if shutil.which(binary) is None: + raise HarnessError(f"required binary not found on PATH: {binary}") + + kb_python = repo_root / "kb-server" / ".venv" / "bin" / "python" + vault_python = repo_root / "vault-sync" / ".venv" / "bin" / "python" + missing = [str(path) for path in (kb_python, vault_python) if not path.exists()] + if missing: + raise HarnessError( + "missing repo-local interpreters; expected:\n" + + "\n".join(f"- {item}" for item in missing) + ) + return kb_python, vault_python + + +def ensure_ref_exists(repo_root: Path, ref: str) -> None: + try: + run_git(repo_root, "rev-parse", "--verify", f"{ref}^{{commit}}") + except subprocess.CalledProcessError as exc: + raise HarnessError(f"git ref not found: {ref}") from exc + + +def resolve_ref_to_commit(repo_root: Path, ref: str) -> str: + try: + return run_git(repo_root, "rev-parse", "--verify", f"{ref}^{{commit}}") + except subprocess.CalledProcessError as exc: + raise HarnessError(f"git ref not found: {ref}") from exc + + +def create_paths() -> EvalPaths: + temp_root = Path(tempfile.mkdtemp(prefix="fd-pr-eval-")) + return EvalPaths( + temp_root=temp_root, + worktree_root=temp_root / "repo", + vault_root=temp_root / "vault", + sync_root=temp_root / "sync", + remote_root=temp_root / "remote.git", + db_path=temp_root / "kb-server.db", + logs_root=temp_root / "logs", + kb_env_file=temp_root / "kb-server.env", + sync_env_file=temp_root / "vault-sync.env", + ) + + +def add_worktree(ctx: EvalContext) -> None: + stage("Creating isolated worktree") + run_command( + ["git", "worktree", "add", "--detach", str(ctx.paths.worktree_root), ctx.base_ref], + cwd=ctx.repo_root, + log_path=ctx.paths.logs_root / "git-worktree-add.log", + ) + ctx.created_worktree = True + run_command( + ["git", "checkout", "--detach", ctx.resolved_target_ref], + cwd=ctx.paths.worktree_root, + log_path=ctx.paths.logs_root / "git-checkout-target.log", + ) + + +def remove_worktree(repo_root: Path, worktree_root: Path) -> None: + if not worktree_root.exists(): + return + subprocess.run( + ["git", "worktree", "remove", "--force", str(worktree_root)], + cwd=repo_root, + check=False, + capture_output=True, + text=True, + ) + + +def collect_changed_files(worktree_root: Path, base_ref: str) -> list[str]: + result = subprocess.run( + ["git", "diff", "--name-only", f"{base_ref}...HEAD"], + cwd=worktree_root, + capture_output=True, + text=True, + check=True, + ) + return [line.strip() for line in result.stdout.splitlines() if line.strip()] + + +def should_run_kb_tests(changed_files: list[str]) -> bool: + if not changed_files: + return True + return any(path.startswith(("kb-server/", "scripts/")) for path in changed_files) + + +def should_run_vault_tests(changed_files: list[str]) -> bool: + if not changed_files: + return True + return any(path.startswith(("vault-sync/", "scripts/")) for path in changed_files) + + +def should_run_e2e(changed_files: list[str]) -> bool: + if not changed_files: + return True + return any(path.startswith(("kb-server/", "vault-sync/", "scripts/")) for path in changed_files) + + +def init_isolated_repos(paths: EvalPaths) -> None: + stage("Creating isolated vault and remote") + paths.vault_root.mkdir(parents=True, exist_ok=True) + paths.sync_root.mkdir(parents=True, exist_ok=True) + paths.logs_root.mkdir(parents=True, exist_ok=True) + + run_command(["git", "init", "--bare", str(paths.remote_root)], log_path=paths.logs_root / "git-init-remote.log") + run_command(["git", "init", "-b", "main", str(paths.vault_root)], log_path=paths.logs_root / "git-init-vault.log") + run_command(["git", "config", "user.email", "e2e@test.local"], cwd=paths.vault_root, log_path=paths.logs_root / "git-config-email.log") + run_command(["git", "config", "user.name", "e2e-test"], cwd=paths.vault_root, log_path=paths.logs_root / "git-config-name.log") + + notes_dir = paths.vault_root / "notes" + notes_dir.mkdir(parents=True, exist_ok=True) + (notes_dir / "seed.md").write_text("# Seed\n", encoding="utf-8") + + run_command(["git", "add", "."], cwd=paths.vault_root, log_path=paths.logs_root / "git-seed-add.log") + run_command(["git", "commit", "-m", "seed"], cwd=paths.vault_root, log_path=paths.logs_root / "git-seed-commit.log") + run_command(["git", "remote", "add", "origin", str(paths.remote_root)], cwd=paths.vault_root, log_path=paths.logs_root / "git-add-remote.log") + run_command(["git", "push", "-u", "origin", "main"], cwd=paths.vault_root, log_path=paths.logs_root / "git-push-main.log") + + +def write_env_file(path: Path, values: dict[str, str]) -> None: + lines = [f"{key}={values[key]}" for key in sorted(values)] + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + + +def build_envs(ctx: EvalContext) -> tuple[dict[str, str], dict[str, str]]: + kb_env = os.environ.copy() + kb_env.update( + { + "VAULT_PATH": str(ctx.paths.vault_root), + "DATABASE_URL": f"sqlite:///{ctx.paths.db_path}", + "KB_API_KEY": DEFAULT_API_KEY, + "GIT_REMOTE": "origin", + "GIT_BRANCH": "main", + "GIT_PUSH_ENABLED": "true", + "AUTOSAVE_DEBOUNCE_SECONDS": "2", + "GIT_PULL_INTERVAL_SECONDS": "30", + "GIT_BATCH_DEBOUNCE_SECONDS": "2", + "GIT_BATCH_BRANCH_PREFIX": "kb-api", + "GITHUB_TOKEN": "", + "GITHUB_REPO": "", + "QUARTZ_BUILD_COMMAND": "", + "QUARTZ_WEBHOOK_URL": "", + "API_HOST": "127.0.0.1", + "API_PORT": str(ctx.args.port), + } + ) + sync_env = os.environ.copy() + sync_env.update( + { + "KB_SERVER_URL": f"http://127.0.0.1:{ctx.args.port}", + "KB_API_KEY": DEFAULT_API_KEY, + "SYNC_DIR": str(ctx.paths.sync_root), + "SYNC_DEBOUNCE_SECONDS": "1", + "SYNC_PULL_INTERVAL_SECONDS": "4", + } + ) + write_env_file( + ctx.paths.kb_env_file, + {key: kb_env[key] for key in ( + "API_HOST", + "API_PORT", + "AUTOSAVE_DEBOUNCE_SECONDS", + "DATABASE_URL", + "GIT_BATCH_BRANCH_PREFIX", + "GIT_BATCH_DEBOUNCE_SECONDS", + "GIT_BRANCH", + "GIT_PULL_INTERVAL_SECONDS", + "GIT_PUSH_ENABLED", + "GIT_REMOTE", + "GITHUB_REPO", + "GITHUB_TOKEN", + "KB_API_KEY", + "QUARTZ_BUILD_COMMAND", + "QUARTZ_WEBHOOK_URL", + "VAULT_PATH", + )}, + ) + write_env_file( + ctx.paths.sync_env_file, + {key: sync_env[key] for key in ( + "KB_API_KEY", + "KB_SERVER_URL", + "SYNC_DEBOUNCE_SECONDS", + "SYNC_DIR", + "SYNC_PULL_INTERVAL_SECONDS", + )}, + ) + return kb_env, sync_env + + +def run_tests(ctx: EvalContext) -> None: + if ctx.args.e2e_only: + log("Skipping targeted tests due to --e2e-only") + return + + if should_run_kb_tests(ctx.changed_files): + stage("Running kb-server targeted tests") + run_command( + [ + str(ctx.kb_python), + "-m", + "pytest", + "tests/test_current_view.py", + "tests/test_source_and_delete.py", + "-q", + ], + cwd=ctx.paths.worktree_root / "kb-server", + log_path=ctx.paths.logs_root / "kb-server-tests.log", + ) + else: + log("Skipping kb-server targeted tests; no matching changes against main") + + if should_run_vault_tests(ctx.changed_files): + stage("Running vault-sync targeted tests") + run_command( + [ + str(ctx.vault_python), + "-m", + "pytest", + "tests/test_api_client.py", + "tests/test_sync.py", + "-q", + ], + cwd=ctx.paths.worktree_root / "vault-sync", + log_path=ctx.paths.logs_root / "vault-sync-tests.log", + ) + else: + log("Skipping vault-sync targeted tests; no matching changes against main") + + +def run_migrations(ctx: EvalContext, kb_env: dict[str, str]) -> None: + stage("Running kb-server migrations") + run_command( + [str(ctx.kb_python), "-m", "alembic", "upgrade", "head"], + cwd=ctx.paths.worktree_root / "kb-server", + env=kb_env, + log_path=ctx.paths.logs_root / "kb-migrate.log", + ) + + +def tmux_has_session(name: str) -> bool: + result = subprocess.run( + ["tmux", "has-session", "-t", name], + capture_output=True, + text=True, + check=False, + ) + return result.returncode == 0 + + +def shell_from_env_file(workdir: Path, env_file: Path, command: list[str]) -> str: + quoted_command = " ".join(shlex.quote(part) for part in command) + return ( + f"cd {shlex.quote(str(workdir))} && " + f"set -a && source {shlex.quote(str(env_file))} && set +a && " + f"exec {quoted_command}" + ) + + +def start_server_tmux(ctx: EvalContext) -> None: + session_name = ctx.args.tmux_session_name + if tmux_has_session(session_name): + raise HarnessError(f"tmux session already exists: {session_name}") + + stage(f"Starting tmux session {session_name}") + server_cmd = shell_from_env_file( + ctx.paths.worktree_root / "kb-server", + ctx.paths.kb_env_file, + [str(ctx.kb_python), "-m", "uvicorn", "app.main:app", "--host", "127.0.0.1", "--port", str(ctx.args.port)], + ) + run_command(["tmux", "new-session", "-d", "-s", session_name, server_cmd], log_path=ctx.paths.logs_root / "tmux-server.log") + ctx.created_tmux_session = True + run_command( + [ + "tmux", + "pipe-pane", + "-o", + "-t", + f"{session_name}:0.0", + f"cat >> {shlex.quote(str(ctx.paths.logs_root / 'server-pane.log'))}", + ], + log_path=ctx.paths.logs_root / "tmux-server-pipe.log", + ) + + +def start_sync_tmux(ctx: EvalContext) -> None: + session_name = ctx.args.tmux_session_name + sync_cmd = shell_from_env_file( + ctx.paths.worktree_root / "vault-sync", + ctx.paths.sync_env_file, + [str(ctx.vault_python), "-m", "vault_sync.cli", "-v"], + ) + run_command(["tmux", "split-window", "-h", "-t", session_name, sync_cmd], log_path=ctx.paths.logs_root / "tmux-sync.log") + run_command( + [ + "tmux", + "pipe-pane", + "-o", + "-t", + f"{session_name}:0.1", + f"cat >> {shlex.quote(str(ctx.paths.logs_root / 'sync-pane.log'))}", + ], + log_path=ctx.paths.logs_root / "tmux-sync-pipe.log", + ) + + +def kill_tmux_session(name: str) -> None: + if not tmux_has_session(name): + return + subprocess.run(["tmux", "kill-session", "-t", name], check=False, capture_output=True, text=True) + + +def request( + method: str, + url: str, + *, + headers: dict[str, str] | None = None, + body: dict[str, object] | None = None, +) -> tuple[int, str]: + data = None + req_headers = dict(headers or {}) + if body is not None: + data = json.dumps(body).encode("utf-8") + req_headers.setdefault("Content-Type", "application/json") + req = urllib.request.Request(url=url, method=method, headers=req_headers, data=data) + try: + with urllib.request.urlopen(req, timeout=5) as resp: + return resp.status, resp.read().decode("utf-8") + except urllib.error.HTTPError as exc: + return exc.code, exc.read().decode("utf-8") + except urllib.error.URLError: + return 0, "" + + +def wait_for_http(url: str, headers: dict[str, str], expected_status: int, timeout_seconds: float = 30.0) -> None: + deadline = time.time() + timeout_seconds + last_status = None + while time.time() < deadline: + status, _ = request("GET", url, headers=headers) + last_status = status + if status == expected_status: + return + time.sleep(0.5) + raise HarnessError(f"timed out waiting for {url} to return {expected_status}; last_status={last_status}") + + +def git_output(vault_root: Path, *args: str) -> str: + result = subprocess.run( + ["git", *args], + cwd=vault_root, + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + + +def expect_status(status: int, expected: int, context: str) -> None: + if status != expected: + raise HarnessError(f"{context}: expected HTTP {expected}, got {status}") + + +def run_e2e(ctx: EvalContext, kb_env: dict[str, str]) -> None: + if ctx.args.tests_only: + log("Skipping tmux-backed smoke flow due to --tests-only") + return + if not should_run_e2e(ctx.changed_files): + log("Skipping tmux-backed smoke flow; no matching code changes against main") + return + + run_migrations(ctx, kb_env) + start_server_tmux(ctx) + + base_url = f"http://127.0.0.1:{ctx.args.port}" + headers = {"X-API-Key": DEFAULT_API_KEY} + wait_for_http(f"{base_url}/health", headers, 200) + + stage("Checking API key enforcement") + status, _ = request("GET", f"{base_url}/health") + expect_status(status, 401, "unauthorized health check") + status, _ = request("GET", f"{base_url}/health", headers=headers) + expect_status(status, 200, "authorized health check") + + stage("Checking source=human direct-to-main write") + status, _ = request( + "PUT", + f"{base_url}/notes/notes/human-e2e.md?source=human", + headers=headers, + body={"content": "# Human\nmain write\n"}, + ) + expect_status(status, 200, "human write") + subject = git_output(ctx.paths.vault_root, "log", "main", "-1", "--format=%s") + if subject != "human: update notes/human-e2e.md": + raise HarnessError(f"unexpected main commit after human write: {subject}") + + stage("Checking default API batching to kb-api/*") + status, _ = request( + "PUT", + f"{base_url}/notes/notes/api-e2e.md", + headers=headers, + body={"content": "# API\npending branch write\n"}, + ) + expect_status(status, 200, "api write") + time.sleep(4) + + status, _ = request("GET", f"{base_url}/notes/notes/api-e2e.md?view=main", headers=headers) + expect_status(status, 404, "view=main after api batch write") + status, _ = request("GET", f"{base_url}/notes/notes/api-e2e.md?view=current", headers=headers) + expect_status(status, 200, "view=current after api batch write") + + branch_name = f"kb-api/{datetime.now(timezone.utc).strftime('%Y-%m-%d')}" + branches = git_output(ctx.paths.vault_root, "branch", "--list", branch_name) + if branch_name not in branches: + raise HarnessError(f"expected local batch branch missing: {branch_name}") + subject = git_output(ctx.paths.vault_root, "log", branch_name, "-1", "--format=%s") + if "kb-api: update notes/api-e2e.md" not in subject: + raise HarnessError(f"unexpected batch commit subject: {subject}") + remote_heads = git_output(ctx.paths.vault_root, "ls-remote", "--heads", "origin", branch_name) + if branch_name not in remote_heads: + raise HarnessError(f"expected remote batch branch missing: {branch_name}") + + if ctx.args.no_sync: + log("Skipping vault-sync smoke due to --no-sync") + return + + stage("Checking vault-sync current-view pull and push flow") + start_sync_tmux(ctx) + deadline = time.time() + 20 + seed_path = ctx.paths.sync_root / "notes" / "seed.md" + pending_path = ctx.paths.sync_root / "notes" / "api-e2e.md" + while time.time() < deadline: + if seed_path.exists() and pending_path.exists(): + break + time.sleep(0.5) + if not seed_path.exists() or not pending_path.exists(): + raise HarnessError("vault-sync did not complete initial pull of seed/current-view notes") + + sync_file = ctx.paths.sync_root / "notes" / "from-sync.md" + sync_file.parent.mkdir(parents=True, exist_ok=True) + sync_file.write_text("# From Sync\nclient write\n", encoding="utf-8") + time.sleep(3) + + subject = git_output(ctx.paths.vault_root, "log", "main", "-1", "--format=%s") + if subject != "human: update notes/from-sync.md": + raise HarnessError(f"unexpected main commit after vault-sync push: {subject}") + synced = git_output(ctx.paths.vault_root, "show", "main:notes/from-sync.md") + if synced != "# From Sync\nclient write": + raise HarnessError("vault-sync pushed unexpected file content") + + +def cleanup(ctx: EvalContext, success: bool) -> None: + keep_temp = ctx.args.keep_temp or not success + if keep_temp: + log(f"Preserved temp root: {ctx.paths.temp_root}") + if ctx.created_worktree: + log(f"Preserved worktree: {ctx.paths.worktree_root}") + if ctx.created_tmux_session: + log(f"Preserved tmux session: {ctx.args.tmux_session_name}") + return + if ctx.created_tmux_session: + kill_tmux_session(ctx.args.tmux_session_name) + if ctx.created_worktree: + remove_worktree(ctx.repo_root, ctx.paths.worktree_root) + shutil.rmtree(ctx.paths.temp_root, ignore_errors=True) + log("Removed temp worktree and runtime state") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Evaluate a target ref in an isolated main-based worktree.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent( + """\ + Examples: + python scripts/eval_pr.py origin/some-branch + python scripts/eval_pr.py feature/foo --keep-temp + python scripts/eval_pr.py HEAD --tests-only + """ + ), + ) + parser.add_argument("target_ref", help="Git ref to evaluate in the temp worktree.") + parser.add_argument("--base-ref", default="main", help="Base branch for the temp worktree (default: main).") + parser.add_argument("--keep-temp", action="store_true", help="Keep temp worktree, env files, and logs after success.") + parser.add_argument("--no-sync", action="store_true", help="Skip the vault-sync pane and sync smoke checks.") + parser.add_argument("--port", type=int, default=DEFAULT_PORT, help=f"API port to use inside tmux (default: {DEFAULT_PORT}).") + parser.add_argument( + "--tmux-session-name", + default=f"fd-pr-eval-{os.getpid()}", + help="tmux session name for the isolated stack.", + ) + parser.add_argument("--tests-only", action="store_true", help="Run targeted tests only; skip the tmux smoke flow.") + parser.add_argument("--e2e-only", action="store_true", help="Run the tmux smoke flow only; skip targeted tests.") + return parser + + +def print_summary(ctx: EvalContext) -> None: + changed_preview = ", ".join(ctx.changed_files[:6]) if ctx.changed_files else "(no diff vs base)" + if len(ctx.changed_files) > 6: + changed_preview += ", ..." + summary = textwrap.dedent( + f"""\ + Evaluation summary + base_ref: {ctx.base_ref} + target_ref: {ctx.requested_target_ref} + target_sha: {ctx.resolved_target_ref} + worktree: {ctx.paths.worktree_root} + temp_root: {ctx.paths.temp_root} + kb_python: {ctx.kb_python} + vault_python: {ctx.vault_python} + changed: {changed_preview} + logs: {ctx.paths.logs_root} + tmux: {ctx.args.tmux_session_name} + """ + ).rstrip() + print(summary) + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + if args.tests_only and args.e2e_only: + parser.error("--tests-only and --e2e-only cannot be combined") + + kb_python, vault_python = ensure_binaries(REPO_ROOT) + ensure_ref_exists(REPO_ROOT, args.base_ref) + resolved_target_ref = resolve_ref_to_commit(REPO_ROOT, args.target_ref) + + ctx = EvalContext( + args=args, + repo_root=REPO_ROOT, + paths=create_paths(), + kb_python=kb_python, + vault_python=vault_python, + requested_target_ref=args.target_ref, + resolved_target_ref=resolved_target_ref, + base_ref=args.base_ref, + changed_files=[], + ) + + success = False + try: + add_worktree(ctx) + ctx.changed_files = collect_changed_files(ctx.paths.worktree_root, ctx.base_ref) + print_summary(ctx) + init_isolated_repos(ctx.paths) + kb_env, _ = build_envs(ctx) + run_tests(ctx) + run_e2e(ctx, kb_env) + success = True + log("Evaluation completed successfully") + return 0 + except HarnessError as exc: + log(f"FAILED: {exc}") + log(f"Inspect logs under: {ctx.paths.logs_root}") + return 1 + finally: + cleanup(ctx, success) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/generate_context_artifacts.py b/scripts/generate_context_artifacts.py index 6d02d12..67c8f3d 100755 --- a/scripts/generate_context_artifacts.py +++ b/scripts/generate_context_artifacts.py @@ -15,6 +15,10 @@ def _read(path: Path) -> str: return path.read_text(encoding="utf-8") +def _utc_today_iso() -> str: + return dt.datetime.now(dt.timezone.utc).date().isoformat() + + def _parse_env_example(path: Path) -> list[tuple[str, str]]: items: list[tuple[str, str]] = [] for line in _read(path).splitlines(): @@ -58,7 +62,7 @@ def _write_api_surface() -> None: notes_routes = _parse_routes(REPO_ROOT / "kb-server" / "app" / "api" / "routes" / "notes.py") publish_routes = _parse_routes(REPO_ROOT / "kb-server" / "app" / "api" / "routes" / "publish.py") all_routes = health_routes + notes_routes + publish_routes - date = dt.date.today().isoformat() + date = _utc_today_iso() content = [ "---", @@ -91,7 +95,7 @@ def _write_api_surface() -> None: def _write_env_catalog() -> None: - date = dt.date.today().isoformat() + date = _utc_today_iso() kb_env = _parse_env_example(REPO_ROOT / "kb-server" / ".env.example") kb_defaults = _parse_settings_defaults(REPO_ROOT / "kb-server" / "app" / "core" / "config.py") vs_defaults = _parse_settings_defaults(REPO_ROOT / "vault-sync" / "vault_sync" / "config.py") @@ -165,4 +169,3 @@ def main() -> int: if __name__ == "__main__": raise SystemExit(main()) - diff --git a/tests/test_eval_pr.py b/tests/test_eval_pr.py new file mode 100644 index 0000000..90234ab --- /dev/null +++ b/tests/test_eval_pr.py @@ -0,0 +1,121 @@ +import argparse +import importlib.util +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path +from unittest import mock + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "scripts" / "eval_pr.py" +SPEC = importlib.util.spec_from_file_location("eval_pr", MODULE_PATH) +assert SPEC is not None +assert SPEC.loader is not None +eval_pr = importlib.util.module_from_spec(SPEC) +sys.modules[SPEC.name] = eval_pr +SPEC.loader.exec_module(eval_pr) + + +class EvalPrTests(unittest.TestCase): + def make_context(self, temp_root: Path, *, keep_temp: bool = False) -> object: + paths = eval_pr.EvalPaths( + temp_root=temp_root, + worktree_root=temp_root / "repo", + vault_root=temp_root / "vault", + sync_root=temp_root / "sync", + remote_root=temp_root / "remote.git", + db_path=temp_root / "kb.sqlite3", + logs_root=temp_root / "logs", + kb_env_file=temp_root / "kb.env", + sync_env_file=temp_root / "sync.env", + ) + return eval_pr.EvalContext( + args=argparse.Namespace( + keep_temp=keep_temp, + tmux_session_name="fd-test-session", + ), + repo_root=temp_root, + paths=paths, + kb_python=Path("/tmp/kb-python"), + vault_python=Path("/tmp/vault-python"), + requested_target_ref="HEAD", + resolved_target_ref="abc123", + base_ref="main", + changed_files=[], + ) + + def test_resolve_ref_to_commit_resolves_symbolic_head(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + repo_root = Path(tmpdir) + subprocess.run(["git", "init"], cwd=repo_root, check=True, capture_output=True, text=True) + subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=repo_root, check=True) + subprocess.run(["git", "config", "user.name", "Test User"], cwd=repo_root, check=True) + (repo_root / "note.txt").write_text("hello\n", encoding="utf-8") + subprocess.run(["git", "add", "note.txt"], cwd=repo_root, check=True) + subprocess.run(["git", "commit", "-m", "init"], cwd=repo_root, check=True, capture_output=True, text=True) + + resolved = eval_pr.resolve_ref_to_commit(repo_root, "HEAD") + expected = subprocess.run( + ["git", "rev-parse", "--verify", "HEAD^{commit}"], + cwd=repo_root, + check=True, + capture_output=True, + text=True, + ).stdout.strip() + + self.assertEqual(resolved, expected) + + def test_add_worktree_checks_out_resolved_target_sha(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + ctx = self.make_context(Path(tmpdir)) + commands = [] + + def fake_run_command(cmd, **kwargs): + commands.append((cmd, kwargs)) + return None + + with mock.patch.object(eval_pr, "run_command", side_effect=fake_run_command): + eval_pr.add_worktree(ctx) + + self.assertTrue(ctx.created_worktree) + self.assertEqual(commands[0][0], ["git", "worktree", "add", "--detach", str(ctx.paths.worktree_root), "main"]) + self.assertEqual(commands[1][0], ["git", "checkout", "--detach", "abc123"]) + + def test_cleanup_preserves_owned_resources_on_failure(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + ctx = self.make_context(Path(tmpdir)) + ctx.created_worktree = True + ctx.created_tmux_session = True + + with ( + mock.patch.object(eval_pr, "kill_tmux_session") as kill_tmux_session, + mock.patch.object(eval_pr, "remove_worktree") as remove_worktree, + mock.patch.object(eval_pr.shutil, "rmtree") as rmtree, + ): + eval_pr.cleanup(ctx, success=False) + + kill_tmux_session.assert_not_called() + remove_worktree.assert_not_called() + rmtree.assert_not_called() + + def test_cleanup_removes_only_owned_resources_on_success(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + ctx = self.make_context(Path(tmpdir)) + ctx.created_worktree = True + ctx.created_tmux_session = False + + with ( + mock.patch.object(eval_pr, "kill_tmux_session") as kill_tmux_session, + mock.patch.object(eval_pr, "remove_worktree") as remove_worktree, + mock.patch.object(eval_pr.shutil, "rmtree") as rmtree, + ): + eval_pr.cleanup(ctx, success=True) + + kill_tmux_session.assert_not_called() + remove_worktree.assert_called_once_with(ctx.repo_root, ctx.paths.worktree_root) + rmtree.assert_called_once_with(ctx.paths.temp_root, ignore_errors=True) + + +if __name__ == "__main__": + unittest.main()