Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/CONFIGURATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,12 @@ aci scan --target . --profile full --scope-mode full-repo --report-scope-class r
aci emit-sarif --report report.json > aci.sarif # SARIF 2.1.0 for code scanning
aci emit-annotations --report report.json # GitHub Actions annotations
aci emit-github-summary --report report.json # GitHub markdown summary
aci emit-baseline --report report.json --output ops.toml # accept current findings as a baseline
aci validate-report --report report.json # check against the report contract
```

Report-view filters are available on `scan`, `emit-sarif`, `emit-annotations`,
and `emit-github-summary`:
`emit-github-summary`, and `emit-baseline`:

```bash
--report-scope-class runtime-source
Expand Down
27 changes: 20 additions & 7 deletions docs/QUICKSTART.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,17 +121,30 @@ aci scan --target . --profile full --severity-threshold critical
aci scan --target . --profile full --fail-on-new-findings
```

## 6. Accept or defer findings (operations file)
## 6. Adopt on an existing codebase (baseline)

Create an operations TOML and pass `--operations-file ops.toml` to baseline,
suppress, or waive findings without editing code:
To start tracking only **new** issues on a project that already has findings,
generate a baseline from a scan and pass it back on later runs — no hand-edited
TOML:

```toml
[baseline]
entries = [{ ci_id = "CI-03", target_file = "legacy/util.py", line = 12 }]
```bash
aci scan --target . --profile full --output report.json # 1. scan once
aci emit-baseline --report report.json --output ops.toml # 2. accept today's findings
aci scan --target . --profile full --operations-file ops.toml --fail-on-new-findings
# 3. from now on, only NEW findings block
```

Each baseline entry is keyed by the finding's **fingerprint**, which is stable
across unrelated line shifts, so edits elsewhere in a file do not resurrect a
baselined finding. When you fix a finding, the next scan reports its entry as
`resolved` (a candidate to remove from the baseline).

You can also hand-author an operations file to **waive** or **suppress** specific
findings without editing code:

```toml
[waiver]
entries = [{ waiver_id = "W1", ci_id = "CI-21", target_file = "app/io.py", line = 42 }]
entries = [{ waiver_id = "W1", fingerprint = "", owner = "alice", reason = "tracked in JIRA-123", review_condition = "before GA" }]
```

## 7. Hosted CI integration
Expand Down
106 changes: 106 additions & 0 deletions shared/python/aci_baseline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Generate an operations-file baseline from a scan report.

Adopting ACI on an existing codebase means accepting today's findings as
pre-existing so that future scans surface only NEW ones. That baseline used to
be hand-authored TOML; this module derives it from a report instead, turning the
central adoption step into one command (`aci emit-baseline`).

The emitted TOML round-trips through aci_operations.load_operations_state. Each
entry's identity is the fingerprint (stable across unrelated line shifts), so no
line number is written -- encoding identity by line was the root of three earlier
defects. Output is sorted for a stable, reviewable diff when the baseline is
regenerated.
"""
from __future__ import annotations

from typing import cast

# TOML basic-string escapes (TOML v1.0.0 §String). Everything else, including
# printable non-ASCII, is emitted as-is in UTF-8.
_TOML_SIMPLE_ESCAPES = {
"\\": "\\\\",
'"': '\\"',
"\b": "\\b",
"\t": "\\t",
"\n": "\\n",
"\f": "\\f",
"\r": "\\r",
}


def _toml_escape(value: str) -> str:
out: list[str] = []
for ch in value:
simple = _TOML_SIMPLE_ESCAPES.get(ch)
if simple is not None:
out.append(simple)
elif ch < "\x20" or ch == "\x7f":
out.append(f"\\u{ord(ch):04X}")
else:
out.append(ch)
return "".join(out)


def _entry_fields(finding: dict[str, object]) -> list[tuple[str, str]]:
"""The stable-identity fields for one baseline entry, in emit order.

fingerprint anchors the match; ci_id and target_file keep the TOML readable
and let resolved-baseline detection bound itself to scanned files. line is
deliberately omitted.
"""
fields: list[tuple[str, str]] = []
for key in ("fingerprint", "ci_id", "target_file"):
raw = finding.get(key)
if isinstance(raw, str) and raw:
fields.append((key, raw))
return fields


def build_baseline_operations(report: dict[str, object]) -> str:
"""Return operations TOML whose [baseline] accepts every finding in *report*."""
raw_findings = report.get("findings")
findings = raw_findings if isinstance(raw_findings, list) else []
entries: list[list[tuple[str, str]]] = []
skipped = 0
for finding in findings:
if not isinstance(finding, dict):
skipped += 1
continue
fields = _entry_fields(cast(dict[str, object], finding))
if not fields:
skipped += 1
continue
entries.append(fields)

# Deterministic order: by target_file, then ci_id, then fingerprint, so a
# regenerated baseline diffs cleanly against the previous one.
def _sort_key(fields: list[tuple[str, str]]) -> tuple[str, str, str]:
as_map = dict(fields)
return (as_map.get("target_file", ""), as_map.get("ci_id", ""), as_map.get("fingerprint", ""))

entries.sort(key=_sort_key)

lines = [
"# ACI baseline -- generated from a scan report by `aci emit-baseline`.",
"# Each entry accepts a finding as pre-existing; future scans report only",
"# NEW findings. Identity is the fingerprint (stable across line shifts), so",
"# no line numbers are stored here. Remove an entry once its finding is",
"# fixed -- ACI then reports it as resolved on the next scan.",
"[baseline]",
]
if not entries:
lines.append("entries = []")
return "\n".join(lines) + "\n"

lines.append("entries = [")
rendered = [
" { " + ", ".join(f'{key} = "{_toml_escape(value)}"' for key, value in fields) + " },"
for fields in entries
]
lines.extend(rendered)
lines.append("]")
if skipped:
lines.append(f"# note: {skipped} finding(s) had no usable identity and were not baselined.")
return "\n".join(lines) + "\n"
29 changes: 29 additions & 0 deletions shared/python/aci_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
SCOPE_MODE_SOURCE_ONLY,
)
from .aci_annotations import build_github_annotations
from .aci_baseline import build_baseline_operations
from .aci_sarif import build_sarif_report
from .aci_sarif import validate_sarif_report
from .aci_package_assets import read_text_asset
Expand Down Expand Up @@ -60,6 +61,7 @@
SCOPE_MODE_SOURCE_ONLY,
)
from aci_annotations import build_github_annotations
from aci_baseline import build_baseline_operations # type: ignore[no-redef]
from aci_sarif import build_sarif_report
from aci_sarif import validate_sarif_report
from aci_package_assets import read_text_asset
Expand Down Expand Up @@ -229,6 +231,19 @@ def _build_parser() -> argparse.ArgumentParser:
github_summary_cmd.add_argument("--report", type=Path, required=True, help="ACI report JSON file to summarize")
_add_report_view_args(github_summary_cmd)

baseline_cmd = sub.add_parser(
"emit-baseline",
help="Generate an operations-file baseline (TOML) from a report JSON, accepting today's findings as pre-existing",
)
baseline_cmd.add_argument("--report", type=Path, required=True, help="ACI report JSON file to baseline")
baseline_cmd.add_argument(
"--output",
type=Path,
default=None,
help="Write the baseline TOML to this path instead of stdout",
)
_add_report_view_args(baseline_cmd)

catalog = sub.add_parser(
"show-analyzer-catalog",
help="Print the bounded external-analyzer catalog known to the common shelf",
Expand Down Expand Up @@ -436,6 +451,20 @@ def _handle_report_command(args: argparse.Namespace) -> int | None:
raise ValueError(f"Report file is not a JSON object: {args.report}")
print(build_github_summary_markdown(_project_report_view_from_args(data, args)), end="")
return EXIT_OK
if args.command == "emit-baseline":
data = _read_json_file(args.report)
if not isinstance(data, dict):
raise ValueError(f"Report file is not a JSON object: {args.report}")
toml_text = build_baseline_operations(_project_report_view_from_args(data, args))
output_path = getattr(args, "output", None)
if output_path is not None:
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(toml_text, encoding="utf-8")
entry_count = toml_text.count("{ ")
print(f"ACI baseline written to {output_path} ({entry_count} entr{'y' if entry_count == 1 else 'ies'})")
else:
print(toml_text, end="")
return EXIT_OK
return None


Expand Down
57 changes: 57 additions & 0 deletions shared/tests/test_aci_report_surface_contracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from types import SimpleNamespace

from aci.aci_cli import _handle_report_command
from aci.aci_baseline import build_baseline_operations
from aci.aci_github_summary import build_github_summary_markdown
from aci.aci_operations import load_operations_state


def _sample_report() -> dict[str, object]:
Expand Down Expand Up @@ -263,3 +265,58 @@ def test_github_summary_carries_the_detection_disclosure_at_the_point_of_use() -
no_disclosure = {k: v for k, v in clean_report.items() if k != "detection_disclosure"}
rendered = build_github_summary_markdown(no_disclosure)
assert "Scope note:" not in rendered


def test_emit_baseline_round_trips_through_the_operations_loader(tmp_path: Path, capsys) -> None:
# The generated baseline is only useful if the real loader accepts it and the
# entries identify exactly the report's findings by their stable fingerprint.
report_path = _write_report(tmp_path)

result = _handle_report_command(
SimpleNamespace(command="emit-baseline", report=report_path, output=None, report_scope_class=[], report_owner_lane=[])
)
assert result == 0
toml_text = capsys.readouterr().out

ops_path = tmp_path / "ops.toml"
ops_path.write_text(toml_text, encoding="utf-8")
state = load_operations_state(ops_path)
assert {e.fingerprint for e in state.baseline_entries} == {"fp-1", "fp-2", "fp-3"}
# Identity is the fingerprint, never the line: no entry carries a line number.
assert all(e.line is None for e in state.baseline_entries)


def test_emit_baseline_escapes_special_characters_and_round_trips(tmp_path: Path) -> None:
# File names and ids can carry TOML metacharacters; the emitter must produce
# TOML the strict loader (tomllib) still parses, not a broken file.
report = {
"findings": [
{"fingerprint": "fp-x", "ci_id": "CI-21", "target_file": 'weird "name"\\dir/â.py'},
]
}
toml_text = build_baseline_operations(report)
ops_path = tmp_path / "ops.toml"
ops_path.write_text(toml_text, encoding="utf-8")
state = load_operations_state(ops_path) # raises if the escaping is wrong
assert state.baseline_entries[0].target_file == 'weird "name"\\dir/â.py'


def test_emit_baseline_empty_report_is_a_valid_empty_baseline(tmp_path: Path) -> None:
toml_text = build_baseline_operations({"findings": []})
ops_path = tmp_path / "ops.toml"
ops_path.write_text(toml_text, encoding="utf-8")
state = load_operations_state(ops_path)
assert state.baseline_entries == ()


def test_emit_baseline_is_deterministic() -> None:
report = {
"findings": [
{"fingerprint": "fp-b", "ci_id": "CI-21", "target_file": "b.py"},
{"fingerprint": "fp-a", "ci_id": "CI-03", "target_file": "a.py"},
]
}
first = build_baseline_operations(report)
assert first == build_baseline_operations(report)
# sorted by target_file, so a.py precedes b.py regardless of input order
assert first.index("a.py") < first.index("b.py")
Loading