hidai25
diff --git a/‎README.md‎
Lines changed: 57 additions & 0 deletions b/‎README.md‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎evalview/cli.py‎
Lines changed: 38 additions & 2 deletions b/‎evalview/cli.py‎
Lines changed: 38 additions & 2 deletions
@@ -130,6 +130,61 @@ Database config is optional – EvalView only uses it if you enable it in config
 
 ---
 
+## Behavior Coverage (not line coverage)
+
+Line coverage doesn't work for LLMs. Instead, EvalView focuses on **behavior coverage**:
+
+| Dimension | What it measures |
+|-----------|------------------|
+| **Tasks covered** | Which real-world scenarios have tests? |
+| **Tools exercised** | Are all your agent's tools being tested? |
+| **Paths hit** | Are multi-step workflows tested end-to-end? |
+| **Eval dimensions** | Are you checking correctness, safety, cost, latency? |
+
+**The loop:** weird prod session → turn it into a regression test → it shows up in your coverage.
+
+```bash
+# Compact summary for screenshots / sharing
+evalview run --summary
+```
+
+```
+━━━ EvalView Summary ━━━
+Suite: analytics_agent
+Tests: 7 passed, 2 failed
+
+Failures:
+  ✗ cohort: large result set     cost +240%
+  ✗ doc QA: long context         missing tool: chunking
+
+Deltas vs last run:
+  Tokens:  +188%  ↑
+  Latency: +95ms  ↑
+  Cost:    +$0.12 ↑
+
+⚠️  Regressions detected
+```
+
+```bash
+# Behavior coverage report
+evalview run --coverage
+```
+
+```
+━━━ Behavior Coverage ━━━
+Suite: analytics_agent
+
+Tasks:      9/9 scenarios (100%)
+Tools:      6/8 exercised (75%)
+            missing: chunking, summarize
+Paths:      3/3 multi-step workflows (100%)
+Dimensions: correctness ✓, output ✓, cost ✗, latency ✓, safety ✓
+
+Overall:    92% behavior coverage
+```
+
+---
+
 ## What it does (in practice)
 
 - **Write test cases in YAML** – Define inputs, required tools, and scoring thresholds
@@ -294,6 +349,8 @@ Options:
   --max-retries N      Retry flaky tests N times (default: 0)
   --watch              Re-run tests on file changes
   --html-report PATH   Generate interactive HTML report
+  --summary            Compact, screenshot-friendly output
+  --coverage           Show behavior coverage report
 ```
 
 ### `evalview expand`
 
@@ -1144,6 +1144,16 @@ async def _init_wizard_async(dir: str):
     type=click.Path(),
     help="Generate HTML report to specified path",
 )
+@click.option(
+    "--summary",
+    is_flag=True,
+    help="Compact, screenshot-friendly output (great for sharing)",
+)
+@click.option(
+    "--coverage",
+    is_flag=True,
+    help="Show behavior coverage report (tasks, tools, paths, eval dimensions)",
+)
 def run(
     path: Optional[str],
     pattern: str,
@@ -1160,6 +1170,8 @@ def run(
     retry_delay: float,
     watch: bool,
     html_report: str,
+    summary: bool,
+    coverage: bool,
 ):
     """Run test cases against the agent.
 
@@ -1168,7 +1180,7 @@ def run(
     """
     asyncio.run(_run_async(
         path, pattern, test, filter, output, verbose, track, compare_baseline, debug,
-        sequential, max_workers, max_retries, retry_delay, watch, html_report
+        sequential, max_workers, max_retries, retry_delay, watch, html_report, summary, coverage
     ))
 
 
@@ -1188,6 +1200,8 @@ async def _run_async(
     retry_delay: float = 1.0,
     watch: bool = False,
     html_report: str = None,
+    summary: bool = False,
+    coverage: bool = False,
 ):
     """Async implementation of run command."""
     import fnmatch
@@ -1901,7 +1915,29 @@ async def update_display():
     # Print summary
     console.print()
     reporter = ConsoleReporter()
-    reporter.print_summary(results)
+    if summary:
+        # Compact, screenshot-friendly output
+        # Get suite name from path
+        suite_name = None
+        if path:
+            suite_name = Path(path).name if Path(path).is_dir() else Path(path).stem
+
+        # Load previous results for delta comparison
+        previous_results = None
+        output_dir = Path(output)
+        if output_dir.exists():
+            previous_results = JSONReporter.get_latest_results(output_dir)
+
+        reporter.print_compact_summary(results, suite_name=suite_name, previous_results=previous_results)
+    else:
+        reporter.print_summary(results)
+
+    # Print behavior coverage report if enabled
+    if coverage:
+        suite_name = None
+        if path:
+            suite_name = Path(path).name if Path(path).is_dir() else Path(path).stem
+        reporter.print_coverage_report(test_cases, results, suite_name=suite_name)
 
     # Print regression analysis if enabled
     if compare_baseline and regression_reports: