[Backend Tester] Add CSV report generation

GregoryComer · GregoryComer · commit 42f383f3c84b · 2025-07-23T16:31:32.000-07:00
ghstack-source-id: 17e6147 ghstack-comment-id: 3105325555 Pull-Request: #12741
diff --git a/backends/test/suite/context.py b/backends/test/suite/context.py
@@ -1,8 +1,9 @@
 # Test run context management. This is used to determine the test context for reporting
 # purposes.
 class TestContext:
-    def __init__(self, test_name: str, flow_name: str, params: dict | None):
+    def __init__(self, test_name: str, test_base_name: str, flow_name: str, params: dict | None):
         self.test_name = test_name
+        self.test_base_name = test_base_name
         self.flow_name = flow_name
         self.params = params
 
diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py
@@ -42,19 +42,19 @@ def _create_test(
     dtype: torch.dtype,
     use_dynamic_shapes: bool,
 ):
+    dtype_name = str(dtype)[6:]  # strip "torch."
+    test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}"
+    if use_dynamic_shapes:
+        test_name += "_dynamic_shape"
+
     def wrapped_test(self):
         params = {
             "dtype": dtype,
             "use_dynamic_shapes": use_dynamic_shapes,
         }
-        with TestContext(test_name, flow.name, params):
+        with TestContext(test_name, test_func.__name__, flow.name, params):
             test_func(self, flow, dtype, use_dynamic_shapes)
 
-    dtype_name = str(dtype)[6:]  # strip "torch."
-    test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}"
-    if use_dynamic_shapes:
-        test_name += "_dynamic_shape"
-
     wrapped_test._name = test_func.__name__  # type: ignore
     wrapped_test._flow = flow  # type: ignore
 
@@ -118,6 +118,7 @@ def run_model_test(
         inputs,
         flow,
         context.test_name,
+        context.test_base_name,
         context.params,
         dynamic_shapes=dynamic_shapes,
     )
diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py
@@ -6,6 +6,7 @@
 
 # pyre-unsafe
 
+import copy
 import os
 import unittest
 
@@ -90,12 +91,13 @@ def _expand_test(cls, test_name: str):
 def _make_wrapped_test(
     test_func: Callable,
     test_name: str,
+    test_base_name: str,
     flow: TestFlow,
     params: dict | None = None,
 ):
     def wrapped_test(self):
-        with TestContext(test_name, flow.name, params):
-            test_kwargs = params or {}
+        with TestContext(test_name, test_base_name, flow.name, params):
+            test_kwargs = copy.copy(params) or {}
             test_kwargs["flow"] = flow
 
             test_func(self, **test_kwargs)
@@ -114,19 +116,20 @@ def _create_test_for_backend(
     test_type = getattr(test_func, "test_type", TestType.STANDARD)
 
     if test_type == TestType.STANDARD:
-        wrapped_test = _make_wrapped_test(test_func, test_func.__name__, flow)
         test_name = f"{test_func.__name__}_{flow.name}"
+        wrapped_test = _make_wrapped_test(test_func, test_name, test_func.__name__, flow)
         setattr(cls, test_name, wrapped_test)
     elif test_type == TestType.DTYPE:
         for dtype in DTYPES:
+            dtype_name = str(dtype)[6:]  # strip "torch."
+            test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}"
             wrapped_test = _make_wrapped_test(
                 test_func,
+                test_name,
                 test_func.__name__,
                 flow,
                 {"dtype": dtype},
             )
-            dtype_name = str(dtype)[6:]  # strip "torch."
-            test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}"
             setattr(cls, test_name, wrapped_test)
     else:
         raise NotImplementedError(f"Unknown test type {test_type}.")
@@ -144,6 +147,7 @@ def _test_op(self, model, inputs, flow: TestFlow):
             inputs,
             flow,
             context.test_name,
+            context.test_base_name,
             context.params,
         )
 
diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py
@@ -1,7 +1,11 @@
 from collections import Counter
 from dataclasses import dataclass
 from enum import IntEnum
+from functools import reduce
+from re import A
+from typing import TextIO
 
+import csv
 
 class TestResult(IntEnum):
     """Represents the result of a test case run, indicating success or a specific failure reason."""
@@ -75,13 +79,19 @@ class TestCaseSummary:
     """
     Contains summary results for the execution of a single test case.
     """
+    
+    backend: str
+    """ The name of the target backend. """
 
-    name: str
-    """ The qualified name of the test, not including the flow suffix. """
-
+    base_name: str
+    """ The base name of the test, not including flow or parameter suffixes. """
+    
     flow: str
     """ The backend-specific flow name. Corresponds to flows registered in backends/test/suite/__init__.py. """
 
+    name: str
+    """ The full name of test, including flow and parameter suffixes. """
+
     params: dict | None
     """ Test-specific parameters, such as dtype. """
 
@@ -162,3 +172,40 @@ def complete_test_session() -> RunSummary:
     _active_session = None
 
     return summary
+
+def generate_csv_report(summary: RunSummary, output: TextIO):
+    """ Write a run summary report to a file in CSV format. """
+
+    field_names = [
+        "Test ID",
+        "Test Case",
+        "Backend",
+        "Flow",
+        "Result",
+    ]
+    
+    # Tests can have custom parameters. We'll want to report them here, so we need
+    # a list of all unique parameter names.
+    param_names = reduce(
+        lambda a, b: a.union(b),
+        (set(s.params.keys()) for s in summary.test_case_summaries if s.params is not None),
+        set()
+    )
+    field_names += (s.capitalize() for s in param_names)
+
+    writer = csv.DictWriter(output, field_names)
+    writer.writeheader()
+    
+    for record in summary.test_case_summaries:
+        row = {
+            "Test ID": record.name,
+            "Test Case": record.base_name,
+            "Backend": record.backend,
+            "Flow": record.flow,
+            "Result": record.result.display_name(),
+        }
+        if record.params is not None:
+            row.update({
+                k.capitalize(): v for k, v in record.params.items()
+            })
+        writer.writerow(row)
diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py
@@ -13,6 +13,7 @@
 from executorch.backends.test.suite.reporting import (
     begin_test_session,
     complete_test_session,
+    generate_csv_report,
     RunSummary,
     TestCaseSummary,
     TestResult,
@@ -31,6 +32,7 @@ def run_test(  # noqa: C901
     inputs: Any,
     flow: TestFlow,
     test_name: str,
+    test_base_name: str,
     params: dict | None,
     dynamic_shapes: Any | None = None,
 ) -> TestCaseSummary:
@@ -44,8 +46,10 @@ def build_result(
         result: TestResult, error: Exception | None = None
     ) -> TestCaseSummary:
         return TestCaseSummary(
-            name=test_name,
+            backend=flow.backend,
+            base_name=test_base_name,
             flow=flow.name,
+            name=test_name,
             params=params,
             result=result,
             error=error,
@@ -168,6 +172,9 @@ def parse_args():
     parser.add_argument(
         "-f", "--filter", nargs="?", help="A regular expression filter for test names."
     )
+    parser.add_argument(
+        "-r", "--report", nargs="?", help="A file to write the test report to, in CSV format."
+    )
     return parser.parse_args()
 
 
@@ -195,6 +202,11 @@ def runner_main():
 
     summary = complete_test_session()
     print_summary(summary)
+    
+    if args.report is not None:
+        with open(args.report, "w") as f:
+            print(f"Writing CSV report to {args.report}.")
+            generate_csv_report(summary, f)
 
 
 if __name__ == "__main__":
diff --git a/backends/test/suite/tests/README.md b/backends/test/suite/tests/README.md
@@ -0,0 +1,3 @@
+# Tests
+
+This directory contains meta-tests for the backend test suite. As the test suite contains a non-neglible amount of logic, these tests are useful to ensure that the test suite itself is working correctly.
diff --git a/backends/test/suite/tests/__init__.py b/backends/test/suite/tests/__init__.py
diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py
@@ -0,0 +1,101 @@
+import torch
+import unittest
+
+from csv import DictReader
+from ..reporting import TestResult, TestCaseSummary, RunSummary, TestSessionState, generate_csv_report
+from io import StringIO
+
+# Test data for simulated test results.
+TEST_CASE_SUMMARIES = [
+    TestCaseSummary(
+        backend="backend1",
+        base_name="test1",
+        flow="flow1",
+        name="test1_backend1_flow1",
+        params=None,
+        result=TestResult.SUCCESS,
+        error=None,
+    ),
+    TestCaseSummary(
+        backend="backend2",
+        base_name="test1",
+        flow="flow1",
+        name="test1_backend2_flow1",
+        params=None,
+        result=TestResult.LOWER_FAIL,
+        error=None,
+    ),
+    TestCaseSummary(
+        backend="backend1",
+        base_name="test2",
+        flow="flow1",
+        name="test2_backend1_flow1",
+        params={"dtype": torch.float32},
+        result=TestResult.SUCCESS_UNDELEGATED,
+        error=None,
+    ),
+    TestCaseSummary(
+        backend="backend2",
+        base_name="test2",
+        flow="flow1",
+        name="test2_backend2_flow1",
+        params={"use_dynamic_shapes": True},
+        result=TestResult.EXPORT_FAIL,
+        error=None,
+    ),
+]
+
+class Reporting(unittest.TestCase):
+    def test_csv_report_simple(self):
+        # Verify the format of a simple CSV run report.
+        session_state = TestSessionState()
+        session_state.test_case_summaries.extend(TEST_CASE_SUMMARIES)
+        run_summary = RunSummary.from_session(session_state)
+        
+        strio = StringIO()
+        generate_csv_report(run_summary, strio)
+        
+        # Attempt to deserialize and validate the CSV report.
+        report = DictReader(StringIO(strio.getvalue()))
+        records = list(report)
+        self.assertEqual(len(records), 4)
+
+        # Validate first record: test1, backend1, SUCCESS
+        self.assertEqual(records[0]["Test ID"], "test1_backend1_flow1")
+        self.assertEqual(records[0]["Test Case"], "test1")
+        self.assertEqual(records[0]["Backend"], "backend1")
+        self.assertEqual(records[0]["Flow"], "flow1")
+        self.assertEqual(records[0]["Result"], "Success (Delegated)")
+        self.assertEqual(records[0]["Dtype"], "")
+        self.assertEqual(records[0]["Use_dynamic_shapes"], "")
+
+        # Validate second record: test1, backend2, LOWER_FAIL
+        self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1")
+        self.assertEqual(records[1]["Test Case"], "test1")
+        self.assertEqual(records[1]["Backend"], "backend2")
+        self.assertEqual(records[1]["Flow"], "flow1")
+        self.assertEqual(records[1]["Result"], "Fail (Lowering)")
+        self.assertEqual(records[1]["Dtype"], "")
+        self.assertEqual(records[1]["Use_dynamic_shapes"], "")
+
+        # Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param
+        self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1")
+        self.assertEqual(records[2]["Test Case"], "test2")
+        self.assertEqual(records[2]["Backend"], "backend1")
+        self.assertEqual(records[2]["Flow"], "flow1")
+        self.assertEqual(records[2]["Result"], "Success (Undelegated)")
+        self.assertEqual(records[2]["Dtype"], str(torch.float32))
+        self.assertEqual(records[2]["Use_dynamic_shapes"], "")
+
+        # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param
+        self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1")
+        self.assertEqual(records[3]["Test Case"], "test2")
+        self.assertEqual(records[3]["Backend"], "backend2")
+        self.assertEqual(records[3]["Flow"], "flow1")
+        self.assertEqual(records[3]["Result"], "Fail (Export)")
+        self.assertEqual(records[3]["Dtype"], "")
+        self.assertEqual(records[3]["Use_dynamic_shapes"], "True")
+
+
+        
+        

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Tests`
	`2`	`+`
	`3`	`+This directory contains meta-tests for the backend test suite. As the test suite contains a non-neglible amount of logic, these tests are useful to ensure that the test suite itself is working correctly.`