From f120e70e935996ce8a0b274f74625fbe0f1252e8 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Thu, 17 Jul 2025 17:26:58 -0700 Subject: [PATCH 01/38] Update [ghstack-poisoned] --- backends/test/suite/__init__.py | 56 ++++++++++---------- backends/test/suite/discovery.py | 63 +++++++++++++++++++++++ backends/test/suite/flow.py | 63 +++++++++++++++++++++++ backends/test/suite/operators/__init__.py | 11 ++++ backends/test/suite/reporting.py | 6 +-- backends/test/suite/runner.py | 40 ++++++++++++-- 6 files changed, 200 insertions(+), 39 deletions(-) create mode 100644 backends/test/suite/discovery.py create mode 100644 backends/test/suite/flow.py diff --git a/backends/test/suite/__init__.py b/backends/test/suite/__init__.py index bce62ce1d63..cf73a7bdd0c 100644 --- a/backends/test/suite/__init__.py +++ b/backends/test/suite/__init__.py @@ -12,11 +12,13 @@ import unittest from enum import Enum -from typing import Any, Callable, Tuple +from typing import Callable, Sequence, Sequence + +import executorch.backends.test.suite.flow import torch -from executorch.backends.test.harness import Tester from executorch.backends.test.suite.context import get_active_test_context, TestContext +from executorch.backends.test.suite.flow import TestFlow from executorch.backends.test.suite.reporting import log_test_summary from executorch.backends.test.suite.runner import run_test, runner_main @@ -44,22 +46,20 @@ def is_backend_enabled(backend): return backend in _ENABLED_BACKENDS -ALL_TEST_FLOWS = [] +_ALL_TEST_FLOWS: Sequence[TestFlow] | None = None -if is_backend_enabled("xnnpack"): - from executorch.backends.xnnpack.test.tester import Tester as XnnpackTester - XNNPACK_TEST_FLOW = ("xnnpack", XnnpackTester) - ALL_TEST_FLOWS.append(XNNPACK_TEST_FLOW) +def get_test_flows() -> Sequence[TestFlow]: + global _ALL_TEST_FLOWS -if is_backend_enabled("coreml"): - try: - from executorch.backends.apple.coreml.test.tester import CoreMLTester + if _ALL_TEST_FLOWS is None: + _ALL_TEST_FLOWS = [ + f + for f in executorch.backends.test.suite.flow.all_flows() + if is_backend_enabled(f.backend) + ] - COREML_TEST_FLOW = ("coreml", CoreMLTester) - ALL_TEST_FLOWS.append(COREML_TEST_FLOW) - except Exception: - print("Core ML AOT is not available.") + return _ALL_TEST_FLOWS DTYPES = [ @@ -115,53 +115,51 @@ def _create_tests(cls): # Expand a test into variants for each registered flow. def _expand_test(cls, test_name: str): test_func = getattr(cls, test_name) - for flow_name, tester_factory in ALL_TEST_FLOWS: - _create_test_for_backend(cls, test_func, flow_name, tester_factory) + for flow in get_test_flows(): + _create_test_for_backend(cls, test_func, flow) delattr(cls, test_name) def _make_wrapped_test( test_func: Callable, test_name: str, - test_flow: str, - tester_factory: Callable, + flow: TestFlow, params: dict | None = None, ): def wrapped_test(self): - with TestContext(test_name, test_flow, params): + with TestContext(test_name, flow.name, params): test_kwargs = params or {} - test_kwargs["tester_factory"] = tester_factory + test_kwargs["tester_factory"] = flow.tester_factory test_func(self, **test_kwargs) + setattr(wrapped_test, "_name", test_name) + setattr(wrapped_test, "_flow", flow) + return wrapped_test def _create_test_for_backend( cls, test_func: Callable, - flow_name: str, - tester_factory: Callable[[torch.nn.Module, Tuple[Any]], Tester], + flow: TestFlow, ): test_type = getattr(test_func, "test_type", TestType.STANDARD) if test_type == TestType.STANDARD: - wrapped_test = _make_wrapped_test( - test_func, test_func.__name__, flow_name, tester_factory - ) - test_name = f"{test_func.__name__}_{flow_name}" + wrapped_test = _make_wrapped_test(test_func, test_func.__name__, flow) + test_name = f"{test_func.__name__}_{flow.name}" setattr(cls, test_name, wrapped_test) elif test_type == TestType.DTYPE: for dtype in DTYPES: wrapped_test = _make_wrapped_test( test_func, test_func.__name__, - flow_name, - tester_factory, + flow, {"dtype": dtype}, ) dtype_name = str(dtype)[6:] # strip "torch." - test_name = f"{test_func.__name__}_{dtype_name}_{flow_name}" + test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}" setattr(cls, test_name, wrapped_test) else: raise NotImplementedError(f"Unknown test type {test_type}.") diff --git a/backends/test/suite/discovery.py b/backends/test/suite/discovery.py new file mode 100644 index 00000000000..5abd194cbcd --- /dev/null +++ b/backends/test/suite/discovery.py @@ -0,0 +1,63 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe + +import os +import unittest + +from types import ModuleType + +from executorch.backends.test.suite.flow import TestFlow + +# +# This file contains logic related to test discovery and filtering. +# + + +def discover_tests( + root_module: ModuleType, backends: set[str] | None +) -> unittest.TestSuite: + # Collect all tests using the unittest discovery mechanism then filter down. + + # Find the file system path corresponding to the root module. + module_file = root_module.__file__ + if module_file is None: + raise RuntimeError(f"Module {root_module} has no __file__ attribute") + + loader = unittest.TestLoader() + module_dir = os.path.dirname(module_file) + suite = loader.discover(module_dir) + + return _filter_tests(suite, backends) + + +def _filter_tests( + suite: unittest.TestSuite, backends: set[str] | None +) -> unittest.TestSuite: + # Recursively traverse the test suite and add them to the filtered set. + filtered_suite = unittest.TestSuite() + + for child in suite: + if isinstance(child, unittest.TestSuite): + filtered_suite.addTest(_filter_tests(child, backends)) + elif isinstance(child, unittest.TestCase): + if _is_test_enabled(child, backends): + filtered_suite.addTest(child) + else: + raise RuntimeError(f"Unexpected test type: {type(child)}") + + return filtered_suite + + +def _is_test_enabled(test_case: unittest.TestCase, backends: set[str] | None) -> bool: + test_method = getattr(test_case, test_case._testMethodName) + + if backends is not None: + flow: TestFlow = getattr(test_method, "_flow") + return flow.backend in backends + else: + return True diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py new file mode 100644 index 00000000000..4410d382401 --- /dev/null +++ b/backends/test/suite/flow.py @@ -0,0 +1,63 @@ +import logging + +from dataclasses import dataclass +from math import log +from typing import Callable, Sequence + +from executorch.backends.test.harness import Tester + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +@dataclass +class TestFlow: + """ + A lowering flow to test. This typically corresponds to a combination of a backend and + a lowering recipe. + """ + + name: str + """ The name of the lowering flow. """ + + backend: str + """ The name of the target backend. """ + + tester_factory: Callable[[], Tester] + """ A factory function that returns a Tester instance for this lowering flow. """ + + +def create_xnnpack_flow() -> TestFlow | None: + try: + from executorch.backends.xnnpack.test.tester import Tester as XnnpackTester + + return TestFlow( + name="xnnpack", + backend="xnnpack", + tester_factory=XnnpackTester, + ) + except Exception: + logger.info("Skipping XNNPACK flow registration due to import failure.") + return None + + +def create_coreml_flow() -> TestFlow | None: + try: + from executorch.backends.apple.coreml.test.tester import CoreMLTester + + return TestFlow( + name="coreml", + backend="coreml", + tester_factory=CoreMLTester, + ) + except Exception: + logger.info("Skipping Core ML flow registration due to import failure.") + return None + + +def all_flows() -> Sequence[TestFlow]: + flows = [ + create_xnnpack_flow(), + create_coreml_flow(), + ] + return [f for f in flows if f is not None] diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py index 6ac1a72bde6..0fb9ecd1dff 100644 --- a/backends/test/suite/operators/__init__.py +++ b/backends/test/suite/operators/__init__.py @@ -5,3 +5,14 @@ # LICENSE file in the root directory of this source tree. # pyre-unsafe + +import os + + +def load_tests(loader, suite, pattern): + package_dir = os.path.dirname(__file__) + discovered_suite = loader.discover( + start_dir=package_dir, pattern=pattern or "test_*.py" + ) + suite.addTests(discovered_suite) + return suite diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index 948a6187b41..d7181300873 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,6 +1,6 @@ from collections import Counter from dataclasses import dataclass -from enum import IntEnum, nonmember +from enum import IntEnum class TestResult(IntEnum): @@ -33,19 +33,15 @@ class TestResult(IntEnum): UNKNOWN_FAIL = 8 """ The test failed in an unknown or unexpected manner. """ - @nonmember def is_success(self): return self in {TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED} - @nonmember def is_non_backend_failure(self): return self in {TestResult.EAGER_FAIL, TestResult.EAGER_FAIL} - @nonmember def is_backend_failure(self): return not self.is_success() and not self.is_non_backend_failure() - @nonmember def display_name(self): if self == TestResult.SUCCESS: return "Success (Delegated)" diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 2a626a5e35f..34a860e8f0b 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -1,4 +1,5 @@ import argparse +import importlib import unittest from typing import Callable @@ -6,6 +7,7 @@ import torch from executorch.backends.test.harness import Tester +from executorch.backends.test.suite.discovery import discover_tests from executorch.backends.test.suite.reporting import ( begin_test_session, complete_test_session, @@ -15,6 +17,12 @@ ) +# A list of all runnable test suites and the corresponding python package. +NAMED_SUITES = { + "operators": "executorch.backends.test.suite.operators", +} + + def run_test( # noqa: C901 model: torch.nn.Module, inputs: any, @@ -130,20 +138,42 @@ def parse_args(): prog="ExecuTorch Backend Test Suite", description="Run ExecuTorch backend tests.", ) - parser.add_argument("test_path", nargs="?", help="Prefix filter for tests to run.") + parser.add_argument( + "suite", + nargs="*", + help="The test suite to run.", + choices=NAMED_SUITES.keys(), + default=["operators"], + ) + parser.add_argument( + "-b", "--backend", nargs="*", help="The backend or backends to test." + ) return parser.parse_args() +def test(suite): + if isinstance(suite, unittest.TestSuite): + print(f"Suite: {suite}") + for t in suite: + test(t) + else: + print(f"Leaf: {type(suite)} {suite}") + print(f" {suite.__name__}") + print(f" {callable(suite)}") + + def runner_main(): args = parse_args() begin_test_session() - test_path = args.test_path or "executorch.backends.test.suite.operators" + if len(args.suite) > 1: + raise NotImplementedError("TODO Support multiple suites.") - loader = unittest.TestLoader() - suite = loader.loadTestsFromName(test_path) - unittest.TextTestRunner().run(suite) + test_path = NAMED_SUITES[args.suite[0]] + test_root = importlib.import_module(test_path) + suite = discover_tests(test_root, args.backend) + unittest.TextTestRunner(verbosity=2).run(suite) summary = complete_test_session() print_summary(summary) From 0fb85e693bfebb44c217d84df9eb9087066330d0 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Thu, 17 Jul 2025 17:40:29 -0700 Subject: [PATCH 02/38] Update [ghstack-poisoned] --- backends/test/suite/discovery.py | 40 +++++++++++++++++++++++--------- backends/test/suite/runner.py | 24 ++++++++++--------- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/backends/test/suite/discovery.py b/backends/test/suite/discovery.py index 5abd194cbcd..929a426d430 100644 --- a/backends/test/suite/discovery.py +++ b/backends/test/suite/discovery.py @@ -9,7 +9,9 @@ import os import unittest +from dataclasses import dataclass from types import ModuleType +from typing import Pattern from executorch.backends.test.suite.flow import TestFlow @@ -18,8 +20,19 @@ # +@dataclass +class TestFilter: + """A set of filters for test discovery.""" + + backends: set[str] | None + """ The set of backends to include. If None, all backends are included. """ + + name_regex: Pattern[str] | None + """ A regular expression to filter test names. If None, all tests are included. """ + + def discover_tests( - root_module: ModuleType, backends: set[str] | None + root_module: ModuleType, test_filter: TestFilter ) -> unittest.TestSuite: # Collect all tests using the unittest discovery mechanism then filter down. @@ -32,20 +45,20 @@ def discover_tests( module_dir = os.path.dirname(module_file) suite = loader.discover(module_dir) - return _filter_tests(suite, backends) + return _filter_tests(suite, test_filter) def _filter_tests( - suite: unittest.TestSuite, backends: set[str] | None + suite: unittest.TestSuite, test_filter: TestFilter ) -> unittest.TestSuite: # Recursively traverse the test suite and add them to the filtered set. filtered_suite = unittest.TestSuite() for child in suite: if isinstance(child, unittest.TestSuite): - filtered_suite.addTest(_filter_tests(child, backends)) + filtered_suite.addTest(_filter_tests(child, test_filter)) elif isinstance(child, unittest.TestCase): - if _is_test_enabled(child, backends): + if _is_test_enabled(child, test_filter): filtered_suite.addTest(child) else: raise RuntimeError(f"Unexpected test type: {type(child)}") @@ -53,11 +66,16 @@ def _filter_tests( return filtered_suite -def _is_test_enabled(test_case: unittest.TestCase, backends: set[str] | None) -> bool: +def _is_test_enabled(test_case: unittest.TestCase, test_filter: TestFilter) -> bool: test_method = getattr(test_case, test_case._testMethodName) + flow: TestFlow = getattr(test_method, "_flow") + + if test_filter.backends is not None and flow.backend not in test_filter.backends: + return False + + if test_filter.name_regex is not None and not test_filter.name_regex.search( + test_case.id() + ): + return False - if backends is not None: - flow: TestFlow = getattr(test_method, "_flow") - return flow.backend in backends - else: - return True + return True diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 34a860e8f0b..36905d0dabc 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -1,5 +1,6 @@ import argparse import importlib +import re import unittest from typing import Callable @@ -7,7 +8,7 @@ import torch from executorch.backends.test.harness import Tester -from executorch.backends.test.suite.discovery import discover_tests +from executorch.backends.test.suite.discovery import discover_tests, TestFilter from executorch.backends.test.suite.reporting import ( begin_test_session, complete_test_session, @@ -148,18 +149,17 @@ def parse_args(): parser.add_argument( "-b", "--backend", nargs="*", help="The backend or backends to test." ) + parser.add_argument( + "-f", "--filter", nargs="?", help="A regular expression filter for test names." + ) return parser.parse_args() -def test(suite): - if isinstance(suite, unittest.TestSuite): - print(f"Suite: {suite}") - for t in suite: - test(t) - else: - print(f"Leaf: {type(suite)} {suite}") - print(f" {suite.__name__}") - print(f" {callable(suite)}") +def build_test_filter(args: argparse.Namespace) -> TestFilter: + return TestFilter( + backends=set(args.backend) if args.backend is not None else None, + name_regex=re.compile(args.filter) if args.filter is not None else None, + ) def runner_main(): @@ -172,7 +172,9 @@ def runner_main(): test_path = NAMED_SUITES[args.suite[0]] test_root = importlib.import_module(test_path) - suite = discover_tests(test_root, args.backend) + test_filter = build_test_filter(args) + + suite = discover_tests(test_root, test_filter) unittest.TextTestRunner(verbosity=2).run(suite) summary = complete_test_session() From 4d8d844ebf040e5aff9b8d4a60cd5948a3b38157 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Fri, 18 Jul 2025 19:52:50 -0700 Subject: [PATCH 03/38] Update [ghstack-poisoned] --- backends/test/suite/discovery.py | 4 + backends/test/suite/models/__init__.py | 124 +++++++++++++++ .../test/suite/models/test_torchvision.py | 145 ++++++++++++++++++ backends/test/suite/runner.py | 12 +- 4 files changed, 282 insertions(+), 3 deletions(-) create mode 100644 backends/test/suite/models/__init__.py create mode 100644 backends/test/suite/models/test_torchvision.py diff --git a/backends/test/suite/discovery.py b/backends/test/suite/discovery.py index 929a426d430..ec77f5a90cd 100644 --- a/backends/test/suite/discovery.py +++ b/backends/test/suite/discovery.py @@ -68,6 +68,10 @@ def _filter_tests( def _is_test_enabled(test_case: unittest.TestCase, test_filter: TestFilter) -> bool: test_method = getattr(test_case, test_case._testMethodName) + + if not hasattr(test_method, "_flow"): + print(f"Test missing flow: {test_method}") + flow: TestFlow = getattr(test_method, "_flow") if test_filter.backends is not None and flow.backend not in test_filter.backends: diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py new file mode 100644 index 00000000000..496bcb6f194 --- /dev/null +++ b/backends/test/suite/models/__init__.py @@ -0,0 +1,124 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe + +from executorch.backends.test.harness import Tester +from executorch.backends.test.suite import get_test_flows +from executorch.backends.test.suite.context import get_active_test_context, TestContext +from executorch.backends.test.suite.flow import TestFlow +from executorch.backends.test.suite.reporting import log_test_summary +from executorch.backends.test.suite.runner import run_test +from typing import Any, Callable + +import itertools +import os +import torch +import unittest + + +DTYPES = [ + torch.float16, + torch.float32, + torch.float64, +] + + +def load_tests(loader, suite, pattern): + package_dir = os.path.dirname(__file__) + discovered_suite = loader.discover( + start_dir=package_dir, pattern=pattern or "test_*.py" + ) + suite.addTests(discovered_suite) + return suite + + +def _create_test( + cls, + test_func: Callable, + flow: TestFlow, + dtype: torch.dtype, + use_dynamic_shapes: bool, +): + def wrapped_test(self): + params = { + "dtype": dtype, + "use_dynamic_shapes": use_dynamic_shapes, + } + with TestContext(test_name, flow.name, params): + test_func(self, dtype, use_dynamic_shapes, flow.tester_factory) + + dtype_name = str(dtype)[6:] # strip "torch." + test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}" + if use_dynamic_shapes: + test_name += "_dynamic_shape" + + setattr(wrapped_test, "_name", test_func.__name__) + setattr(wrapped_test, "_flow", flow) + + setattr(cls, test_name, wrapped_test) + + +# Expand a test into variants for each registered flow. +def _expand_test(cls, test_name: str) -> None: + test_func = getattr(cls, test_name) + supports_dynamic_shapes = getattr(test_func, "supports_dynamic_shapes", True) + dynamic_shape_values = [True, False] if supports_dynamic_shapes else [False] + + for flow, dtype, use_dynamic_shapes in itertools.product(get_test_flows(), DTYPES, dynamic_shape_values): + _create_test(cls, test_func, flow, dtype, use_dynamic_shapes) + delattr(cls, test_name) + + +def model_test_cls(cls) -> Callable | None: + """ Decorator for model tests. Handles generating test variants for each test flow and configuration. """ + for key in dir(cls): + if key.startswith("test_"): + _expand_test(cls, key) + return cls + + +def model_test_params(supports_dynamic_shapes: bool) -> Callable: + """ Optional parameter decorator for model tests. Specifies test pararameters. Only valid with a class decorated by model_test_cls. """ + def inner_decorator(func: Callable) -> Callable: + setattr(func, "supports_dynamic_shapes", supports_dynamic_shapes) + return func + return inner_decorator + + +def run_model_test( + model: torch.nn.Module, + inputs: tuple[Any], + dtype: torch.dtype, + dynamic_shapes: Any | None, + tester_factory: Callable[[], Tester], +): + model = model.to(dtype) + context = get_active_test_context() + + # This should be set in the wrapped test. See _create_test above. + assert context is not None, "Missing test context." + + run_summary = run_test( + model, + inputs, + tester_factory, + context.test_name, + context.flow_name, + context.params, + dynamic_shapes=dynamic_shapes, + ) + + log_test_summary(run_summary) + + if not run_summary.result.is_success(): + if run_summary.result.is_backend_failure(): + raise RuntimeError("Test failure.") from run_summary.error + else: + # Non-backend failure indicates a bad test. Mark as skipped. + raise unittest.SkipTest( + f"Test failed for reasons other than backend failure. Error: {run_summary.error}" + ) diff --git a/backends/test/suite/models/test_torchvision.py b/backends/test/suite/models/test_torchvision.py new file mode 100644 index 00000000000..6e6a8f6b36e --- /dev/null +++ b/backends/test/suite/models/test_torchvision.py @@ -0,0 +1,145 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe + +import torch +import torchvision +import unittest + +from executorch.backends.test.suite.models import model_test_params, model_test_cls, run_model_test +from torch.export import Dim +from typing import Callable + +# +# This file contains model integration tests for supported torchvision models. +# + +@model_test_cls +class TorchVision(unittest.TestCase): + def _test_cv_model( + self, + model: torch.nn.Module, + dtype: torch.dtype, + use_dynamic_shapes: bool, + tester_factory: Callable, + ): + # Test a CV model that follows the standard conventions. + inputs = ( + torch.randn(1, 3, 224, 224, dtype=dtype), + ) + + dynamic_shapes = ( + { + 2: Dim("height", min=1, max=16)*16, + 3: Dim("width", min=1, max=16)*16, + }, + ) if use_dynamic_shapes else None + + run_model_test(model, inputs, dtype, dynamic_shapes, tester_factory) + + + def test_alexnet(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.alexnet() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_convnext_small(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.convnext_small() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_densenet161(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.densenet161() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_efficientnet_b4(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.efficientnet_b4() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_efficientnet_v2_s(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.efficientnet_v2_s() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_googlenet(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.googlenet() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_inception_v3(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.inception_v3() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + @model_test_params(supports_dynamic_shapes=False) + def test_maxvit_t(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.maxvit_t() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_mnasnet1_0(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.mnasnet1_0() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_mobilenet_v2(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.mobilenet_v2() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_mobilenet_v3_small(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.mobilenet_v3_small() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_regnet_y_1_6gf(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.regnet_y_1_6gf() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_resnet50(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.resnet50() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_resnext50_32x4d(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.resnext50_32x4d() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_shufflenet_v2_x1_0(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.shufflenet_v2_x1_0() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_squeezenet1_1(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.squeezenet1_1() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_swin_v2_t(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.swin_v2_t() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_vgg11(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.vgg11() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + @model_test_params(supports_dynamic_shapes=False) + def test_vit_b_16(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.vit_b_16() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + + + def test_wide_resnet50_2(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchvision.models.wide_resnet50_2() + self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + \ No newline at end of file diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 36905d0dabc..09554521d41 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -3,11 +3,12 @@ import re import unittest -from typing import Callable +from typing import Any, Callable import torch from executorch.backends.test.harness import Tester +from executorch.backends.test.harness.stages import StageType from executorch.backends.test.suite.discovery import discover_tests, TestFilter from executorch.backends.test.suite.reporting import ( begin_test_session, @@ -20,17 +21,19 @@ # A list of all runnable test suites and the corresponding python package. NAMED_SUITES = { + "models": "executorch.backends.test.suite.models", "operators": "executorch.backends.test.suite.operators", } def run_test( # noqa: C901 model: torch.nn.Module, - inputs: any, + inputs: Any, tester_factory: Callable[[], Tester], test_name: str, flow_name: str, params: dict | None, + dynamic_shapes: Any | None = None, ) -> TestCaseSummary: """ Top-level test run function for a model, input set, and tester. Handles test execution @@ -61,7 +64,10 @@ def build_result( return build_result(TestResult.UNKNOWN_FAIL, e) try: - tester.export() + # TODO Use Tester dynamic_shapes parameter once input generation can properly handle derived dims. + tester.export( + tester._get_default_stage(StageType.EXPORT, dynamic_shapes=dynamic_shapes), + ) except Exception as e: return build_result(TestResult.EXPORT_FAIL, e) From dc12b40463afd520e2a9f5edc027b29c139d9a60 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Sun, 20 Jul 2025 18:36:51 -0700 Subject: [PATCH 04/38] Update [ghstack-poisoned] --- backends/test/suite/models/__init__.py | 12 ++- backends/test/suite/models/test_torchaudio.py | 81 +++++++++++++++++++ backends/test/suite/runner.py | 2 + 3 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 backends/test/suite/models/test_torchaudio.py diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index 496bcb6f194..278423353ea 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -67,8 +67,9 @@ def _expand_test(cls, test_name: str) -> None: test_func = getattr(cls, test_name) supports_dynamic_shapes = getattr(test_func, "supports_dynamic_shapes", True) dynamic_shape_values = [True, False] if supports_dynamic_shapes else [False] + dtypes = getattr(test_func, "dtypes", DTYPES) - for flow, dtype, use_dynamic_shapes in itertools.product(get_test_flows(), DTYPES, dynamic_shape_values): + for flow, dtype, use_dynamic_shapes in itertools.product(get_test_flows(), dtypes, dynamic_shape_values): _create_test(cls, test_func, flow, dtype, use_dynamic_shapes) delattr(cls, test_name) @@ -81,10 +82,17 @@ def model_test_cls(cls) -> Callable | None: return cls -def model_test_params(supports_dynamic_shapes: bool) -> Callable: +def model_test_params( + supports_dynamic_shapes: bool = True, + dtypes: list[torch.dtype] | None = None, +) -> Callable: """ Optional parameter decorator for model tests. Specifies test pararameters. Only valid with a class decorated by model_test_cls. """ def inner_decorator(func: Callable) -> Callable: setattr(func, "supports_dynamic_shapes", supports_dynamic_shapes) + + if dtypes is not None: + setattr(func, "dtypes", dtypes) + return func return inner_decorator diff --git a/backends/test/suite/models/test_torchaudio.py b/backends/test/suite/models/test_torchaudio.py new file mode 100644 index 00000000000..620dbae07f0 --- /dev/null +++ b/backends/test/suite/models/test_torchaudio.py @@ -0,0 +1,81 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe + +import torch +import torchaudio +import unittest + +from executorch.backends.test.suite.models import model_test_params, model_test_cls, run_model_test +from torch.export import Dim +from typing import Callable, Tuple + +# +# This file contains model integration tests for supported torchaudio models. +# + +class PatchedConformer(torch.nn.Module): + """ + A lightly modified version of the top-level Conformer module, such that it can be exported. + Instead of taking lengths and computing the padding mask, it takes the padding mask directly. + See https://github.com/pytorch/audio/blob/main/src/torchaudio/models/conformer.py#L215 + """ + + def __init__(self, conformer): + super().__init__() + self.conformer = conformer + + def forward(self, input: torch.Tensor, encoder_padding_mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + x = input.transpose(0, 1) + for layer in self.conformer.conformer_layers: + x = layer(x, encoder_padding_mask) + return x.transpose(0, 1) + +@model_test_cls +class TorchAudio(unittest.TestCase): + @model_test_params(dtypes=[torch.float32], supports_dynamic_shapes=False) + def test_conformer(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + inner_model = torchaudio.models.Conformer( + input_dim=80, + num_heads=4, + ffn_dim=128, + num_layers=4, + depthwise_conv_kernel_size=31, + ) + model = PatchedConformer(inner_model) + lengths = torch.randint(1, 400, (10,)) + + encoder_padding_mask = torchaudio.models.conformer._lengths_to_padding_mask(lengths) + inputs = ( + torch.rand(10, int(lengths.max()), 80), + encoder_padding_mask, + ) + + run_model_test(model, inputs, dtype, None, tester_factory) + + @model_test_params(dtypes=[torch.float32]) + def test_wav2letter(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchaudio.models.Wav2Letter() + inputs = (torch.randn(1, 1, 1024, dtype=dtype),) + dynamic_shapes = { + "x": { + 2: Dim("d", min=900, max=1024), + } + } if use_dynamic_shapes else None + run_model_test(model, inputs, dtype, dynamic_shapes, tester_factory) + + @unittest.skip("This model times out on all backends.") + def test_wavernn(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable): + model = torchaudio.models.WaveRNN(upsample_scales=[5,5,8], n_classes=512, hop_length=200).eval() + + # See https://docs.pytorch.org/audio/stable/generated/torchaudio.models.WaveRNN.html#forward + inputs = ( + torch.randn(1, 1, (64 - 5 + 1) * 200), # waveform + torch.randn(1, 1, 128, 64), # specgram + ) + + run_model_test(model, inputs, dtype, None, tester_factory) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 09554521d41..064ead2a9ba 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -51,6 +51,8 @@ def build_result( result=result, error=error, ) + + model.eval() # Ensure the model can run in eager mode. try: From ead0616cc34a5db81f1c8bc7e998cfad8c0b00dd Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Mon, 21 Jul 2025 17:43:47 -0700 Subject: [PATCH 05/38] Update [ghstack-poisoned] --- backends/test/suite/__init__.py | 22 +++++++++++----------- backends/test/suite/discovery.py | 2 +- backends/test/suite/flow.py | 7 +++---- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/backends/test/suite/__init__.py b/backends/test/suite/__init__.py index cf73a7bdd0c..86cb5a5716f 100644 --- a/backends/test/suite/__init__.py +++ b/backends/test/suite/__init__.py @@ -12,7 +12,7 @@ import unittest from enum import Enum -from typing import Callable, Sequence, Sequence +from typing import Callable import executorch.backends.test.suite.flow @@ -46,18 +46,18 @@ def is_backend_enabled(backend): return backend in _ENABLED_BACKENDS -_ALL_TEST_FLOWS: Sequence[TestFlow] | None = None +_ALL_TEST_FLOWS: dict[str, TestFlow] = {} -def get_test_flows() -> Sequence[TestFlow]: +def get_test_flows() -> dict[str, TestFlow]: global _ALL_TEST_FLOWS - if _ALL_TEST_FLOWS is None: - _ALL_TEST_FLOWS = [ - f - for f in executorch.backends.test.suite.flow.all_flows() + if not _ALL_TEST_FLOWS: + _ALL_TEST_FLOWS = { + name: f + for name, f in executorch.backends.test.suite.flow.all_flows().items() if is_backend_enabled(f.backend) - ] + } return _ALL_TEST_FLOWS @@ -115,7 +115,7 @@ def _create_tests(cls): # Expand a test into variants for each registered flow. def _expand_test(cls, test_name: str): test_func = getattr(cls, test_name) - for flow in get_test_flows(): + for flow in get_test_flows().values(): _create_test_for_backend(cls, test_func, flow) delattr(cls, test_name) @@ -133,8 +133,8 @@ def wrapped_test(self): test_func(self, **test_kwargs) - setattr(wrapped_test, "_name", test_name) - setattr(wrapped_test, "_flow", flow) + wrapped_test._name = test_name + wrapped_test._flow = flow return wrapped_test diff --git a/backends/test/suite/discovery.py b/backends/test/suite/discovery.py index 5abd194cbcd..e7af0d0923d 100644 --- a/backends/test/suite/discovery.py +++ b/backends/test/suite/discovery.py @@ -57,7 +57,7 @@ def _is_test_enabled(test_case: unittest.TestCase, backends: set[str] | None) -> test_method = getattr(test_case, test_case._testMethodName) if backends is not None: - flow: TestFlow = getattr(test_method, "_flow") + flow: TestFlow = test_method._flow return flow.backend in backends else: return True diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index 4410d382401..bda85a76ffa 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -1,8 +1,7 @@ import logging from dataclasses import dataclass -from math import log -from typing import Callable, Sequence +from typing import Callable from executorch.backends.test.harness import Tester @@ -55,9 +54,9 @@ def create_coreml_flow() -> TestFlow | None: return None -def all_flows() -> Sequence[TestFlow]: +def all_flows() -> dict[str, TestFlow]: flows = [ create_xnnpack_flow(), create_coreml_flow(), ] - return [f for f in flows if f is not None] + return {f.name: f for f in flows if f is not None} From 9dfeb5a67baa3630743a5d7be2d938eea58a1c76 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 22 Jul 2025 16:16:13 -0700 Subject: [PATCH 06/38] Update [ghstack-poisoned] --- backends/apple/coreml/test/tester.py | 62 +++++++++++-- backends/test/harness/stages/quantize.py | 3 +- backends/test/harness/tester.py | 6 +- backends/test/suite/__init__.py | 7 +- backends/test/suite/flow.py | 58 ++++++------- backends/test/suite/flows/__init__.py | 7 ++ backends/test/suite/flows/coreml.py | 24 ++++++ backends/test/suite/flows/xnnpack.py | 36 ++++++++ backends/test/suite/models/__init__.py | 7 +- backends/test/suite/models/test_torchaudio.py | 13 +-- .../test/suite/models/test_torchvision.py | 86 +++++++++---------- backends/test/suite/operators/test_add.py | 23 +++-- backends/test/suite/operators/test_div.py | 27 +++--- backends/test/suite/operators/test_elu.py | 23 +++-- backends/test/suite/operators/test_gelu.py | 27 +++--- backends/test/suite/operators/test_glu.py | 23 +++-- .../test/suite/operators/test_hardsigmoid.py | 23 +++-- .../test/suite/operators/test_hardswish.py | 23 +++-- .../test/suite/operators/test_hardtanh.py | 27 +++--- .../test/suite/operators/test_leaky_relu.py | 27 +++--- .../test/suite/operators/test_logsigmoid.py | 19 ++-- backends/test/suite/operators/test_mul.py | 19 ++-- backends/test/suite/operators/test_prelu.py | 31 ++++--- backends/test/suite/operators/test_relu.py | 19 ++-- backends/test/suite/operators/test_sigmoid.py | 19 ++-- backends/test/suite/operators/test_silu.py | 23 +++-- backends/test/suite/operators/test_sub.py | 23 +++-- backends/test/suite/operators/test_tanh.py | 19 ++-- .../test/suite/operators/test_threshold.py | 39 ++++----- backends/test/suite/reporting.py | 17 ++-- backends/test/suite/runner.py | 21 +++-- 31 files changed, 443 insertions(+), 338 deletions(-) create mode 100644 backends/test/suite/flows/__init__.py create mode 100644 backends/test/suite/flows/coreml.py create mode 100644 backends/test/suite/flows/xnnpack.py diff --git a/backends/apple/coreml/test/tester.py b/backends/apple/coreml/test/tester.py index f4a5f51ecbd..eee4c4e5893 100644 --- a/backends/apple/coreml/test/tester.py +++ b/backends/apple/coreml/test/tester.py @@ -4,23 +4,64 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import Any, List, Optional, Tuple +from typing import Any, List, Optional, Sequence, Tuple +import coremltools as ct import executorch import executorch.backends.test.harness.stages as BaseStages - +import functools import torch + +from executorch.backends.apple.coreml.compiler import CoreMLBackend from executorch.backends.apple.coreml.partition import CoreMLPartitioner +from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer from executorch.backends.test.harness import Tester as TesterBase from executorch.backends.test.harness.stages import StageType from executorch.exir import EdgeCompileConfig from executorch.exir.backend.partitioner import Partitioner +def _get_static_int8_qconfig(): + return ct.optimize.torch.quantization.LinearQuantizerConfig( + global_config=ct.optimize.torch.quantization.ModuleLinearQuantizerConfig( + quantization_scheme="symmetric", + activation_dtype=torch.quint8, + weight_dtype=torch.qint8, + weight_per_channel=True, + ) + ) + + +class Quantize(BaseStages.Quantize): + def __init__( + self, + quantizer: Optional[CoreMLQuantizer] = None, + quantization_config: Optional[Any] = None, + calibrate: bool = True, + calibration_samples: Optional[Sequence[Any]] = None, + is_qat: Optional[bool] = False, + ): + super().__init__( + quantizer=quantizer or CoreMLQuantizer(quantization_config or _get_static_int8_qconfig()), + calibrate=calibrate, + calibration_samples=calibration_samples, + is_qat=is_qat, + ) + + + class Partition(BaseStages.Partition): - def __init__(self, partitioner: Optional[Partitioner] = None): + def __init__( + self, + partitioner: Optional[Partitioner] = None, + minimum_deployment_target: Optional[Any] = ct.target.iOS15, + ): super().__init__( - partitioner=partitioner or CoreMLPartitioner, + partitioner=partitioner or CoreMLPartitioner( + compile_specs=CoreMLBackend.generate_compile_specs( + minimum_deployment_target=minimum_deployment_target + ) + ), ) @@ -29,9 +70,14 @@ def __init__( self, partitioners: Optional[List[Partitioner]] = None, edge_compile_config: Optional[EdgeCompileConfig] = None, + minimum_deployment_target: Optional[Any] = ct.target.iOS15, ): super().__init__( - default_partitioner_cls=CoreMLPartitioner, + default_partitioner_cls=lambda: CoreMLPartitioner( + compile_specs=CoreMLBackend.generate_compile_specs( + minimum_deployment_target=minimum_deployment_target + ) + ), partitioners=partitioners, edge_compile_config=edge_compile_config, ) @@ -43,13 +89,15 @@ def __init__( module: torch.nn.Module, example_inputs: Tuple[torch.Tensor], dynamic_shapes: Optional[Tuple[Any]] = None, + minimum_deployment_target: Optional[Any] = ct.target.iOS15, ): # Specialize for XNNPACK stage_classes = ( executorch.backends.test.harness.Tester.default_stage_classes() | { - StageType.PARTITION: Partition, - StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower, + StageType.QUANTIZE: Quantize, + StageType.PARTITION: functools.partial(Partition, minimum_deployment_target=minimum_deployment_target), + StageType.TO_EDGE_TRANSFORM_AND_LOWER: functools.partial(ToEdgeTransformAndLower, minimum_deployment_target=minimum_deployment_target), } ) diff --git a/backends/test/harness/stages/quantize.py b/backends/test/harness/stages/quantize.py index e03db058080..dd61d3acacb 100644 --- a/backends/test/harness/stages/quantize.py +++ b/backends/test/harness/stages/quantize.py @@ -31,7 +31,8 @@ def __init__( self.calibrate = calibrate self.calibration_samples = calibration_samples - self.quantizer.set_global(self.quantization_config) + if self.quantization_config is not None: + self.quantizer.set_global(self.quantization_config) self.converted_graph = None self.is_qat = is_qat diff --git a/backends/test/harness/tester.py b/backends/test/harness/tester.py index e418f795b35..06db1aae13d 100644 --- a/backends/test/harness/tester.py +++ b/backends/test/harness/tester.py @@ -1,6 +1,6 @@ import random from collections import Counter, OrderedDict -from typing import Any, Dict, List, Optional, Tuple, Type +from typing import Any, Callable, Dict, List, Optional, Tuple import torch @@ -33,7 +33,7 @@ def __init__( self, module: torch.nn.Module, example_inputs: Tuple[torch.Tensor], - stage_classes: Dict[StageType, Type], + stage_classes: Dict[StageType, Callable], dynamic_shapes: Optional[Tuple[Any]] = None, ): module.eval() @@ -81,7 +81,7 @@ def __init__( self.stage_output = None @staticmethod - def default_stage_classes() -> Dict[StageType, Type]: + def default_stage_classes() -> Dict[StageType, Callable]: """ Returns a map of StageType to default Stage implementation. """ diff --git a/backends/test/suite/__init__.py b/backends/test/suite/__init__.py index 86cb5a5716f..7190da4e0fd 100644 --- a/backends/test/suite/__init__.py +++ b/backends/test/suite/__init__.py @@ -129,7 +129,7 @@ def _make_wrapped_test( def wrapped_test(self): with TestContext(test_name, flow.name, params): test_kwargs = params or {} - test_kwargs["tester_factory"] = flow.tester_factory + test_kwargs["flow"] = flow test_func(self, **test_kwargs) @@ -175,7 +175,7 @@ def load_tests(loader, suite, pattern): class OperatorTest(unittest.TestCase): - def _test_op(self, model, inputs, tester_factory): + def _test_op(self, model, inputs, flow: TestFlow): context = get_active_test_context() # This should be set in the wrapped test. See _make_wrapped_test above. @@ -184,9 +184,8 @@ def _test_op(self, model, inputs, tester_factory): run_summary = run_test( model, inputs, - tester_factory, + flow, context.test_name, - context.flow_name, context.params, ) diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index bda85a76ffa..a9ddec22864 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -1,9 +1,10 @@ import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Callable from executorch.backends.test.harness import Tester +from executorch.backends.test.harness.stages import Quantize logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -21,42 +22,35 @@ class TestFlow: backend: str """ The name of the target backend. """ - - tester_factory: Callable[[], Tester] + + tester_factory: Callable[..., Tester] """ A factory function that returns a Tester instance for this lowering flow. """ + quantize: bool = field(default=False) + """ Whether to tester should run the quantize stage on the model. """ + + quantize_stage_factory: Callable[..., Quantize] | None = None + """ A factory function which instantiates a Quantize stage. Can be None to use the tester's default. """ -def create_xnnpack_flow() -> TestFlow | None: +def all_flows() -> dict[str, TestFlow]: + flows = [] + try: - from executorch.backends.xnnpack.test.tester import Tester as XnnpackTester - - return TestFlow( - name="xnnpack", - backend="xnnpack", - tester_factory=XnnpackTester, - ) - except Exception: - logger.info("Skipping XNNPACK flow registration due to import failure.") - return None - + from executorch.backends.test.suite.flows.xnnpack import XNNPACK_TEST_FLOW, XNNPACK_STATIC_INT8_TEST_FLOW + flows += [ + XNNPACK_TEST_FLOW, + XNNPACK_STATIC_INT8_TEST_FLOW, + ] + except Exception as e: + logger.info(f"Skipping XNNPACK flow registration: {e}") -def create_coreml_flow() -> TestFlow | None: try: - from executorch.backends.apple.coreml.test.tester import CoreMLTester + from executorch.backends.test.suite.flows.coreml import COREML_TEST_FLOW, COREML_STATIC_INT8_TEST_FLOW + flows += [ + COREML_TEST_FLOW, + COREML_STATIC_INT8_TEST_FLOW, + ] + except Exception as e: + logger.info(f"Skipping Core ML flow registration: {e}") - return TestFlow( - name="coreml", - backend="coreml", - tester_factory=CoreMLTester, - ) - except Exception: - logger.info("Skipping Core ML flow registration due to import failure.") - return None - - -def all_flows() -> dict[str, TestFlow]: - flows = [ - create_xnnpack_flow(), - create_coreml_flow(), - ] return {f.name: f for f in flows if f is not None} diff --git a/backends/test/suite/flows/__init__.py b/backends/test/suite/flows/__init__.py new file mode 100644 index 00000000000..6ac1a72bde6 --- /dev/null +++ b/backends/test/suite/flows/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe diff --git a/backends/test/suite/flows/coreml.py b/backends/test/suite/flows/coreml.py new file mode 100644 index 00000000000..443457bd695 --- /dev/null +++ b/backends/test/suite/flows/coreml.py @@ -0,0 +1,24 @@ +import coremltools +import functools + +from executorch.backends.apple.coreml.test.tester import CoreMLTester +from executorch.backends.test.suite.flow import TestFlow +from typing import Any + +def _create_coreml_flow( + name: str, + quantize: bool = False, + minimum_deployment_target: Any = coremltools.target.iOS15 +) -> TestFlow: + return TestFlow( + name, + backend="coreml", + tester_factory=functools.partial(CoreMLTester, minimum_deployment_target=minimum_deployment_target), + quantize=quantize, + ) + +COREML_TEST_FLOW = _create_coreml_flow("coreml") +COREML_STATIC_INT8_TEST_FLOW = _create_coreml_flow( + "coreml_static_int8", + quantize=True, + minimum_deployment_target=coremltools.target.iOS17) diff --git a/backends/test/suite/flows/xnnpack.py b/backends/test/suite/flows/xnnpack.py new file mode 100644 index 00000000000..af079f83018 --- /dev/null +++ b/backends/test/suite/flows/xnnpack.py @@ -0,0 +1,36 @@ +from executorch.backends.test.harness.stages import Quantize +from executorch.backends.test.suite.flow import TestFlow +from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import get_symmetric_quantization_config +from executorch.backends.xnnpack.test.tester import ( + Quantize as XnnpackQuantize, + Tester as XnnpackTester +) +from typing import Callable + +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +def _create_xnnpack_flow_base(name: str, quantize_stage_factory: Callable[..., Quantize] | None = None) -> TestFlow: + return TestFlow( + name, + backend="xnnpack", + tester_factory=XnnpackTester, + quantize=True, + quantize_stage_factory=quantize_stage_factory, + ) + +def _create_xnnpack_flow() -> TestFlow: + return _create_xnnpack_flow_base("xnnpack") + +def _create_xnnpack_static_int8_flow() -> TestFlow: + def create_quantize_stage() -> Quantize: + qparams = get_symmetric_quantization_config(is_per_channel=True) + return XnnpackQuantize( + quantization_config=qparams, + ) + return _create_xnnpack_flow_base("xnnpack_static_int8", create_quantize_stage) + +XNNPACK_TEST_FLOW = _create_xnnpack_flow() +XNNPACK_STATIC_INT8_TEST_FLOW = _create_xnnpack_static_int8_flow() diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index cb89aa816fa..b33878995d7 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -49,7 +49,7 @@ def wrapped_test(self): "use_dynamic_shapes": use_dynamic_shapes, } with TestContext(test_name, flow.name, params): - test_func(self, dtype, use_dynamic_shapes, flow.tester_factory) + test_func(self, flow, dtype, use_dynamic_shapes) dtype_name = str(dtype)[6:] # strip "torch." test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}" @@ -104,9 +104,9 @@ def inner_decorator(func: Callable) -> Callable: def run_model_test( model: torch.nn.Module, inputs: tuple[Any], + flow: TestFlow, dtype: torch.dtype, dynamic_shapes: Any | None, - tester_factory: Callable[[], Tester], ): model = model.to(dtype) context = get_active_test_context() @@ -117,9 +117,8 @@ def run_model_test( run_summary = run_test( model, inputs, - tester_factory, + flow, context.test_name, - context.flow_name, context.params, dynamic_shapes=dynamic_shapes, ) diff --git a/backends/test/suite/models/test_torchaudio.py b/backends/test/suite/models/test_torchaudio.py index ac1bc21a526..11ea71b558d 100644 --- a/backends/test/suite/models/test_torchaudio.py +++ b/backends/test/suite/models/test_torchaudio.py @@ -12,6 +12,7 @@ import torch import torchaudio +from executorch.backends.test.suite.flow import TestFlow from executorch.backends.test.suite.models import ( model_test_cls, model_test_params, @@ -48,7 +49,7 @@ def forward( class TorchAudio(unittest.TestCase): @model_test_params(dtypes=[torch.float32], supports_dynamic_shapes=False) def test_conformer( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): inner_model = torchaudio.models.Conformer( input_dim=80, @@ -68,11 +69,11 @@ def test_conformer( encoder_padding_mask, ) - run_model_test(model, inputs, dtype, None, tester_factory) + run_model_test(model, inputs, flow, dtype, None) @model_test_params(dtypes=[torch.float32]) def test_wav2letter( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchaudio.models.Wav2Letter() inputs = (torch.randn(1, 1, 1024, dtype=dtype),) @@ -85,11 +86,11 @@ def test_wav2letter( if use_dynamic_shapes else None ) - run_model_test(model, inputs, dtype, dynamic_shapes, tester_factory) + run_model_test(model, inputs, flow, dtype, dynamic_shapes) @unittest.skip("This model times out on all backends.") def test_wavernn( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool, ): model = torchaudio.models.WaveRNN( upsample_scales=[5, 5, 8], n_classes=512, hop_length=200 @@ -101,4 +102,4 @@ def test_wavernn( torch.randn(1, 1, 128, 64), # specgram ) - run_model_test(model, inputs, dtype, None, tester_factory) + run_model_test(model, inputs, flow, dtype, None) diff --git a/backends/test/suite/models/test_torchvision.py b/backends/test/suite/models/test_torchvision.py index faa4212e1c4..fed4d31130e 100644 --- a/backends/test/suite/models/test_torchvision.py +++ b/backends/test/suite/models/test_torchvision.py @@ -7,11 +7,11 @@ # pyre-unsafe import unittest -from typing import Callable import torch import torchvision +from executorch.backends.test.suite.flow import TestFlow from executorch.backends.test.suite.models import ( model_test_cls, model_test_params, @@ -29,9 +29,9 @@ class TorchVision(unittest.TestCase): def _test_cv_model( self, model: torch.nn.Module, + flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool, - tester_factory: Callable, ): # Test a CV model that follows the standard conventions. inputs = (torch.randn(1, 3, 224, 224, dtype=dtype),) @@ -47,126 +47,126 @@ def _test_cv_model( else None ) - run_model_test(model, inputs, dtype, dynamic_shapes, tester_factory) + run_model_test(model, inputs, flow, dtype, dynamic_shapes) def test_alexnet( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.alexnet() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_convnext_small( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.convnext_small() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_densenet161( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.densenet161() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_efficientnet_b4( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.efficientnet_b4() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_efficientnet_v2_s( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.efficientnet_v2_s() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_googlenet( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.googlenet() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_inception_v3( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.inception_v3() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) @model_test_params(supports_dynamic_shapes=False) def test_maxvit_t( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.maxvit_t() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_mnasnet1_0( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.mnasnet1_0() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_mobilenet_v2( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.mobilenet_v2() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_mobilenet_v3_small( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.mobilenet_v3_small() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_regnet_y_1_6gf( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.regnet_y_1_6gf() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_resnet50( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.resnet50() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_resnext50_32x4d( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.resnext50_32x4d() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_shufflenet_v2_x1_0( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.shufflenet_v2_x1_0() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_squeezenet1_1( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.squeezenet1_1() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_swin_v2_t( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.swin_v2_t() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_vgg11( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.vgg11() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) @model_test_params(supports_dynamic_shapes=False) def test_vit_b_16( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.vit_b_16() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) def test_wide_resnet50_2( - self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable + self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool ): model = torchvision.models.wide_resnet50_2() - self._test_cv_model(model, dtype, use_dynamic_shapes, tester_factory) + self._test_cv_model(model, flow, dtype, use_dynamic_shapes) diff --git a/backends/test/suite/operators/test_add.py b/backends/test/suite/operators/test_add.py index 970a4babbf0..2ff1644d672 100644 --- a/backends/test/suite/operators/test_add.py +++ b/backends/test/suite/operators/test_add.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -31,52 +30,52 @@ def forward(self, x, y): @operator_test class Add(OperatorTest): @dtype_test - def test_add_dtype(self, dtype, tester_factory: Callable) -> None: + def test_add_dtype(self, flow: TestFlow, dtype) -> None: self._test_op( Model(), ( (torch.rand(2, 10) * 100).to(dtype), (torch.rand(2, 10) * 100).to(dtype), ), - tester_factory, + flow, ) - def test_add_f32_bcast_first(self, tester_factory: Callable) -> None: + def test_add_f32_bcast_first(self, flow: TestFlow) -> None: self._test_op( Model(), ( torch.randn(5), torch.randn(1, 5, 1, 5), ), - tester_factory, + flow, ) - def test_add_f32_bcast_second(self, tester_factory: Callable) -> None: + def test_add_f32_bcast_second(self, flow: TestFlow) -> None: self._test_op( Model(), ( torch.randn(4, 4, 2, 7), torch.randn(2, 7), ), - tester_factory, + flow, ) - def test_add_f32_bcast_unary(self, tester_factory: Callable) -> None: + def test_add_f32_bcast_unary(self, flow: TestFlow) -> None: self._test_op( Model(), ( torch.randn(5), torch.randn(1, 1, 5), ), - tester_factory, + flow, ) - def test_add_f32_alpha(self, tester_factory: Callable) -> None: + def test_add_f32_alpha(self, flow: TestFlow) -> None: self._test_op( ModelAlpha(alpha=2), ( torch.randn(1, 25), torch.randn(1, 25), ), - tester_factory, + flow, ) diff --git a/backends/test/suite/operators/test_div.py b/backends/test/suite/operators/test_div.py index 9e98775e855..1367a4bc8f7 100644 --- a/backends/test/suite/operators/test_div.py +++ b/backends/test/suite/operators/test_div.py @@ -7,11 +7,12 @@ # pyre-unsafe -from typing import Callable, Optional +from typing import Optional import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -31,7 +32,7 @@ def forward(self, x, y): @operator_test class Divide(OperatorTest): @dtype_test - def test_divide_dtype(self, dtype, tester_factory: Callable) -> None: + def test_divide_dtype(self, flow: TestFlow, dtype) -> None: self._test_op( Model(), ( @@ -40,10 +41,10 @@ def test_divide_dtype(self, dtype, tester_factory: Callable) -> None: dtype ), # Adding 0.1 to avoid division by zero ), - tester_factory, + flow, ) - def test_divide_f32_bcast_first(self, tester_factory: Callable) -> None: + def test_divide_f32_bcast_first(self, flow: TestFlow) -> None: self._test_op( Model(), ( @@ -51,10 +52,10 @@ def test_divide_f32_bcast_first(self, tester_factory: Callable) -> None: torch.randn(1, 5, 1, 5).abs() + 0.1, # Using abs and adding 0.1 to avoid division by zero ), - tester_factory, + flow, ) - def test_divide_f32_bcast_second(self, tester_factory: Callable) -> None: + def test_divide_f32_bcast_second(self, flow: TestFlow) -> None: self._test_op( Model(), ( @@ -62,10 +63,10 @@ def test_divide_f32_bcast_second(self, tester_factory: Callable) -> None: torch.randn(2, 7).abs() + 0.1, # Using abs and adding 0.1 to avoid division by zero ), - tester_factory, + flow, ) - def test_divide_f32_bcast_unary(self, tester_factory: Callable) -> None: + def test_divide_f32_bcast_unary(self, flow: TestFlow) -> None: self._test_op( Model(), ( @@ -73,10 +74,10 @@ def test_divide_f32_bcast_unary(self, tester_factory: Callable) -> None: torch.randn(1, 1, 5).abs() + 0.1, # Using abs and adding 0.1 to avoid division by zero ), - tester_factory, + flow, ) - def test_divide_f32_trunc(self, tester_factory: Callable) -> None: + def test_divide_f32_trunc(self, flow: TestFlow) -> None: self._test_op( ModelWithRounding(rounding_mode="trunc"), ( @@ -84,10 +85,10 @@ def test_divide_f32_trunc(self, tester_factory: Callable) -> None: torch.randn(3, 4).abs() + 0.1, # Using abs and adding 0.1 to avoid division by zero ), - tester_factory, + flow, ) - def test_divide_f32_floor(self, tester_factory: Callable) -> None: + def test_divide_f32_floor(self, flow: TestFlow) -> None: self._test_op( ModelWithRounding(rounding_mode="floor"), ( @@ -95,5 +96,5 @@ def test_divide_f32_floor(self, tester_factory: Callable) -> None: torch.randn(3, 4).abs() + 0.1, # Using abs and adding 0.1 to avoid division by zero ), - tester_factory, + flow, ) diff --git a/backends/test/suite/operators/test_elu.py b/backends/test/suite/operators/test_elu.py index 371a13aa26c..be4bb99bba0 100644 --- a/backends/test/suite/operators/test_elu.py +++ b/backends/test/suite/operators/test_elu.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -27,17 +26,17 @@ def forward(self, x): @operator_test class TestELU(OperatorTest): @dtype_test - def test_elu_dtype(self, dtype, tester_factory: Callable) -> None: - self._test_op(Model(), ((torch.rand(2, 10) * 100).to(dtype),), tester_factory) + def test_elu_dtype(self, flow: TestFlow, dtype) -> None: + self._test_op(Model(), ((torch.rand(2, 10) * 100).to(dtype),), flow) - def test_elu_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_elu_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_elu_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_elu_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_elu_f32_alpha(self, tester_factory: Callable) -> None: - self._test_op(Model(alpha=0.5), (torch.randn(3, 4, 5),), tester_factory) + def test_elu_f32_alpha(self, flow: TestFlow) -> None: + self._test_op(Model(alpha=0.5), (torch.randn(3, 4, 5),), flow) - def test_elu_f32_inplace(self, tester_factory: Callable) -> None: - self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), tester_factory) + def test_elu_f32_inplace(self, flow: TestFlow) -> None: + self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) diff --git a/backends/test/suite/operators/test_gelu.py b/backends/test/suite/operators/test_gelu.py index 639b2fbb9b1..4e77f92bc03 100644 --- a/backends/test/suite/operators/test_gelu.py +++ b/backends/test/suite/operators/test_gelu.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -26,28 +25,28 @@ def forward(self, x): @operator_test class TestGELU(OperatorTest): @dtype_test - def test_gelu_dtype(self, dtype, tester_factory: Callable) -> None: + def test_gelu_dtype(self, flow: TestFlow, dtype) -> None: self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), tester_factory + Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow ) - def test_gelu_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_gelu_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_gelu_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_gelu_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_gelu_f32_tanh_approximation(self, tester_factory: Callable) -> None: + def test_gelu_f32_tanh_approximation(self, flow: TestFlow) -> None: self._test_op( - Model(approximate="tanh"), (torch.randn(3, 4, 5),), tester_factory + Model(approximate="tanh"), (torch.randn(3, 4, 5),), flow ) - def test_gelu_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_gelu_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific values spanning negative and positive ranges x = torch.tensor([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) - def test_gelu_f32_tanh_boundary_values(self, tester_factory: Callable) -> None: + def test_gelu_f32_tanh_boundary_values(self, flow: TestFlow) -> None: # Test tanh approximation with specific values x = torch.tensor([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0]) - self._test_op(Model(approximate="tanh"), (x,), tester_factory) + self._test_op(Model(approximate="tanh"), (x,), flow) diff --git a/backends/test/suite/operators/test_glu.py b/backends/test/suite/operators/test_glu.py index 74f46bb9532..a20b2bf8543 100644 --- a/backends/test/suite/operators/test_glu.py +++ b/backends/test/suite/operators/test_glu.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -26,26 +25,26 @@ def forward(self, x): @operator_test class TestGLU(OperatorTest): @dtype_test - def test_glu_dtype(self, dtype, tester_factory: Callable) -> None: + def test_glu_dtype(self, flow: TestFlow, dtype) -> None: # Input must have even number of elements in the specified dimension self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), tester_factory + Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow ) - def test_glu_f32_dim_last(self, tester_factory: Callable) -> None: + def test_glu_f32_dim_last(self, flow: TestFlow) -> None: # Default dim is -1 (last dimension) - self._test_op(Model(), (torch.randn(3, 4, 6),), tester_factory) + self._test_op(Model(), (torch.randn(3, 4, 6),), flow) - def test_glu_f32_dim_first(self, tester_factory: Callable) -> None: + def test_glu_f32_dim_first(self, flow: TestFlow) -> None: # Test with dim=0 (first dimension) - self._test_op(Model(dim=0), (torch.randn(4, 3, 5),), tester_factory) + self._test_op(Model(dim=0), (torch.randn(4, 3, 5),), flow) - def test_glu_f32_dim_middle(self, tester_factory: Callable) -> None: + def test_glu_f32_dim_middle(self, flow: TestFlow) -> None: # Test with dim=1 (middle dimension) - self._test_op(Model(dim=1), (torch.randn(3, 8, 5),), tester_factory) + self._test_op(Model(dim=1), (torch.randn(3, 8, 5),), flow) - def test_glu_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_glu_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific values spanning negative and positive ranges # Input must have even number of elements in the specified dimension x = torch.tensor([[-10.0, -5.0, -1.0, 0.0], [1.0, 5.0, 10.0, -2.0]]) - self._test_op(Model(dim=1), (x,), tester_factory) + self._test_op(Model(dim=1), (x,), flow) diff --git a/backends/test/suite/operators/test_hardsigmoid.py b/backends/test/suite/operators/test_hardsigmoid.py index f26877782db..7ad92819506 100644 --- a/backends/test/suite/operators/test_hardsigmoid.py +++ b/backends/test/suite/operators/test_hardsigmoid.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -26,19 +25,19 @@ def forward(self, x): @operator_test class TestHardsigmoid(OperatorTest): @dtype_test - def test_hardsigmoid_dtype(self, dtype, tester_factory: Callable) -> None: - self._test_op(Model(), ((torch.rand(2, 10)).to(dtype),), tester_factory) + def test_hardsigmoid_dtype(self, flow: TestFlow, dtype) -> None: + self._test_op(Model(), ((torch.rand(2, 10)).to(dtype),), flow) - def test_hardsigmoid_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_hardsigmoid_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_hardsigmoid_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_hardsigmoid_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_hardsigmoid_f32_inplace(self, tester_factory: Callable) -> None: - self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), tester_factory) + def test_hardsigmoid_f32_inplace(self, flow: TestFlow) -> None: + self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) - def test_hardsigmoid_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_hardsigmoid_f32_boundary_values(self, flow: TestFlow) -> None: # Test with values that span the hardsigmoid's piecewise regions x = torch.tensor([-5.0, -3.0, -1.0, 0.0, 1.0, 3.0, 5.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) diff --git a/backends/test/suite/operators/test_hardswish.py b/backends/test/suite/operators/test_hardswish.py index 0c2c6915760..e8d25266af5 100644 --- a/backends/test/suite/operators/test_hardswish.py +++ b/backends/test/suite/operators/test_hardswish.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -26,19 +25,19 @@ def forward(self, x): @operator_test class TestHardswish(OperatorTest): @dtype_test - def test_hardswish_dtype(self, dtype, tester_factory: Callable) -> None: - self._test_op(Model(), ((torch.rand(2, 10)).to(dtype),), tester_factory) + def test_hardswish_dtype(self, flow: TestFlow, dtype) -> None: + self._test_op(Model(), ((torch.rand(2, 10)).to(dtype),), flow) - def test_hardswish_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_hardswish_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_hardswish_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_hardswish_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_hardswish_f32_inplace(self, tester_factory: Callable) -> None: - self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), tester_factory) + def test_hardswish_f32_inplace(self, flow: TestFlow) -> None: + self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) - def test_hardswish_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_hardswish_f32_boundary_values(self, flow: TestFlow) -> None: # Test with values that span the hardswish's piecewise regions x = torch.tensor([-5.0, -3.0, -1.0, 0.0, 1.0, 3.0, 5.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) diff --git a/backends/test/suite/operators/test_hardtanh.py b/backends/test/suite/operators/test_hardtanh.py index f74c52e93db..8b6d7bc1e6e 100644 --- a/backends/test/suite/operators/test_hardtanh.py +++ b/backends/test/suite/operators/test_hardtanh.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -30,24 +29,24 @@ def forward(self, x): @operator_test class TestHardtanh(OperatorTest): @dtype_test - def test_hardtanh_dtype(self, dtype, tester_factory: Callable) -> None: - self._test_op(Model(), ((torch.rand(2, 10) * 4 - 2).to(dtype),), tester_factory) + def test_hardtanh_dtype(self, flow: TestFlow, dtype) -> None: + self._test_op(Model(), ((torch.rand(2, 10) * 4 - 2).to(dtype),), flow) - def test_hardtanh_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_hardtanh_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_hardtanh_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_hardtanh_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_hardtanh_f32_custom_range(self, tester_factory: Callable) -> None: + def test_hardtanh_f32_custom_range(self, flow: TestFlow) -> None: self._test_op( - Model(min_val=-2.0, max_val=2.0), (torch.randn(3, 4, 5),), tester_factory + Model(min_val=-2.0, max_val=2.0), (torch.randn(3, 4, 5),), flow ) - def test_hardtanh_f32_inplace(self, tester_factory: Callable) -> None: - self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), tester_factory) + def test_hardtanh_f32_inplace(self, flow: TestFlow) -> None: + self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) - def test_hardtanh_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_hardtanh_f32_boundary_values(self, flow: TestFlow) -> None: # Test with values that span the hardtanh's piecewise regions x = torch.tensor([-2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) diff --git a/backends/test/suite/operators/test_leaky_relu.py b/backends/test/suite/operators/test_leaky_relu.py index 01d30e9c682..ca60adde55f 100644 --- a/backends/test/suite/operators/test_leaky_relu.py +++ b/backends/test/suite/operators/test_leaky_relu.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -29,24 +28,24 @@ def forward(self, x): @operator_test class TestLeakyReLU(OperatorTest): @dtype_test - def test_leaky_relu_dtype(self, dtype, tester_factory: Callable) -> None: - self._test_op(Model(), ((torch.rand(2, 10) * 2 - 1).to(dtype),), tester_factory) + def test_leaky_relu_dtype(self, flow: TestFlow, dtype) -> None: + self._test_op(Model(), ((torch.rand(2, 10) * 2 - 1).to(dtype),), flow) - def test_leaky_relu_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_leaky_relu_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_leaky_relu_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_leaky_relu_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_leaky_relu_f32_custom_slope(self, tester_factory: Callable) -> None: + def test_leaky_relu_f32_custom_slope(self, flow: TestFlow) -> None: self._test_op( - Model(negative_slope=0.1), (torch.randn(3, 4, 5),), tester_factory + Model(negative_slope=0.1), (torch.randn(3, 4, 5),), flow ) - def test_leaky_relu_f32_inplace(self, tester_factory: Callable) -> None: - self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), tester_factory) + def test_leaky_relu_f32_inplace(self, flow: TestFlow) -> None: + self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) - def test_leaky_relu_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_leaky_relu_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific positive and negative values x = torch.tensor([-2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) diff --git a/backends/test/suite/operators/test_logsigmoid.py b/backends/test/suite/operators/test_logsigmoid.py index ff6a2df83ae..c8cf01217d5 100644 --- a/backends/test/suite/operators/test_logsigmoid.py +++ b/backends/test/suite/operators/test_logsigmoid.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -22,18 +21,18 @@ def forward(self, x): @operator_test class TestLogSigmoid(OperatorTest): @dtype_test - def test_logsigmoid_dtype(self, dtype, tester_factory: Callable) -> None: + def test_logsigmoid_dtype(self, flow: TestFlow, dtype) -> None: self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), tester_factory + Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow ) - def test_logsigmoid_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_logsigmoid_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_logsigmoid_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_logsigmoid_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_logsigmoid_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_logsigmoid_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific values spanning negative and positive ranges x = torch.tensor([-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) diff --git a/backends/test/suite/operators/test_mul.py b/backends/test/suite/operators/test_mul.py index 19d1c8e939d..5914b455762 100644 --- a/backends/test/suite/operators/test_mul.py +++ b/backends/test/suite/operators/test_mul.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -22,42 +21,42 @@ def forward(self, x, y): @operator_test class Multiply(OperatorTest): @dtype_test - def test_multiply_dtype(self, dtype, tester_factory: Callable) -> None: + def test_multiply_dtype(self, flow: TestFlow, dtype) -> None: self._test_op( Model(), ( (torch.rand(2, 10) * 100).to(dtype), (torch.rand(2, 10) * 100).to(dtype), ), - tester_factory, + flow, ) - def test_multiply_f32_bcast_first(self, tester_factory: Callable) -> None: + def test_multiply_f32_bcast_first(self, flow: TestFlow) -> None: self._test_op( Model(), ( torch.randn(5), torch.randn(1, 5, 1, 5), ), - tester_factory, + flow, ) - def test_multiply_f32_bcast_second(self, tester_factory: Callable) -> None: + def test_multiply_f32_bcast_second(self, flow: TestFlow) -> None: self._test_op( Model(), ( torch.randn(4, 4, 2, 7), torch.randn(2, 7), ), - tester_factory, + flow, ) - def test_multiply_f32_bcast_unary(self, tester_factory: Callable) -> None: + def test_multiply_f32_bcast_unary(self, flow: TestFlow) -> None: self._test_op( Model(), ( torch.randn(5), torch.randn(1, 1, 5), ), - tester_factory, + flow, ) diff --git a/backends/test/suite/operators/test_prelu.py b/backends/test/suite/operators/test_prelu.py index a9aee50bc18..b98a88bbe04 100644 --- a/backends/test/suite/operators/test_prelu.py +++ b/backends/test/suite/operators/test_prelu.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -26,33 +25,33 @@ def forward(self, x): @operator_test class TestPReLU(OperatorTest): @dtype_test - def test_prelu_dtype(self, dtype, tester_factory: Callable) -> None: + def test_prelu_dtype(self, flow: TestFlow, dtype) -> None: self._test_op( - Model().to(dtype), ((torch.rand(2, 10) * 2 - 1).to(dtype),), tester_factory + Model().to(dtype), ((torch.rand(2, 10) * 2 - 1).to(dtype),), flow ) - def test_prelu_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_prelu_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_prelu_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_prelu_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_prelu_f32_custom_init(self, tester_factory: Callable) -> None: - self._test_op(Model(init=0.1), (torch.randn(3, 4, 5),), tester_factory) + def test_prelu_f32_custom_init(self, flow: TestFlow) -> None: + self._test_op(Model(init=0.1), (torch.randn(3, 4, 5),), flow) - def test_prelu_f32_channel_shared(self, tester_factory: Callable) -> None: + def test_prelu_f32_channel_shared(self, flow: TestFlow) -> None: # Default num_parameters=1 means the parameter is shared across all channels self._test_op( - Model(num_parameters=1), (torch.randn(2, 3, 4, 5),), tester_factory + Model(num_parameters=1), (torch.randn(2, 3, 4, 5),), flow ) - def test_prelu_f32_per_channel_parameter(self, tester_factory: Callable) -> None: + def test_prelu_f32_per_channel_parameter(self, flow: TestFlow) -> None: # num_parameters=3 means each channel has its own parameter (for dim=1) self._test_op( - Model(num_parameters=3), (torch.randn(2, 3, 4, 5),), tester_factory + Model(num_parameters=3), (torch.randn(2, 3, 4, 5),), flow ) - def test_prelu_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_prelu_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific positive and negative values x = torch.tensor([-2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) diff --git a/backends/test/suite/operators/test_relu.py b/backends/test/suite/operators/test_relu.py index ab6d93d6279..d90a7c6f04e 100644 --- a/backends/test/suite/operators/test_relu.py +++ b/backends/test/suite/operators/test_relu.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -26,14 +25,14 @@ def forward(self, x): @operator_test class TestReLU(OperatorTest): @dtype_test - def test_relu_dtype(self, dtype, tester_factory: Callable) -> None: - self._test_op(Model(), ((torch.rand(2, 10) * 100).to(dtype),), tester_factory) + def test_relu_dtype(self, flow: TestFlow, dtype) -> None: + self._test_op(Model(), ((torch.rand(2, 10) * 100).to(dtype),), flow) - def test_relu_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_relu_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_relu_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_relu_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_relu_f32_inplace(self, tester_factory: Callable) -> None: - self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), tester_factory) + def test_relu_f32_inplace(self, flow: TestFlow) -> None: + self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) diff --git a/backends/test/suite/operators/test_sigmoid.py b/backends/test/suite/operators/test_sigmoid.py index 7e70b30ff19..cb9a090b6cc 100644 --- a/backends/test/suite/operators/test_sigmoid.py +++ b/backends/test/suite/operators/test_sigmoid.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -22,18 +21,18 @@ def forward(self, x): @operator_test class TestSigmoid(OperatorTest): @dtype_test - def test_sigmoid_dtype(self, dtype, tester_factory: Callable) -> None: + def test_sigmoid_dtype(self, flow: TestFlow, dtype) -> None: self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), tester_factory + Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow ) - def test_sigmoid_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_sigmoid_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_sigmoid_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_sigmoid_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_sigmoid_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_sigmoid_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific values spanning negative and positive ranges x = torch.tensor([-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) diff --git a/backends/test/suite/operators/test_silu.py b/backends/test/suite/operators/test_silu.py index a30b47a1c57..9d8afbaa716 100644 --- a/backends/test/suite/operators/test_silu.py +++ b/backends/test/suite/operators/test_silu.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -26,19 +25,19 @@ def forward(self, x): @operator_test class TestSiLU(OperatorTest): @dtype_test - def test_silu_dtype(self, dtype, tester_factory: Callable) -> None: - self._test_op(Model(), ((torch.randn(2, 10) * 100).to(dtype),), tester_factory) + def test_silu_dtype(self, flow: TestFlow, dtype) -> None: + self._test_op(Model(), ((torch.randn(2, 10) * 100).to(dtype),), flow) - def test_silu_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_silu_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_silu_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_silu_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_silu_f32_inplace(self, tester_factory: Callable) -> None: - self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), tester_factory) + def test_silu_f32_inplace(self, flow: TestFlow) -> None: + self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) - def test_silu_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_silu_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific values spanning negative and positive ranges x = torch.tensor([-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) diff --git a/backends/test/suite/operators/test_sub.py b/backends/test/suite/operators/test_sub.py index 19884419637..30c0db5878c 100644 --- a/backends/test/suite/operators/test_sub.py +++ b/backends/test/suite/operators/test_sub.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -31,52 +30,52 @@ def forward(self, x, y): @operator_test class Subtract(OperatorTest): @dtype_test - def test_subtract_dtype(self, dtype, tester_factory: Callable) -> None: + def test_subtract_dtype(self, flow: TestFlow, dtype) -> None: self._test_op( Model(), ( (torch.rand(2, 10) * 100).to(dtype), (torch.rand(2, 10) * 100).to(dtype), ), - tester_factory, + flow, ) - def test_subtract_f32_bcast_first(self, tester_factory: Callable) -> None: + def test_subtract_f32_bcast_first(self, flow: TestFlow) -> None: self._test_op( Model(), ( torch.randn(5), torch.randn(1, 5, 1, 5), ), - tester_factory, + flow, ) - def test_subtract_f32_bcast_second(self, tester_factory: Callable) -> None: + def test_subtract_f32_bcast_second(self, flow: TestFlow) -> None: self._test_op( Model(), ( torch.randn(4, 4, 2, 7), torch.randn(2, 7), ), - tester_factory, + flow, ) - def test_subtract_f32_bcast_unary(self, tester_factory: Callable) -> None: + def test_subtract_f32_bcast_unary(self, flow: TestFlow) -> None: self._test_op( Model(), ( torch.randn(5), torch.randn(1, 1, 5), ), - tester_factory, + flow, ) - def test_subtract_f32_alpha(self, tester_factory: Callable) -> None: + def test_subtract_f32_alpha(self, flow: TestFlow) -> None: self._test_op( ModelAlpha(alpha=2), ( torch.randn(1, 25), torch.randn(1, 25), ), - tester_factory, + flow, ) diff --git a/backends/test/suite/operators/test_tanh.py b/backends/test/suite/operators/test_tanh.py index 1d7889a95da..a1c2b2bdafb 100644 --- a/backends/test/suite/operators/test_tanh.py +++ b/backends/test/suite/operators/test_tanh.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -22,18 +21,18 @@ def forward(self, x): @operator_test class TestTanh(OperatorTest): @dtype_test - def test_tanh_dtype(self, dtype, tester_factory: Callable) -> None: + def test_tanh_dtype(self, flow: TestFlow, dtype) -> None: self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), tester_factory + Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow ) - def test_tanh_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_tanh_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_tanh_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_tanh_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_tanh_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_tanh_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific values spanning negative and positive ranges x = torch.tensor([-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) diff --git a/backends/test/suite/operators/test_threshold.py b/backends/test/suite/operators/test_threshold.py index 97c84c58404..2b6922181b6 100644 --- a/backends/test/suite/operators/test_threshold.py +++ b/backends/test/suite/operators/test_threshold.py @@ -7,11 +7,10 @@ # pyre-unsafe -from typing import Callable - import torch from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.flow import TestFlow class Model(torch.nn.Module): @@ -30,42 +29,42 @@ def forward(self, x): @operator_test class TestThreshold(OperatorTest): @dtype_test - def test_threshold_dtype(self, dtype, tester_factory: Callable) -> None: + def test_threshold_dtype(self, flow: TestFlow, dtype) -> None: self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), tester_factory + Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow ) - def test_threshold_f32_single_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(20),), tester_factory) + def test_threshold_f32_single_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(20),), flow) - def test_threshold_f32_multi_dim(self, tester_factory: Callable) -> None: - self._test_op(Model(), (torch.randn(2, 3, 4, 5),), tester_factory) + def test_threshold_f32_multi_dim(self, flow: TestFlow) -> None: + self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) - def test_threshold_f32_custom_threshold(self, tester_factory: Callable) -> None: - self._test_op(Model(threshold=1.0), (torch.randn(3, 4, 5),), tester_factory) + def test_threshold_f32_custom_threshold(self, flow: TestFlow) -> None: + self._test_op(Model(threshold=1.0), (torch.randn(3, 4, 5),), flow) - def test_threshold_f32_custom_value(self, tester_factory: Callable) -> None: - self._test_op(Model(value=2.0), (torch.randn(3, 4, 5),), tester_factory) + def test_threshold_f32_custom_value(self, flow: TestFlow) -> None: + self._test_op(Model(value=2.0), (torch.randn(3, 4, 5),), flow) def test_threshold_f32_custom_threshold_value( - self, tester_factory: Callable + self, flow: TestFlow ) -> None: self._test_op( - Model(threshold=0.5, value=1.0), (torch.randn(3, 4, 5),), tester_factory + Model(threshold=0.5, value=1.0), (torch.randn(3, 4, 5),), flow ) - def test_threshold_f32_inplace(self, tester_factory: Callable) -> None: - self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), tester_factory) + def test_threshold_f32_inplace(self, flow: TestFlow) -> None: + self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) - def test_threshold_f32_boundary_values(self, tester_factory: Callable) -> None: + def test_threshold_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific values around the threshold x = torch.tensor([-2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0]) - self._test_op(Model(), (x,), tester_factory) + self._test_op(Model(), (x,), flow) - def test_threshold_f32_all_params(self, tester_factory: Callable) -> None: + def test_threshold_f32_all_params(self, flow: TestFlow) -> None: # Test with all parameters customized self._test_op( Model(threshold=0.5, value=3.0, inplace=True), (torch.randn(3, 4, 5),), - tester_factory, + flow, ) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index d7181300873..b5a4609447e 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -14,23 +14,26 @@ class TestResult(IntEnum): EAGER_FAIL = 2 """ The test failed due to the model failing to run in eager mode. """ + + QUANTIZE_FAIL = 3 + """ The test failed due to the quantization stage failing. """ - EXPORT_FAIL = 3 + EXPORT_FAIL = 4 """ The test failed due to the model failing to export. """ - LOWER_FAIL = 4 + LOWER_FAIL = 5 """ The test failed due to a failure in partitioning or lowering. """ - PTE_LOAD_FAIL = 5 + PTE_LOAD_FAIL = 6 """ The test failed due to the resulting PTE failing to load. """ - PTE_RUN_FAIL = 6 + PTE_RUN_FAIL = 7 """ The test failed due to the resulting PTE failing to run. """ - OUTPUT_MISMATCH_FAIL = 7 + OUTPUT_MISMATCH_FAIL = 8 """ The test failed due to a mismatch between runtime and reference outputs. """ - UNKNOWN_FAIL = 8 + UNKNOWN_FAIL = 9 """ The test failed in an unknown or unexpected manner. """ def is_success(self): @@ -49,6 +52,8 @@ def display_name(self): return "Success (Undelegated)" elif self == TestResult.EAGER_FAIL: return "Fail (Eager)" + elif self == TestResult.QUANTIZE_FAIL: + return "Fail (Quantize)" elif self == TestResult.EXPORT_FAIL: return "Fail (Export)" elif self == TestResult.LOWER_FAIL: diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index f6a515c39ac..5e019400131 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -10,6 +10,7 @@ from executorch.backends.test.harness import Tester from executorch.backends.test.harness.stages import StageType from executorch.backends.test.suite.discovery import discover_tests, TestFilter +from executorch.backends.test.suite.flow import TestFlow from executorch.backends.test.suite.reporting import ( begin_test_session, complete_test_session, @@ -29,9 +30,8 @@ def run_test( # noqa: C901 model: torch.nn.Module, inputs: Any, - tester_factory: Callable[[], Tester], + flow: TestFlow, test_name: str, - flow_name: str, params: dict | None, dynamic_shapes: Any | None = None, ) -> TestCaseSummary: @@ -46,7 +46,7 @@ def build_result( ) -> TestCaseSummary: return TestCaseSummary( name=test_name, - flow=flow_name, + flow=flow.name, params=params, result=result, error=error, @@ -54,8 +54,6 @@ def build_result( model.eval() - model.eval() - # Ensure the model can run in eager mode. try: model(*inputs) @@ -63,10 +61,16 @@ def build_result( return build_result(TestResult.EAGER_FAIL, e) try: - tester = tester_factory(model, inputs) + tester = flow.tester_factory(model, inputs) except Exception as e: return build_result(TestResult.UNKNOWN_FAIL, e) - + + if flow.quantize: + try: + tester.quantize(flow.quantize_stage_factory() if flow.quantize_stage_factory else None) + except Exception as e: + return build_result(TestResult.QUANTIZE_FAIL, e) + try: # TODO Use Tester dynamic_shapes parameter once input generation can properly handle derived dims. tester.export( @@ -128,6 +132,9 @@ def print_summary(summary: RunSummary): print() print("[Failure]") + print( + f"{summary.aggregated_results.get(TestResult.QUANTIZE_FAIL, 0):>5} Quantization Fail" + ) print( f"{summary.aggregated_results.get(TestResult.LOWER_FAIL, 0):>5} Lowering Fail" ) From ff5c4a58b3b077eda3dccaa0e233d9c5c96fcde7 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 22 Jul 2025 16:47:27 -0700 Subject: [PATCH 07/38] Update [ghstack-poisoned] --- backends/test/suite/__init__.py | 139 +---------------- backends/test/suite/discovery.py | 9 +- backends/test/suite/operators/__init__.py | 140 ++++++++++++++++++ backends/test/suite/operators/test_add.py | 2 +- backends/test/suite/operators/test_div.py | 2 +- backends/test/suite/operators/test_elu.py | 2 +- backends/test/suite/operators/test_gelu.py | 2 +- backends/test/suite/operators/test_glu.py | 2 +- .../test/suite/operators/test_hardsigmoid.py | 2 +- .../test/suite/operators/test_hardswish.py | 2 +- .../test/suite/operators/test_hardtanh.py | 2 +- .../test/suite/operators/test_leaky_relu.py | 2 +- .../test/suite/operators/test_logsigmoid.py | 2 +- backends/test/suite/operators/test_mul.py | 2 +- backends/test/suite/operators/test_prelu.py | 2 +- backends/test/suite/operators/test_relu.py | 2 +- backends/test/suite/operators/test_sigmoid.py | 2 +- backends/test/suite/operators/test_silu.py | 2 +- backends/test/suite/operators/test_sub.py | 2 +- backends/test/suite/operators/test_tanh.py | 2 +- .../test/suite/operators/test_threshold.py | 2 +- 21 files changed, 167 insertions(+), 157 deletions(-) diff --git a/backends/test/suite/__init__.py b/backends/test/suite/__init__.py index 7190da4e0fd..43d4e16818f 100644 --- a/backends/test/suite/__init__.py +++ b/backends/test/suite/__init__.py @@ -9,18 +9,11 @@ import logging import os -import unittest - -from enum import Enum -from typing import Callable import executorch.backends.test.suite.flow -import torch -from executorch.backends.test.suite.context import get_active_test_context, TestContext from executorch.backends.test.suite.flow import TestFlow -from executorch.backends.test.suite.reporting import log_test_summary -from executorch.backends.test.suite.runner import run_test, runner_main +from executorch.backends.test.suite.runner import runner_main logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -62,109 +55,6 @@ def get_test_flows() -> dict[str, TestFlow]: return _ALL_TEST_FLOWS -DTYPES = [ - # torch.int8, - # torch.uint8, - # torch.int16, - # torch.uint16, - # torch.int32, - # torch.uint32, - # torch.int64, - # torch.uint64, - # torch.float16, - torch.float32, - # torch.float64, -] - -FLOAT_DTYPES = [ - torch.float16, - torch.float32, - torch.float64, -] - - -# The type of test function. This controls the test generation and expected signature. -# Standard tests are run, as is. Dtype tests get a variant generated for each dtype and -# take an additional dtype parameter. -class TestType(Enum): - STANDARD = 1 - DTYPE = 2 - - -# Function annotation for dtype tests. This instructs the test framework to run the test -# for each supported dtype and to pass dtype as a test parameter. -def dtype_test(func): - func.test_type = TestType.DTYPE - return func - - -# Class annotation for operator tests. This triggers the test framework to register -# the tests. -def operator_test(cls): - _create_tests(cls) - return cls - - -# Generate test cases for each backend flow. -def _create_tests(cls): - for key in dir(cls): - if key.startswith("test_"): - _expand_test(cls, key) - - -# Expand a test into variants for each registered flow. -def _expand_test(cls, test_name: str): - test_func = getattr(cls, test_name) - for flow in get_test_flows().values(): - _create_test_for_backend(cls, test_func, flow) - delattr(cls, test_name) - - -def _make_wrapped_test( - test_func: Callable, - test_name: str, - flow: TestFlow, - params: dict | None = None, -): - def wrapped_test(self): - with TestContext(test_name, flow.name, params): - test_kwargs = params or {} - test_kwargs["flow"] = flow - - test_func(self, **test_kwargs) - - wrapped_test._name = test_name - wrapped_test._flow = flow - - return wrapped_test - - -def _create_test_for_backend( - cls, - test_func: Callable, - flow: TestFlow, -): - test_type = getattr(test_func, "test_type", TestType.STANDARD) - - if test_type == TestType.STANDARD: - wrapped_test = _make_wrapped_test(test_func, test_func.__name__, flow) - test_name = f"{test_func.__name__}_{flow.name}" - setattr(cls, test_name, wrapped_test) - elif test_type == TestType.DTYPE: - for dtype in DTYPES: - wrapped_test = _make_wrapped_test( - test_func, - test_func.__name__, - flow, - {"dtype": dtype}, - ) - dtype_name = str(dtype)[6:] # strip "torch." - test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}" - setattr(cls, test_name, wrapped_test) - else: - raise NotImplementedError(f"Unknown test type {test_type}.") - - def load_tests(loader, suite, pattern): package_dir = os.path.dirname(__file__) discovered_suite = loader.discover( @@ -174,32 +64,5 @@ def load_tests(loader, suite, pattern): return suite -class OperatorTest(unittest.TestCase): - def _test_op(self, model, inputs, flow: TestFlow): - context = get_active_test_context() - - # This should be set in the wrapped test. See _make_wrapped_test above. - assert context is not None, "Missing test context." - - run_summary = run_test( - model, - inputs, - flow, - context.test_name, - context.params, - ) - - log_test_summary(run_summary) - - if not run_summary.result.is_success(): - if run_summary.result.is_backend_failure(): - raise RuntimeError("Test failure.") from run_summary.error - else: - # Non-backend failure indicates a bad test. Mark as skipped. - raise unittest.SkipTest( - f"Test failed for reasons other than backend failure. Error: {run_summary.error}" - ) - - if __name__ == "__main__": runner_main() diff --git a/backends/test/suite/discovery.py b/backends/test/suite/discovery.py index 7ccc52ba4e7..f3ba26af69b 100644 --- a/backends/test/suite/discovery.py +++ b/backends/test/suite/discovery.py @@ -68,9 +68,16 @@ def _filter_tests( def _is_test_enabled(test_case: unittest.TestCase, test_filter: TestFilter) -> bool: test_method = getattr(test_case, test_case._testMethodName) + + # Handle import / discovery failures - leave them enabled to report nicely at the + # top level. There might be a better way to do this. Internally, unittest seems to + # replace it with a stub method to report the failure. + if "testFailure" in str(test_method): + print(f"Warning: Test {test_case._testMethodName} failed to import.") + return True if not hasattr(test_method, "_flow"): - print(f"Test missing flow: {test_method}") + raise RuntimeError(f"Test missing flow: {test_case._testMethodName} {test_method}") flow: TestFlow = test_method._flow diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py index 0fb9ecd1dff..25f56fb05bc 100644 --- a/backends/test/suite/operators/__init__.py +++ b/backends/test/suite/operators/__init__.py @@ -7,7 +7,17 @@ # pyre-unsafe import os +import unittest +from enum import Enum +from typing import Callable + +import torch +from executorch.backends.test.suite import get_test_flows +from executorch.backends.test.suite.context import get_active_test_context, TestContext +from executorch.backends.test.suite.flow import TestFlow +from executorch.backends.test.suite.reporting import log_test_summary +from executorch.backends.test.suite.runner import run_test def load_tests(loader, suite, pattern): package_dir = os.path.dirname(__file__) @@ -16,3 +26,133 @@ def load_tests(loader, suite, pattern): ) suite.addTests(discovered_suite) return suite + + +DTYPES = [ + # torch.int8, + # torch.uint8, + # torch.int16, + # torch.uint16, + # torch.int32, + # torch.uint32, + # torch.int64, + # torch.uint64, + # torch.float16, + torch.float32, + # torch.float64, +] + +FLOAT_DTYPES = [ + torch.float16, + torch.float32, + torch.float64, +] + + +# The type of test function. This controls the test generation and expected signature. +# Standard tests are run, as is. Dtype tests get a variant generated for each dtype and +# take an additional dtype parameter. +class TestType(Enum): + STANDARD = 1 + DTYPE = 2 + + +# Function annotation for dtype tests. This instructs the test framework to run the test +# for each supported dtype and to pass dtype as a test parameter. +def dtype_test(func): + func.test_type = TestType.DTYPE + return func + + +# Class annotation for operator tests. This triggers the test framework to register +# the tests. +def operator_test(cls): + _create_tests(cls) + return cls + + +# Generate test cases for each backend flow. +def _create_tests(cls): + for key in dir(cls): + if key.startswith("test_"): + _expand_test(cls, key) + + +# Expand a test into variants for each registered flow. +def _expand_test(cls, test_name: str): + test_func = getattr(cls, test_name) + for flow in get_test_flows().values(): + _create_test_for_backend(cls, test_func, flow) + delattr(cls, test_name) + + +def _make_wrapped_test( + test_func: Callable, + test_name: str, + flow: TestFlow, + params: dict | None = None, +): + def wrapped_test(self): + with TestContext(test_name, flow.name, params): + test_kwargs = params or {} + test_kwargs["flow"] = flow + + test_func(self, **test_kwargs) + + wrapped_test._name = test_name + wrapped_test._flow = flow + + return wrapped_test + + +def _create_test_for_backend( + cls, + test_func: Callable, + flow: TestFlow, +): + test_type = getattr(test_func, "test_type", TestType.STANDARD) + + if test_type == TestType.STANDARD: + wrapped_test = _make_wrapped_test(test_func, test_func.__name__, flow) + test_name = f"{test_func.__name__}_{flow.name}" + setattr(cls, test_name, wrapped_test) + elif test_type == TestType.DTYPE: + for dtype in DTYPES: + wrapped_test = _make_wrapped_test( + test_func, + test_func.__name__, + flow, + {"dtype": dtype}, + ) + dtype_name = str(dtype)[6:] # strip "torch." + test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}" + setattr(cls, test_name, wrapped_test) + else: + raise NotImplementedError(f"Unknown test type {test_type}.") + + +class OperatorTest(unittest.TestCase): + def _test_op(self, model, inputs, flow: TestFlow): + context = get_active_test_context() + + # This should be set in the wrapped test. See _make_wrapped_test above. + assert context is not None, "Missing test context." + + run_summary = run_test( + model, + inputs, + flow, + context.test_name, + context.params, + ) + + log_test_summary(run_summary) + + if not run_summary.result.is_success(): + if run_summary.result.is_backend_failure(): + raise RuntimeError("Test failure.") from run_summary.error + else: + # Non-backend failure indicates a bad test. Mark as skipped. + raise unittest.SkipTest( + f"Test failed for reasons other than backend failure. Error: {run_summary.error}" + ) diff --git a/backends/test/suite/operators/test_add.py b/backends/test/suite/operators/test_add.py index 2ff1644d672..decdbdd585e 100644 --- a/backends/test/suite/operators/test_add.py +++ b/backends/test/suite/operators/test_add.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_div.py b/backends/test/suite/operators/test_div.py index 1367a4bc8f7..1a84aaacb7a 100644 --- a/backends/test/suite/operators/test_div.py +++ b/backends/test/suite/operators/test_div.py @@ -11,7 +11,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_elu.py b/backends/test/suite/operators/test_elu.py index be4bb99bba0..52f381994e8 100644 --- a/backends/test/suite/operators/test_elu.py +++ b/backends/test/suite/operators/test_elu.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_gelu.py b/backends/test/suite/operators/test_gelu.py index 4e77f92bc03..3132614aa25 100644 --- a/backends/test/suite/operators/test_gelu.py +++ b/backends/test/suite/operators/test_gelu.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_glu.py b/backends/test/suite/operators/test_glu.py index a20b2bf8543..82510f659af 100644 --- a/backends/test/suite/operators/test_glu.py +++ b/backends/test/suite/operators/test_glu.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_hardsigmoid.py b/backends/test/suite/operators/test_hardsigmoid.py index 7ad92819506..4104d8b3f56 100644 --- a/backends/test/suite/operators/test_hardsigmoid.py +++ b/backends/test/suite/operators/test_hardsigmoid.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_hardswish.py b/backends/test/suite/operators/test_hardswish.py index e8d25266af5..0e6fb3b004d 100644 --- a/backends/test/suite/operators/test_hardswish.py +++ b/backends/test/suite/operators/test_hardswish.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_hardtanh.py b/backends/test/suite/operators/test_hardtanh.py index 8b6d7bc1e6e..c72045a3a49 100644 --- a/backends/test/suite/operators/test_hardtanh.py +++ b/backends/test/suite/operators/test_hardtanh.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_leaky_relu.py b/backends/test/suite/operators/test_leaky_relu.py index ca60adde55f..56c5fe463db 100644 --- a/backends/test/suite/operators/test_leaky_relu.py +++ b/backends/test/suite/operators/test_leaky_relu.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_logsigmoid.py b/backends/test/suite/operators/test_logsigmoid.py index c8cf01217d5..5354e995149 100644 --- a/backends/test/suite/operators/test_logsigmoid.py +++ b/backends/test/suite/operators/test_logsigmoid.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_mul.py b/backends/test/suite/operators/test_mul.py index 5914b455762..bfda5b883a9 100644 --- a/backends/test/suite/operators/test_mul.py +++ b/backends/test/suite/operators/test_mul.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_prelu.py b/backends/test/suite/operators/test_prelu.py index b98a88bbe04..75f4c1a63b7 100644 --- a/backends/test/suite/operators/test_prelu.py +++ b/backends/test/suite/operators/test_prelu.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_relu.py b/backends/test/suite/operators/test_relu.py index d90a7c6f04e..796395eaaf6 100644 --- a/backends/test/suite/operators/test_relu.py +++ b/backends/test/suite/operators/test_relu.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_sigmoid.py b/backends/test/suite/operators/test_sigmoid.py index cb9a090b6cc..6623533dda5 100644 --- a/backends/test/suite/operators/test_sigmoid.py +++ b/backends/test/suite/operators/test_sigmoid.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_silu.py b/backends/test/suite/operators/test_silu.py index 9d8afbaa716..331e835433c 100644 --- a/backends/test/suite/operators/test_silu.py +++ b/backends/test/suite/operators/test_silu.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_sub.py b/backends/test/suite/operators/test_sub.py index 30c0db5878c..fad64e7f000 100644 --- a/backends/test/suite/operators/test_sub.py +++ b/backends/test/suite/operators/test_sub.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_tanh.py b/backends/test/suite/operators/test_tanh.py index a1c2b2bdafb..b911fcfd1a0 100644 --- a/backends/test/suite/operators/test_tanh.py +++ b/backends/test/suite/operators/test_tanh.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/operators/test_threshold.py b/backends/test/suite/operators/test_threshold.py index 2b6922181b6..6708fd69971 100644 --- a/backends/test/suite/operators/test_threshold.py +++ b/backends/test/suite/operators/test_threshold.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.test.suite import dtype_test, operator_test, OperatorTest +from executorch.backends.test.suite.operators import dtype_test, operator_test, OperatorTest from executorch.backends.test.suite.flow import TestFlow From 1105e043f52dc46549004b4e59e7bcf1a6ad2bb3 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 22 Jul 2025 16:59:27 -0700 Subject: [PATCH 08/38] Update [ghstack-poisoned] --- backends/test/suite/flow.py | 8 ++++ backends/test/suite/flows/vulkan.py | 19 +++++++++ backends/vulkan/test/TARGETS | 10 +++++ backends/vulkan/test/tester.py | 60 +++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 backends/test/suite/flows/vulkan.py create mode 100644 backends/vulkan/test/tester.py diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index a9ddec22864..b85de8d673d 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -53,4 +53,12 @@ def all_flows() -> dict[str, TestFlow]: except Exception as e: logger.info(f"Skipping Core ML flow registration: {e}") + try: + from executorch.backends.test.suite.flows.vulkan import VULKAN_TEST_FLOW + flows += [ + VULKAN_TEST_FLOW, + ] + except Exception as e: + logger.info(f"Skipping Vulkan flow registration: {e}") + return {f.name: f for f in flows if f is not None} diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py new file mode 100644 index 00000000000..c37704acb57 --- /dev/null +++ b/backends/test/suite/flows/vulkan.py @@ -0,0 +1,19 @@ +import coremltools +import functools + +from executorch.backends.vulkan.test.tester import VulkanTester +from executorch.backends.test.suite.flow import TestFlow +from typing import Any + +def _create_vulkan_flow( + name: str, + quantize: bool = False, +) -> TestFlow: + return TestFlow( + name, + backend="vulkan", + tester_factory=VulkanTester, + quantize=quantize, + ) + +VULKAN_TEST_FLOW = _create_vulkan_flow("vulkan") diff --git a/backends/vulkan/test/TARGETS b/backends/vulkan/test/TARGETS index 8f07040d586..7f535a0001b 100644 --- a/backends/vulkan/test/TARGETS +++ b/backends/vulkan/test/TARGETS @@ -1,4 +1,5 @@ load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") oncall("executorch") @@ -57,3 +58,12 @@ python_unittest( "//executorch/backends/vulkan:vulkan_preprocess", ], ) + +runtime.python_library( + name = "tester", + srcs = ["tester.py"], + deps = [ + "//executorch/backends/vulkan/partitioner:vulkan_partitioner", + "//executorch/backends/vulkan:vulkan_preprocess", + ] +) diff --git a/backends/vulkan/test/tester.py b/backends/vulkan/test/tester.py new file mode 100644 index 00000000000..0b7cf51cee9 --- /dev/null +++ b/backends/vulkan/test/tester.py @@ -0,0 +1,60 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, List, Optional, Tuple + +import executorch +import executorch.backends.test.harness.stages as BaseStages + +import torch +from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner +from executorch.backends.test.harness import Tester as TesterBase +from executorch.backends.test.harness.stages import StageType +from executorch.exir import EdgeCompileConfig +from executorch.exir.backend.partitioner import Partitioner + + +class Partition(BaseStages.Partition): + def __init__(self, partitioner: Optional[Partitioner] = None): + super().__init__( + partitioner=partitioner or VulkanPartitioner(), + ) + + +class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower): + def __init__( + self, + partitioners: Optional[List[Partitioner]] = None, + edge_compile_config: Optional[EdgeCompileConfig] = None, + ): + super().__init__( + default_partitioner_cls=VulkanPartitioner, + partitioners=partitioners, + edge_compile_config=edge_compile_config or EdgeCompileConfig(_check_ir_validity=False), + ) + + +class VulkanTester(TesterBase): + def __init__( + self, + module: torch.nn.Module, + example_inputs: Tuple[torch.Tensor], + dynamic_shapes: Optional[Tuple[Any]] = None, + ): + stage_classes = ( + executorch.backends.test.harness.Tester.default_stage_classes() + | { + StageType.PARTITION: Partition, + StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower, + } + ) + + super().__init__( + module=module, + stage_classes=stage_classes, + example_inputs=example_inputs, + dynamic_shapes=dynamic_shapes, + ) From ea548b76fbb801984b370436dd7c18f7182c48e6 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 22 Jul 2025 17:01:28 -0700 Subject: [PATCH 09/38] Update [ghstack-poisoned] --- backends/test/suite/flows/vulkan.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py index c37704acb57..7d9629688c1 100644 --- a/backends/test/suite/flows/vulkan.py +++ b/backends/test/suite/flows/vulkan.py @@ -1,9 +1,5 @@ -import coremltools -import functools - from executorch.backends.vulkan.test.tester import VulkanTester from executorch.backends.test.suite.flow import TestFlow -from typing import Any def _create_vulkan_flow( name: str, From 4108f540c8f3809e00493560b967fbe7e8cd544f Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 22 Jul 2025 17:10:48 -0700 Subject: [PATCH 10/38] Update [ghstack-poisoned] --- backends/qualcomm/tests/TARGETS | 10 +++ backends/qualcomm/tests/tester.py | 87 +++++++++++++++++++++++++++ backends/test/suite/flow.py | 8 +++ backends/test/suite/flows/qualcomm.py | 15 +++++ 4 files changed, 120 insertions(+) create mode 100644 backends/qualcomm/tests/tester.py create mode 100644 backends/test/suite/flows/qualcomm.py diff --git a/backends/qualcomm/tests/TARGETS b/backends/qualcomm/tests/TARGETS index 8078ca611f8..cb6bfa21b25 100644 --- a/backends/qualcomm/tests/TARGETS +++ b/backends/qualcomm/tests/TARGETS @@ -37,3 +37,13 @@ python_library( "//executorch/backends/qualcomm/debugger:utils", ], ) + +python_library( + name = "tester", + srcs = [ + "tester.py", + ], + deps = [ + ":test_qnn_delegate" + ] +) diff --git a/backends/qualcomm/tests/tester.py b/backends/qualcomm/tests/tester.py new file mode 100644 index 00000000000..3c5272ecc2e --- /dev/null +++ b/backends/qualcomm/tests/tester.py @@ -0,0 +1,87 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, List, Optional, Tuple + +import executorch +import executorch.backends.test.harness.stages as BaseStages + +import torch +from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager +from executorch.backends.qualcomm.partition.qnn_partitioner import QnnPartitioner +from executorch.backends.qualcomm.utils.utils import ( + generate_qnn_executorch_compiler_spec, + generate_htp_compiler_spec, + get_soc_to_chipset_map, +) +from executorch.backends.test.harness import Tester as TesterBase +from executorch.backends.test.harness.stages import StageType +from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower +from executorch.exir.backend.partitioner import Partitioner +from torch.export import ExportedProgram + + +class Partition(BaseStages.Partition): + def __init__(self, partitioner: Optional[Partitioner] = None): + super().__init__( + partitioner=partitioner or QnnPartitioner, + ) + + +class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower): + def __init__( + self, + partitioners: Optional[List[Partitioner]] = None, + edge_compile_config: Optional[EdgeCompileConfig] = None, + soc_model: str = "SM8650" + ): + backend_options = generate_htp_compiler_spec(use_fp16=True) + self.chipset = get_soc_to_chipset_map()[soc_model] + self.compiler_specs = generate_qnn_executorch_compiler_spec( + soc_model=self.chipset, + backend_options=backend_options, + ) + + super().__init__( + partitioners=partitioners or [QnnPartitioner(self.compiler_specs)], + edge_compile_config=edge_compile_config or EdgeCompileConfig(_check_ir_validity=False), + default_partitioner_cls=QnnPartitioner, + ) + + def run(self, artifact: ExportedProgram, inputs=None) -> None: + ep = QnnPassManager().transform_for_export_pipeline(artifact) + transform_passes = QnnPassManager().get_to_edge_transform_passes(ep) + + self.edge_dialect_program = to_edge_transform_and_lower( + ep, + transform_passes=transform_passes, + partitioner=self.partitioners, + compile_config=self.edge_compile_conf, + ) + + +class QualcommTester(TesterBase): + def __init__( + self, + module: torch.nn.Module, + example_inputs: Tuple[torch.Tensor], + dynamic_shapes: Optional[Tuple[Any]] = None, + ): + # Specialize for Qualcomm + stage_classes = ( + executorch.backends.test.harness.Tester.default_stage_classes() + | { + StageType.PARTITION: Partition, + StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower, + } + ) + + super().__init__( + module=module, + stage_classes=stage_classes, + example_inputs=example_inputs, + dynamic_shapes=dynamic_shapes, + ) diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index b85de8d673d..644b3c4579c 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -61,4 +61,12 @@ def all_flows() -> dict[str, TestFlow]: except Exception as e: logger.info(f"Skipping Vulkan flow registration: {e}") + try: + from executorch.backends.test.suite.flows.qualcomm import QUALCOMM_TEST_FLOW + flows += [ + QUALCOMM_TEST_FLOW, + ] + except Exception as e: + logger.info(f"Skipping Qualcomm flow registration: {e}") + return {f.name: f for f in flows if f is not None} diff --git a/backends/test/suite/flows/qualcomm.py b/backends/test/suite/flows/qualcomm.py new file mode 100644 index 00000000000..bd347b9f8f5 --- /dev/null +++ b/backends/test/suite/flows/qualcomm.py @@ -0,0 +1,15 @@ +from executorch.backends.qualcomm.tests.tester import QualcommTester +from executorch.backends.test.suite.flow import TestFlow + +def _create_qualcomm_flow( + name: str, + quantize: bool = False, +) -> TestFlow: + return TestFlow( + name, + backend="qualcomm", + tester_factory=QualcommTester, + quantize=quantize, + ) + +QUALCOMM_TEST_FLOW = _create_qualcomm_flow("qualcomm") From 7ef236bf9fc2ba7e9a1836a9aa7941c8c1d59448 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 22 Jul 2025 18:28:18 -0700 Subject: [PATCH 11/38] Update [ghstack-poisoned] --- backends/test/suite/flows/xnnpack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/test/suite/flows/xnnpack.py b/backends/test/suite/flows/xnnpack.py index af079f83018..e9773738926 100644 --- a/backends/test/suite/flows/xnnpack.py +++ b/backends/test/suite/flows/xnnpack.py @@ -17,7 +17,7 @@ def _create_xnnpack_flow_base(name: str, quantize_stage_factory: Callable[..., Q name, backend="xnnpack", tester_factory=XnnpackTester, - quantize=True, + quantize=quantize_stage_factory is not None, quantize_stage_factory=quantize_stage_factory, ) From 19760fcfac378426d0acbe29da1c48ba375aee95 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 22 Jul 2025 18:28:23 -0700 Subject: [PATCH 12/38] Update [ghstack-poisoned] --- backends/test/suite/context.py | 3 +- backends/test/suite/models/__init__.py | 13 +-- backends/test/suite/operators/__init__.py | 14 ++- backends/test/suite/reporting.py | 53 +++++++++- backends/test/suite/runner.py | 15 ++- backends/test/suite/tests/README.md | 3 + backends/test/suite/tests/__init__.py | 0 backends/test/suite/tests/test_reporting.py | 101 ++++++++++++++++++++ 8 files changed, 185 insertions(+), 17 deletions(-) create mode 100644 backends/test/suite/tests/README.md create mode 100644 backends/test/suite/tests/__init__.py create mode 100644 backends/test/suite/tests/test_reporting.py diff --git a/backends/test/suite/context.py b/backends/test/suite/context.py index 5f12284ae21..ec426e1e8fc 100644 --- a/backends/test/suite/context.py +++ b/backends/test/suite/context.py @@ -1,8 +1,9 @@ # Test run context management. This is used to determine the test context for reporting # purposes. class TestContext: - def __init__(self, test_name: str, flow_name: str, params: dict | None): + def __init__(self, test_name: str, test_base_name: str, flow_name: str, params: dict | None): self.test_name = test_name + self.test_base_name = test_base_name self.flow_name = flow_name self.params = params diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index b33878995d7..a44305d3b5a 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -43,19 +43,19 @@ def _create_test( dtype: torch.dtype, use_dynamic_shapes: bool, ): + dtype_name = str(dtype)[6:] # strip "torch." + test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}" + if use_dynamic_shapes: + test_name += "_dynamic_shape" + def wrapped_test(self): params = { "dtype": dtype, "use_dynamic_shapes": use_dynamic_shapes, } - with TestContext(test_name, flow.name, params): + with TestContext(test_name, test_func.__name__, flow.name, params): test_func(self, flow, dtype, use_dynamic_shapes) - dtype_name = str(dtype)[6:] # strip "torch." - test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}" - if use_dynamic_shapes: - test_name += "_dynamic_shape" - wrapped_test._name = test_func.__name__ # type: ignore wrapped_test._flow = flow # type: ignore @@ -119,6 +119,7 @@ def run_model_test( inputs, flow, context.test_name, + context.test_base_name, context.params, dynamic_shapes=dynamic_shapes, ) diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py index 25f56fb05bc..54827bbca43 100644 --- a/backends/test/suite/operators/__init__.py +++ b/backends/test/suite/operators/__init__.py @@ -6,6 +6,7 @@ # pyre-unsafe +import copy import os import unittest @@ -89,12 +90,13 @@ def _expand_test(cls, test_name: str): def _make_wrapped_test( test_func: Callable, test_name: str, + test_base_name: str, flow: TestFlow, params: dict | None = None, ): def wrapped_test(self): - with TestContext(test_name, flow.name, params): - test_kwargs = params or {} + with TestContext(test_name, test_base_name, flow.name, params): + test_kwargs = copy.copy(params) or {} test_kwargs["flow"] = flow test_func(self, **test_kwargs) @@ -113,19 +115,20 @@ def _create_test_for_backend( test_type = getattr(test_func, "test_type", TestType.STANDARD) if test_type == TestType.STANDARD: - wrapped_test = _make_wrapped_test(test_func, test_func.__name__, flow) test_name = f"{test_func.__name__}_{flow.name}" + wrapped_test = _make_wrapped_test(test_func, test_name, test_func.__name__, flow) setattr(cls, test_name, wrapped_test) elif test_type == TestType.DTYPE: for dtype in DTYPES: + dtype_name = str(dtype)[6:] # strip "torch." + test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}" wrapped_test = _make_wrapped_test( test_func, + test_name, test_func.__name__, flow, {"dtype": dtype}, ) - dtype_name = str(dtype)[6:] # strip "torch." - test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}" setattr(cls, test_name, wrapped_test) else: raise NotImplementedError(f"Unknown test type {test_type}.") @@ -143,6 +146,7 @@ def _test_op(self, model, inputs, flow: TestFlow): inputs, flow, context.test_name, + context.test_base_name, context.params, ) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index b5a4609447e..96a9a80c30b 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,7 +1,11 @@ from collections import Counter from dataclasses import dataclass from enum import IntEnum +from functools import reduce +from re import A +from typing import TextIO +import csv class TestResult(IntEnum): """Represents the result of a test case run, indicating success or a specific failure reason.""" @@ -75,13 +79,19 @@ class TestCaseSummary: """ Contains summary results for the execution of a single test case. """ + + backend: str + """ The name of the target backend. """ - name: str - """ The qualified name of the test, not including the flow suffix. """ - + base_name: str + """ The base name of the test, not including flow or parameter suffixes. """ + flow: str """ The backend-specific flow name. Corresponds to flows registered in backends/test/suite/__init__.py. """ + name: str + """ The full name of test, including flow and parameter suffixes. """ + params: dict | None """ Test-specific parameters, such as dtype. """ @@ -162,3 +172,40 @@ def complete_test_session() -> RunSummary: _active_session = None return summary + +def generate_csv_report(summary: RunSummary, output: TextIO): + """ Write a run summary report to a file in CSV format. """ + + field_names = [ + "Test ID", + "Test Case", + "Backend", + "Flow", + "Result", + ] + + # Tests can have custom parameters. We'll want to report them here, so we need + # a list of all unique parameter names. + param_names = reduce( + lambda a, b: a.union(b), + (set(s.params.keys()) for s in summary.test_case_summaries if s.params is not None), + set() + ) + field_names += (s.capitalize() for s in param_names) + + writer = csv.DictWriter(output, field_names) + writer.writeheader() + + for record in summary.test_case_summaries: + row = { + "Test ID": record.name, + "Test Case": record.base_name, + "Backend": record.backend, + "Flow": record.flow, + "Result": record.result.display_name(), + } + if record.params is not None: + row.update({ + k.capitalize(): v for k, v in record.params.items() + }) + writer.writerow(row) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 5e019400131..7c01ba4d210 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -7,13 +7,13 @@ import torch -from executorch.backends.test.harness import Tester from executorch.backends.test.harness.stages import StageType from executorch.backends.test.suite.discovery import discover_tests, TestFilter from executorch.backends.test.suite.flow import TestFlow from executorch.backends.test.suite.reporting import ( begin_test_session, complete_test_session, + generate_csv_report, RunSummary, TestCaseSummary, TestResult, @@ -32,6 +32,7 @@ def run_test( # noqa: C901 inputs: Any, flow: TestFlow, test_name: str, + test_base_name: str, params: dict | None, dynamic_shapes: Any | None = None, ) -> TestCaseSummary: @@ -45,8 +46,10 @@ def build_result( result: TestResult, error: Exception | None = None ) -> TestCaseSummary: return TestCaseSummary( - name=test_name, + backend=flow.backend, + base_name=test_base_name, flow=flow.name, + name=test_name, params=params, result=result, error=error, @@ -169,6 +172,9 @@ def parse_args(): parser.add_argument( "-f", "--filter", nargs="?", help="A regular expression filter for test names." ) + parser.add_argument( + "-r", "--report", nargs="?", help="A file to write the test report to, in CSV format." + ) return parser.parse_args() @@ -196,6 +202,11 @@ def runner_main(): summary = complete_test_session() print_summary(summary) + + if args.report is not None: + with open(args.report, "w") as f: + print(f"Writing CSV report to {args.report}.") + generate_csv_report(summary, f) if __name__ == "__main__": diff --git a/backends/test/suite/tests/README.md b/backends/test/suite/tests/README.md new file mode 100644 index 00000000000..09117e1cd31 --- /dev/null +++ b/backends/test/suite/tests/README.md @@ -0,0 +1,3 @@ +# Tests + +This directory contains meta-tests for the backend test suite. As the test suite contains a non-neglible amount of logic, these tests are useful to ensure that the test suite itself is working correctly. diff --git a/backends/test/suite/tests/__init__.py b/backends/test/suite/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py new file mode 100644 index 00000000000..5d949cf74f3 --- /dev/null +++ b/backends/test/suite/tests/test_reporting.py @@ -0,0 +1,101 @@ +import torch +import unittest + +from csv import DictReader +from ..reporting import TestResult, TestCaseSummary, RunSummary, TestSessionState, generate_csv_report +from io import StringIO + +# Test data for simulated test results. +TEST_CASE_SUMMARIES = [ + TestCaseSummary( + backend="backend1", + base_name="test1", + flow="flow1", + name="test1_backend1_flow1", + params=None, + result=TestResult.SUCCESS, + error=None, + ), + TestCaseSummary( + backend="backend2", + base_name="test1", + flow="flow1", + name="test1_backend2_flow1", + params=None, + result=TestResult.LOWER_FAIL, + error=None, + ), + TestCaseSummary( + backend="backend1", + base_name="test2", + flow="flow1", + name="test2_backend1_flow1", + params={"dtype": torch.float32}, + result=TestResult.SUCCESS_UNDELEGATED, + error=None, + ), + TestCaseSummary( + backend="backend2", + base_name="test2", + flow="flow1", + name="test2_backend2_flow1", + params={"use_dynamic_shapes": True}, + result=TestResult.EXPORT_FAIL, + error=None, + ), +] + +class Reporting(unittest.TestCase): + def test_csv_report_simple(self): + # Verify the format of a simple CSV run report. + session_state = TestSessionState() + session_state.test_case_summaries.extend(TEST_CASE_SUMMARIES) + run_summary = RunSummary.from_session(session_state) + + strio = StringIO() + generate_csv_report(run_summary, strio) + + # Attempt to deserialize and validate the CSV report. + report = DictReader(StringIO(strio.getvalue())) + records = list(report) + self.assertEqual(len(records), 4) + + # Validate first record: test1, backend1, SUCCESS + self.assertEqual(records[0]["Test ID"], "test1_backend1_flow1") + self.assertEqual(records[0]["Test Case"], "test1") + self.assertEqual(records[0]["Backend"], "backend1") + self.assertEqual(records[0]["Flow"], "flow1") + self.assertEqual(records[0]["Result"], "Success (Delegated)") + self.assertEqual(records[0]["Dtype"], "") + self.assertEqual(records[0]["Use_dynamic_shapes"], "") + + # Validate second record: test1, backend2, LOWER_FAIL + self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1") + self.assertEqual(records[1]["Test Case"], "test1") + self.assertEqual(records[1]["Backend"], "backend2") + self.assertEqual(records[1]["Flow"], "flow1") + self.assertEqual(records[1]["Result"], "Fail (Lowering)") + self.assertEqual(records[1]["Dtype"], "") + self.assertEqual(records[1]["Use_dynamic_shapes"], "") + + # Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param + self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1") + self.assertEqual(records[2]["Test Case"], "test2") + self.assertEqual(records[2]["Backend"], "backend1") + self.assertEqual(records[2]["Flow"], "flow1") + self.assertEqual(records[2]["Result"], "Success (Undelegated)") + self.assertEqual(records[2]["Dtype"], str(torch.float32)) + self.assertEqual(records[2]["Use_dynamic_shapes"], "") + + # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param + self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1") + self.assertEqual(records[3]["Test Case"], "test2") + self.assertEqual(records[3]["Backend"], "backend2") + self.assertEqual(records[3]["Flow"], "flow1") + self.assertEqual(records[3]["Result"], "Fail (Export)") + self.assertEqual(records[3]["Dtype"], "") + self.assertEqual(records[3]["Use_dynamic_shapes"], "True") + + + + From 81dfb07dda684312018b2d2562ff911d9e74c3d1 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Wed, 23 Jul 2025 13:07:15 -0700 Subject: [PATCH 13/38] Update [ghstack-poisoned] --- backends/test/suite/models/test_torchaudio.py | 4 +++- backends/test/suite/models/test_torchvision.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/backends/test/suite/models/test_torchaudio.py b/backends/test/suite/models/test_torchaudio.py index ac1bc21a526..5d526fe708e 100644 --- a/backends/test/suite/models/test_torchaudio.py +++ b/backends/test/suite/models/test_torchaudio.py @@ -20,7 +20,9 @@ from torch.export import Dim # -# This file contains model integration tests for supported torchaudio models. +# This file contains model integration tests for supported torchaudio models. As many torchaudio +# models are not export-compatible, this suite contains a subset of the available models and may +# grow over time. # diff --git a/backends/test/suite/models/test_torchvision.py b/backends/test/suite/models/test_torchvision.py index faa4212e1c4..2ef864ef42c 100644 --- a/backends/test/suite/models/test_torchvision.py +++ b/backends/test/suite/models/test_torchvision.py @@ -20,7 +20,9 @@ from torch.export import Dim # -# This file contains model integration tests for supported torchvision models. +# This file contains model integration tests for supported torchvision models. This +# suite intends to include all export-compatible torchvision models. For models with +# multiple size variants, one small or medium variant is used. # From 7a2fab5623c381098a35ef46f7e2549455416c88 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Wed, 23 Jul 2025 15:50:46 -0700 Subject: [PATCH 14/38] Update [ghstack-poisoned] --- backends/apple/coreml/test/tester.py | 26 ++++++++++------ backends/test/harness/stages/quantize.py | 3 +- backends/test/suite/flow.py | 21 +++++++++---- backends/test/suite/flows/coreml.py | 22 ++++++++----- backends/test/suite/flows/xnnpack.py | 31 ++++++++++++------- backends/test/suite/models/__init__.py | 1 - backends/test/suite/models/test_torchaudio.py | 7 +++-- .../test/suite/models/test_torchvision.py | 4 +-- backends/test/suite/operators/test_gelu.py | 8 ++--- backends/test/suite/operators/test_glu.py | 4 +-- .../test/suite/operators/test_hardtanh.py | 4 +-- .../test/suite/operators/test_leaky_relu.py | 4 +-- .../test/suite/operators/test_logsigmoid.py | 4 +-- backends/test/suite/operators/test_prelu.py | 12 ++----- backends/test/suite/operators/test_sigmoid.py | 4 +-- backends/test/suite/operators/test_tanh.py | 4 +-- .../test/suite/operators/test_threshold.py | 12 ++----- backends/test/suite/reporting.py | 2 +- backends/test/suite/runner.py | 11 ++++--- 19 files changed, 94 insertions(+), 90 deletions(-) diff --git a/backends/apple/coreml/test/tester.py b/backends/apple/coreml/test/tester.py index eee4c4e5893..05b9ab22836 100644 --- a/backends/apple/coreml/test/tester.py +++ b/backends/apple/coreml/test/tester.py @@ -4,12 +4,12 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import functools from typing import Any, List, Optional, Sequence, Tuple import coremltools as ct import executorch import executorch.backends.test.harness.stages as BaseStages -import functools import torch from executorch.backends.apple.coreml.compiler import CoreMLBackend @@ -21,7 +21,7 @@ from executorch.exir.backend.partitioner import Partitioner -def _get_static_int8_qconfig(): +def _get_static_int8_linear_qconfig(): return ct.optimize.torch.quantization.LinearQuantizerConfig( global_config=ct.optimize.torch.quantization.ModuleLinearQuantizerConfig( quantization_scheme="symmetric", @@ -42,22 +42,23 @@ def __init__( is_qat: Optional[bool] = False, ): super().__init__( - quantizer=quantizer or CoreMLQuantizer(quantization_config or _get_static_int8_qconfig()), + quantizer=quantizer + or CoreMLQuantizer(quantization_config or _get_static_int8_linear_qconfig()), calibrate=calibrate, calibration_samples=calibration_samples, is_qat=is_qat, ) - class Partition(BaseStages.Partition): def __init__( - self, + self, partitioner: Optional[Partitioner] = None, minimum_deployment_target: Optional[Any] = ct.target.iOS15, ): super().__init__( - partitioner=partitioner or CoreMLPartitioner( + partitioner=partitioner + or CoreMLPartitioner( compile_specs=CoreMLBackend.generate_compile_specs( minimum_deployment_target=minimum_deployment_target ) @@ -74,9 +75,9 @@ def __init__( ): super().__init__( default_partitioner_cls=lambda: CoreMLPartitioner( - compile_specs=CoreMLBackend.generate_compile_specs( + compile_specs=CoreMLBackend.generate_compile_specs( minimum_deployment_target=minimum_deployment_target - ) + ) ), partitioners=partitioners, edge_compile_config=edge_compile_config, @@ -96,8 +97,13 @@ def __init__( executorch.backends.test.harness.Tester.default_stage_classes() | { StageType.QUANTIZE: Quantize, - StageType.PARTITION: functools.partial(Partition, minimum_deployment_target=minimum_deployment_target), - StageType.TO_EDGE_TRANSFORM_AND_LOWER: functools.partial(ToEdgeTransformAndLower, minimum_deployment_target=minimum_deployment_target), + StageType.PARTITION: functools.partial( + Partition, minimum_deployment_target=minimum_deployment_target + ), + StageType.TO_EDGE_TRANSFORM_AND_LOWER: functools.partial( + ToEdgeTransformAndLower, + minimum_deployment_target=minimum_deployment_target, + ), } ) diff --git a/backends/test/harness/stages/quantize.py b/backends/test/harness/stages/quantize.py index dd61d3acacb..b98c4faa3dd 100644 --- a/backends/test/harness/stages/quantize.py +++ b/backends/test/harness/stages/quantize.py @@ -25,13 +25,14 @@ def __init__( calibrate: bool = True, calibration_samples: Optional[Sequence[Any]] = None, is_qat: Optional[bool] = False, + set_global: bool = True, ): self.quantizer = quantizer self.quantization_config = quantization_config self.calibrate = calibrate self.calibration_samples = calibration_samples - if self.quantization_config is not None: + if self.quantization_config is not None and set_global: self.quantizer.set_global(self.quantization_config) self.converted_graph = None diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index a9ddec22864..2006ac9a485 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -22,30 +22,39 @@ class TestFlow: backend: str """ The name of the target backend. """ - + tester_factory: Callable[..., Tester] """ A factory function that returns a Tester instance for this lowering flow. """ quantize: bool = field(default=False) """ Whether to tester should run the quantize stage on the model. """ - + quantize_stage_factory: Callable[..., Quantize] | None = None """ A factory function which instantiates a Quantize stage. Can be None to use the tester's default. """ + def all_flows() -> dict[str, TestFlow]: flows = [] - + try: - from executorch.backends.test.suite.flows.xnnpack import XNNPACK_TEST_FLOW, XNNPACK_STATIC_INT8_TEST_FLOW + from executorch.backends.test.suite.flows.xnnpack import ( + XNNPACK_STATIC_INT8_PER_CHANNEL_TEST_FLOW, + XNNPACK_TEST_FLOW, + ) + flows += [ XNNPACK_TEST_FLOW, - XNNPACK_STATIC_INT8_TEST_FLOW, + XNNPACK_STATIC_INT8_PER_CHANNEL_TEST_FLOW, ] except Exception as e: logger.info(f"Skipping XNNPACK flow registration: {e}") try: - from executorch.backends.test.suite.flows.coreml import COREML_TEST_FLOW, COREML_STATIC_INT8_TEST_FLOW + from executorch.backends.test.suite.flows.coreml import ( + COREML_STATIC_INT8_TEST_FLOW, + COREML_TEST_FLOW, + ) + flows += [ COREML_TEST_FLOW, COREML_STATIC_INT8_TEST_FLOW, diff --git a/backends/test/suite/flows/coreml.py b/backends/test/suite/flows/coreml.py index 443457bd695..fd956b64f05 100644 --- a/backends/test/suite/flows/coreml.py +++ b/backends/test/suite/flows/coreml.py @@ -1,24 +1,30 @@ -import coremltools import functools +from typing import Any + +import coremltools from executorch.backends.apple.coreml.test.tester import CoreMLTester from executorch.backends.test.suite.flow import TestFlow -from typing import Any + def _create_coreml_flow( - name: str, - quantize: bool = False, - minimum_deployment_target: Any = coremltools.target.iOS15 + name: str, + quantize: bool = False, + minimum_deployment_target: Any = coremltools.target.iOS15, ) -> TestFlow: return TestFlow( name, backend="coreml", - tester_factory=functools.partial(CoreMLTester, minimum_deployment_target=minimum_deployment_target), + tester_factory=functools.partial( + CoreMLTester, minimum_deployment_target=minimum_deployment_target + ), quantize=quantize, ) + COREML_TEST_FLOW = _create_coreml_flow("coreml") COREML_STATIC_INT8_TEST_FLOW = _create_coreml_flow( - "coreml_static_int8", + "coreml_static_int8", quantize=True, - minimum_deployment_target=coremltools.target.iOS17) + minimum_deployment_target=coremltools.target.iOS17, +) diff --git a/backends/test/suite/flows/xnnpack.py b/backends/test/suite/flows/xnnpack.py index e9773738926..d5ae5361d11 100644 --- a/backends/test/suite/flows/xnnpack.py +++ b/backends/test/suite/flows/xnnpack.py @@ -1,18 +1,23 @@ +import logging +from typing import Callable + from executorch.backends.test.harness.stages import Quantize from executorch.backends.test.suite.flow import TestFlow -from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import get_symmetric_quantization_config +from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import ( + get_symmetric_quantization_config, +) from executorch.backends.xnnpack.test.tester import ( Quantize as XnnpackQuantize, - Tester as XnnpackTester + Tester as XnnpackTester, ) -from typing import Callable - -import logging logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -def _create_xnnpack_flow_base(name: str, quantize_stage_factory: Callable[..., Quantize] | None = None) -> TestFlow: + +def _create_xnnpack_flow_base( + name: str, quantize_stage_factory: Callable[..., Quantize] | None = None +) -> TestFlow: return TestFlow( name, backend="xnnpack", @@ -20,17 +25,21 @@ def _create_xnnpack_flow_base(name: str, quantize_stage_factory: Callable[..., Q quantize=quantize_stage_factory is not None, quantize_stage_factory=quantize_stage_factory, ) - + + def _create_xnnpack_flow() -> TestFlow: return _create_xnnpack_flow_base("xnnpack") -def _create_xnnpack_static_int8_flow() -> TestFlow: + +def _create_xnnpack_static_int8_per_channel_flow() -> TestFlow: def create_quantize_stage() -> Quantize: - qparams = get_symmetric_quantization_config(is_per_channel=True) + qparams = get_symmetric_quantization_config(is_per_channel=True) return XnnpackQuantize( quantization_config=qparams, ) - return _create_xnnpack_flow_base("xnnpack_static_int8", create_quantize_stage) + + return _create_xnnpack_flow_base("xnnpack_static_int8_per_channel", create_quantize_stage) + XNNPACK_TEST_FLOW = _create_xnnpack_flow() -XNNPACK_STATIC_INT8_TEST_FLOW = _create_xnnpack_static_int8_flow() +XNNPACK_STATIC_INT8_PER_CHANNEL_TEST_FLOW = _create_xnnpack_static_int8_per_channel_flow() diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index b33878995d7..e155e3382c5 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -12,7 +12,6 @@ from typing import Any, Callable import torch -from executorch.backends.test.harness import Tester from executorch.backends.test.suite import get_test_flows from executorch.backends.test.suite.context import get_active_test_context, TestContext from executorch.backends.test.suite.flow import TestFlow diff --git a/backends/test/suite/models/test_torchaudio.py b/backends/test/suite/models/test_torchaudio.py index 2816a3855d6..69f6de4684f 100644 --- a/backends/test/suite/models/test_torchaudio.py +++ b/backends/test/suite/models/test_torchaudio.py @@ -7,7 +7,7 @@ # pyre-unsafe import unittest -from typing import Callable, Tuple +from typing import Tuple import torch import torchaudio @@ -92,7 +92,10 @@ def test_wav2letter( @unittest.skip("This model times out on all backends.") def test_wavernn( - self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool, + self, + flow: TestFlow, + dtype: torch.dtype, + use_dynamic_shapes: bool, ): model = torchaudio.models.WaveRNN( upsample_scales=[5, 5, 8], n_classes=512, hop_length=200 diff --git a/backends/test/suite/models/test_torchvision.py b/backends/test/suite/models/test_torchvision.py index ab811854f69..e69de80a871 100644 --- a/backends/test/suite/models/test_torchvision.py +++ b/backends/test/suite/models/test_torchvision.py @@ -154,9 +154,7 @@ def test_swin_v2_t( model = torchvision.models.swin_v2_t() self._test_cv_model(model, flow, dtype, use_dynamic_shapes) - def test_vgg11( - self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool - ): + def test_vgg11(self, flow: TestFlow, dtype: torch.dtype, use_dynamic_shapes: bool): model = torchvision.models.vgg11() self._test_cv_model(model, flow, dtype, use_dynamic_shapes) diff --git a/backends/test/suite/operators/test_gelu.py b/backends/test/suite/operators/test_gelu.py index 4e77f92bc03..948947907d9 100644 --- a/backends/test/suite/operators/test_gelu.py +++ b/backends/test/suite/operators/test_gelu.py @@ -26,9 +26,7 @@ def forward(self, x): class TestGELU(OperatorTest): @dtype_test def test_gelu_dtype(self, flow: TestFlow, dtype) -> None: - self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow - ) + self._test_op(Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow) def test_gelu_f32_single_dim(self, flow: TestFlow) -> None: self._test_op(Model(), (torch.randn(20),), flow) @@ -37,9 +35,7 @@ def test_gelu_f32_multi_dim(self, flow: TestFlow) -> None: self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) def test_gelu_f32_tanh_approximation(self, flow: TestFlow) -> None: - self._test_op( - Model(approximate="tanh"), (torch.randn(3, 4, 5),), flow - ) + self._test_op(Model(approximate="tanh"), (torch.randn(3, 4, 5),), flow) def test_gelu_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific values spanning negative and positive ranges diff --git a/backends/test/suite/operators/test_glu.py b/backends/test/suite/operators/test_glu.py index a20b2bf8543..b7126d5fdf5 100644 --- a/backends/test/suite/operators/test_glu.py +++ b/backends/test/suite/operators/test_glu.py @@ -27,9 +27,7 @@ class TestGLU(OperatorTest): @dtype_test def test_glu_dtype(self, flow: TestFlow, dtype) -> None: # Input must have even number of elements in the specified dimension - self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow - ) + self._test_op(Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow) def test_glu_f32_dim_last(self, flow: TestFlow) -> None: # Default dim is -1 (last dimension) diff --git a/backends/test/suite/operators/test_hardtanh.py b/backends/test/suite/operators/test_hardtanh.py index 8b6d7bc1e6e..ffef9977e01 100644 --- a/backends/test/suite/operators/test_hardtanh.py +++ b/backends/test/suite/operators/test_hardtanh.py @@ -39,9 +39,7 @@ def test_hardtanh_f32_multi_dim(self, flow: TestFlow) -> None: self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) def test_hardtanh_f32_custom_range(self, flow: TestFlow) -> None: - self._test_op( - Model(min_val=-2.0, max_val=2.0), (torch.randn(3, 4, 5),), flow - ) + self._test_op(Model(min_val=-2.0, max_val=2.0), (torch.randn(3, 4, 5),), flow) def test_hardtanh_f32_inplace(self, flow: TestFlow) -> None: self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) diff --git a/backends/test/suite/operators/test_leaky_relu.py b/backends/test/suite/operators/test_leaky_relu.py index ca60adde55f..e753abf8bb6 100644 --- a/backends/test/suite/operators/test_leaky_relu.py +++ b/backends/test/suite/operators/test_leaky_relu.py @@ -38,9 +38,7 @@ def test_leaky_relu_f32_multi_dim(self, flow: TestFlow) -> None: self._test_op(Model(), (torch.randn(2, 3, 4, 5),), flow) def test_leaky_relu_f32_custom_slope(self, flow: TestFlow) -> None: - self._test_op( - Model(negative_slope=0.1), (torch.randn(3, 4, 5),), flow - ) + self._test_op(Model(negative_slope=0.1), (torch.randn(3, 4, 5),), flow) def test_leaky_relu_f32_inplace(self, flow: TestFlow) -> None: self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) diff --git a/backends/test/suite/operators/test_logsigmoid.py b/backends/test/suite/operators/test_logsigmoid.py index c8cf01217d5..ff62358a98e 100644 --- a/backends/test/suite/operators/test_logsigmoid.py +++ b/backends/test/suite/operators/test_logsigmoid.py @@ -22,9 +22,7 @@ def forward(self, x): class TestLogSigmoid(OperatorTest): @dtype_test def test_logsigmoid_dtype(self, flow: TestFlow, dtype) -> None: - self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow - ) + self._test_op(Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow) def test_logsigmoid_f32_single_dim(self, flow: TestFlow) -> None: self._test_op(Model(), (torch.randn(20),), flow) diff --git a/backends/test/suite/operators/test_prelu.py b/backends/test/suite/operators/test_prelu.py index b98a88bbe04..5987f6bd75b 100644 --- a/backends/test/suite/operators/test_prelu.py +++ b/backends/test/suite/operators/test_prelu.py @@ -26,9 +26,7 @@ def forward(self, x): class TestPReLU(OperatorTest): @dtype_test def test_prelu_dtype(self, flow: TestFlow, dtype) -> None: - self._test_op( - Model().to(dtype), ((torch.rand(2, 10) * 2 - 1).to(dtype),), flow - ) + self._test_op(Model().to(dtype), ((torch.rand(2, 10) * 2 - 1).to(dtype),), flow) def test_prelu_f32_single_dim(self, flow: TestFlow) -> None: self._test_op(Model(), (torch.randn(20),), flow) @@ -41,15 +39,11 @@ def test_prelu_f32_custom_init(self, flow: TestFlow) -> None: def test_prelu_f32_channel_shared(self, flow: TestFlow) -> None: # Default num_parameters=1 means the parameter is shared across all channels - self._test_op( - Model(num_parameters=1), (torch.randn(2, 3, 4, 5),), flow - ) + self._test_op(Model(num_parameters=1), (torch.randn(2, 3, 4, 5),), flow) def test_prelu_f32_per_channel_parameter(self, flow: TestFlow) -> None: # num_parameters=3 means each channel has its own parameter (for dim=1) - self._test_op( - Model(num_parameters=3), (torch.randn(2, 3, 4, 5),), flow - ) + self._test_op(Model(num_parameters=3), (torch.randn(2, 3, 4, 5),), flow) def test_prelu_f32_boundary_values(self, flow: TestFlow) -> None: # Test with specific positive and negative values diff --git a/backends/test/suite/operators/test_sigmoid.py b/backends/test/suite/operators/test_sigmoid.py index cb9a090b6cc..2a2c8c0539e 100644 --- a/backends/test/suite/operators/test_sigmoid.py +++ b/backends/test/suite/operators/test_sigmoid.py @@ -22,9 +22,7 @@ def forward(self, x): class TestSigmoid(OperatorTest): @dtype_test def test_sigmoid_dtype(self, flow: TestFlow, dtype) -> None: - self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow - ) + self._test_op(Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow) def test_sigmoid_f32_single_dim(self, flow: TestFlow) -> None: self._test_op(Model(), (torch.randn(20),), flow) diff --git a/backends/test/suite/operators/test_tanh.py b/backends/test/suite/operators/test_tanh.py index a1c2b2bdafb..b7e4ce7166b 100644 --- a/backends/test/suite/operators/test_tanh.py +++ b/backends/test/suite/operators/test_tanh.py @@ -22,9 +22,7 @@ def forward(self, x): class TestTanh(OperatorTest): @dtype_test def test_tanh_dtype(self, flow: TestFlow, dtype) -> None: - self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow - ) + self._test_op(Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow) def test_tanh_f32_single_dim(self, flow: TestFlow) -> None: self._test_op(Model(), (torch.randn(20),), flow) diff --git a/backends/test/suite/operators/test_threshold.py b/backends/test/suite/operators/test_threshold.py index 2b6922181b6..1dfac7dd007 100644 --- a/backends/test/suite/operators/test_threshold.py +++ b/backends/test/suite/operators/test_threshold.py @@ -30,9 +30,7 @@ def forward(self, x): class TestThreshold(OperatorTest): @dtype_test def test_threshold_dtype(self, flow: TestFlow, dtype) -> None: - self._test_op( - Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow - ) + self._test_op(Model(), ((torch.rand(2, 10) * 10 - 5).to(dtype),), flow) def test_threshold_f32_single_dim(self, flow: TestFlow) -> None: self._test_op(Model(), (torch.randn(20),), flow) @@ -46,12 +44,8 @@ def test_threshold_f32_custom_threshold(self, flow: TestFlow) -> None: def test_threshold_f32_custom_value(self, flow: TestFlow) -> None: self._test_op(Model(value=2.0), (torch.randn(3, 4, 5),), flow) - def test_threshold_f32_custom_threshold_value( - self, flow: TestFlow - ) -> None: - self._test_op( - Model(threshold=0.5, value=1.0), (torch.randn(3, 4, 5),), flow - ) + def test_threshold_f32_custom_threshold_value(self, flow: TestFlow) -> None: + self._test_op(Model(threshold=0.5, value=1.0), (torch.randn(3, 4, 5),), flow) def test_threshold_f32_inplace(self, flow: TestFlow) -> None: self._test_op(Model(inplace=True), (torch.randn(3, 4, 5),), flow) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index b5a4609447e..ad32a8c74c9 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -14,7 +14,7 @@ class TestResult(IntEnum): EAGER_FAIL = 2 """ The test failed due to the model failing to run in eager mode. """ - + QUANTIZE_FAIL = 3 """ The test failed due to the quantization stage failing. """ diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 5c80699e6bb..3fe9084548c 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -3,11 +3,10 @@ import re import unittest -from typing import Any, Callable +from typing import Any import torch -from executorch.backends.test.harness import Tester from executorch.backends.test.harness.stages import StageType from executorch.backends.test.suite.discovery import discover_tests, TestFilter from executorch.backends.test.suite.flow import TestFlow @@ -62,13 +61,15 @@ def build_result( tester = flow.tester_factory(model, inputs) except Exception as e: return build_result(TestResult.UNKNOWN_FAIL, e) - + if flow.quantize: try: - tester.quantize(flow.quantize_stage_factory() if flow.quantize_stage_factory else None) + tester.quantize( + flow.quantize_stage_factory() if flow.quantize_stage_factory else None + ) except Exception as e: return build_result(TestResult.QUANTIZE_FAIL, e) - + try: # TODO Use Tester dynamic_shapes parameter once input generation can properly handle derived dims. tester.export( From fd26fc7f202d41bd71587f790a2dc0cdf1933fcf Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Wed, 23 Jul 2025 22:25:13 -0700 Subject: [PATCH 15/38] Update [ghstack-poisoned] --- backends/test/harness/error_statistics.py | 91 +++++++++++++++++++++++ backends/test/harness/tester.py | 26 +++++-- backends/test/suite/reporting.py | 28 ++++++- backends/test/suite/runner.py | 8 +- 4 files changed, 144 insertions(+), 9 deletions(-) create mode 100644 backends/test/harness/error_statistics.py diff --git a/backends/test/harness/error_statistics.py b/backends/test/harness/error_statistics.py new file mode 100644 index 00000000000..4662b028afe --- /dev/null +++ b/backends/test/harness/error_statistics.py @@ -0,0 +1,91 @@ +from dataclasses import dataclass +from torch.ao.ns.fx.utils import compute_sqnr + +import torch + +@dataclass +class TensorStatistics: + """ Contains summary statistics for a tensor. """ + + shape: torch.Size + """ The shape of the tensor. """ + + numel: int + """ The number of elements in the tensor. """ + + median: float + """ The median of the tensor. """ + + mean: float + """ The mean of the tensor. """ + + max: torch.types.Number + """ The maximum element of the tensor. """ + + min: torch.types.Number + """ The minimum element of the tensor. """ + + @classmethod + def from_tensor(cls, tensor: torch.Tensor) -> "TensorStatistics": + """ Creates a TensorStatistics object from a tensor. """ + flattened = torch.flatten(tensor) + return cls( + shape=tensor.shape, + numel=tensor.numel(), + median=flattened.median().item(), + mean=flattened.mean().item(), + max=flattened.max().item(), + min=flattened.min().item(), + ) + +@dataclass +class ErrorStatistics: + """ Contains statistics derived from the difference of two tensors. """ + + reference_stats: TensorStatistics + """ Statistics for the reference tensor. """ + + actual_stats: TensorStatistics + """ Statistics for the actual tensor. """ + + error_l2_norm: float | None + """ The L2 norm of the error between the actual and reference tensor. """ + + error_mae: float | None + """ The mean absolute error between the actual and reference tensor. """ + + error_max: float | None + """ The maximum absolute elementwise error between the actual and reference tensor. """ + + error_msd: float | None + """ The mean signed deviation between the actual and reference tensor. """ + + sqnr: float | None + """ The signal-to-quantization-noise ratio between the actual and reference tensor. """ + + @classmethod + def from_tensors(cls, actual: torch.Tensor, reference: torch.Tensor) -> "ErrorStatistics": + """ Creates an ErrorStatistics object from two tensors. """ + if actual.shape != reference.shape: + return cls( + reference_stats=TensorStatistics.from_tensor(reference), + actual_stats=TensorStatistics.from_tensor(actual), + error_l2_norm=None, + error_mae=None, + error_max = None, + error_msd=None, + sqnr=None, + ) + + error = actual.to(torch.float64) - reference.to(torch.float64) + flat_error = torch.flatten(error) + + return cls( + reference_stats=TensorStatistics.from_tensor(reference), + actual_stats=TensorStatistics.from_tensor(actual), + error_l2_norm=torch.linalg.norm(flat_error).item(), + error_mae=torch.mean(torch.abs(flat_error)).item(), + error_max=torch.max(torch.abs(flat_error)).item(), + error_msd=torch.mean(flat_error).item(), + sqnr=compute_sqnr(actual, reference).item() + ) diff --git a/backends/test/harness/tester.py b/backends/test/harness/tester.py index 06db1aae13d..34832989459 100644 --- a/backends/test/harness/tester.py +++ b/backends/test/harness/tester.py @@ -4,6 +4,7 @@ import torch +from executorch.backends.test.harness.error_statistics import ErrorStatistics from executorch.backends.test.harness.stages import ( Export, Partition, @@ -302,17 +303,15 @@ def run_method_and_compare_outputs( atol=1e-03, rtol=1e-03, qtol=0, + statistics_callback: Callable[[ErrorStatistics], None] | None = None, ): number_of_runs = 1 if inputs is not None else num_runs reference_stage = self.stages[StageType.EXPORT] stage = stage or self.cur - print(f"Comparing Stage {stage} with Stage {reference_stage}") - for run_iteration in range(number_of_runs): + for _ in range(number_of_runs): inputs_to_run = inputs if inputs else next(self.generate_random_inputs()) - input_shapes = [generated_input.shape for generated_input in inputs_to_run] - print(f"Run {run_iteration} with input shapes: {input_shapes}") # Reference output (and quantization scale) ( @@ -325,13 +324,19 @@ def run_method_and_compare_outputs( # Output from running artifact at stage stage_output = self.stages[stage].run_artifact(inputs_to_run) self._compare_outputs( - reference_output, stage_output, quantization_scale, atol, rtol, qtol + reference_output, stage_output, quantization_scale, atol, rtol, qtol, statistics_callback ) return self @staticmethod - def _assert_outputs_equal(model_output, ref_output, atol=1e-03, rtol=1e-03): + def _assert_outputs_equal( + model_output, + ref_output, + atol=1e-03, + rtol=1e-03, + statistics_callback: Callable[[ErrorStatistics], None] | None = None, + ): """ Helper testing function that asserts that the model output and the reference output are equal with some tolerance. Due to numerical differences between eager mode and @@ -346,6 +351,11 @@ def _assert_outputs_equal(model_output, ref_output, atol=1e-03, rtol=1e-03): for i in range(len(model_output)): model = model_output[i] ref = ref_output[i] + + error_stats = ErrorStatistics.from_tensors(model, ref) + if statistics_callback is not None: + statistics_callback(error_stats) + assert ( ref.shape == model.shape ), f"Output {i} shape {model.shape} does not match reference output shape {ref.shape}" @@ -383,6 +393,7 @@ def _compare_outputs( atol=1e-03, rtol=1e-03, qtol=0, + statistics_callback: Callable[[ErrorStatistics], None] | None = None, ): """ Compares the original of the original nn module with the output of the generated artifact. @@ -399,12 +410,13 @@ def _compare_outputs( # atol by qtol quant units. if quantization_scale is not None: atol += quantization_scale * qtol - + Tester._assert_outputs_equal( stage_output, reference_output, atol=atol, rtol=rtol, + statistics_callback=statistics_callback, ) @staticmethod diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index 784feff2324..7ff89eb34ae 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -2,11 +2,12 @@ from dataclasses import dataclass from enum import IntEnum from functools import reduce -from re import A from typing import TextIO import csv +from executorch.backends.test.harness.error_statistics import ErrorStatistics + class TestResult(IntEnum): """Represents the result of a test case run, indicating success or a specific failure reason.""" @@ -100,6 +101,12 @@ class TestCaseSummary: error: Exception | None """ The Python exception object, if any. """ + + tensor_error_statistics: list[ErrorStatistics] + """ + Statistics about the error between the backend and reference outputs. Each element of this list corresponds to + a single output tensor. + """ class TestSessionState: @@ -193,6 +200,17 @@ def generate_csv_report(summary: RunSummary, output: TextIO): ) field_names += (s.capitalize() for s in param_names) + # Add tensor error statistic field names for each output index. + max_outputs = max(len(s.tensor_error_statistics) for s in summary.test_case_summaries) + for i in range(max_outputs): + field_names.extend([ + f"Output {i} Error Max", + f"Output {i} Error MAE", + f"Output {i} Error MSD", + f"Output {i} Error L2", + f"Output {i} SQNR", + ]) + writer = csv.DictWriter(output, field_names) writer.writeheader() @@ -208,4 +226,12 @@ def generate_csv_report(summary: RunSummary, output: TextIO): row.update({ k.capitalize(): v for k, v in record.params.items() }) + + for output_idx, error_stats in enumerate(record.tensor_error_statistics): + row[f"Output {output_idx} Error Max"] = error_stats.error_max + row[f"Output {output_idx} Error MAE"] = error_stats.error_mae + row[f"Output {output_idx} Error MSD"] = error_stats.error_msd + row[f"Output {output_idx} Error L2"] = error_stats.error_l2_norm + row[f"Output {output_idx} SQNR"] = error_stats.sqnr + writer.writerow(row) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index b5b4e6e19bb..95798c73bb7 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -7,6 +7,7 @@ import torch +from executorch.backends.test.harness.error_statistics import ErrorStatistics from executorch.backends.test.harness.stages import StageType from executorch.backends.test.suite.discovery import discover_tests, TestFilter from executorch.backends.test.suite.flow import TestFlow @@ -40,6 +41,8 @@ def run_test( # noqa: C901 Top-level test run function for a model, input set, and tester. Handles test execution and reporting. """ + + error_statistics: list[ErrorStatistics] = [] # Helper method to construct the summary. def build_result( @@ -53,6 +56,7 @@ def build_result( params=params, result=result, error=error, + tensor_error_statistics=error_statistics, ) # Ensure the model can run in eager mode. @@ -106,7 +110,9 @@ def build_result( # the cause of a failure in run_method_and_compare_outputs. We can look for # AssertionErrors to catch output mismatches, but this might catch more than that. try: - tester.run_method_and_compare_outputs() + tester.run_method_and_compare_outputs( + statistics_callback = lambda stats: error_statistics.append(stats) + ) except AssertionError as e: return build_result(TestResult.OUTPUT_MISMATCH_FAIL, e) except Exception as e: From 4ae840d4c9bf777ee34f14b553ceae5ce996f2b9 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Thu, 24 Jul 2025 10:19:58 -0700 Subject: [PATCH 16/38] Update [ghstack-poisoned] --- backends/test/suite/flow.py | 1 + backends/test/suite/flows/vulkan.py | 8 +++++--- backends/vulkan/test/tester.py | 5 +++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index 106297cf708..2e2c2bf9391 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -64,6 +64,7 @@ def all_flows() -> dict[str, TestFlow]: try: from executorch.backends.test.suite.flows.vulkan import VULKAN_TEST_FLOW + flows += [ VULKAN_TEST_FLOW, ] diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py index 7d9629688c1..4d661efe3c7 100644 --- a/backends/test/suite/flows/vulkan.py +++ b/backends/test/suite/flows/vulkan.py @@ -1,9 +1,10 @@ -from executorch.backends.vulkan.test.tester import VulkanTester from executorch.backends.test.suite.flow import TestFlow +from executorch.backends.vulkan.test.tester import VulkanTester + def _create_vulkan_flow( - name: str, - quantize: bool = False, + name: str, + quantize: bool = False, ) -> TestFlow: return TestFlow( name, @@ -12,4 +13,5 @@ def _create_vulkan_flow( quantize=quantize, ) + VULKAN_TEST_FLOW = _create_vulkan_flow("vulkan") diff --git a/backends/vulkan/test/tester.py b/backends/vulkan/test/tester.py index 0b7cf51cee9..def5aa05e5f 100644 --- a/backends/vulkan/test/tester.py +++ b/backends/vulkan/test/tester.py @@ -10,9 +10,9 @@ import executorch.backends.test.harness.stages as BaseStages import torch -from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner from executorch.backends.test.harness import Tester as TesterBase from executorch.backends.test.harness.stages import StageType +from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner from executorch.exir import EdgeCompileConfig from executorch.exir.backend.partitioner import Partitioner @@ -33,7 +33,8 @@ def __init__( super().__init__( default_partitioner_cls=VulkanPartitioner, partitioners=partitioners, - edge_compile_config=edge_compile_config or EdgeCompileConfig(_check_ir_validity=False), + edge_compile_config=edge_compile_config + or EdgeCompileConfig(_check_ir_validity=False), ) From 18e89c18d9f78834f0fb70b643124f5267a0b2ad Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Thu, 24 Jul 2025 16:54:49 -0700 Subject: [PATCH 17/38] Update [ghstack-poisoned] --- backends/test/suite/reporting.py | 15 +++++++++++++++ backends/test/suite/runner.py | 15 +++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index 15c19bf7c8e..e054bb1685b 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,6 +1,7 @@ import csv from collections import Counter from dataclasses import dataclass +from datetime import timedelta from enum import IntEnum from functools import reduce from typing import TextIO @@ -108,6 +109,12 @@ class TestCaseSummary: a single output tensor. """ + quantize_time: timedelta | None = None + """ The total runtime of the quantization stage, or none, if the test did not run the quantize stage. """ + + lower_time: timedelta | None = None + """ The total runtime of the to_edge_transform_and_lower stage, or none, if the test did not run the quantize stage. """ + class TestSessionState: test_case_summaries: list[TestCaseSummary] @@ -190,6 +197,8 @@ def generate_csv_report(summary: RunSummary, output: TextIO): "Backend", "Flow", "Result", + "Quantize Time (s)", + "Lowering Time (s)", ] # Tests can have custom parameters. We'll want to report them here, so we need @@ -230,6 +239,12 @@ def generate_csv_report(summary: RunSummary, output: TextIO): "Backend": record.backend, "Flow": record.flow, "Result": record.result.display_name(), + "Quantize Time (s)": ( + record.quantize_time.total_seconds() if record.quantize_time else None + ), + "Lowering Time (s)": ( + record.lower_time.total_seconds() if record.lower_time else None + ), } if record.params is not None: row.update({k.capitalize(): v for k, v in record.params.items()}) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 21c17c5e158..c73a2a39592 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -1,8 +1,10 @@ import argparse import importlib import re +import time import unittest +from datetime import timedelta from typing import Any import torch @@ -43,6 +45,7 @@ def run_test( # noqa: C901 """ error_statistics: list[ErrorStatistics] = [] + extra_stats = {} # Helper method to construct the summary. def build_result( @@ -57,6 +60,7 @@ def build_result( result=result, error=error, tensor_error_statistics=error_statistics, + **extra_stats, ) # Ensure the model can run in eager mode. @@ -71,11 +75,16 @@ def build_result( return build_result(TestResult.UNKNOWN_FAIL, e) if flow.quantize: + start_time = time.perf_counter() try: tester.quantize( flow.quantize_stage_factory() if flow.quantize_stage_factory else None ) + elapsed = time.perf_counter() - start_time + extra_stats["quantize_time"] = timedelta(seconds=elapsed) except Exception as e: + elapsed = time.perf_counter() - start_time + extra_stats["quantize_time"] = timedelta(seconds=elapsed) return build_result(TestResult.QUANTIZE_FAIL, e) try: @@ -86,9 +95,14 @@ def build_result( except Exception as e: return build_result(TestResult.EXPORT_FAIL, e) + lower_start_time = time.perf_counter() try: tester.to_edge_transform_and_lower() + elapsed = time.perf_counter() - lower_start_time + extra_stats["lower_time"] = timedelta(seconds=elapsed) except Exception as e: + elapsed = time.perf_counter() - lower_start_time + extra_stats["lower_time"] = timedelta(seconds=elapsed) return build_result(TestResult.LOWER_FAIL, e) is_delegated = any( @@ -183,6 +197,7 @@ def parse_args(): "--report", nargs="?", help="A file to write the test report to, in CSV format.", + default="backend_test_report.csv", ) return parser.parse_args() From 4719c90cfccffebd03ac92955a002003afd0a606 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Thu, 24 Jul 2025 18:27:44 -0700 Subject: [PATCH 18/38] Update [ghstack-poisoned] --- backends/test/suite/reporting.py | 83 ++++++++++++++++++++- backends/test/suite/runner.py | 10 +++ backends/test/suite/tests/test_reporting.py | 36 +++++++++ exir/program/_program.py | 10 ++- pytest.ini | 2 + 5 files changed, 139 insertions(+), 2 deletions(-) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index e054bb1685b..22affcaee84 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,12 +1,22 @@ import csv + from collections import Counter from dataclasses import dataclass from datetime import timedelta from enum import IntEnum from functools import reduce -from typing import TextIO +from typing import Any, TextIO from executorch.backends.test.harness.error_statistics import ErrorStatistics +from torch.export import ExportedProgram + + +# Operators that are excluded from the counts returned by count_ops. These are used to +# exclude operatations that are not logically relevant or delegatable to backends. +OP_COUNT_IGNORED_OPS = { + "executorch_call_delegate", + "getitem", +} class TestResult(IntEnum): @@ -115,6 +125,12 @@ class TestCaseSummary: lower_time: timedelta | None = None """ The total runtime of the to_edge_transform_and_lower stage, or none, if the test did not run the quantize stage. """ + delegated_op_counts: Counter | None = None + """ The number of delegated occurances of each operator in the graph. """ + + undelegated_op_counts: Counter | None = None + """ The number of undelegated occurances of each operator in the graph. """ + class TestSessionState: test_case_summaries: list[TestCaseSummary] @@ -164,6 +180,40 @@ def from_session(cls, session: TestSessionState) -> "RunSummary": _active_session: TestSessionState | None = None +def _get_target_name(target: Any) -> str: + """Retrieve a string representation of a node target.""" + if isinstance(target, str): + return target + elif hasattr(target, "name"): + return target.name() # Op overloads have this + elif hasattr(target, "__name__"): + return target.__name__ # Some builtins have this + else: + return str(target) + + +def _count_ops(program: ExportedProgram) -> Counter: + op_names = ( + _get_target_name(n.target) + for n in program.graph.nodes + if n.op == "call_function" + ) + + return Counter(op for op in op_names if op not in OP_COUNT_IGNORED_OPS) + + +def count_ops(program: dict[str, ExportedProgram] | ExportedProgram) -> Counter: + if isinstance(program, ExportedProgram): + return _count_ops(program) + else: + # Sum op counts for all methods in the program. + return reduce( + lambda a, b: a + b, + (_count_ops(p) for p in program.values()), + Counter(), + ) + + def begin_test_session(): global _active_session @@ -188,6 +238,24 @@ def complete_test_session() -> RunSummary: return summary +def _sum_op_counts(counter: Counter | None) -> int | None: + """ + A utility function to count the total number of nodes in an op count dict. + """ + return sum(counter.values()) if counter is not None else None + + +def _serialize_op_counts(counter: Counter | None) -> str: + """ + A utility function to serialize op counts to a string, for the purpose of including + in the test report. + """ + if counter is not None: + return str(dict(sorted(counter.items()))) + else: + return "" + + def generate_csv_report(summary: RunSummary, output: TextIO): """Write a run summary report to a file in CSV format.""" @@ -228,6 +296,14 @@ def generate_csv_report(summary: RunSummary, output: TextIO): f"Output {i} SQNR", ] ) + field_names.extend( + [ + "Delegated Nodes", + "Undelegated Nodes", + "Delegated Ops", + "Undelegated Ops", + ] + ) writer = csv.DictWriter(output, field_names) writer.writeheader() @@ -256,4 +332,9 @@ def generate_csv_report(summary: RunSummary, output: TextIO): row[f"Output {output_idx} Error L2"] = error_stats.error_l2_norm row[f"Output {output_idx} SQNR"] = error_stats.sqnr + row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts) + row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts) + row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts) + row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts) + writer.writerow(row) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index c73a2a39592..eb9a80a9efa 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -16,11 +16,13 @@ from executorch.backends.test.suite.reporting import ( begin_test_session, complete_test_session, + count_ops, generate_csv_report, RunSummary, TestCaseSummary, TestResult, ) +from executorch.exir import EdgeProgramManager # A list of all runnable test suites and the corresponding python package. @@ -105,6 +107,14 @@ def build_result( extra_stats["lower_time"] = timedelta(seconds=elapsed) return build_result(TestResult.LOWER_FAIL, e) + edge_manager: EdgeProgramManager = tester.get_artifact() + edge_op_counts = count_ops(edge_manager.original_edge_programs) + undelegated_op_counts = count_ops(edge_manager._edge_programs) + delegated_op_counts = edge_op_counts - undelegated_op_counts + + extra_stats["delegated_op_counts"] = delegated_op_counts + extra_stats["undelegated_op_counts"] = undelegated_op_counts + is_delegated = any( n.target == torch._higher_order_ops.executorch_call_delegate for n in tester.stages[tester.cur].graph_module.graph.nodes diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index 5adda651082..3b711e45949 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -5,7 +5,10 @@ import torch +from executorch.exir import to_edge + from ..reporting import ( + count_ops, generate_csv_report, RunSummary, TestCaseSummary, @@ -23,6 +26,7 @@ params=None, result=TestResult.SUCCESS, error=None, + tensor_error_statistics=[], ), TestCaseSummary( backend="backend2", @@ -32,6 +36,7 @@ params=None, result=TestResult.LOWER_FAIL, error=None, + tensor_error_statistics=[], ), TestCaseSummary( backend="backend1", @@ -41,6 +46,7 @@ params={"dtype": torch.float32}, result=TestResult.SUCCESS_UNDELEGATED, error=None, + tensor_error_statistics=[], ), TestCaseSummary( backend="backend2", @@ -50,6 +56,7 @@ params={"use_dynamic_shapes": True}, result=TestResult.EXPORT_FAIL, error=None, + tensor_error_statistics=[], ), ] @@ -104,3 +111,32 @@ def test_csv_report_simple(self): self.assertEqual(records[3]["Result"], "Fail (Export)") self.assertEqual(records[3]["Dtype"], "") self.assertEqual(records[3]["Use_dynamic_shapes"], "True") + + def test_count_ops(self): + """ + Verify that the count_ops function correctly counts operator occurances in the edge graph. + """ + + class Model1(torch.nn.Module): + def forward(self, x, y): + return x + y + + class Model2(torch.nn.Module): + def forward(self, x, y): + return x + y * y + + args = (torch.randn(2), torch.randn(2)) + ep1 = torch.export.export(Model1(), args) + ep2 = torch.export.export(Model2(), args) + + ep = to_edge({"forward1": ep1, "forward2": ep2}) + + op_counts = count_ops(ep._edge_programs) + + self.assertEqual( + op_counts, + { + "aten::add.Tensor": 2, + "aten::mul.Tensor": 1, + }, + ) diff --git a/exir/program/_program.py b/exir/program/_program.py index 8bbe0833b85..555af38e33b 100644 --- a/exir/program/_program.py +++ b/exir/program/_program.py @@ -1179,6 +1179,7 @@ def _gen_edge_manager_for_partitioners( config, list(set().union(*ops_set_to_not_decompose_by_program.values())), ) + return edge_manager @@ -1410,6 +1411,8 @@ class EdgeProgramManager: Manages the second link in the lowering chain of ATen -> Edge -> ExecuTorch. """ + original_edge_programs: dict[str, ExportedProgram] | None = None + def __init__( self, edge_programs: Union[ExportedProgram, Dict[str, ExportedProgram]], @@ -1558,12 +1561,17 @@ def to_backend( new_edge_programs = to_backend(method_to_programs_and_partitioners) config = EdgeCompileConfig(_check_ir_validity=False) - return EdgeProgramManager( + new_edge_manager = EdgeProgramManager( new_edge_programs, copy.deepcopy(self._config_methods), config, ) + # Placeholder - not for land + new_edge_manager.original_edge_programs = copy.deepcopy(self._edge_programs) + + return new_edge_manager + @et_logger("to_executorch") def to_executorch( self, diff --git a/pytest.ini b/pytest.ini index da56ddbd8d5..aae87f242a7 100644 --- a/pytest.ini +++ b/pytest.ini @@ -48,6 +48,8 @@ addopts = # is stable and signal to noise ratio is good (no irrelevant failures). # See https://github.com/pytorch/executorch/discussions/11140 --ignore=backends/test + backends/test/harness/tests + backends/test/suite/tests # backends/xnnpack backends/xnnpack/test/ops --ignore=backends/xnnpack/test/ops/test_bmm.py From b2ab3a57a0bed3db6153d0e745990f6a41f74912 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Fri, 8 Aug 2025 16:36:15 -0700 Subject: [PATCH 19/38] Update [ghstack-poisoned] --- backends/test/suite/reporting.py | 7 +++++++ backends/test/suite/runner.py | 1 + 2 files changed, 8 insertions(+) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index 22affcaee84..6981047b580 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -131,6 +131,9 @@ class TestCaseSummary: undelegated_op_counts: Counter | None = None """ The number of undelegated occurances of each operator in the graph. """ + pte_size_bytes: int | None = None + """ The size of the PTE file in bytes. """ + class TestSessionState: test_case_summaries: list[TestCaseSummary] @@ -302,6 +305,7 @@ def generate_csv_report(summary: RunSummary, output: TextIO): "Undelegated Nodes", "Delegated Ops", "Undelegated Ops", + "PTE Size (Kb)", ] ) @@ -336,5 +340,8 @@ def generate_csv_report(summary: RunSummary, output: TextIO): row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts) row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts) row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts) + row["PTE Size (Kb)"] = ( + record.pte_size_bytes / 1000.0 if record.pte_size_bytes else "" + ) writer.writerow(row) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index c57483455a3..1d03bcf78db 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -129,6 +129,7 @@ def build_result( if is_delegated: try: tester.to_executorch().serialize() + extra_stats["pte_size_bytes"] = len(tester.get_artifact()) except Exception as e: # We could introduce a result value for this, but I'm not sure it's necessary. # We can do this if we ever see to_executorch() or serialize() fail due a backend issue. From c23c3e9d18d50059178527ff0dc0a033d42d6877 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Fri, 8 Aug 2025 16:58:12 -0700 Subject: [PATCH 20/38] Update [ghstack-poisoned] --- .../stages/to_edge_transform_and_lower.py | 8 ++++++-- backends/test/harness/tester.py | 4 ++-- backends/test/suite/flow.py | 11 ++++++++++- backends/test/suite/flows/portable.py | 19 +++++++++++++++++++ backends/test/suite/runner.py | 4 ++-- 5 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 backends/test/suite/flows/portable.py diff --git a/backends/test/harness/stages/to_edge_transform_and_lower.py b/backends/test/harness/stages/to_edge_transform_and_lower.py index e436fc04a93..2d526cd593d 100644 --- a/backends/test/harness/stages/to_edge_transform_and_lower.py +++ b/backends/test/harness/stages/to_edge_transform_and_lower.py @@ -15,11 +15,15 @@ class ToEdgeTransformAndLower(Stage): def __init__( self, - default_partitioner_cls: Type, + default_partitioner_cls: Type | None = None, partitioners: Optional[List[Partitioner]] = None, edge_compile_config: Optional[EdgeCompileConfig] = None, ): - self.partitioners = partitioners or [default_partitioner_cls()] + self.partitioners = ( + partitioners or [default_partitioner_cls()] + if default_partitioner_cls is not None + else [] + ) self.edge_compile_conf = edge_compile_config or EdgeCompileConfig() self.edge_dialect_program = None diff --git a/backends/test/harness/tester.py b/backends/test/harness/tester.py index 9f3c0ebba80..2dbfec516d0 100644 --- a/backends/test/harness/tester.py +++ b/backends/test/harness/tester.py @@ -34,12 +34,12 @@ def __init__( self, module: torch.nn.Module, example_inputs: Tuple[torch.Tensor], - stage_classes: Dict[StageType, Callable], + stage_classes: Dict[StageType, Callable] | None = None, dynamic_shapes: Optional[Tuple[Any]] = None, ): module.eval() - self.stage_classes = stage_classes + self.stage_classes = stage_classes or Tester.default_stage_classes() self.original_module = module self.example_inputs = example_inputs self.dynamic_shapes = dynamic_shapes diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index 124891fc541..4324db46796 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -26,16 +26,25 @@ class TestFlow: tester_factory: Callable[..., Tester] """ A factory function that returns a Tester instance for this lowering flow. """ - quantize: bool = field(default=False) + quantize: bool = False """ Whether to tester should run the quantize stage on the model. """ quantize_stage_factory: Callable[..., Quantize] | None = None """ A factory function which instantiates a Quantize stage. Can be None to use the tester's default. """ + is_delegated: bool = True + """ Indicates whether the flow is expected to generate CALL_DELEGATE nodes. """ + def all_flows() -> dict[str, TestFlow]: flows = [] + from executorch.backends.test.suite.flows.portable import PORTABLE_TEST_FLOW + + flows += [ + PORTABLE_TEST_FLOW, + ] + try: from executorch.backends.test.suite.flows.xnnpack import ( XNNPACK_STATIC_INT8_PER_CHANNEL_TEST_FLOW, diff --git a/backends/test/suite/flows/portable.py b/backends/test/suite/flows/portable.py new file mode 100644 index 00000000000..ab176fb0e2d --- /dev/null +++ b/backends/test/suite/flows/portable.py @@ -0,0 +1,19 @@ +import logging + +from executorch.backends.test.harness import Tester +from executorch.backends.test.suite.flow import TestFlow + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def _create_portable_flow() -> TestFlow: + return TestFlow( + "portable", + backend="portable", + tester_factory=Tester, + is_delegated=False, + ) + + +PORTABLE_TEST_FLOW = _create_portable_flow() diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 1d03bcf78db..5e4f1dcf32a 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -125,8 +125,8 @@ def build_result( if n.op == "call_function" ) - # Only run the runtime portion if something was delegated. - if is_delegated: + # Only run the runtime portion if something was delegated (or the flow doesn't delegate). + if is_delegated or not flow.is_delegated: try: tester.to_executorch().serialize() extra_stats["pte_size_bytes"] = len(tester.get_artifact()) From c99c41a448215a616ba8674f2c9b99dd68ad2efd Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Sat, 9 Aug 2025 12:09:17 -0700 Subject: [PATCH 21/38] Update [ghstack-poisoned] --- .../stages/to_edge_transform_and_lower.py | 4 ++- backends/test/suite/operators/test_amax.py | 12 ++++----- backends/test/suite/operators/test_amin.py | 12 ++++----- backends/test/suite/operators/test_argmax.py | 12 ++++----- backends/test/suite/operators/test_argmin.py | 12 ++++----- backends/test/suite/operators/test_floor.py | 4 +-- backends/test/suite/reporting.py | 25 ++++++++----------- backends/test/suite/runner.py | 24 +++++++++++++++--- 8 files changed, 60 insertions(+), 45 deletions(-) diff --git a/backends/test/harness/stages/to_edge_transform_and_lower.py b/backends/test/harness/stages/to_edge_transform_and_lower.py index 2d526cd593d..6c7b4966be3 100644 --- a/backends/test/harness/stages/to_edge_transform_and_lower.py +++ b/backends/test/harness/stages/to_edge_transform_and_lower.py @@ -24,7 +24,9 @@ def __init__( if default_partitioner_cls is not None else [] ) - self.edge_compile_conf = edge_compile_config or EdgeCompileConfig() + self.edge_compile_conf = edge_compile_config or EdgeCompileConfig( + _check_ir_validity=False + ) self.edge_dialect_program = None def stage_type(self) -> StageType: diff --git a/backends/test/suite/operators/test_amax.py b/backends/test/suite/operators/test_amax.py index aff33476e69..0c9a8c06f0d 100644 --- a/backends/test/suite/operators/test_amax.py +++ b/backends/test/suite/operators/test_amax.py @@ -207,19 +207,19 @@ def test_amax_edge_cases(self, flow: TestFlow) -> None: AmaxModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AmaxModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AmaxModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([[1.0, float("nan"), 3.0], [4.0, 5.0, float("nan")]]) @@ -227,19 +227,19 @@ def test_amax_edge_cases(self, flow: TestFlow) -> None: AmaxModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AmaxModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AmaxModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) def test_amax_scalar(self, flow: TestFlow) -> None: diff --git a/backends/test/suite/operators/test_amin.py b/backends/test/suite/operators/test_amin.py index ab59d77d0be..f4b88b1dade 100644 --- a/backends/test/suite/operators/test_amin.py +++ b/backends/test/suite/operators/test_amin.py @@ -209,19 +209,19 @@ def test_amin_edge_cases(self, flow: TestFlow) -> None: AminModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AminModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AminModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([[1.0, float("nan"), 3.0], [4.0, 5.0, float("nan")]]) @@ -229,19 +229,19 @@ def test_amin_edge_cases(self, flow: TestFlow) -> None: AminModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AminModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AminModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) def test_amin_scalar(self, flow: TestFlow) -> None: diff --git a/backends/test/suite/operators/test_argmax.py b/backends/test/suite/operators/test_argmax.py index adf1e43a340..dc8b57fc214 100644 --- a/backends/test/suite/operators/test_argmax.py +++ b/backends/test/suite/operators/test_argmax.py @@ -149,19 +149,19 @@ def test_argmax_edge_cases(self, flow: TestFlow) -> None: ArgmaxModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgmaxModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgmaxModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([[1.0, float("nan"), 3.0], [4.0, 5.0, float("nan")]]) @@ -169,19 +169,19 @@ def test_argmax_edge_cases(self, flow: TestFlow) -> None: ArgmaxModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgmaxModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgmaxModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([5.0]) diff --git a/backends/test/suite/operators/test_argmin.py b/backends/test/suite/operators/test_argmin.py index 0613c74a3ee..d7a24e24f5a 100644 --- a/backends/test/suite/operators/test_argmin.py +++ b/backends/test/suite/operators/test_argmin.py @@ -149,19 +149,19 @@ def test_argmin_edge_cases(self, flow: TestFlow) -> None: ArgminModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgminModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgminModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([[1.0, float("nan"), 3.0], [4.0, 5.0, float("nan")]]) @@ -169,19 +169,19 @@ def test_argmin_edge_cases(self, flow: TestFlow) -> None: ArgminModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgminModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgminModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([5.0]) diff --git a/backends/test/suite/operators/test_floor.py b/backends/test/suite/operators/test_floor.py index e5da5da63df..fcc834afa16 100644 --- a/backends/test/suite/operators/test_floor.py +++ b/backends/test/suite/operators/test_floor.py @@ -18,8 +18,8 @@ class FloorModel(torch.nn.Module): - def __init__(self): - super().__init__() + def forward(self, x): + return torch.floor(x) @operator_test diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index 6981047b580..93a93f76283 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -28,35 +28,32 @@ class TestResult(IntEnum): SUCCESS_UNDELEGATED = 1 """ The test succeeded without the backend delegating anything. """ - EAGER_FAIL = 2 - """ The test failed due to the model failing to run in eager mode. """ + SKIPPED = 2 + """ The test was skipped due to a non-backend failure. """ QUANTIZE_FAIL = 3 """ The test failed due to the quantization stage failing. """ - EXPORT_FAIL = 4 - """ The test failed due to the model failing to export. """ - - LOWER_FAIL = 5 + LOWER_FAIL = 4 """ The test failed due to a failure in partitioning or lowering. """ - PTE_LOAD_FAIL = 6 + PTE_LOAD_FAIL = 5 """ The test failed due to the resulting PTE failing to load. """ - PTE_RUN_FAIL = 7 + PTE_RUN_FAIL = 6 """ The test failed due to the resulting PTE failing to run. """ - OUTPUT_MISMATCH_FAIL = 8 + OUTPUT_MISMATCH_FAIL = 7 """ The test failed due to a mismatch between runtime and reference outputs. """ - UNKNOWN_FAIL = 9 + UNKNOWN_FAIL = 8 """ The test failed in an unknown or unexpected manner. """ def is_success(self): return self in {TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED} def is_non_backend_failure(self): - return self in {TestResult.EAGER_FAIL, TestResult.EAGER_FAIL} + return self in {TestResult.SKIPPED} def is_backend_failure(self): return not self.is_success() and not self.is_non_backend_failure() @@ -66,12 +63,10 @@ def display_name(self): return "Success (Delegated)" elif self == TestResult.SUCCESS_UNDELEGATED: return "Success (Undelegated)" - elif self == TestResult.EAGER_FAIL: - return "Fail (Eager)" + elif self == TestResult.SKIPPED: + return "Skipped" elif self == TestResult.QUANTIZE_FAIL: return "Fail (Quantize)" - elif self == TestResult.EXPORT_FAIL: - return "Fail (Export)" elif self == TestResult.LOWER_FAIL: return "Fail (Lowering)" elif self == TestResult.PTE_LOAD_FAIL: diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 5e4f1dcf32a..bffd726cf94 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -9,6 +9,14 @@ import torch +# Set of unsupported ops that should cause tests to be skipped +UNSUPPORTED_PORTABLE_OPS = { + "aten::_embedding_bag", + "aten::median", + "aten::median.dim", + "aten::round.decimals", +} + from executorch.backends.test.harness.error_statistics import ErrorStatistics from executorch.backends.test.harness.stages import StageType from executorch.backends.test.suite.discovery import discover_tests, TestFilter @@ -70,7 +78,7 @@ def build_result( try: model(*inputs) except Exception as e: - return build_result(TestResult.EAGER_FAIL, e) + return build_result(TestResult.SKIPPED, e) try: tester = flow.tester_factory(model, inputs) @@ -96,7 +104,7 @@ def build_result( tester._get_default_stage(StageType.EXPORT, dynamic_shapes=dynamic_shapes), ) except Exception as e: - return build_result(TestResult.EXPORT_FAIL, e) + return build_result(TestResult.SKIPPED, e) lower_start_time = time.perf_counter() try: @@ -125,7 +133,16 @@ def build_result( if n.op == "call_function" ) - # Only run the runtime portion if something was delegated (or the flow doesn't delegate). + # Check if any undelegated ops are in the unsupported ops set + has_unsupported_ops = any( + op in UNSUPPORTED_PORTABLE_OPS for op in undelegated_op_counts.keys() + ) + + # Skip the test if there are unsupported portable ops remaining. + if has_unsupported_ops: + return build_result(TestResult.SKIPPED) + + # Only run the runtime portion if something was delegated (or the flow doesn't delegate) if is_delegated or not flow.is_delegated: try: tester.to_executorch().serialize() @@ -148,6 +165,7 @@ def build_result( except Exception as e: return build_result(TestResult.PTE_RUN_FAIL, e) else: + # Skip the test if nothing is delegated return build_result(TestResult.SUCCESS_UNDELEGATED) return build_result(TestResult.SUCCESS) From bf57d6cfa8eae2e9fb0bde00209b7aab1e66454d Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Sat, 9 Aug 2025 12:12:38 -0700 Subject: [PATCH 22/38] Update [ghstack-poisoned] --- backends/test/suite/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index bffd726cf94..72c290106ce 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -133,7 +133,7 @@ def build_result( if n.op == "call_function" ) - # Check if any undelegated ops are in the unsupported ops set + # Check if any undelegated ops are in the unsupported ops set. has_unsupported_ops = any( op in UNSUPPORTED_PORTABLE_OPS for op in undelegated_op_counts.keys() ) From c6bd56b042201d964f452d5fc5815a6be27454dc Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 16:52:27 -0700 Subject: [PATCH 23/38] Update [ghstack-poisoned] --- backends/test/suite/tests/test_reporting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index 3b711e45949..5eab5648335 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -54,7 +54,7 @@ flow="flow1", name="test2_backend2_flow1", params={"use_dynamic_shapes": True}, - result=TestResult.EXPORT_FAIL, + result=TestResult.SKIPPED, error=None, tensor_error_statistics=[], ), @@ -108,7 +108,7 @@ def test_csv_report_simple(self): self.assertEqual(records[3]["Test Case"], "test2") self.assertEqual(records[3]["Backend"], "backend2") self.assertEqual(records[3]["Flow"], "flow1") - self.assertEqual(records[3]["Result"], "Fail (Export)") + self.assertEqual(records[3]["Result"], "Skipped") self.assertEqual(records[3]["Dtype"], "") self.assertEqual(records[3]["Use_dynamic_shapes"], "True") From 144a8ae5631a3d1a8e56e209a9457a8a23303f66 Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 17:55:42 -0700 Subject: [PATCH 24/38] Update [ghstack-poisoned] --- backends/test/suite/runner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 72c290106ce..101e168476b 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -159,6 +159,8 @@ def build_result( tester.run_method_and_compare_outputs( inputs=None if generate_random_test_inputs else inputs, statistics_callback=lambda stats: error_statistics.append(stats), + atol=1e-1, + rtol=4e-2, ) except AssertionError as e: return build_result(TestResult.OUTPUT_MISMATCH_FAIL, e) From 6f85fc13f24933c1df045ce897b89694a8833ccc Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 18:47:04 -0700 Subject: [PATCH 25/38] Update [ghstack-poisoned] --- backends/test/suite/reporting.py | 63 +++++++++++++++------ backends/test/suite/tests/test_reporting.py | 12 ++-- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index 93a93f76283..cb37ded947e 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -57,6 +57,36 @@ def is_non_backend_failure(self): def is_backend_failure(self): return not self.is_success() and not self.is_non_backend_failure() + + def to_short_str(self): + if self in { TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED }: + return "Pass" + elif self == TestResult.SKIPPED: + return "Skip" + else: + return "Fail" + + def to_detail_str(self): + if self == TestResult.SUCCESS: + return "" + elif self == TestResult.SUCCESS_UNDELEGATED: + return "" + elif self == TestResult.SKIPPED: + return "" + elif self == TestResult.QUANTIZE_FAIL: + return "Quantization Failed" + elif self == TestResult.LOWER_FAIL: + return "Lowering Failed" + elif self == TestResult.PTE_LOAD_FAIL: + return "PTE Load Failed" + elif self == TestResult.PTE_RUN_FAIL: + return "PTE Run Failed" + elif self == TestResult.OUTPUT_MISMATCH_FAIL: + return "Output Mismatch" + elif self == TestResult.UNKNOWN_FAIL: + return "Unknown Failure" + else: + raise ValueError(f"Invalid TestResult value: {self}.") def display_name(self): if self == TestResult.SUCCESS: @@ -129,6 +159,9 @@ class TestCaseSummary: pte_size_bytes: int | None = None """ The size of the PTE file in bytes. """ + def is_delegated(self): + return any(v > 0 for v in self.delegated_op_counts.values()) if self.delegated_op_counts else False + class TestSessionState: test_case_summaries: list[TestCaseSummary] @@ -260,11 +293,12 @@ def generate_csv_report(summary: RunSummary, output: TextIO): field_names = [ "Test ID", "Test Case", - "Backend", "Flow", "Result", + "Result Detail", + "Delegated", "Quantize Time (s)", - "Lowering Time (s)", + "Lower Time (s)", ] # Tests can have custom parameters. We'll want to report them here, so we need @@ -289,9 +323,7 @@ def generate_csv_report(summary: RunSummary, output: TextIO): [ f"Output {i} Error Max", f"Output {i} Error MAE", - f"Output {i} Error MSD", - f"Output {i} Error L2", - f"Output {i} SQNR", + f"Output {i} SNR", ] ) field_names.extend( @@ -311,32 +343,31 @@ def generate_csv_report(summary: RunSummary, output: TextIO): row = { "Test ID": record.name, "Test Case": record.base_name, - "Backend": record.backend, "Flow": record.flow, - "Result": record.result.display_name(), + "Result": record.result.to_short_str(), + "Result Detail": record.result.to_detail_str(), + "Delegated": "True" if record.is_delegated() else "False", "Quantize Time (s)": ( - record.quantize_time.total_seconds() if record.quantize_time else None + f"{record.quantize_time.total_seconds():.3f}" if record.quantize_time else None ), - "Lowering Time (s)": ( - record.lower_time.total_seconds() if record.lower_time else None + "Lower Time (s)": ( + f"{record.lower_time.total_seconds():.3f}" if record.lower_time else None ), } if record.params is not None: row.update({k.capitalize(): v for k, v in record.params.items()}) for output_idx, error_stats in enumerate(record.tensor_error_statistics): - row[f"Output {output_idx} Error Max"] = error_stats.error_max - row[f"Output {output_idx} Error MAE"] = error_stats.error_mae - row[f"Output {output_idx} Error MSD"] = error_stats.error_msd - row[f"Output {output_idx} Error L2"] = error_stats.error_l2_norm - row[f"Output {output_idx} SQNR"] = error_stats.sqnr + row[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}" + row[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}" + row[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}" row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts) row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts) row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts) row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts) row["PTE Size (Kb)"] = ( - record.pte_size_bytes / 1000.0 if record.pte_size_bytes else "" + f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else "" ) writer.writerow(row) diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index 5eab5648335..c3324b58332 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -79,36 +79,32 @@ def test_csv_report_simple(self): # Validate first record: test1, backend1, SUCCESS self.assertEqual(records[0]["Test ID"], "test1_backend1_flow1") self.assertEqual(records[0]["Test Case"], "test1") - self.assertEqual(records[0]["Backend"], "backend1") self.assertEqual(records[0]["Flow"], "flow1") - self.assertEqual(records[0]["Result"], "Success (Delegated)") + self.assertEqual(records[0]["Result"], "Pass") self.assertEqual(records[0]["Dtype"], "") self.assertEqual(records[0]["Use_dynamic_shapes"], "") # Validate second record: test1, backend2, LOWER_FAIL self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1") self.assertEqual(records[1]["Test Case"], "test1") - self.assertEqual(records[1]["Backend"], "backend2") self.assertEqual(records[1]["Flow"], "flow1") - self.assertEqual(records[1]["Result"], "Fail (Lowering)") + self.assertEqual(records[1]["Result"], "Fail") self.assertEqual(records[1]["Dtype"], "") self.assertEqual(records[1]["Use_dynamic_shapes"], "") # Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1") self.assertEqual(records[2]["Test Case"], "test2") - self.assertEqual(records[2]["Backend"], "backend1") self.assertEqual(records[2]["Flow"], "flow1") - self.assertEqual(records[2]["Result"], "Success (Undelegated)") + self.assertEqual(records[2]["Result"], "Pass") self.assertEqual(records[2]["Dtype"], str(torch.float32)) self.assertEqual(records[2]["Use_dynamic_shapes"], "") # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1") self.assertEqual(records[3]["Test Case"], "test2") - self.assertEqual(records[3]["Backend"], "backend2") self.assertEqual(records[3]["Flow"], "flow1") - self.assertEqual(records[3]["Result"], "Skipped") + self.assertEqual(records[3]["Result"], "Skip") self.assertEqual(records[3]["Dtype"], "") self.assertEqual(records[3]["Use_dynamic_shapes"], "True") From 2439022c59436b56db5ddf66ad95df1212bcf91f Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 20:35:00 -0700 Subject: [PATCH 26/38] Update [ghstack-poisoned] --- backends/test/suite/reporting.py | 202 +++++++++++--------- backends/test/suite/runner.py | 8 +- backends/test/suite/tests/test_reporting.py | 19 +- 3 files changed, 121 insertions(+), 108 deletions(-) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index cb37ded947e..6294ab9434f 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,7 +1,7 @@ import csv from collections import Counter -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import timedelta from enum import IntEnum from functools import reduce @@ -11,6 +11,40 @@ from torch.export import ExportedProgram +# The maximum number of model output tensors to log statistics for. Most model tests will +# only have one output, but some may return more than one tensor. This upper bound is needed +# upfront since the file is written progressively. Any outputs beyond these will not have stats logged. +MAX_LOGGED_MODEL_OUTPUTS = 2 + + +# Field names for the CSV report. +CSV_FIELD_NAMES = [ + "Test ID", + "Test Case", + "Flow", + "Params", + "Result", + "Result Detail", + "Delegated", + "Quantize Time (s)", + "Lower Time (s)", + "Delegated Nodes", + "Undelegated Nodes", + "Delegated Ops", + "Undelegated Ops", + "PTE Size (Kb)", +] + +for i in range(MAX_LOGGED_MODEL_OUTPUTS): + CSV_FIELD_NAMES.extend( + [ + f"Output {i} Error Max", + f"Output {i} Error MAE", + f"Output {i} SNR", + ] + ) + + # Operators that are excluded from the counts returned by count_ops. These are used to # exclude operatations that are not logically relevant or delegatable to backends. OP_COUNT_IGNORED_OPS = { @@ -57,15 +91,15 @@ def is_non_backend_failure(self): def is_backend_failure(self): return not self.is_success() and not self.is_non_backend_failure() - + def to_short_str(self): - if self in { TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED }: + if self in {TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED}: return "Pass" elif self == TestResult.SKIPPED: return "Skip" else: return "Fail" - + def to_detail_str(self): if self == TestResult.SUCCESS: return "" @@ -160,14 +194,22 @@ class TestCaseSummary: """ The size of the PTE file in bytes. """ def is_delegated(self): - return any(v > 0 for v in self.delegated_op_counts.values()) if self.delegated_op_counts else False + return ( + any(v > 0 for v in self.delegated_op_counts.values()) + if self.delegated_op_counts + else False + ) +@dataclass class TestSessionState: - test_case_summaries: list[TestCaseSummary] + # True if the CSV header has been written to report__path. + has_written_report_header: bool = False - def __init__(self): - self.test_case_summaries = [] + # The file path to write the detail report to, if enabled. + report_path: str | None = None + + test_case_summaries: list[TestCaseSummary] = field(default_factory=list) @dataclass @@ -245,11 +287,11 @@ def count_ops(program: dict[str, ExportedProgram] | ExportedProgram) -> Counter: ) -def begin_test_session(): +def begin_test_session(report_path: str | None): global _active_session assert _active_session is None, "A test session is already active." - _active_session = TestSessionState() + _active_session = TestSessionState(report_path=report_path) def log_test_summary(summary: TestCaseSummary): @@ -258,6 +300,15 @@ def log_test_summary(summary: TestCaseSummary): if _active_session is not None: _active_session.test_case_summaries.append(summary) + if _active_session.report_path is not None: + file_mode = "a" if _active_session.has_written_report_header else "w" + with open(_active_session.report_path, file_mode) as f: + if not _active_session.has_written_report_header: + write_csv_header(f) + _active_session.has_written_report_header = True + + write_csv_row(summary, f) + def complete_test_session() -> RunSummary: global _active_session @@ -276,6 +327,13 @@ def _sum_op_counts(counter: Counter | None) -> int | None: return sum(counter.values()) if counter is not None else None +def _serialize_params(params: dict[str, Any] | None) -> str: + if params is not None: + return str(dict(sorted(params.items()))) + else: + return "" + + def _serialize_op_counts(counter: Counter | None) -> str: """ A utility function to serialize op counts to a string, for the purpose of including @@ -287,87 +345,49 @@ def _serialize_op_counts(counter: Counter | None) -> str: return "" -def generate_csv_report(summary: RunSummary, output: TextIO): - """Write a run summary report to a file in CSV format.""" - - field_names = [ - "Test ID", - "Test Case", - "Flow", - "Result", - "Result Detail", - "Delegated", - "Quantize Time (s)", - "Lower Time (s)", - ] - - # Tests can have custom parameters. We'll want to report them here, so we need - # a list of all unique parameter names. - param_names = reduce( - lambda a, b: a.union(b), - ( - set(s.params.keys()) - for s in summary.test_case_summaries - if s.params is not None - ), - set(), - ) - field_names += (s.capitalize() for s in param_names) - - # Add tensor error statistic field names for each output index. - max_outputs = max( - len(s.tensor_error_statistics) for s in summary.test_case_summaries - ) - for i in range(max_outputs): - field_names.extend( - [ - f"Output {i} Error Max", - f"Output {i} Error MAE", - f"Output {i} SNR", - ] - ) - field_names.extend( - [ - "Delegated Nodes", - "Undelegated Nodes", - "Delegated Ops", - "Undelegated Ops", - "PTE Size (Kb)", - ] - ) - - writer = csv.DictWriter(output, field_names) +def write_csv_header(output: TextIO): + writer = csv.DictWriter(output, CSV_FIELD_NAMES) writer.writeheader() - for record in summary.test_case_summaries: - row = { - "Test ID": record.name, - "Test Case": record.base_name, - "Flow": record.flow, - "Result": record.result.to_short_str(), - "Result Detail": record.result.to_detail_str(), - "Delegated": "True" if record.is_delegated() else "False", - "Quantize Time (s)": ( - f"{record.quantize_time.total_seconds():.3f}" if record.quantize_time else None - ), - "Lower Time (s)": ( - f"{record.lower_time.total_seconds():.3f}" if record.lower_time else None - ), - } - if record.params is not None: - row.update({k.capitalize(): v for k, v in record.params.items()}) - - for output_idx, error_stats in enumerate(record.tensor_error_statistics): - row[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}" - row[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}" - row[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}" - - row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts) - row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts) - row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts) - row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts) - row["PTE Size (Kb)"] = ( - f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else "" - ) - writer.writerow(row) +def write_csv_row(record: TestCaseSummary, output: TextIO): + writer = csv.DictWriter(output, CSV_FIELD_NAMES) + + row = { + "Test ID": record.name, + "Test Case": record.base_name, + "Flow": record.flow, + "Params": _serialize_params(record.params), + "Result": record.result.to_short_str(), + "Result Detail": record.result.to_detail_str(), + "Delegated": "True" if record.is_delegated() else "False", + "Quantize Time (s)": ( + f"{record.quantize_time.total_seconds():.3f}" + if record.quantize_time + else None + ), + "Lower Time (s)": ( + f"{record.lower_time.total_seconds():.3f}" if record.lower_time else None + ), + } + + for output_idx, error_stats in enumerate(record.tensor_error_statistics): + if output_idx >= MAX_LOGGED_MODEL_OUTPUTS: + print( + f"Model output stats are truncated as model has more than {MAX_LOGGED_MODEL_OUTPUTS} outputs. Consider increasing MAX_LOGGED_MODEL_OUTPUTS." + ) + break + + row[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}" + row[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}" + row[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}" + + row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts) + row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts) + row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts) + row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts) + row["PTE Size (Kb)"] = ( + f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else "" + ) + + writer.writerow(row) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 101e168476b..b128d64eca2 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -25,7 +25,6 @@ begin_test_session, complete_test_session, count_ops, - generate_csv_report, RunSummary, TestCaseSummary, TestResult, @@ -248,7 +247,7 @@ def build_test_filter(args: argparse.Namespace) -> TestFilter: def runner_main(): args = parse_args() - begin_test_session() + begin_test_session(args.report) if len(args.suite) > 1: raise NotImplementedError("TODO Support multiple suites.") @@ -263,11 +262,6 @@ def runner_main(): summary = complete_test_session() print_summary(summary) - if args.report is not None: - with open(args.report, "w") as f: - print(f"Writing CSV report to {args.report}.") - generate_csv_report(summary, f) - if __name__ == "__main__": runner_main() diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index c3324b58332..6ab4817b44c 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -9,11 +9,12 @@ from ..reporting import ( count_ops, - generate_csv_report, RunSummary, TestCaseSummary, TestResult, TestSessionState, + write_csv_header, + write_csv_row, ) # Test data for simulated test results. @@ -69,7 +70,9 @@ def test_csv_report_simple(self): run_summary = RunSummary.from_session(session_state) strio = StringIO() - generate_csv_report(run_summary, strio) + write_csv_header(strio) + for case_summary in run_summary.test_case_summaries: + write_csv_row(case_summary, strio) # Attempt to deserialize and validate the CSV report. report = DictReader(StringIO(strio.getvalue())) @@ -81,32 +84,28 @@ def test_csv_report_simple(self): self.assertEqual(records[0]["Test Case"], "test1") self.assertEqual(records[0]["Flow"], "flow1") self.assertEqual(records[0]["Result"], "Pass") - self.assertEqual(records[0]["Dtype"], "") - self.assertEqual(records[0]["Use_dynamic_shapes"], "") + self.assertEqual(records[0]["Params"], "") # Validate second record: test1, backend2, LOWER_FAIL self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1") self.assertEqual(records[1]["Test Case"], "test1") self.assertEqual(records[1]["Flow"], "flow1") self.assertEqual(records[1]["Result"], "Fail") - self.assertEqual(records[1]["Dtype"], "") - self.assertEqual(records[1]["Use_dynamic_shapes"], "") + self.assertEqual(records[1]["Params"], "") # Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1") self.assertEqual(records[2]["Test Case"], "test2") self.assertEqual(records[2]["Flow"], "flow1") self.assertEqual(records[2]["Result"], "Pass") - self.assertEqual(records[2]["Dtype"], str(torch.float32)) - self.assertEqual(records[2]["Use_dynamic_shapes"], "") + self.assertEqual(records[2]["Params"], str({"dtype": torch.float32})) # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1") self.assertEqual(records[3]["Test Case"], "test2") self.assertEqual(records[3]["Flow"], "flow1") self.assertEqual(records[3]["Result"], "Skip") - self.assertEqual(records[3]["Dtype"], "") - self.assertEqual(records[3]["Use_dynamic_shapes"], "True") + self.assertEqual(records[3]["Params"], str({"use_dynamic_shapes": True})) def test_count_ops(self): """ From bd79ef22ad6cbe215bc4d539db1992f5fada7843 Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 21:10:51 -0700 Subject: [PATCH 27/38] Update [ghstack-poisoned] --- backends/test/harness/stages/to_edge_transform_and_lower.py | 1 - backends/test/harness/tester.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/backends/test/harness/stages/to_edge_transform_and_lower.py b/backends/test/harness/stages/to_edge_transform_and_lower.py index e436fc04a93..0949b633c5d 100644 --- a/backends/test/harness/stages/to_edge_transform_and_lower.py +++ b/backends/test/harness/stages/to_edge_transform_and_lower.py @@ -8,7 +8,6 @@ ) from executorch.exir.backend.partitioner import Partitioner -from sympy.ntheory import generate from torch.export import ExportedProgram diff --git a/backends/test/harness/tester.py b/backends/test/harness/tester.py index 9f3c0ebba80..7e5b558aff0 100644 --- a/backends/test/harness/tester.py +++ b/backends/test/harness/tester.py @@ -186,7 +186,7 @@ def _post(self, stage): def _run_stage(self, stage_instance, inputs=None, *args, **kwargs): assert isinstance(stage_instance, Stage) prev_stage_artifact = self._pre(stage_instance) - stage_instance.run(prev_stage_artifact, inputs=inputs, *args, **kwargs) + stage_instance.run(prev_stage_artifact, inputs=inputs, *args, **kwargs) # noqa self._post(stage_instance) return self From 2c4488f9f70c22a60dee456586857a73f4d523f6 Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 21:12:58 -0700 Subject: [PATCH 28/38] Update [ghstack-poisoned] --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index f2fba8921f5..a3755983cf3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -757,6 +757,10 @@ if(EXECUTORCH_BUILD_PYBIND) list(APPEND _dep_libs openvino_backend) endif() + if(EXECUTORCH_BUILD_VULKAN) + list(APPEND _dep_libs vulkan_backend) + endif() + if(EXECUTORCH_BUILD_XNNPACK) # need to explicitly specify XNNPACK and xnnpack-microkernels-prod here # otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu From 491ec2b4c8d14cfbe3a41ae8f0a6c0fec89bd07e Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 21:57:12 -0700 Subject: [PATCH 29/38] Update [ghstack-poisoned] --- backends/test/suite/context.py | 3 +++ backends/test/suite/flow.py | 2 +- backends/test/suite/operators/__init__.py | 4 ++++ backends/test/suite/reporting.py | 5 +++++ backends/test/suite/runner.py | 2 ++ 5 files changed, 15 insertions(+), 1 deletion(-) diff --git a/backends/test/suite/context.py b/backends/test/suite/context.py index 16b22b89f87..fd754737060 100644 --- a/backends/test/suite/context.py +++ b/backends/test/suite/context.py @@ -1,6 +1,8 @@ # Test run context management. This is used to determine the test context for reporting # purposes. class TestContext: + subtest_index: int + def __init__( self, test_name: str, test_base_name: str, flow_name: str, params: dict | None ): @@ -8,6 +10,7 @@ def __init__( self.test_base_name = test_base_name self.flow_name = flow_name self.params = params + self.subtest_index = 0 def __enter__(self): global _active_test_context diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index 4324db46796..8f47ebf0ebd 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -1,6 +1,6 @@ import logging -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Callable from executorch.backends.test.harness import Tester diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py index 8f7fbb1bc03..6ceb9086f71 100644 --- a/backends/test/suite/operators/__init__.py +++ b/backends/test/suite/operators/__init__.py @@ -152,12 +152,16 @@ def _test_op( flow, context.test_name, context.test_base_name, + context.subtest_index, context.params, generate_random_test_inputs=generate_random_test_inputs, ) log_test_summary(run_summary) + # This is reset when a new test is started - it creates the context per-test. + context.subtest_index = context.subtest_index + 1 + if not run_summary.result.is_success(): if run_summary.result.is_backend_failure(): raise RuntimeError("Test failure.") from run_summary.error diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index 6294ab9434f..f4a1f9a653e 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -21,6 +21,7 @@ CSV_FIELD_NAMES = [ "Test ID", "Test Case", + "Subtest", "Flow", "Params", "Result", @@ -163,6 +164,9 @@ class TestCaseSummary: name: str """ The full name of test, including flow and parameter suffixes. """ + subtest_index: int + """ The subtest number. If a test case runs multiple tests, this field can be used to disambiguate. """ + params: dict | None """ Test-specific parameters, such as dtype. """ @@ -356,6 +360,7 @@ def write_csv_row(record: TestCaseSummary, output: TextIO): row = { "Test ID": record.name, "Test Case": record.base_name, + "Subtest": record.subtest_index, "Flow": record.flow, "Params": _serialize_params(record.params), "Result": record.result.to_short_str(), diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index b128d64eca2..4999779b3c9 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -45,6 +45,7 @@ def run_test( # noqa: C901 flow: TestFlow, test_name: str, test_base_name: str, + subtest_index: int, params: dict | None, dynamic_shapes: Any | None = None, generate_random_test_inputs: bool = True, @@ -64,6 +65,7 @@ def build_result( return TestCaseSummary( backend=flow.backend, base_name=test_base_name, + subtest_index=subtest_index, flow=flow.name, name=test_name, params=params, From 375b0764a36d1cff21724b008fc47ac58c257e7d Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 22:12:10 -0700 Subject: [PATCH 30/38] Update [ghstack-poisoned] --- backends/test/harness/stages/serialize.py | 5 ++++- backends/test/suite/runner.py | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/backends/test/harness/stages/serialize.py b/backends/test/harness/stages/serialize.py index 9d0bded0483..a5be1631d98 100644 --- a/backends/test/harness/stages/serialize.py +++ b/backends/test/harness/stages/serialize.py @@ -13,6 +13,7 @@ try: from executorch.extension.pybindings.portable_lib import ( # @manual _load_for_executorch_from_buffer, + Verification, ) except ImportError as e: logger.warning(f"{e=}") @@ -39,7 +40,9 @@ def graph_module(self) -> None: def run_artifact(self, inputs): inputs_flattened, _ = tree_flatten(inputs) - executorch_module = _load_for_executorch_from_buffer(self.buffer) + executorch_module = _load_for_executorch_from_buffer( + self.buffer, program_verification=Verification.Minimal + ) executorch_output = copy.deepcopy( executorch_module.run_method("forward", tuple(inputs_flattened)) ) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 4999779b3c9..eea1ce6b404 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -3,6 +3,7 @@ import re import time import unittest +import warnings from datetime import timedelta from typing import Any @@ -249,6 +250,10 @@ def build_test_filter(args: argparse.Namespace) -> TestFilter: def runner_main(): args = parse_args() + # Suppress deprecation warnings for export_for_training, as it generates a + # lot of log spam. We don't really need the warning here. + warnings.simplefilter("ignore", category=FutureWarning) + begin_test_session(args.report) if len(args.suite) > 1: From 78086b4ae523d9c889d847b1ce756585c02916af Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 22:24:23 -0700 Subject: [PATCH 31/38] Update [ghstack-poisoned] --- backends/test/suite/flow.py | 2 +- backends/test/suite/reporting.py | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index 4324db46796..8f47ebf0ebd 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -1,6 +1,6 @@ import logging -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Callable from executorch.backends.test.harness import Tester diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index cb37ded947e..a19c63dd474 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -57,15 +57,15 @@ def is_non_backend_failure(self): def is_backend_failure(self): return not self.is_success() and not self.is_non_backend_failure() - + def to_short_str(self): - if self in { TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED }: + if self in {TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED}: return "Pass" elif self == TestResult.SKIPPED: return "Skip" else: return "Fail" - + def to_detail_str(self): if self == TestResult.SUCCESS: return "" @@ -160,7 +160,11 @@ class TestCaseSummary: """ The size of the PTE file in bytes. """ def is_delegated(self): - return any(v > 0 for v in self.delegated_op_counts.values()) if self.delegated_op_counts else False + return ( + any(v > 0 for v in self.delegated_op_counts.values()) + if self.delegated_op_counts + else False + ) class TestSessionState: @@ -348,10 +352,14 @@ def generate_csv_report(summary: RunSummary, output: TextIO): "Result Detail": record.result.to_detail_str(), "Delegated": "True" if record.is_delegated() else "False", "Quantize Time (s)": ( - f"{record.quantize_time.total_seconds():.3f}" if record.quantize_time else None + f"{record.quantize_time.total_seconds():.3f}" + if record.quantize_time + else None ), "Lower Time (s)": ( - f"{record.lower_time.total_seconds():.3f}" if record.lower_time else None + f"{record.lower_time.total_seconds():.3f}" + if record.lower_time + else None ), } if record.params is not None: From f4b0dc223edddea695789011b18929514fb1bb85 Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 22:27:51 -0700 Subject: [PATCH 32/38] Update [ghstack-poisoned] --- backends/test/suite/flow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index 4324db46796..8f47ebf0ebd 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -1,6 +1,6 @@ import logging -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Callable from executorch.backends.test.harness import Tester From 733d4f98956daa0e643dd4765a61cd2f2ddd763f Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 23:07:27 -0700 Subject: [PATCH 33/38] Update [ghstack-poisoned] --- backends/test/suite/runner.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index eea1ce6b404..a863a13f877 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -1,4 +1,5 @@ import argparse +import hashlib import importlib import re import time @@ -40,6 +41,15 @@ } +def _get_test_seed(test_base_name: str) -> int: + # Set the seed based on the test base name to give consistent inputs between runs and backends. + # Having a stable hash between runs and across machines is a plus (builtin python hash is not). + # Using MD5 here because it's fast and we don't actually care about cryptographic properties. + hasher = hashlib.md5() + data = test_base_name.encode("utf-8") + hasher.update(data) + return int.from_bytes(hasher.hexdigest(), "little") + def run_test( # noqa: C901 model: torch.nn.Module, inputs: Any, @@ -59,6 +69,8 @@ def run_test( # noqa: C901 error_statistics: list[ErrorStatistics] = [] extra_stats = {} + torch.manual_seed(_get_test_seed(test_base_name)) + # Helper method to construct the summary. def build_result( result: TestResult, error: Exception | None = None From 1e3e79c6918ef3c2a557b5db2056cc060d202f7a Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 23:19:02 -0700 Subject: [PATCH 34/38] Update [ghstack-poisoned] --- backends/test/suite/tests/test_reporting.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index 6ab4817b44c..a6f2ca60bdd 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -24,6 +24,7 @@ base_name="test1", flow="flow1", name="test1_backend1_flow1", + subtest_index=0, params=None, result=TestResult.SUCCESS, error=None, @@ -34,6 +35,7 @@ base_name="test1", flow="flow1", name="test1_backend2_flow1", + subtest_index=0, params=None, result=TestResult.LOWER_FAIL, error=None, @@ -44,6 +46,7 @@ base_name="test2", flow="flow1", name="test2_backend1_flow1", + subtest_index=0, params={"dtype": torch.float32}, result=TestResult.SUCCESS_UNDELEGATED, error=None, @@ -54,6 +57,7 @@ base_name="test2", flow="flow1", name="test2_backend2_flow1", + subtest_index=0, params={"use_dynamic_shapes": True}, result=TestResult.SKIPPED, error=None, From 1c136d79c8f381bfc701cea5277503d77a1be907 Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 23:25:11 -0700 Subject: [PATCH 35/38] Update [ghstack-poisoned] --- backends/test/suite/models/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index 700baa435fc..06c1c537477 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -119,6 +119,7 @@ def run_model_test( flow, context.test_name, context.test_base_name, + 0, # subtest_index - currently unused for model tests context.params, dynamic_shapes=dynamic_shapes, ) From e7b79757d43197d7b2b10caadbea8bec93e3fe0e Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 12 Aug 2025 19:33:59 -0700 Subject: [PATCH 36/38] Update [ghstack-poisoned] --- backends/test/suite/models/__init__.py | 2 +- backends/test/suite/reporting.py | 12 +++++++++-- backends/test/suite/runner.py | 28 ++++++++++++++++++++++---- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index 06c1c537477..76b2d2966f6 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -119,7 +119,7 @@ def run_model_test( flow, context.test_name, context.test_base_name, - 0, # subtest_index - currently unused for model tests + 0, # subtest_index - currently unused for model tests context.params, dynamic_shapes=dynamic_shapes, ) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index f4a1f9a653e..ce8a48dcc12 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -207,6 +207,8 @@ def is_delegated(self): @dataclass class TestSessionState: + seed: int + # True if the CSV header has been written to report__path. has_written_report_header: bool = False @@ -291,11 +293,17 @@ def count_ops(program: dict[str, ExportedProgram] | ExportedProgram) -> Counter: ) -def begin_test_session(report_path: str | None): +def begin_test_session(report_path: str | None, seed: int): global _active_session assert _active_session is None, "A test session is already active." - _active_session = TestSessionState(report_path=report_path) + _active_session = TestSessionState(report_path=report_path, seed=seed) + + +def get_active_test_session() -> TestSessionState | None: + global _active_session + + return _active_session def log_test_summary(summary: TestCaseSummary): diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index c1918f21112..6caf27afe92 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -1,6 +1,7 @@ import argparse import hashlib import importlib +import random import re import time import unittest @@ -27,6 +28,7 @@ begin_test_session, complete_test_session, count_ops, + get_active_test_session, RunSummary, TestCaseSummary, TestResult, @@ -42,14 +44,23 @@ def _get_test_seed(test_base_name: str) -> int: - # Set the seed based on the test base name to give consistent inputs between runs and backends. - # Having a stable hash between runs and across machines is a plus (builtin python hash is not). + # Set the seed based on the test base name to give consistent inputs between backends. Add the + # run seed to allow for reproducible results, but still allow for run-to-run variation. + # Having a stable hash between runs and across machines is a plus (builtin python hash is not). # Using MD5 here because it's fast and we don't actually care about cryptographic properties. + test_session = get_active_test_session() + run_seed = ( + test_session.seed + if test_session is not None + else random.randint(0, 100_000_000) + ) + hasher = hashlib.md5() data = test_base_name.encode("utf-8") hasher.update(data) # Torch doesn't like very long seeds. - return int.from_bytes(hasher.digest(), "little") % 100_000_000 + return (int.from_bytes(hasher.digest(), "little") % 100_000_000) + run_seed + def run_test( # noqa: C901 model: torch.nn.Module, @@ -250,6 +261,12 @@ def parse_args(): help="A file to write the test report to, in CSV format.", default="backend_test_report.csv", ) + parser.add_argument( + "--seed", + nargs="?", + help="The numeric seed value to use for random generation.", + type=int, + ) return parser.parse_args() @@ -267,7 +284,10 @@ def runner_main(): # lot of log spam. We don't really need the warning here. warnings.simplefilter("ignore", category=FutureWarning) - begin_test_session(args.report) + seed = args.seed or random.randint(0, 100_000_000) + print(f"Running with seed {seed}.") + + begin_test_session(args.report, seed=seed) if len(args.suite) > 1: raise NotImplementedError("TODO Support multiple suites.") From a5f702f020d1e375ff4de9cf3dd10f0e7ba4957d Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Wed, 13 Aug 2025 13:34:36 -0700 Subject: [PATCH 37/38] Update [ghstack-poisoned] --- backends/test/suite/models/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index 06c1c537477..76b2d2966f6 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -119,7 +119,7 @@ def run_model_test( flow, context.test_name, context.test_base_name, - 0, # subtest_index - currently unused for model tests + 0, # subtest_index - currently unused for model tests context.params, dynamic_shapes=dynamic_shapes, ) From b2996595000a124d5b91e3b9d3b6dd4eff84ef67 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Wed, 13 Aug 2025 13:36:59 -0700 Subject: [PATCH 38/38] Update [ghstack-poisoned] --- backends/test/suite/tests/test_reporting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index a6f2ca60bdd..58ff76cba17 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -69,7 +69,7 @@ class Reporting(unittest.TestCase): def test_csv_report_simple(self): # Verify the format of a simple CSV run report. - session_state = TestSessionState() + session_state = TestSessionState(seed=0) session_state.test_case_summaries.extend(TEST_CASE_SUMMARIES) run_summary = RunSummary.from_session(session_state)