diff --git a/.ci/scripts/test_backend_linux.sh b/.ci/scripts/test_backend_linux.sh index 0bfd14fb7f5..d2282bd7bc0 100755 --- a/.ci/scripts/test_backend_linux.sh +++ b/.ci/scripts/test_backend_linux.sh @@ -18,11 +18,32 @@ eval "$(conda shell.bash hook)" CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" -# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate -source .ci/scripts/setup-vulkan-linux-deps.sh +export PYTHON_EXECUTABLE=python # CMake options to use, in addition to the defaults. -EXTRA_BUILD_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" +EXTRA_BUILD_ARGS="" + +if [[ "$FLOW" == *qnn* ]]; then + # Setup QNN sdk and deps - note that this is a bit hacky due to the nature of the + # Qualcomm build. TODO (gjcomer) Clean this up once the QNN pybinding integration is + # cleaned up. + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh + PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh + QNN_X86_LIB_DIR=`realpath build-x86/lib/` + QNN_SDK_ROOT="/tmp/qnn/2.28.0.241029" + export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}" + + # TODO Get SDK root from install scripts + EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT" +fi + +if [[ "$FLOW" == *vulkan* ]]; then + # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate + source .ci/scripts/setup-vulkan-linux-deps.sh + + EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON" +fi # We need the runner to test the built library. PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 4c40311d9a9..24bf86bf441 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -42,7 +42,7 @@ jobs: strategy: fail-fast: false matrix: - flow: [vulkan, xnnpack, xnnpack_static_int8_per_channel] + flow: [qnn, qnn_16a16w, qnn_16a8w, qnn_16a4w, qnn_16a4w_block, qnn_8a8w, vulkan, xnnpack, xnnpack_static_int8_per_channel] suite: [models, operators] with: ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} diff --git a/backends/qualcomm/scripts/install_qnn_sdk.sh b/backends/qualcomm/scripts/install_qnn_sdk.sh index 913ce34b711..a8f9e63862d 100644 --- a/backends/qualcomm/scripts/install_qnn_sdk.sh +++ b/backends/qualcomm/scripts/install_qnn_sdk.sh @@ -9,7 +9,7 @@ source "${SCRIPT_DIR}/qnn_config.sh" # Function to install Android NDK (only if not already set) setup_android_ndk() { # Check if ANDROID_NDK_ROOT is already set and valid - if [ -n "${ANDROID_NDK_ROOT}" ] && [ -d "${ANDROID_NDK_ROOT}" ]; then + if [ -n "${ANDROID_NDK_ROOT:-}" ] && [ -d "${ANDROID_NDK_ROOT:-}" ]; then echo "Android NDK already set to ${ANDROID_NDK_ROOT} - skipping installation" return fi @@ -41,7 +41,7 @@ verify_pkg_installed() { install_qnn() { # Check if QNN_SDK_ROOT is already set and valid - if [ -n "${QNN_SDK_ROOT}" ] && [ -d "${QNN_SDK_ROOT}" ]; then + if [ -n "${QNN_SDK_ROOT:-}" ] && [ -d "${QNN_SDK_ROOT:-}" ]; then echo "QNN SDK already set to ${QNN_SDK_ROOT} - skipping installation" return fi @@ -141,9 +141,9 @@ setup_libcpp() { popd >/dev/null # Set environment variables - export CPLUS_INCLUDE_PATH="${INSTALL_DIR}/include:$CPLUS_INCLUDE_PATH" - export LD_LIBRARY_PATH="${INSTALL_DIR}/lib:$LD_LIBRARY_PATH" - export LIBRARY_PATH="${INSTALL_DIR}/lib:$LIBRARY_PATH" + export CPLUS_INCLUDE_PATH="${INSTALL_DIR}/include:${CPLUS_INCLUDE_PATH:-}" + export LD_LIBRARY_PATH="${INSTALL_DIR}/lib:${LD_LIBRARY_PATH:-}" + export LIBRARY_PATH="${INSTALL_DIR}/lib:${LIBRARY_PATH:-}" echo "libc++ installed to ${INSTALL_DIR}" } diff --git a/backends/qualcomm/tests/tester.py b/backends/qualcomm/tests/tester.py index fb34087ac90..812e8971115 100644 --- a/backends/qualcomm/tests/tester.py +++ b/backends/qualcomm/tests/tester.py @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import Any, List, Optional, Tuple +from typing import Any, List, Optional, Sequence, Tuple import executorch import executorch.backends.test.harness.stages as BaseStages @@ -12,6 +12,7 @@ import torch from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager from executorch.backends.qualcomm.partition.qnn_partitioner import QnnPartitioner +from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer from executorch.backends.qualcomm.utils.utils import ( generate_htp_compiler_spec, generate_qnn_executorch_compiler_spec, @@ -24,6 +25,24 @@ from torch.export import ExportedProgram +class Quantize(BaseStages.Quantize): + def __init__( + self, + quantizer: QnnQuantizer, + quantization_config: Optional[Any] = None, + calibrate: bool = True, + calibration_samples: Optional[Sequence[Any]] = None, + is_qat: Optional[bool] = False, + ): + super().__init__( + quantizer=quantizer, + calibrate=calibrate, + calibration_samples=calibration_samples, + is_qat=is_qat, + set_global=False, + ) + + class Partition(BaseStages.Partition): def __init__(self, partitioner: Optional[Partitioner] = None): super().__init__( @@ -37,8 +56,9 @@ def __init__( partitioners: Optional[List[Partitioner]] = None, edge_compile_config: Optional[EdgeCompileConfig] = None, soc_model: str = "SM8650", + use_fp16: bool = True, ): - backend_options = generate_htp_compiler_spec(use_fp16=True) + backend_options = generate_htp_compiler_spec(use_fp16=use_fp16) self.chipset = get_soc_to_chipset_map()[soc_model] self.compiler_specs = generate_qnn_executorch_compiler_spec( soc_model=self.chipset, @@ -73,15 +93,17 @@ def __init__( module: torch.nn.Module, example_inputs: Tuple[torch.Tensor], dynamic_shapes: Optional[Tuple[Any]] = None, + use_fp16: bool = True, ): + def create_to_edge_transform_and_lower(*args, **kwargs): + kwargs["use_fp16"] = use_fp16 + return ToEdgeTransformAndLower(*args, **kwargs) + # Specialize for Qualcomm - stage_classes = ( - executorch.backends.test.harness.Tester.default_stage_classes() - | { - StageType.PARTITION: Partition, - StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower, - } - ) + stage_classes = executorch.backends.test.harness.Tester.default_stage_classes() | { + StageType.PARTITION: Partition, + StageType.TO_EDGE_TRANSFORM_AND_LOWER: create_to_edge_transform_and_lower, + } super().__init__( module=module, diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index 8f47ebf0ebd..fbc5552d7d8 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -81,12 +81,24 @@ def all_flows() -> dict[str, TestFlow]: logger.info(f"Skipping Vulkan flow registration: {e}") try: - from executorch.backends.test.suite.flows.qualcomm import QUALCOMM_TEST_FLOW + from executorch.backends.test.suite.flows.qualcomm import ( + QNN_16A16W_TEST_FLOW, + QNN_16A4W_BLOCK_TEST_FLOW, + QNN_16A4W_TEST_FLOW, + QNN_16A8W_TEST_FLOW, + QNN_8A8W_TEST_FLOW, + QNN_TEST_FLOW, + ) flows += [ - QUALCOMM_TEST_FLOW, + QNN_TEST_FLOW, + QNN_16A16W_TEST_FLOW, + QNN_16A8W_TEST_FLOW, + QNN_16A4W_TEST_FLOW, + QNN_16A4W_BLOCK_TEST_FLOW, + QNN_8A8W_TEST_FLOW, ] except Exception as e: - logger.info(f"Skipping Qualcomm flow registration: {e}") + logger.info(f"Skipping QNN flow registration: {e}") return {f.name: f for f in flows if f is not None} diff --git a/backends/test/suite/flows/qualcomm.py b/backends/test/suite/flows/qualcomm.py index bf17061597b..9998caa51b6 100644 --- a/backends/test/suite/flows/qualcomm.py +++ b/backends/test/suite/flows/qualcomm.py @@ -1,17 +1,61 @@ -from executorch.backends.qualcomm.tests.tester import QualcommTester +from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer, QuantDtype +from executorch.backends.qualcomm.tests.tester import QualcommTester, Quantize from executorch.backends.test.suite.flow import TestFlow +from torchao.quantization.pt2e import MovingAverageMinMaxObserver -def _create_qualcomm_flow( +def _create_qnn_flow( name: str, quantize: bool = False, + quant_dtype: QuantDtype | None = None, + per_channel_conv=True, + per_channel_linear=False, + is_qat=False, + use_fp16=True, ) -> TestFlow: + if quantize and quant_dtype is None: + raise RuntimeError("Quant dtype must be provided when quantize is true.") + + def create_tester(*args, **kwargs) -> QualcommTester: + kwargs["use_fp16"] = (use_fp16,) + return QualcommTester(*args, **kwargs) + + def create_quantize_stage() -> Quantize: + quantizer = QnnQuantizer() + quantizer.set_default_quant_config( + quant_dtype, + is_qat=is_qat, + is_conv_per_channel=per_channel_conv, + is_linear_per_channel=per_channel_linear, + act_observer=MovingAverageMinMaxObserver, + ) + return Quantize(quantizer=quantizer) + return TestFlow( name, backend="qualcomm", - tester_factory=QualcommTester, + tester_factory=create_tester, quantize=quantize, + quantize_stage_factory=create_quantize_stage if quantize else None, ) -QUALCOMM_TEST_FLOW = _create_qualcomm_flow("qualcomm") +QNN_TEST_FLOW = _create_qnn_flow("qnn") +QNN_16A16W_TEST_FLOW = _create_qnn_flow( + "qnn_16a16w", quantize=True, quant_dtype=QuantDtype.use_8a8w, use_fp16=False +) +QNN_16A8W_TEST_FLOW = _create_qnn_flow( + "qnn_16a8w", quantize=True, quant_dtype=QuantDtype.use_16a8w, use_fp16=False +) +QNN_16A4W_TEST_FLOW = _create_qnn_flow( + "qnn_16a4w", quantize=True, quant_dtype=QuantDtype.use_16a4w, use_fp16=False +) +QNN_16A4W_BLOCK_TEST_FLOW = _create_qnn_flow( + "qnn_16a4w_block", + quantize=True, + quant_dtype=QuantDtype.use_8a8w, + use_fp16=False, +) +QNN_8A8W_TEST_FLOW = _create_qnn_flow( + "qnn_8a8w", quantize=True, quant_dtype=QuantDtype.use_8a8w, use_fp16=False +)