diff --git a/backends/arm/test/models/test_llama.py b/backends/arm/test/models/test_llama.py index a318afbfc1c..5f2348dee1e 100644 --- a/backends/arm/test/models/test_llama.py +++ b/backends/arm/test/models/test_llama.py @@ -16,6 +16,7 @@ import pytest import torch from executorch.backends.arm._passes import InsertInt32CastsAfterInt64PlaceholdersPass +from executorch.backends.arm.quantizer import get_symmetric_quantization_config from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.test_pipeline import ( @@ -99,6 +100,14 @@ def prepare_model(self): return llama_model, llama_inputs, llama_meta +def _use_partial_quantizer(pipeline): + """Set the pipeline's quantizer to only include Linear layers""" + pipeline.quantizer.set_global(None) + pipeline.quantizer.set_module_type( + torch.nn.Linear, get_symmetric_quantization_config() + ) + + def test_llama_tosa_FP(): llama_model, llama_inputs, llama_meta = TestLlama().prepare_model() @@ -179,3 +188,21 @@ def test_llama_vgf_quant(): quantize=True, ) pipeline.run() + + +def test_llama_partial_quant_tosa_INT_FP(): + llama_model, llama_inputs, llama_meta = TestLlama().prepare_model() + + if llama_model is None or llama_inputs is None: + pytest.skip("Missing model and/or input files") + + with torch.no_grad(): + pipeline = TosaPipelineINT[input_t]( + llama_model, + llama_inputs, + aten_op=[], + exir_op=[], + tosa_extensions=["FP"], + ) + _use_partial_quantizer(pipeline) + pipeline.run() diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py index 41ef4136760..2c5d2cd627d 100644 --- a/backends/arm/test/models/test_mobilenet_v2_arm.py +++ b/backends/arm/test/models/test_mobilenet_v2_arm.py @@ -10,6 +10,7 @@ import pytest import torch +from executorch.backends.arm.quantizer import get_symmetric_quantization_config from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.test_pipeline import ( EthosU55PipelineINT, @@ -39,6 +40,14 @@ } +def _use_partial_quantizer(pipeline): + """Set the pipeline's quantizer to only include Conv2d and ReLU6""" + quant_cfg = get_symmetric_quantization_config() + pipeline.quantizer.set_global(None) + pipeline.quantizer.set_module_type(torch.nn.Conv2d, quant_cfg) + pipeline.quantizer.set_module_type(torch.nn.ReLU6, quant_cfg) + + def test_mv2_tosa_FP(): pipeline = TosaPipelineFP[input_t]( mv2, model_inputs, aten_op=[], exir_op=[], use_to_edge_transform_and_lower=True @@ -140,3 +149,17 @@ def test_mv2_vgf_no_quant(): quantize=False, ) pipeline.run() + + +def test_mv2_partial_quant_tosa_INT_FP(): + pipeline = TosaPipelineINT[input_t]( + mv2, + model_inputs, + aten_op=[], + exir_op=[], + tosa_extensions=["FP"], + use_to_edge_transform_and_lower=True, + atol=0.20, + ) + _use_partial_quantizer(pipeline) + pipeline.run()