pytorch · oscarandersson8218 · Dec 16, 2025 · Nov 25, 2025 · Dec 16, 2025
@@ -16,6 +16,7 @@
 import pytest
 import torch
 from executorch.backends.arm._passes import InsertInt32CastsAfterInt64PlaceholdersPass
+from executorch.backends.arm.quantizer import get_symmetric_quantization_config
 
 from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.test_pipeline import (
@@ -99,6 +100,14 @@ def prepare_model(self):
         return llama_model, llama_inputs, llama_meta
 
 
+def _use_partial_quantizer(pipeline):
+    """Set the pipeline's quantizer to only include Linear layers"""
+    pipeline.quantizer.set_global(None)
+    pipeline.quantizer.set_module_type(
+        torch.nn.Linear, get_symmetric_quantization_config()
+    )
+
+
 def test_llama_tosa_FP():
     llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
 
@@ -179,3 +188,21 @@ def test_llama_vgf_quant():
             quantize=True,
         )
         pipeline.run()
+
+
+def test_llama_partial_quant_tosa_INT_FP():
+    llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
+
+    if llama_model is None or llama_inputs is None:
+        pytest.skip("Missing model and/or input files")
+
+    with torch.no_grad():
+        pipeline = TosaPipelineINT[input_t](
+            llama_model,
+            llama_inputs,
+            aten_op=[],
+            exir_op=[],
+            tosa_extensions=["FP"],
+        )
+        _use_partial_quantizer(pipeline)
+        pipeline.run()
@@ -10,6 +10,7 @@
 import pytest
 
 import torch
+from executorch.backends.arm.quantizer import get_symmetric_quantization_config
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
@@ -39,6 +40,14 @@
 }
 
 
+def _use_partial_quantizer(pipeline):
+    """Set the pipeline's quantizer to only include Conv2d and ReLU6"""
+    quant_cfg = get_symmetric_quantization_config()
+    pipeline.quantizer.set_global(None)
+    pipeline.quantizer.set_module_type(torch.nn.Conv2d, quant_cfg)
+    pipeline.quantizer.set_module_type(torch.nn.ReLU6, quant_cfg)
+
+
 def test_mv2_tosa_FP():
     pipeline = TosaPipelineFP[input_t](
         mv2, model_inputs, aten_op=[], exir_op=[], use_to_edge_transform_and_lower=True
@@ -140,3 +149,17 @@ def test_mv2_vgf_no_quant():
         quantize=False,
     )
     pipeline.run()
+
+
+def test_mv2_partial_quant_tosa_INT_FP():
+    pipeline = TosaPipelineINT[input_t](
+        mv2,
+        model_inputs,
+        aten_op=[],
+        exir_op=[],
+        tosa_extensions=["FP"],
+        use_to_edge_transform_and_lower=True,
+        atol=0.20,
+    )
+    _use_partial_quantizer(pipeline)
+    pipeline.run()