Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions backends/arm/test/models/test_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pytest
import torch
from executorch.backends.arm._passes import InsertInt32CastsAfterInt64PlaceholdersPass
from executorch.backends.arm.quantizer import get_symmetric_quantization_config

from executorch.backends.arm.test import common, conftest
from executorch.backends.arm.test.tester.test_pipeline import (
Expand Down Expand Up @@ -99,6 +100,14 @@ def prepare_model(self):
return llama_model, llama_inputs, llama_meta


def _use_partial_quantizer(pipeline):
"""Set the pipeline's quantizer to only include Linear layers"""
pipeline.quantizer.set_global(None)
pipeline.quantizer.set_module_type(
torch.nn.Linear, get_symmetric_quantization_config()
)


def test_llama_tosa_FP():
llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()

Expand Down Expand Up @@ -179,3 +188,21 @@ def test_llama_vgf_quant():
quantize=True,
)
pipeline.run()


def test_llama_partial_quant_tosa_INT_FP():
llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()

if llama_model is None or llama_inputs is None:
pytest.skip("Missing model and/or input files")

with torch.no_grad():
pipeline = TosaPipelineINT[input_t](
llama_model,
llama_inputs,
aten_op=[],
exir_op=[],
tosa_extensions=["FP"],
)
_use_partial_quantizer(pipeline)
pipeline.run()
23 changes: 23 additions & 0 deletions backends/arm/test/models/test_mobilenet_v2_arm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pytest

import torch
from executorch.backends.arm.quantizer import get_symmetric_quantization_config
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
EthosU55PipelineINT,
Expand Down Expand Up @@ -39,6 +40,14 @@
}


def _use_partial_quantizer(pipeline):
"""Set the pipeline's quantizer to only include Conv2d and ReLU6"""
quant_cfg = get_symmetric_quantization_config()
pipeline.quantizer.set_global(None)
pipeline.quantizer.set_module_type(torch.nn.Conv2d, quant_cfg)
pipeline.quantizer.set_module_type(torch.nn.ReLU6, quant_cfg)


def test_mv2_tosa_FP():
pipeline = TosaPipelineFP[input_t](
mv2, model_inputs, aten_op=[], exir_op=[], use_to_edge_transform_and_lower=True
Expand Down Expand Up @@ -140,3 +149,17 @@ def test_mv2_vgf_no_quant():
quantize=False,
)
pipeline.run()


def test_mv2_partial_quant_tosa_INT_FP():
pipeline = TosaPipelineINT[input_t](
mv2,
model_inputs,
aten_op=[],
exir_op=[],
tosa_extensions=["FP"],
use_to_edge_transform_and_lower=True,
atol=0.20,
)
_use_partial_quantizer(pipeline)
pipeline.run()
Loading