pytorch · abhinaykukkadapu · Jul 23, 2025 · Jul 23, 2025 · digantdesai · Jul 23, 2025
@@ -61,11 +61,6 @@ def create_recipe(
                 recipe_type, is_per_channel=True, is_dynamic=True
             )
 
-        elif recipe_type == XNNPackRecipeType.INT8_DYNAMIC_PER_TENSOR:
-            return self._build_quantized_recipe(
-                recipe_type, is_per_channel=False, is_dynamic=True
-            )
-
         elif recipe_type == XNNPackRecipeType.INT8_STATIC_PER_CHANNEL:
             return self._build_quantized_recipe(
                 recipe_type, is_per_channel=True, is_dynamic=False

@@ -15,7 +15,6 @@ class XNNPackRecipeType(RecipeType):
     FP32 = "fp32"
     # INT8 Dynamic Quantization
     INT8_DYNAMIC_PER_CHANNEL = "int8_dynamic_per_channel"
-    INT8_DYNAMIC_PER_TENSOR = "int8_dynamic_per_tensor"
     # INT8 Dynamic Activations INT4 Weight Quantization, Axis = 0
     INT8_DYNAMIC_ACT_INT4_WEIGHT_PER_CHANNEL = "int8da_int4w_per_channel"
     # INT8 Dynamic Activations INT4 Weight Quantization, default group_size = 32

@@ -57,7 +57,6 @@ def test_basic_recipe(self) -> None:
     def test_int8_dynamic_quant_recipe(self) -> None:
         test_cases = [
             ExportRecipe.get_recipe(XNNPackRecipeType.INT8_DYNAMIC_PER_CHANNEL),
-            ExportRecipe.get_recipe(XNNPackRecipeType.INT8_DYNAMIC_PER_TENSOR),
         ]
 
         for export_recipe in test_cases:
@@ -74,7 +73,7 @@ def test_int8_dynamic_quant_recipe(self) -> None:
                         torch.allclose(
                             session.run_method("forward", example_inputs[0])[0],
                             m_eager(*example_inputs[0]),
-                            atol=1e-3,
+                            atol=1e-1,
                         )
                     )
                     self.check_fully_delegated(session.get_executorch_program())
@@ -99,7 +98,7 @@ def test_int8_static_quant_recipe(self) -> None:
                         torch.allclose(
                             session.run_method("forward", example_inputs[0])[0],
                             m_eager(*example_inputs[0]),
-                            atol=1e-3,
+                            atol=1e-1,
                         )
                     )
                     self.check_fully_delegated(session.get_executorch_program())
@@ -189,6 +188,7 @@ def _test_model_with_factory(self, model_name: str) -> None:
             atol=1e-3,
         )
 
+    @unittest.skip("T187799178: Debugging Numerical Issues with Calibration")
     def test_all_models_with_recipes(self) -> None:
         models_to_test = [
             "linear",

diff --git a/export/export.py b/export/export.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import logging
 from abc import ABC, abstractmethod
 from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 
@@ -18,6 +19,7 @@
 )
 from executorch.exir.program._program import _transform
 from executorch.exir.schema import Program
+from executorch.export.recipe import QuantizationRecipe
 from executorch.extension.export_util.utils import save_pte_program
 from executorch.runtime import Runtime, Verification
 from tabulate import tabulate
@@ -26,7 +28,6 @@
 from torch._export.pass_base import PassType
 from torch.export import ExportedProgram
 from torchao.quantization import quantize_
-from torchao.quantization.pt2e import allow_exported_model_train_eval
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 from torchao.quantization.pt2e.quantizer import ComposableQuantizer
@@ -360,8 +361,8 @@ class QuantizeStage(Stage):
     def __init__(self, quantizers: Any) -> None:
         self._quantizers = quantizers
         self._quantized_models: Dict[str, nn.Module] = {}
+        self._exported_programs: Dict[str, ExportedProgram] = {}
         self._model_dict: Dict[str, nn.Module] = {}
-        self._exported_program_dict: Dict[str, ExportedProgram] = {}
         self._example_inputs_dict: Dict[str, List[tuple[torch.Tensor, ...]]] = {}
 
     @property
@@ -370,20 +371,20 @@ def name(self) -> str:
 
     def run(
         self,
-        exported_program_data: Dict[str, Any],
+        models: Dict[str, nn.Module],
         calibration_config: Optional[Dict[str, Any]] = None,
         **kwargs,
     ) -> None:
         """
-        Perform post-training quantization on the exported program.
+        Perform post-training quantization on the model.
 
         Args:
-            exported_program_data: Dictionary containing exported programs
+            models: Dictionary containing models to quantize
             calibration_config: Configuration containing example inputs for calibration
             **kwargs: Additional keyword arguments (not used)
         """
         # Store inputs
-        self._exported_program_dict = exported_program_data["exported_program"]
+        self._model_dict = models
 
         # Initialize with empty dictionaries
         self._example_inputs_dict = {}
@@ -392,7 +393,7 @@ def run(
             self._example_inputs_dict = calibration_config.get("example_inputs", {})
 
         # Process inputs
-        for method_name, exported_program in self._exported_program_dict.items():
+        for method_name, model in self._model_dict.items():
             # Check if method_name exists in example_inputs and has at least one element
             if (
                 method_name not in self._example_inputs_dict
@@ -402,23 +403,21 @@ def run(
                     f"Example inputs for method {method_name} not found or empty."
                 )
 
-            # Get the module from the exported program
-            model = exported_program.module()
+            # Export the model for training to get a captured graph
+            inputs = self._example_inputs_dict[method_name][0]
+            captured_graph = torch.export.export(model, inputs, strict=True).module()
 
             # Prepare the model for quantization
             composed_quantizer = ComposableQuantizer(self._quantizers)
-            prepared_model = prepare_pt2e(model, composed_quantizer)  # type: ignore
-
-            # Allow the model to switch between train and eval modes
-            allow_exported_model_train_eval(prepared_model)
+            prepared_model = prepare_pt2e(captured_graph, composed_quantizer)  # type: ignore
 
             # Calibrate the model with the provided calibration data
             for calibration_input in self._example_inputs_dict[method_name]:  # type: ignore
                 prepared_model(*calibration_input)
 
             # Convert the prepared model to a quantized model
             quantized_model = convert_pt2e(prepared_model)
-            self._quantized_models[method_name] = quantized_model  # type: ignore
+            self._quantized_models[method_name] = quantized_model
 
     def get_artifacts(self) -> Dict[str, nn.Module]:
         """
@@ -541,29 +540,37 @@ def __init__(
         self._artifact_dir = artifact_dir
         self._export_recipe = export_recipe
 
+        self._quant_recipe: Optional[QuantizationRecipe] = (
+            self._export_recipe.quantization_recipe
+        )
+
         # Initialize pipeline as a list of stages
         self._pipeline = []
 
         # Create the source transform stage if a quantization recipe is provided
-        if self._export_recipe.quantization_recipe is not None:
+        if self._quant_recipe is not None and self._quant_recipe.ao_base_config:
             source_transform_stage = SourceTransformStage(
                 quantization_recipe=self._export_recipe.quantization_recipe
             )
             self._pipeline.append(source_transform_stage)
 
-        # Create the export stage
-        export_stage = ExportStage(
-            pre_edge_transform_passes=self._export_recipe.pre_edge_transform_passes
+        enable_quantize_stage = (
+            self._quant_recipe is not None and self._quant_recipe.quantizers
         )
-        self._pipeline.append(export_stage)
 
         # Create the quantize stage if a quantizer is provided
-        if self._export_recipe.quantization_recipe is not None:
-            quantizers = self._export_recipe.quantization_recipe.get_quantizers()
-            if quantizers is not None:
+        if enable_quantize_stage:
+            # pyre-ignore
+            if quantizers := self._quant_recipe.quantizers:
                 quantize_stage = QuantizeStage(quantizers=quantizers)
                 self._pipeline.append(quantize_stage)
 
+        # Create the export stage
+        export_stage = ExportStage(
+            pre_edge_transform_passes=self._export_recipe.pre_edge_transform_passes,
+        )
+        self._pipeline.append(export_stage)
+
         # Create the edge transform and lower stage
         edge_transform_and_lower_stage = EdgeTransformAndLowerStage(
             partitioners=self._export_recipe.partitioners,
@@ -597,16 +604,16 @@ def _run_pipeline(self) -> None:
         # Process each stage in the pipeline
         for stage in self._pipeline:
             stage_name = stage.name
+            logging.info(f"Executing stage: {stage_name}")
             # Configure inputs for the current stage
             if stage_name == "source_transform":
                 # Run the source transform stage
                 stage.run(self._model, {})
                 self._model = stage.get_artifacts()
             elif stage_name == "quantize":
                 # Run the quantize stage
-                exported_program_data = {"exported_program": self._exported_program}
                 config_params = {"example_inputs": self._example_inputs}
-                stage.run(exported_program_data, config_params)
+                stage.run(self._model, config_params)
                 self._model = stage.get_artifacts()
             elif stage_name == "export":
                 # Run the export stage

diff --git a/export/tests/TARGETS b/export/tests/TARGETS
@@ -16,13 +16,17 @@ runtime.python_test(
 )
 
 runtime.python_test(
-    name = "test_export_recipe",
+    name = "test_executorch_export",
     srcs = [
         "test_recipe_provider.py",
         "test_recipe_registry.py",
         "test_export_recipe.py",
+        "test_export_stages.py",
     ],
     deps = [
         "//executorch/export:lib",
+        "//executorch/exir:lib",
+        "//executorch/devtools/backend_debug:delegation_info",
+        "//executorch/runtime:runtime",
     ]
 )