Update on "New multi-step QAT API"

andrewor14 · andrewor14 · commit 551703147e05 · 2025-07-31T14:19:36.000-07:00
**Summary:** This commit adds a new multi-step QAT API with the
main goal of simplifying the existing UX. The new API uses the
same `QATConfig` for both the prepare and convert steps, and
automatically infers the fake quantization configs based on
a PTQ base config provided by the user:

```Py
from torchao.quantization import (
    quantize_,
    Int8DynamicActivationInt4WeightConfig
)
from torchao.quantization.qat import QATConfig

\# prepare
base_config = Int8DynamicActivationInt4WeightConfig(group_size=32)
qat_config = QATConfig(base_config, step="prepare")
quantize_(m, qat_config)

\# train (not shown)

\# convert
quantize_(m, QATConfig(base_config, step="convert"))
```

The main improvements include:
- A single config for both prepare and convert steps
- A single quantize_ for convert (instead of 2)
- No chance for incompatible prepare vs convert configs
- Much less boilerplate code for most common use case
- Simpler config names

For less common use cases such as experimentation, users can
still specify arbitrary fake quantization configs for
activations and/or weights as before. This is still important
since there may not always be a corresponding PTQ base config.
For example:

```Py
from torchao.quantization import quantize_
from torchao.quantization.qat import IntxFakeQuantizeConfig, QATConfig

activation_config = IntxFakeQuantizeConfig(torch.int8, "per_token", is_symmetric=False)
weight_config = IntxFakeQuantizeConfig(torch.int4, group_size=32)
qat_config = QATConfig(
    activation_config=activation_config,
    weight_config=weight_config,
    step="prepare",
)
quantize_(model, qat_config)

\# train and convert same as above (not shown)
```

**BC-breaking notes:** This change by itself is technically not
BC-breaking since we keep around the old path, but will become
so when we deprecate and remove the old path in the future.

Before:
```Py
\# prepare
activation_config = IntxFakeQuantizeConfig(torch.int8, "per_token", is_symmetric=False)
weight_config = IntxFakeQuantizeConfig(torch.int4, group_size=32)
qat_config = IntXQuantizationAwareTrainingConfig(activation_config, weight_config),
quantize_(model, qat_config)

\# train (not shown)

\# convert
quantize_(model, FromIntXQuantizationAwareTrainingConfig())
quantize_(model, Int8DynamicActivationInt4WeightConfig(group_size=32))
```

After: (see above)

**Test Plan:**
```
python test/quantization/test_qat.py
```

[ghstack-poisoned]
diff --git a/docs/source/api_ref_qat.rst b/docs/source/api_ref_qat.rst
@@ -16,7 +16,7 @@ please refer to the `QAT README <https://github.com/pytorch/ao/blob/main/torchao
     :nosignatures:
 
     QATConfig
-    QATConfigStep
+    QATStep
 
 Custom QAT APIs
 ---------------
diff --git a/test/quantization/test_qat.py b/test/quantization/test_qat.py
@@ -35,6 +35,7 @@
     FromIntXQuantizationAwareTrainingConfig,
     IntXQuantizationAwareTrainingConfig,
     QATConfig,
+    QATStep,
     initialize_fake_quantizers,
 )
 from torchao.quantization.qat.embedding import (
@@ -1272,6 +1273,8 @@ def test_qat_config_init(self):
         # OK
         QATConfig(base_config, step="prepare")
         QATConfig(base_config, step="convert")
+        QATConfig(base_config, step=QATStep.PREPARE)
+        QATConfig(base_config, step=QATStep.CONVERT)
         QATConfig(activation_config=fq_config, weight_config=fq_config, step="prepare")
         QATConfig(weight_config=fq_config, step="prepare")
 
diff --git a/torchao/quantization/qat/__init__.py b/torchao/quantization/qat/__init__.py
@@ -3,7 +3,7 @@
     FromIntXQuantizationAwareTrainingConfig,
     IntXQuantizationAwareTrainingConfig,
     QATConfig,
-    QATConfigStep,
+    QATStep,
     from_intx_quantization_aware_training,
     initialize_fake_quantizers,
     intx_quantization_aware_training,
@@ -27,7 +27,7 @@
 
 __all__ = [
     "QATConfig",
-    "QATConfigStep",
+    "QATStep",
     "FakeQuantizeConfigBase",
     "IntxFakeQuantizeConfig",
     "FakeQuantizer",
diff --git a/torchao/quantization/qat/api.py b/torchao/quantization/qat/api.py
@@ -26,7 +26,7 @@
 from .linear import FakeQuantizedLinear
 
 
-class QATConfigStep(str, Enum):
+class QATStep(str, Enum):
     """
     Enum value for the `step` field in :class:`~torchao.quantization.qat.QATConfig`.
     """
@@ -124,7 +124,7 @@ class QATConfig(AOBaseConfig):
     base_config: Optional[AOBaseConfig]
     activation_config: Optional[FakeQuantizeConfigBase]
     weight_config: Optional[FakeQuantizeConfigBase]
-    step: QATConfigStep
+    step: QATStep
 
     # Express `step` as a keyword argument
     # TODO: Use `kw_only=True` instead, added in python 3.10
@@ -134,7 +134,7 @@ def __init__(
         activation_config: Optional[FakeQuantizeConfigBase] = None,
         weight_config: Optional[FakeQuantizeConfigBase] = None,
         *,
-        step: QATConfigStep = "prepare",
+        step: QATStep = "prepare",
     ):
         self.base_config = base_config
         self.activation_config = activation_config
@@ -144,7 +144,7 @@ def __init__(
 
     def __post_init__(self):
         self.step = self.step.lower()
-        all_step_values = [s.value for s in QATConfigStep]
+        all_step_values = [s.value for s in QATStep]
         if self.step not in all_step_values:
             raise ValueError("`step` must be one of %s" % all_step_values)
         if self.base_config is None and self.weight_config is None:
@@ -189,7 +189,7 @@ def _qat_config_transform(
     # Swap nn.Embedding -> FakeQuantizedEmbedding
     base_config = config.base_config
     step = config.step
-    if step == QATConfigStep.PREPARE:
+    if step == QATStep.PREPARE:
         if base_config is not None:
             (act_config, weight_config) = _infer_fake_quantize_configs(base_config)
         else:
@@ -212,7 +212,7 @@ def _qat_config_transform(
         # Swap FakeQuantizedLinear -> nn.Linear
         # Swap FakeQuantizedEmbedding -> nn.Embedding
         # Then apply the base config's transform function to quantize the model
-        assert step == QATConfigStep.CONVERT, "unexpected step '%s' in QATConfig" % step
+        assert step == QATStep.CONVERT, "unexpected step '%s' in QATConfig" % step
         assert base_config is not None, "expected `base_config` in convert step"
         if isinstance(module, FakeQuantizedLinear):
             module = module.to_linear()