huggingface · sayakpaul · Nov 24, 2025
diff --git a/tests/quantization/torchao/test_torchao.py b/tests/quantization/torchao/test_torchao.py
@@ -73,7 +73,7 @@
 
 @require_torch
 @require_torch_accelerator
-@require_torchao_version_greater_or_equal("0.7.0")
+@require_torchao_version_greater_or_equal("0.14.0")
 class TorchAoConfigTest(unittest.TestCase):
     def test_to_dict(self):
         """
@@ -131,7 +131,7 @@ def test_repr(self):
 # Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners
 @require_torch
 @require_torch_accelerator
-@require_torchao_version_greater_or_equal("0.7.0")
+@require_torchao_version_greater_or_equal("0.14.0")
 class TorchAoTest(unittest.TestCase):
     def tearDown(self):
         gc.collect()
@@ -540,7 +540,7 @@ def test_aobase_config(self):
 # Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners
 @require_torch
 @require_torch_accelerator
-@require_torchao_version_greater_or_equal("0.7.0")
+@require_torchao_version_greater_or_equal("0.14.0")
 class TorchAoSerializationTest(unittest.TestCase):
     model_name = "hf-internal-testing/tiny-flux-pipe"
 
@@ -651,23 +651,22 @@ def test_aobase_config(self):
         self._check_serialization_expected_slice(quant_method, quant_method_kwargs, expected_slice, device)
 
 
-@require_torchao_version_greater_or_equal("0.7.0")
+@require_torchao_version_greater_or_equal("0.14.0")
 class TorchAoCompileTest(QuantCompileTests, unittest.TestCase):
     @property
     def quantization_config(self):
         return PipelineQuantizationConfig(
-            quant_mapping={
-                "transformer": TorchAoConfig(quant_type="int8_weight_only"),
-            },
+            quant_mapping={"transformer": TorchAoConfig(Int8WeightOnlyConfig())},
         )
 
-    @unittest.skip(
-        "Changing the device of AQT tensor with module._apply (called from doing module.to() in accelerate) does not work "
-        "when compiling."
-    )
     def test_torch_compile_with_cpu_offload(self):
+        pipe = self._init_pipeline(self.quantization_config, torch.bfloat16)
+        pipe.enable_model_cpu_offload()
+        # No compilation because it fails with:
         # RuntimeError: _apply(): Couldn't swap Linear.weight
-        super().test_torch_compile_with_cpu_offload()
+
+        # small resolutions to ensure speedy execution.
+        pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
 
     @parameterized.expand([False, True])
     @unittest.skip(
@@ -698,7 +697,7 @@ def test_torch_compile_with_group_offload_leaf(self, use_stream):
 # Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners
 @require_torch
 @require_torch_accelerator
-@require_torchao_version_greater_or_equal("0.7.0")
+@require_torchao_version_greater_or_equal("0.14.0")
 @slow
 @nightly
 class SlowTorchAoTests(unittest.TestCase):
@@ -857,7 +856,7 @@ def test_memory_footprint_int8wo(self):
 
 @require_torch
 @require_torch_accelerator
-@require_torchao_version_greater_or_equal("0.7.0")
+@require_torchao_version_greater_or_equal("0.14.0")
 @slow
 @nightly
 class SlowTorchAoPreserializedModelTests(unittest.TestCase):