Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 13 additions & 14 deletions tests/quantization/torchao/test_torchao.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@

@require_torch
@require_torch_accelerator
@require_torchao_version_greater_or_equal("0.7.0")
@require_torchao_version_greater_or_equal("0.14.0")
class TorchAoConfigTest(unittest.TestCase):
def test_to_dict(self):
"""
Expand Down Expand Up @@ -131,7 +131,7 @@ def test_repr(self):
# Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners
@require_torch
@require_torch_accelerator
@require_torchao_version_greater_or_equal("0.7.0")
@require_torchao_version_greater_or_equal("0.14.0")
class TorchAoTest(unittest.TestCase):
def tearDown(self):
gc.collect()
Expand Down Expand Up @@ -540,7 +540,7 @@ def test_aobase_config(self):
# Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners
@require_torch
@require_torch_accelerator
@require_torchao_version_greater_or_equal("0.7.0")
@require_torchao_version_greater_or_equal("0.14.0")
class TorchAoSerializationTest(unittest.TestCase):
model_name = "hf-internal-testing/tiny-flux-pipe"

Expand Down Expand Up @@ -651,23 +651,22 @@ def test_aobase_config(self):
self._check_serialization_expected_slice(quant_method, quant_method_kwargs, expected_slice, device)


@require_torchao_version_greater_or_equal("0.7.0")
@require_torchao_version_greater_or_equal("0.14.0")
class TorchAoCompileTest(QuantCompileTests, unittest.TestCase):
@property
def quantization_config(self):
return PipelineQuantizationConfig(
quant_mapping={
"transformer": TorchAoConfig(quant_type="int8_weight_only"),
},
quant_mapping={"transformer": TorchAoConfig(Int8WeightOnlyConfig())},
)

@unittest.skip(
"Changing the device of AQT tensor with module._apply (called from doing module.to() in accelerate) does not work "
"when compiling."
)
def test_torch_compile_with_cpu_offload(self):
pipe = self._init_pipeline(self.quantization_config, torch.bfloat16)
pipe.enable_model_cpu_offload()
# No compilation because it fails with:
# RuntimeError: _apply(): Couldn't swap Linear.weight
super().test_torch_compile_with_cpu_offload()

# small resolutions to ensure speedy execution.
pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)

@parameterized.expand([False, True])
@unittest.skip(
Expand Down Expand Up @@ -698,7 +697,7 @@ def test_torch_compile_with_group_offload_leaf(self, use_stream):
# Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners
@require_torch
@require_torch_accelerator
@require_torchao_version_greater_or_equal("0.7.0")
@require_torchao_version_greater_or_equal("0.14.0")
@slow
@nightly
class SlowTorchAoTests(unittest.TestCase):
Expand Down Expand Up @@ -857,7 +856,7 @@ def test_memory_footprint_int8wo(self):

@require_torch
@require_torch_accelerator
@require_torchao_version_greater_or_equal("0.7.0")
@require_torchao_version_greater_or_equal("0.14.0")
@slow
@nightly
class SlowTorchAoPreserializedModelTests(unittest.TestCase):
Expand Down
Loading