pytorch · andrewor14 · Aug 13, 2025 · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025
diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml
@@ -59,12 +59,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - name: CUDA 2.5.1
-            runs-on: linux.g5.12xlarge.nvidia.gpu
-            torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121'
-            gpu-arch-type: "cuda"
-            gpu-arch-version: "12.6"
-            dev-requirements-overrides: "s/^pytest$/pytest==7.4.0/"
           - name: CUDA 2.6
             runs-on: linux.g5.12xlarge.nvidia.gpu
             torch-spec: 'torch==2.6.0'
@@ -77,13 +71,13 @@ jobs:
             gpu-arch-type: "cuda"
             gpu-arch-version: "12.6"
             dev-requirements-overrides: ""
+          - name: CUDA 2.8
+            runs-on: linux.g5.12xlarge.nvidia.gpu
+            torch-spec: 'torch==2.8.0'
+            gpu-arch-type: "cuda"
+            gpu-arch-version: "12.6"
+            dev-requirements-overrides: ""
 
-          - name: CPU 2.5.1
-            runs-on: linux.4xlarge
-            torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu'
-            gpu-arch-type: "cpu"
-            gpu-arch-version: ""
-            dev-requirements-overrides: "s/^pytest$/pytest==7.4.0/"
           - name: CPU 2.6
             runs-on: linux.4xlarge
             torch-spec: 'torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu'
@@ -96,6 +90,12 @@ jobs:
             gpu-arch-type: "cpu"
             gpu-arch-version: ""
             dev-requirements-overrides: ""
+          - name: CPU 2.8
+            runs-on: linux.4xlarge
+            torch-spec: 'torch==2.8.0 --index-url https://download.pytorch.org/whl/cpu'
+            gpu-arch-type: "cpu"
+            gpu-arch-version: ""
+            dev-requirements-overrides: ""
 
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:

diff --git a/benchmarks/benchmark_aq.py b/benchmarks/benchmark_aq.py
@@ -20,46 +20,26 @@
     Int4WeightOnlyQuantizedLinearWeight,
     Int8WeightOnlyQuantizedLinearWeight,
 )
-from torchao.utils import (
-    TORCH_VERSION_AT_LEAST_2_4,
-    TORCH_VERSION_AT_LEAST_2_5,
-    unwrap_tensor_subclass,
-)
 
 
 def _int8wo_api(mod, **kwargs):
-    if TORCH_VERSION_AT_LEAST_2_4:
-        quantize_(mod, int8_weight_only(**kwargs), set_inductor_config=False)
-        if not TORCH_VERSION_AT_LEAST_2_5:
-            unwrap_tensor_subclass(mod)
-    else:
-        change_linear_weights_to_int8_woqtensors(mod, **kwargs)
+    quantize_(mod, int8_weight_only(**kwargs), set_inductor_config=False)
 
 
 def _int8da_int8w_api(mod, **kwargs):
-    if TORCH_VERSION_AT_LEAST_2_4:
-        quantize_(
-            mod,
-            int8_dynamic_activation_int8_weight(**kwargs),
-            set_inductor_config=False,
-        )
-        if not TORCH_VERSION_AT_LEAST_2_5:
-            unwrap_tensor_subclass(mod)
-    else:
-        change_linear_weights_to_int8_dqtensors(mod, **kwargs)
+    quantize_(
+        mod,
+        int8_dynamic_activation_int8_weight(**kwargs),
+        set_inductor_config=False,
+    )
 
 
 def _int4wo_api(mod, **kwargs):
-    if TORCH_VERSION_AT_LEAST_2_4:
-        kwargs_copy = kwargs.copy()
-        if "groupsize" in kwargs_copy:
-            kwargs_copy["group_size"] = kwargs_copy["groupsize"]
-            del kwargs_copy["groupsize"]
-        quantize_(mod, int4_weight_only(**kwargs_copy), set_inductor_config=False)
-        if not TORCH_VERSION_AT_LEAST_2_5:
-            unwrap_tensor_subclass(mod)
-    else:
-        change_linear_weights_to_int4_woqtensors(mod, **kwargs)
+    kwargs_copy = kwargs.copy()
+    if "groupsize" in kwargs_copy:
+        kwargs_copy["group_size"] = kwargs_copy["groupsize"]
+        del kwargs_copy["groupsize"]
+    quantize_(mod, int4_weight_only(**kwargs_copy), set_inductor_config=False)
 
 
 class ToyLinearModel(torch.nn.Module):
@@ -195,21 +175,19 @@ def _bench_quantized_tensor_subclass_perf(api, ref_api, M, N, K, kwargs=None):
     )
 
 
-if __name__ == "__main__" and TORCH_VERSION_AT_LEAST_2_4 and torch.cuda.is_available():
+if __name__ == "__main__" and torch.cuda.is_available():
     all_shapes = [
         (20, 2048, 2048),
     ]
 
     print("_int8da_int8w_api")
-    from torchao.quantization.quant_api import change_linear_weights_to_int8_dqtensors
 
     for M, N, K in all_shapes:
         _bench_quantized_tensor_subclass_perf(
             _int8da_int8w_api, _ref_change_linear_weights_to_int8_dqtensors, M, N, K
         )
 
     print("_int8wo_api")
-    from torchao.quantization.quant_api import change_linear_weights_to_int8_woqtensors
 
     for M, N, K in all_shapes:
         _bench_quantized_tensor_subclass_perf(
@@ -218,7 +196,6 @@ def _bench_quantized_tensor_subclass_perf(api, ref_api, M, N, K, kwargs=None):
 
     print("_int4wo_api")
     kwargs = {"groupsize": 32}
-    from torchao.quantization.quant_api import change_linear_weights_to_int4_woqtensors
 
     for M, N, K in all_shapes:
         _bench_quantized_tensor_subclass_perf(

diff --git a/docs/source/pretraining.rst b/docs/source/pretraining.rst
@@ -161,10 +161,6 @@ Below is a code snippet showing how to use it:
     from torchao.float8.float8_linear_utils import convert_to_float8_training
     from torchao.float8.float8_linear import Float8Linear
     from torchao.float8 import convert_to_float8_training
-    from torchao.utils import TORCH_VERSION_AT_LEAST_2_5
-
-    if not TORCH_VERSION_AT_LEAST_2_5:
-        raise AssertionError("torchao.float8 requires PyTorch version 2.5 or greater")
 
     # create model and sample input
     m = nn.Sequential(

diff --git a/docs/source/quick_start.rst b/docs/source/quick_start.rst
@@ -95,16 +95,10 @@ it is also much faster!
 .. code:: py
 
   from torchao.utils import (
-      TORCH_VERSION_AT_LEAST_2_5,
       benchmark_model,
       unwrap_tensor_subclass,
   )
 
-  # Temporary workaround for tensor subclass + torch.compile
-  # Only needed for torch version < 2.5
-  if not TORCH_VERSION_AT_LEAST_2_5:
-      unwrap_tensor_subclass(model)
-
   num_runs = 100
   torch._dynamo.reset()
   example_inputs = (torch.randn(1, 1024, dtype=torch.bfloat16, device="cuda"),)

diff --git a/scripts/quick_start.py b/scripts/quick_start.py
@@ -8,11 +8,7 @@
 import torch
 
 from torchao.quantization import Int4WeightOnlyConfig, quantize_
-from torchao.utils import (
-    TORCH_VERSION_AT_LEAST_2_5,
-    benchmark_model,
-    unwrap_tensor_subclass,
-)
+from torchao.utils import benchmark_model
 
 # ================
 # | Set up model |
@@ -50,11 +46,6 @@ def forward(self, x):
 # | Benchmark |
 # =============
 
-# Temporary workaround for tensor subclass + torch.compile
-# Only needed for torch version < 2.5
-if not TORCH_VERSION_AT_LEAST_2_5:
-    unwrap_tensor_subclass(model)
-
 num_runs = 100
 torch._dynamo.reset()
 example_inputs = (torch.randn(1, 1024, dtype=torch.bfloat16, device="cuda"),)

diff --git a/test/core/test_config.py b/test/core/test_config.py
@@ -39,7 +39,6 @@
     UIntXWeightOnlyConfig,
 )
 from torchao.sparsity.sparse_api import BlockSparseWeightConfig, SemiSparseWeightConfig
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_6
 
 # Define test configurations as fixtures
 configs = [
@@ -85,11 +84,9 @@
     ),
     AWQConfig(Int4WeightOnlyConfig(group_size=128), step=AWQStep.PREPARE_FOR_LOADING),
     AWQConfig(Int4WeightOnlyConfig(group_size=128), step="prepare_for_loading"),
+    FbgemmConfig(torch.bfloat16, torch.int4, torch.bfloat16, [1, 1, 256]),
 ]
 
-if TORCH_VERSION_AT_LEAST_2_6:
-    configs += [FbgemmConfig(torch.bfloat16, torch.int4, torch.bfloat16, [1, 1, 256])]
-
 
 # Create ids for better test naming
 def get_config_ids(configs):

diff --git a/test/dtypes/test_affine_quantized.py b/test/dtypes/test_affine_quantized.py
@@ -41,7 +41,6 @@
 from torchao.quantization.quant_primitives import MappingType, ZeroPointDomain
 from torchao.testing.utils import skip_if_no_cuda, skip_if_no_gemlite, skip_if_rocm
 from torchao.utils import (
-    TORCH_VERSION_AT_LEAST_2_5,
     check_cpu_version,
     check_xpu_version,
     is_fbcode,
@@ -151,11 +150,7 @@ def test_weights_only(self):
                 with tempfile.NamedTemporaryFile() as f:
                     torch.save(ql.state_dict(), f)
                     f.seek(0)
-                    # `weights_only=True` is enabled for torch 2.5+
-                    if TORCH_VERSION_AT_LEAST_2_5:
-                        _ = torch.load(f, weights_only=True)
-                    else:
-                        _ = torch.load(f, weights_only=False)
+                    _ = torch.load(f, weights_only=True)
 
     @unittest.skipIf(len(GPU_DEVICES) == 0, "Need GPU available")
     @common_utils.parametrize("apply_quant", get_quantization_functions(False, False))

diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py
@@ -3,15 +3,6 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
-import pytest
-
-from torchao.utils import (
-    TORCH_VERSION_AT_LEAST_2_5,
-)
-
-if not TORCH_VERSION_AT_LEAST_2_5:
-    pytest.skip("Unsupported PyTorch version", allow_module_level=True)
-
 import copy
 import io
 import random

diff --git a/test/dtypes/test_affine_quantized_tensor_parallel.py b/test/dtypes/test_affine_quantized_tensor_parallel.py
@@ -24,7 +24,6 @@
 )
 from torchao.quantization.observer import PerRow, PerTensor
 from torchao.quantization.quant_api import quantize_
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_6
 
 if common_utils.SEED is None:
     common_utils.SEED = 1234
@@ -127,10 +126,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         dn_dist(up_dist(input_dtensor))
 
-        if not TORCH_VERSION_AT_LEAST_2_6:
-            # Need torch 2.6 to support compiled tensor parallelism
-            return
-
         up_compiled = torch.compile(up_dist)
         y_up = up_compiled(input_dtensor)
         dn_compiled = torch.compile(dn_dist)

diff --git a/test/dtypes/test_floatx.py b/test/dtypes/test_floatx.py
@@ -33,7 +33,7 @@
     quantize_,
 )
 from torchao.testing.utils import skip_if_rocm
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_5, is_fbcode
+from torchao.utils import is_fbcode
 
 _DEVICES = ["cpu"] + (["cuda"] if torch.cuda.is_available() else [])
 _Floatx_DTYPES = [(3, 2), (2, 2)]
@@ -107,10 +107,6 @@ def test_to_copy_device(self, ebits, mbits):
         assert floatx_tensor_impl.device.type == "cpu"
 
     @unittest.skipIf(not torch.cuda.is_available(), reason="CUDA not available")
-    @unittest.skipIf(
-        not TORCH_VERSION_AT_LEAST_2_5,
-        reason="quantization only works with torch.compile for 2.5+",
-    )
     @parametrize("ebits,mbits", _Floatx_DTYPES)
     @parametrize("bias", [False, True])
     @parametrize("dtype", [torch.half, torch.bfloat16])

diff --git a/test/dtypes/test_uint4.py b/test/dtypes/test_uint4.py
@@ -34,7 +34,6 @@
     _replace_with_custom_fn_if_matches_filter,
 )
 from torchao.testing.utils import skip_if_rocm
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_5
 
 
 def _apply_weight_only_uint4_quant(model):
@@ -243,16 +242,10 @@ def forward(self, x):
 
         # program capture
         m = copy.deepcopy(m_eager)
-        if TORCH_VERSION_AT_LEAST_2_5:
-            m = torch.export.texport_for_training(
-                m,
-                example_inputs,
-            ).module()
-        else:
-            m = torch._export.capture_pre_autograd_graph(
-                m,
-                example_inputs,
-            ).module()
+        m = torch.export.texport_for_training(
+            m,
+            example_inputs,
+        ).module()
 
         m = prepare_pt2e(m, quantizer)
         # Calibrate