Skip to content

Commit fc3e7a2

Browse files
committed
Drop support for PyTorch 2.5 and before
**Summary:** We gate on the PyTorch version throughout the repo. Recently PyTorch 2.8 was released, so the oldest PyTorch version we need to support is 2.6. After this commit, we assume the user is running PyTorch 2.6+, and remove all references to the following variables, which are deprecated. ``` TORCH_VERSION_AT_LEAST_2_6 TORCH_VERSION_AT_LEAST_2_5 TORCH_VERSION_AT_LEAST_2_4 TORCH_VERSION_AT_LEAST_2_3 TORCH_VERSION_AT_LEAST_2_2 TORCH_VERSION_AFTER_2_5 TORCH_VERSION_AFTER_2_4 TORCH_VERSION_AFTER_2_3 TORCH_VERSION_AFTER_2_2 ``` **Test Plan:** CI ghstack-source-id: 21e08ee Pull Request resolved: #2720
1 parent 7214e67 commit fc3e7a2

File tree

109 files changed

+604
-1774
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

109 files changed

+604
-1774
lines changed

.github/workflows/regression_test.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,6 @@ jobs:
5959
fail-fast: false
6060
matrix:
6161
include:
62-
- name: CUDA 2.5.1
63-
runs-on: linux.g5.12xlarge.nvidia.gpu
64-
torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121'
65-
gpu-arch-type: "cuda"
66-
gpu-arch-version: "12.6"
67-
dev-requirements-overrides: "s/^pytest$/pytest==7.4.0/"
6862
- name: CUDA 2.6
6963
runs-on: linux.g5.12xlarge.nvidia.gpu
7064
torch-spec: 'torch==2.6.0'
@@ -77,13 +71,13 @@ jobs:
7771
gpu-arch-type: "cuda"
7872
gpu-arch-version: "12.6"
7973
dev-requirements-overrides: ""
74+
- name: CUDA 2.8
75+
runs-on: linux.g5.12xlarge.nvidia.gpu
76+
torch-spec: 'torch==2.8.0'
77+
gpu-arch-type: "cuda"
78+
gpu-arch-version: "12.6"
79+
dev-requirements-overrides: ""
8080

81-
- name: CPU 2.5.1
82-
runs-on: linux.4xlarge
83-
torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu'
84-
gpu-arch-type: "cpu"
85-
gpu-arch-version: ""
86-
dev-requirements-overrides: "s/^pytest$/pytest==7.4.0/"
8781
- name: CPU 2.6
8882
runs-on: linux.4xlarge
8983
torch-spec: 'torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu'
@@ -96,6 +90,12 @@ jobs:
9690
gpu-arch-type: "cpu"
9791
gpu-arch-version: ""
9892
dev-requirements-overrides: ""
93+
- name: CPU 2.8
94+
runs-on: linux.4xlarge
95+
torch-spec: 'torch==2.8.0 --index-url https://download.pytorch.org/whl/cpu'
96+
gpu-arch-type: "cpu"
97+
gpu-arch-version: ""
98+
dev-requirements-overrides: ""
9999

100100
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
101101
with:

benchmarks/benchmark_aq.py

Lines changed: 12 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -20,46 +20,26 @@
2020
Int4WeightOnlyQuantizedLinearWeight,
2121
Int8WeightOnlyQuantizedLinearWeight,
2222
)
23-
from torchao.utils import (
24-
TORCH_VERSION_AT_LEAST_2_4,
25-
TORCH_VERSION_AT_LEAST_2_5,
26-
unwrap_tensor_subclass,
27-
)
2823

2924

3025
def _int8wo_api(mod, **kwargs):
31-
if TORCH_VERSION_AT_LEAST_2_4:
32-
quantize_(mod, int8_weight_only(**kwargs), set_inductor_config=False)
33-
if not TORCH_VERSION_AT_LEAST_2_5:
34-
unwrap_tensor_subclass(mod)
35-
else:
36-
change_linear_weights_to_int8_woqtensors(mod, **kwargs)
26+
quantize_(mod, int8_weight_only(**kwargs), set_inductor_config=False)
3727

3828

3929
def _int8da_int8w_api(mod, **kwargs):
40-
if TORCH_VERSION_AT_LEAST_2_4:
41-
quantize_(
42-
mod,
43-
int8_dynamic_activation_int8_weight(**kwargs),
44-
set_inductor_config=False,
45-
)
46-
if not TORCH_VERSION_AT_LEAST_2_5:
47-
unwrap_tensor_subclass(mod)
48-
else:
49-
change_linear_weights_to_int8_dqtensors(mod, **kwargs)
30+
quantize_(
31+
mod,
32+
int8_dynamic_activation_int8_weight(**kwargs),
33+
set_inductor_config=False,
34+
)
5035

5136

5237
def _int4wo_api(mod, **kwargs):
53-
if TORCH_VERSION_AT_LEAST_2_4:
54-
kwargs_copy = kwargs.copy()
55-
if "groupsize" in kwargs_copy:
56-
kwargs_copy["group_size"] = kwargs_copy["groupsize"]
57-
del kwargs_copy["groupsize"]
58-
quantize_(mod, int4_weight_only(**kwargs_copy), set_inductor_config=False)
59-
if not TORCH_VERSION_AT_LEAST_2_5:
60-
unwrap_tensor_subclass(mod)
61-
else:
62-
change_linear_weights_to_int4_woqtensors(mod, **kwargs)
38+
kwargs_copy = kwargs.copy()
39+
if "groupsize" in kwargs_copy:
40+
kwargs_copy["group_size"] = kwargs_copy["groupsize"]
41+
del kwargs_copy["groupsize"]
42+
quantize_(mod, int4_weight_only(**kwargs_copy), set_inductor_config=False)
6343

6444

6545
class ToyLinearModel(torch.nn.Module):
@@ -195,21 +175,19 @@ def _bench_quantized_tensor_subclass_perf(api, ref_api, M, N, K, kwargs=None):
195175
)
196176

197177

198-
if __name__ == "__main__" and TORCH_VERSION_AT_LEAST_2_4 and torch.cuda.is_available():
178+
if __name__ == "__main__" and torch.cuda.is_available():
199179
all_shapes = [
200180
(20, 2048, 2048),
201181
]
202182

203183
print("_int8da_int8w_api")
204-
from torchao.quantization.quant_api import change_linear_weights_to_int8_dqtensors
205184

206185
for M, N, K in all_shapes:
207186
_bench_quantized_tensor_subclass_perf(
208187
_int8da_int8w_api, _ref_change_linear_weights_to_int8_dqtensors, M, N, K
209188
)
210189

211190
print("_int8wo_api")
212-
from torchao.quantization.quant_api import change_linear_weights_to_int8_woqtensors
213191

214192
for M, N, K in all_shapes:
215193
_bench_quantized_tensor_subclass_perf(
@@ -218,7 +196,6 @@ def _bench_quantized_tensor_subclass_perf(api, ref_api, M, N, K, kwargs=None):
218196

219197
print("_int4wo_api")
220198
kwargs = {"groupsize": 32}
221-
from torchao.quantization.quant_api import change_linear_weights_to_int4_woqtensors
222199

223200
for M, N, K in all_shapes:
224201
_bench_quantized_tensor_subclass_perf(

docs/source/pretraining.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,6 @@ Below is a code snippet showing how to use it:
161161
from torchao.float8.float8_linear_utils import convert_to_float8_training
162162
from torchao.float8.float8_linear import Float8Linear
163163
from torchao.float8 import convert_to_float8_training
164-
from torchao.utils import TORCH_VERSION_AT_LEAST_2_5
165-
166-
if not TORCH_VERSION_AT_LEAST_2_5:
167-
raise AssertionError("torchao.float8 requires PyTorch version 2.5 or greater")
168164
169165
# create model and sample input
170166
m = nn.Sequential(

docs/source/quick_start.rst

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,10 @@ it is also much faster!
9595
.. code:: py
9696
9797
from torchao.utils import (
98-
TORCH_VERSION_AT_LEAST_2_5,
9998
benchmark_model,
10099
unwrap_tensor_subclass,
101100
)
102101
103-
# Temporary workaround for tensor subclass + torch.compile
104-
# Only needed for torch version < 2.5
105-
if not TORCH_VERSION_AT_LEAST_2_5:
106-
unwrap_tensor_subclass(model)
107-
108102
num_runs = 100
109103
torch._dynamo.reset()
110104
example_inputs = (torch.randn(1, 1024, dtype=torch.bfloat16, device="cuda"),)

scripts/quick_start.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,7 @@
88
import torch
99

1010
from torchao.quantization import Int4WeightOnlyConfig, quantize_
11-
from torchao.utils import (
12-
TORCH_VERSION_AT_LEAST_2_5,
13-
benchmark_model,
14-
unwrap_tensor_subclass,
15-
)
11+
from torchao.utils import benchmark_model
1612

1713
# ================
1814
# | Set up model |
@@ -50,11 +46,6 @@ def forward(self, x):
5046
# | Benchmark |
5147
# =============
5248

53-
# Temporary workaround for tensor subclass + torch.compile
54-
# Only needed for torch version < 2.5
55-
if not TORCH_VERSION_AT_LEAST_2_5:
56-
unwrap_tensor_subclass(model)
57-
5849
num_runs = 100
5950
torch._dynamo.reset()
6051
example_inputs = (torch.randn(1, 1024, dtype=torch.bfloat16, device="cuda"),)

test/core/test_config.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
UIntXWeightOnlyConfig,
4040
)
4141
from torchao.sparsity.sparse_api import BlockSparseWeightConfig, SemiSparseWeightConfig
42-
from torchao.utils import TORCH_VERSION_AT_LEAST_2_6
4342

4443
# Define test configurations as fixtures
4544
configs = [
@@ -85,11 +84,9 @@
8584
),
8685
AWQConfig(Int4WeightOnlyConfig(group_size=128), step=AWQStep.PREPARE_FOR_LOADING),
8786
AWQConfig(Int4WeightOnlyConfig(group_size=128), step="prepare_for_loading"),
87+
FbgemmConfig(torch.bfloat16, torch.int4, torch.bfloat16, [1, 1, 256]),
8888
]
8989

90-
if TORCH_VERSION_AT_LEAST_2_6:
91-
configs += [FbgemmConfig(torch.bfloat16, torch.int4, torch.bfloat16, [1, 1, 256])]
92-
9390

9491
# Create ids for better test naming
9592
def get_config_ids(configs):

test/dtypes/test_affine_quantized.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
from torchao.quantization.quant_primitives import MappingType, ZeroPointDomain
4242
from torchao.testing.utils import skip_if_no_cuda, skip_if_no_gemlite, skip_if_rocm
4343
from torchao.utils import (
44-
TORCH_VERSION_AT_LEAST_2_5,
4544
check_cpu_version,
4645
check_xpu_version,
4746
is_fbcode,
@@ -151,11 +150,7 @@ def test_weights_only(self):
151150
with tempfile.NamedTemporaryFile() as f:
152151
torch.save(ql.state_dict(), f)
153152
f.seek(0)
154-
# `weights_only=True` is enabled for torch 2.5+
155-
if TORCH_VERSION_AT_LEAST_2_5:
156-
_ = torch.load(f, weights_only=True)
157-
else:
158-
_ = torch.load(f, weights_only=False)
153+
_ = torch.load(f, weights_only=True)
159154

160155
@unittest.skipIf(len(GPU_DEVICES) == 0, "Need GPU available")
161156
@common_utils.parametrize("apply_quant", get_quantization_functions(False, False))

test/dtypes/test_affine_quantized_float.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,6 @@
33
#
44
# This source code is licensed under the BSD 3-Clause license found in the
55
# LICENSE file in the root directory of this source tree.
6-
import pytest
7-
8-
from torchao.utils import (
9-
TORCH_VERSION_AT_LEAST_2_5,
10-
)
11-
12-
if not TORCH_VERSION_AT_LEAST_2_5:
13-
pytest.skip("Unsupported PyTorch version", allow_module_level=True)
14-
156
import copy
167
import io
178
import random

test/dtypes/test_affine_quantized_tensor_parallel.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
)
2525
from torchao.quantization.observer import PerRow, PerTensor
2626
from torchao.quantization.quant_api import quantize_
27-
from torchao.utils import TORCH_VERSION_AT_LEAST_2_6
2827

2928
if common_utils.SEED is None:
3029
common_utils.SEED = 1234
@@ -127,10 +126,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
127126

128127
dn_dist(up_dist(input_dtensor))
129128

130-
if not TORCH_VERSION_AT_LEAST_2_6:
131-
# Need torch 2.6 to support compiled tensor parallelism
132-
return
133-
134129
up_compiled = torch.compile(up_dist)
135130
y_up = up_compiled(input_dtensor)
136131
dn_compiled = torch.compile(dn_dist)

test/dtypes/test_floatx.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
quantize_,
3434
)
3535
from torchao.testing.utils import skip_if_rocm
36-
from torchao.utils import TORCH_VERSION_AT_LEAST_2_5, is_fbcode
36+
from torchao.utils import is_fbcode
3737

3838
_DEVICES = ["cpu"] + (["cuda"] if torch.cuda.is_available() else [])
3939
_Floatx_DTYPES = [(3, 2), (2, 2)]
@@ -107,10 +107,6 @@ def test_to_copy_device(self, ebits, mbits):
107107
assert floatx_tensor_impl.device.type == "cpu"
108108

109109
@unittest.skipIf(not torch.cuda.is_available(), reason="CUDA not available")
110-
@unittest.skipIf(
111-
not TORCH_VERSION_AT_LEAST_2_5,
112-
reason="quantization only works with torch.compile for 2.5+",
113-
)
114110
@parametrize("ebits,mbits", _Floatx_DTYPES)
115111
@parametrize("bias", [False, True])
116112
@parametrize("dtype", [torch.half, torch.bfloat16])

0 commit comments

Comments
 (0)