Skip to content

Drop support for PyTorch 2.5 and before #2720

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: gh/andrewor14/20/base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions .github/workflows/regression_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,6 @@ jobs:
fail-fast: false
matrix:
include:
- name: CUDA 2.5.1
runs-on: linux.g5.12xlarge.nvidia.gpu
torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121'
gpu-arch-type: "cuda"
gpu-arch-version: "12.6"
dev-requirements-overrides: "s/^pytest$/pytest==7.4.0/"
- name: CUDA 2.6
runs-on: linux.g5.12xlarge.nvidia.gpu
torch-spec: 'torch==2.6.0'
Expand All @@ -77,13 +71,13 @@ jobs:
gpu-arch-type: "cuda"
gpu-arch-version: "12.6"
dev-requirements-overrides: ""
- name: CUDA 2.8
runs-on: linux.g5.12xlarge.nvidia.gpu
torch-spec: 'torch==2.8.0'
gpu-arch-type: "cuda"
gpu-arch-version: "12.6"
dev-requirements-overrides: ""

- name: CPU 2.5.1
runs-on: linux.4xlarge
torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu'
gpu-arch-type: "cpu"
gpu-arch-version: ""
dev-requirements-overrides: "s/^pytest$/pytest==7.4.0/"
- name: CPU 2.6
runs-on: linux.4xlarge
torch-spec: 'torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu'
Expand All @@ -96,6 +90,12 @@ jobs:
gpu-arch-type: "cpu"
gpu-arch-version: ""
dev-requirements-overrides: ""
- name: CPU 2.8
runs-on: linux.4xlarge
torch-spec: 'torch==2.8.0 --index-url https://download.pytorch.org/whl/cpu'
gpu-arch-type: "cpu"
gpu-arch-version: ""
dev-requirements-overrides: ""

uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
Expand Down
47 changes: 12 additions & 35 deletions benchmarks/benchmark_aq.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,46 +20,26 @@
Int4WeightOnlyQuantizedLinearWeight,
Int8WeightOnlyQuantizedLinearWeight,
)
from torchao.utils import (
TORCH_VERSION_AT_LEAST_2_4,
TORCH_VERSION_AT_LEAST_2_5,
unwrap_tensor_subclass,
)


def _int8wo_api(mod, **kwargs):
if TORCH_VERSION_AT_LEAST_2_4:
quantize_(mod, int8_weight_only(**kwargs), set_inductor_config=False)
if not TORCH_VERSION_AT_LEAST_2_5:
unwrap_tensor_subclass(mod)
else:
change_linear_weights_to_int8_woqtensors(mod, **kwargs)
quantize_(mod, int8_weight_only(**kwargs), set_inductor_config=False)


def _int8da_int8w_api(mod, **kwargs):
if TORCH_VERSION_AT_LEAST_2_4:
quantize_(
mod,
int8_dynamic_activation_int8_weight(**kwargs),
set_inductor_config=False,
)
if not TORCH_VERSION_AT_LEAST_2_5:
unwrap_tensor_subclass(mod)
else:
change_linear_weights_to_int8_dqtensors(mod, **kwargs)
quantize_(
mod,
int8_dynamic_activation_int8_weight(**kwargs),
set_inductor_config=False,
)


def _int4wo_api(mod, **kwargs):
if TORCH_VERSION_AT_LEAST_2_4:
kwargs_copy = kwargs.copy()
if "groupsize" in kwargs_copy:
kwargs_copy["group_size"] = kwargs_copy["groupsize"]
del kwargs_copy["groupsize"]
quantize_(mod, int4_weight_only(**kwargs_copy), set_inductor_config=False)
if not TORCH_VERSION_AT_LEAST_2_5:
unwrap_tensor_subclass(mod)
else:
change_linear_weights_to_int4_woqtensors(mod, **kwargs)
kwargs_copy = kwargs.copy()
if "groupsize" in kwargs_copy:
kwargs_copy["group_size"] = kwargs_copy["groupsize"]
del kwargs_copy["groupsize"]
quantize_(mod, int4_weight_only(**kwargs_copy), set_inductor_config=False)


class ToyLinearModel(torch.nn.Module):
Expand Down Expand Up @@ -195,21 +175,19 @@ def _bench_quantized_tensor_subclass_perf(api, ref_api, M, N, K, kwargs=None):
)


if __name__ == "__main__" and TORCH_VERSION_AT_LEAST_2_4 and torch.cuda.is_available():
if __name__ == "__main__" and torch.cuda.is_available():
all_shapes = [
(20, 2048, 2048),
]

print("_int8da_int8w_api")
from torchao.quantization.quant_api import change_linear_weights_to_int8_dqtensors

for M, N, K in all_shapes:
_bench_quantized_tensor_subclass_perf(
_int8da_int8w_api, _ref_change_linear_weights_to_int8_dqtensors, M, N, K
)

print("_int8wo_api")
from torchao.quantization.quant_api import change_linear_weights_to_int8_woqtensors

for M, N, K in all_shapes:
_bench_quantized_tensor_subclass_perf(
Expand All @@ -218,7 +196,6 @@ def _bench_quantized_tensor_subclass_perf(api, ref_api, M, N, K, kwargs=None):

print("_int4wo_api")
kwargs = {"groupsize": 32}
from torchao.quantization.quant_api import change_linear_weights_to_int4_woqtensors

for M, N, K in all_shapes:
_bench_quantized_tensor_subclass_perf(
Expand Down
4 changes: 0 additions & 4 deletions docs/source/pretraining.rst
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,6 @@ Below is a code snippet showing how to use it:
from torchao.float8.float8_linear_utils import convert_to_float8_training
from torchao.float8.float8_linear import Float8Linear
from torchao.float8 import convert_to_float8_training
from torchao.utils import TORCH_VERSION_AT_LEAST_2_5

if not TORCH_VERSION_AT_LEAST_2_5:
raise AssertionError("torchao.float8 requires PyTorch version 2.5 or greater")

# create model and sample input
m = nn.Sequential(
Expand Down
6 changes: 0 additions & 6 deletions docs/source/quick_start.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,10 @@ it is also much faster!
.. code:: py

from torchao.utils import (
TORCH_VERSION_AT_LEAST_2_5,
benchmark_model,
unwrap_tensor_subclass,
)

# Temporary workaround for tensor subclass + torch.compile
# Only needed for torch version < 2.5
if not TORCH_VERSION_AT_LEAST_2_5:
unwrap_tensor_subclass(model)

num_runs = 100
torch._dynamo.reset()
example_inputs = (torch.randn(1, 1024, dtype=torch.bfloat16, device="cuda"),)
Expand Down
11 changes: 1 addition & 10 deletions scripts/quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@
import torch

from torchao.quantization import Int4WeightOnlyConfig, quantize_
from torchao.utils import (
TORCH_VERSION_AT_LEAST_2_5,
benchmark_model,
unwrap_tensor_subclass,
)
from torchao.utils import benchmark_model

# ================
# | Set up model |
Expand Down Expand Up @@ -50,11 +46,6 @@ def forward(self, x):
# | Benchmark |
# =============

# Temporary workaround for tensor subclass + torch.compile
# Only needed for torch version < 2.5
if not TORCH_VERSION_AT_LEAST_2_5:
unwrap_tensor_subclass(model)

num_runs = 100
torch._dynamo.reset()
example_inputs = (torch.randn(1, 1024, dtype=torch.bfloat16, device="cuda"),)
Expand Down
5 changes: 1 addition & 4 deletions test/core/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
UIntXWeightOnlyConfig,
)
from torchao.sparsity.sparse_api import BlockSparseWeightConfig, SemiSparseWeightConfig
from torchao.utils import TORCH_VERSION_AT_LEAST_2_6

# Define test configurations as fixtures
configs = [
Expand Down Expand Up @@ -85,11 +84,9 @@
),
AWQConfig(Int4WeightOnlyConfig(group_size=128), step=AWQStep.PREPARE_FOR_LOADING),
AWQConfig(Int4WeightOnlyConfig(group_size=128), step="prepare_for_loading"),
FbgemmConfig(torch.bfloat16, torch.int4, torch.bfloat16, [1, 1, 256]),
]

if TORCH_VERSION_AT_LEAST_2_6:
configs += [FbgemmConfig(torch.bfloat16, torch.int4, torch.bfloat16, [1, 1, 256])]


# Create ids for better test naming
def get_config_ids(configs):
Expand Down
7 changes: 1 addition & 6 deletions test/dtypes/test_affine_quantized.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
from torchao.quantization.quant_primitives import MappingType, ZeroPointDomain
from torchao.testing.utils import skip_if_no_cuda, skip_if_no_gemlite, skip_if_rocm
from torchao.utils import (
TORCH_VERSION_AT_LEAST_2_5,
check_cpu_version,
check_xpu_version,
is_fbcode,
Expand Down Expand Up @@ -151,11 +150,7 @@ def test_weights_only(self):
with tempfile.NamedTemporaryFile() as f:
torch.save(ql.state_dict(), f)
f.seek(0)
# `weights_only=True` is enabled for torch 2.5+
if TORCH_VERSION_AT_LEAST_2_5:
_ = torch.load(f, weights_only=True)
else:
_ = torch.load(f, weights_only=False)
_ = torch.load(f, weights_only=True)

@unittest.skipIf(len(GPU_DEVICES) == 0, "Need GPU available")
@common_utils.parametrize("apply_quant", get_quantization_functions(False, False))
Expand Down
9 changes: 0 additions & 9 deletions test/dtypes/test_affine_quantized_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,6 @@
#
# This source code is licensed under the BSD 3-Clause license found in the
# LICENSE file in the root directory of this source tree.
import pytest

from torchao.utils import (
TORCH_VERSION_AT_LEAST_2_5,
)

if not TORCH_VERSION_AT_LEAST_2_5:
pytest.skip("Unsupported PyTorch version", allow_module_level=True)

import copy
import io
import random
Expand Down
5 changes: 0 additions & 5 deletions test/dtypes/test_affine_quantized_tensor_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
)
from torchao.quantization.observer import PerRow, PerTensor
from torchao.quantization.quant_api import quantize_
from torchao.utils import TORCH_VERSION_AT_LEAST_2_6

if common_utils.SEED is None:
common_utils.SEED = 1234
Expand Down Expand Up @@ -127,10 +126,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:

dn_dist(up_dist(input_dtensor))

if not TORCH_VERSION_AT_LEAST_2_6:
# Need torch 2.6 to support compiled tensor parallelism
return

up_compiled = torch.compile(up_dist)
y_up = up_compiled(input_dtensor)
dn_compiled = torch.compile(dn_dist)
Expand Down
6 changes: 1 addition & 5 deletions test/dtypes/test_floatx.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
quantize_,
)
from torchao.testing.utils import skip_if_rocm
from torchao.utils import TORCH_VERSION_AT_LEAST_2_5, is_fbcode
from torchao.utils import is_fbcode

_DEVICES = ["cpu"] + (["cuda"] if torch.cuda.is_available() else [])
_Floatx_DTYPES = [(3, 2), (2, 2)]
Expand Down Expand Up @@ -107,10 +107,6 @@ def test_to_copy_device(self, ebits, mbits):
assert floatx_tensor_impl.device.type == "cpu"

@unittest.skipIf(not torch.cuda.is_available(), reason="CUDA not available")
@unittest.skipIf(
not TORCH_VERSION_AT_LEAST_2_5,
reason="quantization only works with torch.compile for 2.5+",
)
@parametrize("ebits,mbits", _Floatx_DTYPES)
@parametrize("bias", [False, True])
@parametrize("dtype", [torch.half, torch.bfloat16])
Expand Down
15 changes: 4 additions & 11 deletions test/dtypes/test_uint4.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
_replace_with_custom_fn_if_matches_filter,
)
from torchao.testing.utils import skip_if_rocm
from torchao.utils import TORCH_VERSION_AT_LEAST_2_5


def _apply_weight_only_uint4_quant(model):
Expand Down Expand Up @@ -243,16 +242,10 @@ def forward(self, x):

# program capture
m = copy.deepcopy(m_eager)
if TORCH_VERSION_AT_LEAST_2_5:
m = torch.export.texport_for_training(
m,
example_inputs,
).module()
else:
m = torch._export.capture_pre_autograd_graph(
m,
example_inputs,
).module()
m = torch.export.texport_for_training(
m,
example_inputs,
).module()

m = prepare_pt2e(m, quantizer)
# Calibrate
Expand Down
Loading
Loading