diff --git a/bitsandbytes/backends/utils.py b/bitsandbytes/backends/utils.py index 1543f3474..2ba8ff318 100755 --- a/bitsandbytes/backends/utils.py +++ b/bitsandbytes/backends/utils.py @@ -18,7 +18,7 @@ import triton.language as tl # noqa: F401 triton_available = True -except ImportError as e: +except ImportError: triton_available = False diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py index 2b89b5a76..c9f5ece60 100644 --- a/bitsandbytes/functional.py +++ b/bitsandbytes/functional.py @@ -242,7 +242,6 @@ def create_fp8_map(signed=True, exponent_bits=5, precision_bits=2, total_bits=8) assert e + p == total_bits - has_sign # the exponent is biased to 2^(e-1) -1 == 0 evalues = [] - pvalues = [] for i, val in enumerate(range(-(2 ** (exponent_bits - has_sign)), 2 ** (exponent_bits - has_sign), 1)): evalues.append(2**val) @@ -1365,8 +1364,6 @@ def optimizer_update_8bit_blockwise( gnorm_scale: float = 1.0, skip_zeros=False, ) -> None: - optim_func = None - is_on_gpu([p, g, state1, state2, qmap1, qmap2, absmax1, absmax2]) torch.ops.bitsandbytes.optimizer_update_8bit_blockwise( @@ -2116,7 +2113,7 @@ def spmm_coo( assert cooA.values.numel() == nnz assert cooA.cols == B.shape[0] - transposed_B = False if B.is_contiguous() else True + transposed_B = not B.is_contiguous() ldb = B.stride()[(1 if transposed_B else 0)] ldc = B.shape[1] @@ -2165,12 +2162,7 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None): assert cooA.values.numel() == nnz assert cooA.cols == B.shape[0], f"{cooA.cols} vs {B.shape}" - transposed_B = False if B.is_contiguous() else True - - ldb = B.stride()[(1 if transposed_B else 0)] - ldc = B.shape[1] - - values, counts = torch.unique(cooA.rowidx, return_counts=True) + _, counts = torch.unique(cooA.rowidx, return_counts=True) offset = counts.cumsum(0).int() max_count, max_idx = torch.sort(counts, descending=True) max_idx = max_idx.int() @@ -2190,11 +2182,8 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None): cnnz_rows = ct.c_int32(counts.numel()) cnnz = ct.c_int32(cooA.nnz) crowsA = ct.c_int32(cooA.rows) - ccolsA = ct.c_int32(cooA.cols) crowsB = ct.c_int32(B.shape[1]) ccolsB = ct.c_int32(B.shape[1]) - cldb = ct.c_int32(ldb) - cldc = ct.c_int32(ldc) with _cuda_device_of(B): is_on_gpu([cooA.rowidx, cooA.colidx, cooA.values, B, out, dequant_stats]) diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py index e599643cc..1cef1f5e9 100644 --- a/bitsandbytes/nn/modules.py +++ b/bitsandbytes/nn/modules.py @@ -480,7 +480,7 @@ def __init__( ) # self.persistent_buffers = [] # TODO consider as way to save quant state self.compute_dtype = compute_dtype - self.compute_type_is_set = False if compute_dtype is None else True + self.compute_type_is_set = compute_dtype is not None self.quant_state = None self.quant_storage = quant_storage self.ipex_linear_is_set = False @@ -1150,4 +1150,4 @@ def forward(self, x): if self.weight.CB is not None: self.init_8bit_state() - out = bnb.matmul_mixed(x.half(), self.weight.half(), bias=None, state=self.state) + self.bias + return bnb.matmul_mixed(x.half(), self.weight.half(), bias=None, state=self.state) + self.bias diff --git a/bitsandbytes/optim/lars.py b/bitsandbytes/optim/lars.py index 90c3686fe..fa2af57bc 100644 --- a/bitsandbytes/optim/lars.py +++ b/bitsandbytes/optim/lars.py @@ -231,9 +231,6 @@ def step(self, closure=None): loss = closure() for group in self.param_groups: - params_with_grad = [] - d_p_list = [] - momentum_buffer_list = [] weight_decay = group["weight_decay"] momentum = group["momentum"] dampening = group["dampening"] diff --git a/bitsandbytes/optim/optimizer.py b/bitsandbytes/optim/optimizer.py index 7a40f1b75..ea3ff32c9 100644 --- a/bitsandbytes/optim/optimizer.py +++ b/bitsandbytes/optim/optimizer.py @@ -272,8 +272,6 @@ def step(self, closure=None): with torch.enable_grad(): loss = closure() - overflows = [] - if not self.initialized: self.check_overrides() self.to_gpu() # needed for fairseq pure fp16 training diff --git a/bitsandbytes/research/autograd/_functions.py b/bitsandbytes/research/autograd/_functions.py index d9718382b..9c7afc354 100644 --- a/bitsandbytes/research/autograd/_functions.py +++ b/bitsandbytes/research/autograd/_functions.py @@ -235,7 +235,7 @@ def forward(ctx, A, B, out=None, bias=None, state: Optional[MatmulLtState] = Non # 2. Quantize B if state.has_fp16_weights: # print('B shape', B.shape) - has_grad = True if (getattr(B, "grad", None) is not None) else False + has_grad = getattr(B, "grad", None) is not None is_transposed = not B.is_contiguous() and B.shape[0] == B.stride(1) if is_transposed: B = B.contiguous() diff --git a/bitsandbytes/utils.py b/bitsandbytes/utils.py index a3b043ba0..cbbe29d3f 100644 --- a/bitsandbytes/utils.py +++ b/bitsandbytes/utils.py @@ -92,11 +92,6 @@ def find_outlier_dims(weight, reduction_dim=0, zscore=4.0, topk=None, rdm=False) if rdm: return torch.randint(0, weight.shape[1], size=(topk,), device=weight.device).long() - m = weight.mean(reduction_dim) - mm = m.mean() - mstd = m.std() - zm = (m - mm) / mstd - std = weight.std(reduction_dim) stdm = std.mean() stdstd = std.std() diff --git a/install_cuda.py b/install_cuda.py index c87deaedf..0122be04b 100644 --- a/install_cuda.py +++ b/install_cuda.py @@ -87,7 +87,7 @@ def main(): # Install CUDA version(s) if version == "all": - for ver in cuda_versions.keys(): + for ver in cuda_versions: install_cuda(ver, base_path, download_path) elif version in cuda_versions: install_cuda(version, base_path, download_path) diff --git a/pyproject.toml b/pyproject.toml index 7940e7bbf..d26832e4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,11 +123,10 @@ select = [ ignore = [ "B007", # Loop control variable not used within the loop body (TODO: enable) "B028", # Warning without stacklevel (TODO: enable) - "E501", # Supress line-too-long warnings: trust yapf's judgement on this one. + "E501", # Suppress line-too-long warnings: trust yapf's judgement on this one. "E701", # Multiple statements on one line (TODO: enable) "E712", # Allow using if x == False, as it's not always equivalent to if x. "E731", # Do not use lambda - "F841", # Local assigned but not used (TODO: enable, these are likely bugs) "RUF012", # Mutable class attribute annotations "RUF034", # Useless if-else (TODO: enable) "ISC001", # single-line-implicit-string-concatenation incompatible with formatter diff --git a/tests/test_generation.py b/tests/test_generation.py index 38b5ce9bd..3ab1cc5bd 100644 --- a/tests/test_generation.py +++ b/tests/test_generation.py @@ -112,7 +112,7 @@ def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ, dtype): assert len(outputs) == n_cases failure_count = 0 for i in range(n_cases): - if not outputs[i][: len(str(math.pi))] == str(math.pi): + if outputs[i][: len(str(math.pi))] != str(math.pi): failure_count += 1 failure_max = 2 if fixture_config[0] == "huggyllama/llama-7b" else 4 if failure_count > failure_max: