Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bitsandbytes/backends/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import triton.language as tl # noqa: F401

triton_available = True
except ImportError as e:
except ImportError:
triton_available = False


Expand Down
15 changes: 2 additions & 13 deletions bitsandbytes/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ def create_fp8_map(signed=True, exponent_bits=5, precision_bits=2, total_bits=8)
assert e + p == total_bits - has_sign
# the exponent is biased to 2^(e-1) -1 == 0
evalues = []
pvalues = []
for i, val in enumerate(range(-(2 ** (exponent_bits - has_sign)), 2 ** (exponent_bits - has_sign), 1)):
evalues.append(2**val)

Expand Down Expand Up @@ -1365,8 +1364,6 @@ def optimizer_update_8bit_blockwise(
gnorm_scale: float = 1.0,
skip_zeros=False,
) -> None:
optim_func = None

is_on_gpu([p, g, state1, state2, qmap1, qmap2, absmax1, absmax2])

torch.ops.bitsandbytes.optimizer_update_8bit_blockwise(
Expand Down Expand Up @@ -2116,7 +2113,7 @@ def spmm_coo(
assert cooA.values.numel() == nnz
assert cooA.cols == B.shape[0]

transposed_B = False if B.is_contiguous() else True
transposed_B = not B.is_contiguous()

ldb = B.stride()[(1 if transposed_B else 0)]
ldc = B.shape[1]
Expand Down Expand Up @@ -2165,12 +2162,7 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None):
assert cooA.values.numel() == nnz
assert cooA.cols == B.shape[0], f"{cooA.cols} vs {B.shape}"

transposed_B = False if B.is_contiguous() else True

ldb = B.stride()[(1 if transposed_B else 0)]
ldc = B.shape[1]

values, counts = torch.unique(cooA.rowidx, return_counts=True)
_, counts = torch.unique(cooA.rowidx, return_counts=True)
offset = counts.cumsum(0).int()
max_count, max_idx = torch.sort(counts, descending=True)
max_idx = max_idx.int()
Expand All @@ -2190,11 +2182,8 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None):
cnnz_rows = ct.c_int32(counts.numel())
cnnz = ct.c_int32(cooA.nnz)
crowsA = ct.c_int32(cooA.rows)
ccolsA = ct.c_int32(cooA.cols)
crowsB = ct.c_int32(B.shape[1])
ccolsB = ct.c_int32(B.shape[1])
cldb = ct.c_int32(ldb)
cldc = ct.c_int32(ldc)

with _cuda_device_of(B):
is_on_gpu([cooA.rowidx, cooA.colidx, cooA.values, B, out, dequant_stats])
Expand Down
4 changes: 2 additions & 2 deletions bitsandbytes/nn/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def __init__(
)
# self.persistent_buffers = [] # TODO consider as way to save quant state
self.compute_dtype = compute_dtype
self.compute_type_is_set = False if compute_dtype is None else True
self.compute_type_is_set = compute_dtype is not None
self.quant_state = None
self.quant_storage = quant_storage
self.ipex_linear_is_set = False
Expand Down Expand Up @@ -1150,4 +1150,4 @@ def forward(self, x):
if self.weight.CB is not None:
self.init_8bit_state()

out = bnb.matmul_mixed(x.half(), self.weight.half(), bias=None, state=self.state) + self.bias
return bnb.matmul_mixed(x.half(), self.weight.half(), bias=None, state=self.state) + self.bias
3 changes: 0 additions & 3 deletions bitsandbytes/optim/lars.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,6 @@ def step(self, closure=None):
loss = closure()

for group in self.param_groups:
params_with_grad = []
d_p_list = []
momentum_buffer_list = []
weight_decay = group["weight_decay"]
momentum = group["momentum"]
dampening = group["dampening"]
Expand Down
2 changes: 0 additions & 2 deletions bitsandbytes/optim/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,6 @@ def step(self, closure=None):
with torch.enable_grad():
loss = closure()

overflows = []

if not self.initialized:
self.check_overrides()
self.to_gpu() # needed for fairseq pure fp16 training
Expand Down
2 changes: 1 addition & 1 deletion bitsandbytes/research/autograd/_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def forward(ctx, A, B, out=None, bias=None, state: Optional[MatmulLtState] = Non
# 2. Quantize B
if state.has_fp16_weights:
# print('B shape', B.shape)
has_grad = True if (getattr(B, "grad", None) is not None) else False
has_grad = getattr(B, "grad", None) is not None
is_transposed = not B.is_contiguous() and B.shape[0] == B.stride(1)
if is_transposed:
B = B.contiguous()
Expand Down
5 changes: 0 additions & 5 deletions bitsandbytes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,6 @@ def find_outlier_dims(weight, reduction_dim=0, zscore=4.0, topk=None, rdm=False)
if rdm:
return torch.randint(0, weight.shape[1], size=(topk,), device=weight.device).long()

m = weight.mean(reduction_dim)
mm = m.mean()
mstd = m.std()
zm = (m - mm) / mstd

std = weight.std(reduction_dim)
stdm = std.mean()
stdstd = std.std()
Expand Down
2 changes: 1 addition & 1 deletion install_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def main():

# Install CUDA version(s)
if version == "all":
for ver in cuda_versions.keys():
for ver in cuda_versions:
install_cuda(ver, base_path, download_path)
elif version in cuda_versions:
install_cuda(version, base_path, download_path)
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,10 @@ select = [
ignore = [
"B007", # Loop control variable not used within the loop body (TODO: enable)
"B028", # Warning without stacklevel (TODO: enable)
"E501", # Supress line-too-long warnings: trust yapf's judgement on this one.
"E501", # Suppress line-too-long warnings: trust yapf's judgement on this one.
"E701", # Multiple statements on one line (TODO: enable)
"E712", # Allow using if x == False, as it's not always equivalent to if x.
"E731", # Do not use lambda
"F841", # Local assigned but not used (TODO: enable, these are likely bugs)
"RUF012", # Mutable class attribute annotations
"RUF034", # Useless if-else (TODO: enable)
"ISC001", # single-line-implicit-string-concatenation incompatible with formatter
Expand Down
2 changes: 1 addition & 1 deletion tests/test_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ, dtype):
assert len(outputs) == n_cases
failure_count = 0
for i in range(n_cases):
if not outputs[i][: len(str(math.pi))] == str(math.pi):
if outputs[i][: len(str(math.pi))] != str(math.pi):
failure_count += 1
failure_max = 2 if fixture_config[0] == "huggyllama/llama-7b" else 4
if failure_count > failure_max:
Expand Down