Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bitsandbytes/backends/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import triton.language as tl # noqa: F401

triton_available = True
except ImportError as e:
except ImportError:
triton_available = False


Expand Down
15 changes: 2 additions & 13 deletions bitsandbytes/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ def create_fp8_map(signed=True, exponent_bits=5, precision_bits=2, total_bits=8)
assert e + p == total_bits - has_sign
# the exponent is biased to 2^(e-1) -1 == 0
evalues = []
pvalues = []
for i, val in enumerate(range(-(2 ** (exponent_bits - has_sign)), 2 ** (exponent_bits - has_sign), 1)):
evalues.append(2**val)

Expand Down Expand Up @@ -1365,8 +1364,6 @@ def optimizer_update_8bit_blockwise(
gnorm_scale: float = 1.0,
skip_zeros=False,
) -> None:
optim_func = None

is_on_gpu([p, g, state1, state2, qmap1, qmap2, absmax1, absmax2])

torch.ops.bitsandbytes.optimizer_update_8bit_blockwise(
Expand Down Expand Up @@ -2116,7 +2113,7 @@ def spmm_coo(
assert cooA.values.numel() == nnz
assert cooA.cols == B.shape[0]

transposed_B = False if B.is_contiguous() else True
transposed_B = not B.is_contiguous()

ldb = B.stride()[(1 if transposed_B else 0)]
ldc = B.shape[1]
Expand Down Expand Up @@ -2165,12 +2162,7 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None):
assert cooA.values.numel() == nnz
assert cooA.cols == B.shape[0], f"{cooA.cols} vs {B.shape}"

transposed_B = False if B.is_contiguous() else True

ldb = B.stride()[(1 if transposed_B else 0)]
ldc = B.shape[1]

values, counts = torch.unique(cooA.rowidx, return_counts=True)
_, counts = torch.unique(cooA.rowidx, return_counts=True)
offset = counts.cumsum(0).int()
max_count, max_idx = torch.sort(counts, descending=True)
max_idx = max_idx.int()
Expand All @@ -2190,11 +2182,8 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None):
cnnz_rows = ct.c_int32(counts.numel())
cnnz = ct.c_int32(cooA.nnz)
crowsA = ct.c_int32(cooA.rows)
ccolsA = ct.c_int32(cooA.cols)
crowsB = ct.c_int32(B.shape[1])
ccolsB = ct.c_int32(B.shape[1])
cldb = ct.c_int32(ldb)
cldc = ct.c_int32(ldc)

with _cuda_device_of(B):
is_on_gpu([cooA.rowidx, cooA.colidx, cooA.values, B, out, dequant_stats])
Expand Down
4 changes: 2 additions & 2 deletions bitsandbytes/nn/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def __init__(
)
# self.persistent_buffers = [] # TODO consider as way to save quant state
self.compute_dtype = compute_dtype
self.compute_type_is_set = False if compute_dtype is None else True
self.compute_type_is_set = compute_dtype is not None
self.quant_state = None
self.quant_storage = quant_storage
self.ipex_linear_is_set = False
Expand Down Expand Up @@ -1150,4 +1150,4 @@ def forward(self, x):
if self.weight.CB is not None:
self.init_8bit_state()

out = bnb.matmul_mixed(x.half(), self.weight.half(), bias=None, state=self.state) + self.bias
return bnb.matmul_mixed(x.half(), self.weight.half(), bias=None, state=self.state) + self.bias
3 changes: 0 additions & 3 deletions bitsandbytes/optim/lars.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,6 @@ def step(self, closure=None):
loss = closure()

for group in self.param_groups:
params_with_grad = []
d_p_list = []
momentum_buffer_list = []
weight_decay = group["weight_decay"]
momentum = group["momentum"]
dampening = group["dampening"]
Expand Down
2 changes: 0 additions & 2 deletions bitsandbytes/optim/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,6 @@ def step(self, closure=None):
with torch.enable_grad():
loss = closure()

overflows = []

if not self.initialized:
self.check_overrides()
self.to_gpu() # needed for fairseq pure fp16 training
Expand Down
2 changes: 1 addition & 1 deletion bitsandbytes/research/autograd/_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def forward(ctx, A, B, out=None, bias=None, state: Optional[MatmulLtState] = Non
# 2. Quantize B
if state.has_fp16_weights:
# print('B shape', B.shape)
has_grad = True if (getattr(B, "grad", None) is not None) else False
has_grad = getattr(B, "grad", None) is not None
is_transposed = not B.is_contiguous() and B.shape[0] == B.stride(1)
if is_transposed:
B = B.contiguous()
Expand Down
5 changes: 0 additions & 5 deletions bitsandbytes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,6 @@ def find_outlier_dims(weight, reduction_dim=0, zscore=4.0, topk=None, rdm=False)
if rdm:
return torch.randint(0, weight.shape[1], size=(topk,), device=weight.device).long()

m = weight.mean(reduction_dim)
mm = m.mean()
mstd = m.std()
zm = (m - mm) / mstd

std = weight.std(reduction_dim)
stdm = std.mean()
stdstd = std.std()
Expand Down
2 changes: 1 addition & 1 deletion install_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def main():

# Install CUDA version(s)
if version == "all":
for ver in cuda_versions.keys():
for ver in cuda_versions:
install_cuda(ver, base_path, download_path)
elif version in cuda_versions:
install_cuda(version, base_path, download_path)
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,10 @@ select = [
ignore = [
"B007", # Loop control variable not used within the loop body (TODO: enable)
"B028", # Warning without stacklevel (TODO: enable)
"E501", # Supress line-too-long warnings: trust yapf's judgement on this one.
"E501", # Suppress line-too-long warnings: trust yapf's judgement on this one.
"E701", # Multiple statements on one line (TODO: enable)
"E712", # Allow using if x == False, as it's not always equivalent to if x.
"E731", # Do not use lambda
"F841", # Local assigned but not used (TODO: enable, these are likely bugs)
"RUF012", # Mutable class attribute annotations
"RUF034", # Useless if-else (TODO: enable)
"ISC001", # single-line-implicit-string-concatenation incompatible with formatter
Expand Down
2 changes: 1 addition & 1 deletion tests/test_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ, dtype):
assert len(outputs) == n_cases
failure_count = 0
for i in range(n_cases):
if not outputs[i][: len(str(math.pi))] == str(math.pi):
if outputs[i][:len(str(math.pi))] != str(math.pi):
failure_count += 1
failure_max = 2 if fixture_config[0] == "huggyllama/llama-7b" else 4
if failure_count > failure_max:
Expand Down
Loading