File tree Expand file tree Collapse file tree 3 files changed +17
-12
lines changed
modifiers/pruning/sparsegpt Expand file tree Collapse file tree 3 files changed +17
-12
lines changed Original file line number Diff line number Diff line change @@ -47,8 +47,8 @@ def test_is_quantized_cache_singleton():
4747
4848
4949def test_update ():
50- nbits = 8
51- args = QuantizationArgs (nbits = nbits , symmetric = True )
50+ num_bits = 8
51+ args = QuantizationArgs (num_bits = num_bits , symmetric = True )
5252 cache = QuantizedKVParameterCache (args )
5353
5454 max_key_states_val = 1.0
@@ -62,7 +62,7 @@ def test_update():
6262 layer_idx = 0
6363
6464 cache .update (key_states , value_states , layer_idx )
65- denom = (2 ** (nbits ) - 1 ) / 2
65+ denom = (2 ** (num_bits ) - 1 ) / 2
6666 expected_k_scale = torch .tensor ([max_key_states_val / denom ])
6767 expected_v_scale = torch .tensor ([max_value_states_val / denom ])
6868
@@ -83,8 +83,8 @@ def test_update():
8383
8484
8585def test_cache_reset ():
86- nbits = 8
87- args = QuantizationArgs (nbits = nbits , symmetric = True )
86+ num_bits = 8
87+ args = QuantizationArgs (num_bits = num_bits , symmetric = True )
8888 cache = QuantizedKVParameterCache (args )
8989
9090 max_key_states_val = 1.0
Original file line number Diff line number Diff line change @@ -96,13 +96,11 @@ def setUp(self):
9696 "symmetric" : False ,
9797 "strategy" : "token" ,
9898 "dynamic" : True ,
99- "kwargs" : {},
10099 },
101100 "weights" : {
102101 "num_bits" : 4 ,
103102 "symmetric" : True ,
104103 "strategy" : "channel" ,
105- "kwargs" : {},
106104 },
107105 }
108106 }
Original file line number Diff line number Diff line change 11import pytest
22import torch
3- from compressed_tensors .quantization import QuantizationArgs , QuantizationScheme
3+ from compressed_tensors .quantization import (
4+ QuantizationArgs ,
5+ QuantizationScheme ,
6+ QuantizationStrategy ,
7+ QuantizationType ,
8+ )
49from torch .nn import Linear , Module , ReLU
510
611from llmcompressor .pytorch .utils import ModuleSparsificationInfo
@@ -16,10 +21,12 @@ def __init__(self):
1621 self .fc1 .quantization_scheme = QuantizationScheme (
1722 targets = ["model.fc1" ],
1823 weights = QuantizationArgs (
19- precision = 8 ,
20- granularity = "per_tensor" ,
21- algorithm = "gptq" ,
22- blocksize = 128 ,
24+ num_bits = 8 ,
25+ type = QuantizationType .INT ,
26+ group_size = 128 ,
27+ strategy = QuantizationStrategy .GROUP ,
28+ symmetric = True ,
29+ dynamic = False ,
2330 ),
2431 )
2532
You can’t perform that action at this time.
0 commit comments