@@ -428,9 +428,57 @@ def dequantize_blocks_Q2_K(blocks, block_size, type_size, dtype=None):
428428def dequantize_blocks_BF16 (blocks , block_size , type_size , dtype = None ):
429429 return (blocks .view (torch .int16 ).to (torch .int32 ) << 16 ).view (torch .float32 )
430430
431+ # this part from calcuis (gguf.org)
432+ # more info: https://github.com/calcuis/gguf-connector/blob/main/src/gguf_connector/quant2c.py
433+
434+ def dequantize_blocks_IQ4_NL (blocks , block_size , type_size , dtype = None ):
435+ kvalues = torch .tensor (
436+ [- 127 , - 104 , - 83 , - 65 , - 49 , - 35 , - 22 , - 10 , 1 , 13 , 25 , 38 , 53 , 69 , 89 , 113 ],
437+ dtype = torch .float32 , device = blocks .device
438+ )
439+ n_blocks = blocks .shape [0 ]
440+ d , qs = split_block_dims (blocks , 2 )
441+ d = d .view (torch .float16 ).to (dtype )
442+ qs = qs .reshape ((n_blocks , - 1 , 1 , block_size // 2 )) >> torch .tensor (
443+ [0 , 4 ], device = blocks .device , dtype = torch .uint8
444+ ).reshape ((1 , 1 , 2 , 1 ))
445+ qs = (qs & 15 ).reshape ((n_blocks , - 1 )).to (torch .int64 )
446+ kvalues = kvalues .view (1 , 1 , 16 )
447+ qs = qs .unsqueeze (- 1 )
448+ qs = torch .gather (kvalues .expand (qs .shape [0 ], qs .shape [1 ], 16 ), 2 , qs )
449+ qs = qs .squeeze (- 1 ).to (dtype )
450+ return d * qs
451+
452+ def dequantize_blocks_IQ4_XS (blocks , block_size , type_size , dtype = None ):
453+ kvalues = torch .tensor (
454+ [- 127 , - 104 , - 83 , - 65 , - 49 , - 35 , - 22 , - 10 , 1 , 13 , 25 , 38 , 53 , 69 , 89 , 113 ],
455+ dtype = torch .float32 , device = blocks .device
456+ )
457+ n_blocks = blocks .shape [0 ]
458+ d , scales_h , scales_l , qs = split_block_dims (blocks , 2 , 2 , QK_K // 64 )
459+ d = d .view (torch .float16 ).to (dtype )
460+ scales_h = scales_h .view (torch .int16 )
461+ scales_l = scales_l .reshape ((n_blocks , - 1 , 1 )) >> torch .tensor (
462+ [0 , 4 ], device = blocks .device , dtype = torch .uint8 ).reshape ((1 , 1 , 2 ))
463+ scales_h = scales_h .reshape ((n_blocks , 1 , - 1 )) >> torch .tensor (
464+ [2 * i for i in range (QK_K // 32 )], device = blocks .device , dtype = torch .uint8 ).reshape ((1 , - 1 , 1 ))
465+ scales_l = scales_l .reshape ((n_blocks , - 1 )) & 0x0F
466+ scales_h = scales_h .reshape ((n_blocks , - 1 )) & 0x03
467+ scales = (scales_l | (scales_h << 4 )) - 32
468+ dl = (d * scales .to (dtype )).reshape ((n_blocks , - 1 , 1 ))
469+ shifts_q = torch .tensor ([0 , 4 ], device = blocks .device , dtype = torch .uint8 ).reshape (1 , 1 , 2 , 1 )
470+ qs = qs .reshape ((n_blocks , - 1 , 1 , 16 )) >> shifts_q
471+ qs = (qs & 15 ).reshape ((n_blocks , - 1 , 32 )).to (torch .int64 )
472+ kvalues = kvalues .view (1 , 1 , 1 , 16 )
473+ qs = qs .unsqueeze (- 1 )
474+ qs = torch .gather (kvalues .expand (qs .shape [0 ], qs .shape [1 ], qs .shape [2 ], 16 ), 3 , qs )
475+ qs = qs .squeeze (- 1 ).to (dtype )
476+ return (dl * qs ).reshape (n_blocks , - 1 )
431477
432478GGML_QUANT_SIZES = gguf .GGML_QUANT_SIZES
433479dequantize_functions = {
480+ gguf .GGMLQuantizationType .IQ4_NL : dequantize_blocks_IQ4_NL ,
481+ gguf .GGMLQuantizationType .IQ4_XS : dequantize_blocks_IQ4_XS ,
434482 gguf .GGMLQuantizationType .BF16 : dequantize_blocks_BF16 ,
435483 gguf .GGMLQuantizationType .Q8_0 : dequantize_blocks_Q8_0 ,
436484 gguf .GGMLQuantizationType .Q5_1 : dequantize_blocks_Q5_1 ,
0 commit comments