From b8c5a91fded72b80c7a2dc56fad2ce3132e31f2a Mon Sep 17 00:00:00 2001 From: 410011max <410011max@gmail.com> Date: Tue, 15 Jul 2025 01:16:21 +0800 Subject: [PATCH] Fix per-token dynamic quant --- src/compressed_tensors/quantization/utils/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compressed_tensors/quantization/utils/helpers.py b/src/compressed_tensors/quantization/utils/helpers.py index b6d81009..c2b4abfa 100644 --- a/src/compressed_tensors/quantization/utils/helpers.py +++ b/src/compressed_tensors/quantization/utils/helpers.py @@ -167,7 +167,7 @@ def compute_dynamic_scales_and_zp( keep_dims = True if args.strategy == QuantizationStrategy.TOKEN: - dim = {1, 2} + dim = {0, 1} reduce_dims = tuple(idx for idx in range(value.ndim) if idx not in dim) elif args.strategy == QuantizationStrategy.TENSOR: reduce_dims = None