Skip to content

Commit 8c97d26

Browse files
committed
minor code refactoring
Signed-off-by: Alex Kogan <[email protected]>
1 parent 6fa3e54 commit 8c97d26

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

csrc/ops.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,10 +298,11 @@ void per_token_group_quant_int8(const torch::Tensor& input,
298298
torch::Tensor& output_s, int64_t group_size,
299299
double eps, double int8_min, double int8_max);
300300

301-
torch::Tensor rtn_marlin_gemm(const torch::Tensor& a, const torch::Tensor& b_q_weight,
301+
torch::Tensor rtn_marlin_gemm(const torch::Tensor& a,
302+
const torch::Tensor& b_q_weight,
302303
const torch::Tensor& b_scales,
303-
torch::Tensor& workspace, int64_t size_m, int64_t size_n,
304-
int64_t size_k);
304+
torch::Tensor& workspace, int64_t size_m,
305+
int64_t size_n, int64_t size_k);
305306

306307
#endif
307308

csrc/quantization/rtn_marlin/rtn_marlin_cuda_kernel.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -968,7 +968,7 @@ torch::Tensor rtn_marlin_gemm(const torch::Tensor& a,
968968
// Switch to a Dequantize-and-GEMM path for long inputs
969969
// (see https://arxiv.org/pdf/2505.15909 for details)
970970
const int SLOW_PATH_MATMUL_HEURISTIC_CONDITION = 1024;
971-
if (size_m >= SLOW_PATH_MATMUL_HEURISTIC_CONDITION) {
971+
if (size_m >= SLOW_PATH_MATMUL_HEURISTIC_CONDITION) {
972972
auto weight = rtn_marlin_dequantize(b_q_weight, b_scales, qbits);
973973
return at::matmul(a, weight.t());
974974
}

0 commit comments

Comments
 (0)