minor code refactoring

sakogan · sakogan · commit 8c97d2629b4f · 2025-07-29T16:39:18.000-04:00
Signed-off-by: Alex Kogan &lt;alex.kogan@oracle.com&gt;
diff --git a/csrc/ops.h b/csrc/ops.h
@@ -298,10 +298,11 @@ void per_token_group_quant_int8(const torch::Tensor& input,
                                 torch::Tensor& output_s, int64_t group_size,
                                 double eps, double int8_min, double int8_max);
 
-torch::Tensor rtn_marlin_gemm(const torch::Tensor& a, const torch::Tensor& b_q_weight,
+torch::Tensor rtn_marlin_gemm(const torch::Tensor& a,
+                              const torch::Tensor& b_q_weight,
                               const torch::Tensor& b_scales,
-                              torch::Tensor& workspace, int64_t size_m, int64_t size_n,
-                              int64_t size_k);
+                              torch::Tensor& workspace, int64_t size_m,
+                              int64_t size_n, int64_t size_k);
 
 #endif
 
diff --git a/csrc/quantization/rtn_marlin/rtn_marlin_cuda_kernel.cu b/csrc/quantization/rtn_marlin/rtn_marlin_cuda_kernel.cu
@@ -968,7 +968,7 @@ torch::Tensor rtn_marlin_gemm(const torch::Tensor& a,
   // Switch to a Dequantize-and-GEMM path for long inputs
   // (see https://arxiv.org/pdf/2505.15909 for details)
   const int SLOW_PATH_MATMUL_HEURISTIC_CONDITION = 1024;
-  if (size_m >= SLOW_PATH_MATMUL_HEURISTIC_CONDITION) {    
+  if (size_m >= SLOW_PATH_MATMUL_HEURISTIC_CONDITION) {
     auto weight = rtn_marlin_dequantize(b_q_weight, b_scales, qbits);
     return at::matmul(a, weight.t());
   }

Original file line number	Diff line number	Diff line change
`@@ -968,7 +968,7 @@ torch::Tensor rtn_marlin_gemm(const torch::Tensor& a,`
`968`	`968`	`// Switch to a Dequantize-and-GEMM path for long inputs`
`969`	`969`	`// (see https://arxiv.org/pdf/2505.15909 for details)`
`970`	`970`	`const int SLOW_PATH_MATMUL_HEURISTIC_CONDITION = 1024;`
`971`		`- if (size_m >= SLOW_PATH_MATMUL_HEURISTIC_CONDITION) {`
	`971`	`+ if (size_m >= SLOW_PATH_MATMUL_HEURISTIC_CONDITION) {`
`972`	`972`	`auto weight = rtn_marlin_dequantize(b_q_weight, b_scales, qbits);`
`973`	`973`	`return at::matmul(a, weight.t());`
`974`	`974`	`}`