We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ba6c28c commit 7ff8b94Copy full SHA for 7ff8b94
vllm/model_executor/layers/quantization/modelopt.py
@@ -1286,9 +1286,14 @@ def apply(
1286
gemm1_weights=layer.gemm1_weights_fp4_shuffled.data,
1287
gemm1_weights_scale=layer.gemm1_scales_fp4_shuffled.data.view(
1288
torch.float8_e4m3fn),
1289
+ gemm1_bias=None,
1290
+ gemm1_alpha=None,
1291
+ gemm1_beta=None,
1292
+ gemm1_clamp_limit=None,
1293
gemm2_weights=layer.gemm2_weights_fp4_shuffled.data,
1294
gemm2_weights_scale=layer.gemm2_scales_fp4_shuffled.data.view(
1295
1296
+ gemm2_bias=None,
1297
output1_scale_scalar=layer.g1_scale_c.data,
1298
output1_scale_gate_scalar=layer.g1_alphas.data,
1299
output2_scale_scalar=layer.g2_alphas.data,
0 commit comments