Add: deepseekv2 smoothquant mappings (#1433)

rahul-tuli · web-flow · commit 9a11f529afbd · 2025-05-15T17:20:57.000Z
This PR adds deafult smoothquant mappings for Deepseekv2!

Test Plan: Checked that
`examples/quantizing_moe/deepseek_moe_w8a8_int8.py` runs with a
smoothquant modifier (w/o having to specify mapings)

Signed-off-by: Rahul Tuli &lt;rtuli@redhat.com&gt;
diff --git a/src/llmcompressor/modifiers/smoothquant/utils.py b/src/llmcompressor/modifiers/smoothquant/utils.py
@@ -63,6 +63,16 @@
     ),
 ]
 
+DEEPSEEK_V2_SMOOTHQUANT_MAPPINGS: List[LayerMap] = [
+    LayerMap(
+        balance_layers=["re:.*q_proj", "re:.*kv_a_proj_with_mqa"],
+        smooth_layers="re:.*input_layernorm",
+    ),
+    LayerMap(
+        balance_layers=["re:.*gate"], smooth_layers="re:.*post_attention_layernorm"
+    ),
+]
+
 
 # Registry of layer mappings for different architectures
 #   Add more mappings here
@@ -75,6 +85,7 @@
     "ChatGLMForConditionalGeneration": BLOOM_SMOOTHQUANT_MAPPINGS,
     "Phi3VForCausalLM": PHI3_VISION_SMOOTHQUANT_MAPPINGS,
     "WhisperForConditionalGeneration": WHISPER_V2_SMOOTHQUANT_MAPPINGS,
+    "DeepseekV2ForCausalLM": DEEPSEEK_V2_SMOOTHQUANT_MAPPINGS,
 }