Enable option to disable dynamic shapes in optimum-et (#114)

metascroy · web-flow · commit 36e3dd54effb · 2025-08-01T10:46:49.000+02:00
diff --git a/optimum/commands/export/executorch.py b/optimum/commands/export/executorch.py
@@ -67,6 +67,12 @@ def parse_args_executorch(parser):
         action="store_true",
         help="For decoder-only models to use custom kv cache for static cache that updates cache using custom op. Defaults to False.",
     )
+    required_group.add_argument(
+        "--disable_dynamic_shapes",
+        required=False,
+        action="store_true",
+        help="When this flag is set on decoder-only models, dynamic shapes are disabled during export.",
+    )
     required_group.add_argument(
         "--qlinear",
         type=str,
@@ -109,6 +115,8 @@ def run(self):
             kwargs["use_custom_sdpa"] = self.args.use_custom_sdpa
         if self.args.use_custom_kv_cache:
             kwargs["use_custom_kv_cache"] = self.args.use_custom_kv_cache
+        if self.args.disable_dynamic_shapes:
+            kwargs["disable_dynamic_shapes"] = self.args.disable_dynamic_shapes
         if self.args.qlinear:
             kwargs["qlinear"] = self.args.qlinear
         if self.args.qembedding:
diff --git a/optimum/exporters/executorch/integrations.py b/optimum/exporters/executorch/integrations.py
@@ -40,12 +40,13 @@ class CausalLMExportableModule(torch.nn.Module):
     This module ensures that the exported model is compatible with ExecuTorch.
     """
 
-    def __init__(self, model, use_custom_kv_cache=False, use_custom_sdpa=False):
+    def __init__(self, model, use_custom_kv_cache=False, use_custom_sdpa=False, disable_dynamic_shapes=False):
         super().__init__()
         self.model = model
         self.config = model.config
         self.use_custom_kv_cache = use_custom_kv_cache
         self.use_custom_sdpa = use_custom_sdpa
+        self.disable_dynamic_shapes = disable_dynamic_shapes
         self.metadata = save_config_to_constant_methods(model.config, model.generation_config)
         logging.info(f"Metadata to be recorded in PTE: {self.metadata}")
 
@@ -71,7 +72,11 @@ def _prepare_export_inputs(self):
             and not (self.use_custom_kv_cache and self.use_custom_sdpa)
         )
 
-        if is_transformers_version(">", "4.52.0") and not is_using_hybrid_cache_wo_custom_sdpa_kv_cache:
+        if (
+            not self.disable_dynamic_shapes
+            and is_transformers_version(">", "4.52.0")
+            and not is_using_hybrid_cache_wo_custom_sdpa_kv_cache
+        ):
             # Prepare inputs with dynamic shapes
             seq_length = 3  # Sequence length > 1 to avoid specialization issues
             example_input_ids = torch.zeros((1, seq_length), dtype=torch.long)
diff --git a/optimum/exporters/executorch/tasks/causal_lm.py b/optimum/exporters/executorch/tasks/causal_lm.py
@@ -53,6 +53,7 @@ def load_causal_lm_model(model_name_or_path: str, **kwargs) -> CausalLMExportabl
     device = "cpu"
     batch_size = 1
     dtype = kwargs.get("dtype", "float32")
+    disable_dynamic_shapes = kwargs.get("disable_dynamic_shapes", False)
     use_custom_sdpa = kwargs.get("use_custom_sdpa", False)
     use_custom_kv_cache = kwargs.get("use_custom_kv_cache", False)
     attn_implementation = kwargs.get("attn_implementation", "custom_sdpa" if use_custom_sdpa else "sdpa")
@@ -133,4 +134,4 @@ def _load_eager_pretrained(
     qembedding_config = kwargs.get("qembedding", None)
     quantize_model_(eager_model, qlinear_config=qlinear_config, qembedding_config=qembedding_config)
 
-    return CausalLMExportableModule(eager_model, use_custom_kv_cache, use_custom_sdpa)
+    return CausalLMExportableModule(eager_model, use_custom_kv_cache, use_custom_sdpa, disable_dynamic_shapes)