Modifications to the flow of hash creation and filtration of params for export

quic-dhirajku · quic-dhirajku · commit 7b9cfba246e5 · 2025-07-16T08:41:00.000Z
Signed-off-by: Dhiraj Kumar Sah &lt;dhirajku@qti.qualcomm.com&gt;
diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py
@@ -22,8 +22,19 @@
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.compile.qnn_compiler import compile as qnn_compile
 from QEfficient.generation.cloud_infer import QAICInferenceSession
+<<<<<<< HEAD
 from QEfficient.utils import constants, create_json, dump_qconfig, generate_mdp_partition_config, load_json
 from QEfficient.utils.cache import QEFF_HOME, to_hashable
+=======
+from QEfficient.utils import (
+    constants,
+    create_json,
+    dump_qconfig,
+    filter_and_create_export_hash,
+    hash_compile_params,
+)
+from QEfficient.utils.cache import QEFF_HOME
+>>>>>>> dd35ad1 (Modifications to the flow of hash creation and filtration of params for export)
 
 logger = logging.getLogger(__name__)
 
@@ -50,13 +61,12 @@ def create_model_params(self, **kwargs) -> Dict:
 
         model_params["config"] = self.model.config.to_diff_dict()
         model_params["_transform_names"] = self._transform_names()
-        # TODO: Add keywords list to filter out params that are not needed for hashing
         return model_params
 
     def __init__(self, model: torch.nn.Module, **kwargs) -> None:
         super().__init__()
         self.model = model
-        self.model_params = self.create_model_params(**kwargs)
+        self.hash_params = self.create_model_params(**kwargs)
 
         if hasattr(self.model.config, "architectures"):
             self.model_architecture = self.model.config.architectures[0]
@@ -123,7 +133,6 @@ def compile(self, *args, **kwargs) -> Path:
             :str: Path of the compiled ``qpc`` package.
         """
 
-    # @dump_model_params
     def _export(
         self,
         example_inputs: Dict[str, torch.Tensor],
@@ -146,8 +155,8 @@ def _export(
         """
 
         export_dir = Path(export_dir or (QEFF_HOME / self.model_architecture / self.model_name))
-        export_hash, hashed_params = filter_and_hash_export_params(
-            model_params=copy.deepcopy(self.model_params),
+        export_hash = filter_and_create_export_hash(
+            model_params=self.hash_params,
             output_names=output_names,
             dynamic_axes=dynamic_axes,
             export_kwargs=export_kwargs,
@@ -232,7 +241,7 @@ def _export(
 
         # Dump JSON file with hashed parameters
         hashed_params_export_path = export_dir / "hashed_model_params.json"
-        create_json(hashed_params_export_path, hashed_params)
+        create_json(hashed_params_export_path, self.hash_params)
         logger.info("Hashed parameters exported successfully.")
 
         self.onnx_path = onnx_path
@@ -307,6 +316,7 @@ def _compile(
                 continue
             command.append(f"{option}={value}")
 
+<<<<<<< HEAD
         # Create a dummy mdp_ts_json if mdp-load-partition-config not provided and num_devices > 1
         if mdp_ts_json_path is not None:
             mdp_ts_json = load_json(str(mdp_ts_json_path))
@@ -335,6 +345,15 @@ def _compile(
         # Check if already compiled
         compile_hash = hash_dict_params(self.compile_params)
         compile_hash = compile_hash.hexdigest()[:16]
+=======
+        compile_hash, hashed_params = hash_compile_params(
+            command=command,
+            specializations=specializations,
+            custom_io=custom_io,
+            mdp_ts_num_devices=mdp_ts_num_devices,
+            num_speculative_tokens=num_speculative_tokens,
+        )
+>>>>>>> dd35ad1 (Modifications to the flow of hash creation and filtration of params for export)
         compile_dir = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
 
         qpc_path = compile_dir / "qpc"
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -73,7 +73,7 @@ def __init__(self, model: nn.Module, **kwargs) -> None:
         ):
             raise AssertionError("Please use `from_pretrained` method to load quantized models")
 
-        super().__init__(model)
+        super().__init__(model, **kwargs)
 
     def __repr__(self) -> str:
         return self.__class__.__name__ + "\n" + self.model.__repr__()
@@ -174,7 +174,7 @@ def __init__(self, model: nn.Module, pooling=None, **kwargs):
             self.model, _ = PoolingTransform.apply(self.model, pooling)
 
         self.model.base_model.config.use_cache = True
-        self.model_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_class"] = self.__class__.__name__
 
     @classmethod
     @with_replaced_quantizers
@@ -435,7 +435,7 @@ class QEffVisionEncoderForTextImageToTextModel(QEFFBaseModel):
     def __init__(self, model: nn.modules, **kwargs):
         super().__init__(model, **kwargs)
         self.model = model.get_qeff_vision_encoder()
-        self.model_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_class"] = self.__class__.__name__
 
     def export(self, inputs, output_names, dynamic_axes, export_dir=None):
         return self._export(inputs, output_names, dynamic_axes, export_dir)
@@ -490,7 +490,7 @@ class QEffCausalLMForTextImageToTextModel(QEFFBaseModel):
     def __init__(self, model, **kwargs):
         super().__init__(model, **kwargs)
         self.model = model.get_qeff_language_decoder()
-        self.model_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_class"] = self.__class__.__name__
 
     def export(self, inputs, output_names, dynamic_axes, export_dir=None):
         return self._export(inputs, output_names, dynamic_axes, export_dir)
@@ -543,8 +543,8 @@ def __init__(
             raise NotImplementedError("Continuous batching is not supported for image-text-to-text models yet.")
         self.model = model
         self.config = model.config
-        self.vision_model = QEffVisionEncoderForTextImageToTextModel(model)
-        self.lang_model = QEffCausalLMForTextImageToTextModel(model)
+        self.vision_model = QEffVisionEncoderForTextImageToTextModel(model, **kwargs)
+        self.lang_model = QEffCausalLMForTextImageToTextModel(model, **kwargs)
         self.input_shapes, self.output_names = None, None
 
     @property
@@ -916,7 +916,7 @@ def __init__(
             self.model.config.vision_config.use_flash_attn = "false"
         else:
             self.model.config.text_config.use_cache = True
-        self.model_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_class"] = self.__class__.__name__
 
     @classmethod
     def from_pretrained(
@@ -940,10 +940,6 @@ def from_pretrained(
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, config, *args, **kwargs)
 
         return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
-        # # Bypass __call__ and manually initialize
-        # instance = object.__new__(cls)
-        # instance.__init__(model, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
-        # return instance
 
     def export(
         self,
@@ -1288,11 +1284,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, kv_offload: Optiona
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
         return cls(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
 
-        # # Bypass __call__ and manually initialize
-        # instance = object.__new__(cls)
-        # instance.__init__(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
-        # return instance
-
 
 MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP = {"InternVLChatModel": QEFFAutoModelForImageTextToText}
 
@@ -1376,7 +1367,11 @@ def __init__(
         self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
         self.model, transformed = SpDTransform.apply(self.model, qaic_config, **kwargs)
         self.is_tlm = transformed
+<<<<<<< HEAD
 
+=======
+        self.hash_params["qeff_class"] = self.__class__.__name__
+>>>>>>> dd35ad1 (Modifications to the flow of hash creation and filtration of params for export)
         # ---Sampling---
         # Note: SamplerTransform should be applied after all other transforms
         # are done. The role of the sampler is to just add nodes at the output of the
@@ -1917,7 +1912,7 @@ def __init__(self, model: nn.Module, **kwargs):
         super().__init__(model, **kwargs)
         self.model.config.use_cache = True
         self.num_layers = model.config.num_hidden_layers
-        self.model_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_class"] = self.__class__.__name__
 
     @property
     def get_model_config(self) -> dict:
diff --git a/QEfficient/utils/__init__.py b/QEfficient/utils/__init__.py
@@ -13,9 +13,12 @@
     check_and_assign_cache_dir,
     create_json,
     custom_format_warning,
-    dump_model_params,
     dump_qconfig,
+<<<<<<< HEAD
     generate_mdp_partition_config,
+=======
+    filter_and_create_export_hash,
+>>>>>>> dd35ad1 (Modifications to the flow of hash creation and filtration of params for export)
     get_num_layers_from_config,
     get_num_layers_vlm,
     get_onnx_dir_name,
@@ -24,6 +27,7 @@
     get_qpc_dir_path,
     get_sliding_window_layers,
     get_sliding_window_shapes,
+    hash_compile_params,
     hf_download,
     load_hf_processor,
     load_hf_tokenizer,