quic
diff --git a/‎QEfficient/base/modeling_qeff.py
Lines changed: 29 additions & 26 deletions b/‎QEfficient/base/modeling_qeff.py
Lines changed: 29 additions & 26 deletions
diff --git a/‎QEfficient/compile/qnn_compiler.py
Lines changed: 1 addition & 1 deletion b/‎QEfficient/compile/qnn_compiler.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎QEfficient/finetune/utils/train_utils.py
Lines changed: 12 additions & 11 deletions b/‎QEfficient/finetune/utils/train_utils.py
Lines changed: 12 additions & 11 deletions
diff --git a/‎QEfficient/peft/auto.py
Lines changed: 1 addition & 1 deletion b/‎QEfficient/peft/auto.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎QEfficient/peft/lora/auto.py
Lines changed: 1 addition & 1 deletion b/‎QEfficient/peft/lora/auto.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎QEfficient/transformers/models/llama4/modeling_llama4.py
Lines changed: 0 additions & 8 deletions b/‎QEfficient/transformers/models/llama4/modeling_llama4.py
Lines changed: 0 additions & 8 deletions
@@ -5,7 +5,6 @@
 #
 # ----------------------------------------------------------------------------
 
-import hashlib
 import inspect
 import logging
 import shutil
@@ -22,8 +21,16 @@
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.compile.qnn_compiler import compile as qnn_compile
 from QEfficient.generation.cloud_infer import QAICInferenceSession
-from QEfficient.utils import constants, create_json, dump_qconfig, generate_mdp_partition_config, load_json
-from QEfficient.utils.cache import QEFF_HOME, to_hashable
+from QEfficient.utils import (
+    constants,
+    create_json,
+    create_model_params,
+    dump_qconfig,
+    export_wrapper,
+    generate_mdp_partition_config,
+    hash_dict_params,
+    load_json,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -45,12 +52,16 @@ class QEFFBaseModel(ABC):
     def _transform_names(cls) -> List[str]:
         return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms]
 
-    def __init__(self, model: torch.nn.Module) -> None:
+    def __init__(self, model: torch.nn.Module, **kwargs) -> None:
         super().__init__()
         self.model = model
+        self.hash_params = create_model_params(self, **kwargs)
         self.onnx_path: Optional[str] = None
         self.qpc_path: Optional[str] = None
         self.qpc_session: Optional[QAICInferenceSession] = None
+        self.model_architecture = (
+            (arch := getattr(self.model.config, "architectures", None)) and len(arch) > 0 and arch[0]
+        ) or None
 
         # Apply the transformations
         any_transformed = False
@@ -67,10 +78,6 @@ def __init__(self, model: torch.nn.Module) -> None:
     @abstractmethod
     def model_name(self) -> str: ...
 
-    @property
-    @abstractmethod
-    def model_hash(self) -> str: ...
-
     @abstractmethod
     def export(self, export_dir: Optional[str] = None) -> Path:
         """
@@ -114,6 +121,7 @@ def compile(self, *args, **kwargs) -> Path:
             :str: Path of the compiled ``qpc`` package.
         """
 
+    @export_wrapper
     def _export(
         self,
         example_inputs: Dict[str, torch.Tensor],
@@ -134,8 +142,6 @@ def _export(
             :onnx_transform_kwargs (dict): Additional arguments to be passed to `Transform.apply` for this class.
             :export_dir (str): Specify the export directory. The export_dir will be suffixed with a hash corresponding to current model.
         """
-        export_dir = Path(export_dir or (QEFF_HOME / self.model_name))
-        export_dir = export_dir.with_name(export_dir.name + "-" + self.model_hash)
         onnx_path = export_dir / f"{self.model_name}.onnx"
         if onnx_path.is_file():
             self.onnx_path = onnx_path
@@ -299,23 +305,16 @@ def _compile(
         else:
             mdp_ts_json = None
 
-        compile_hash = hashlib.sha256(to_hashable(command))
-
-        if specializations is not None:
-            compile_hash.update(to_hashable(specializations))
-
-        if custom_io is not None:
-            compile_hash.update(to_hashable(custom_io))
-
-        if num_speculative_tokens:
-            compile_hash.update(to_hashable({"num_speculative_tokens": num_speculative_tokens}))
-
-        # Hash the MDP partition config and the number of devices.
-        compile_hash.update(to_hashable(mdp_ts_json))
-        compile_hash.update(to_hashable({"mdp_ts_num_devices": mdp_ts_num_devices}))
+        compile_hash_params = {
+            "command": command,
+            "specializations": specializations,
+            "custom_io": custom_io,
+            "mdp_ts_num_devices": mdp_ts_num_devices,
+            "mdp_ts_json": mdp_ts_json,
+            "num_speculative_tokens": num_speculative_tokens,
+        }
+        compile_hash = hash_dict_params(compile_hash_params)
 
-        # Check if already compiled
-        compile_hash = compile_hash.hexdigest()[:16]
         compile_dir = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
         qpc_path = compile_dir / "qpc"
         qpc_path.mkdir(parents=True, exist_ok=True)
@@ -366,6 +365,10 @@ def _compile(
                     ]
                 )
             )
+        # Dump JSON file with hashed parameters
+        hashed_compile_params_path = compile_dir / "hashed_compile_params.json"
+        create_json(hashed_compile_params_path, compile_hash_params)
+        logger.info("Hashed parameters exported successfully.")
 
         self.qpc_path = qpc_path
 
 
@@ -12,12 +12,12 @@
 from typing import Dict, List, Optional
 
 from QEfficient.utils._utils import create_json, execute_command, load_json
-from QEfficient.utils.cache import to_hashable
 from QEfficient.utils.constants import QnnConstants
 from QEfficient.utils.generate_qnn_network_specialization_config import (
     generate_data_format_config,
     generate_qnn_specialization,
 )
+from QEfficient.utils.hash_utils import to_hashable
 from QEfficient.utils.logging_utils import logger
 
 
 
@@ -124,10 +124,9 @@ def train(
 
         if train_config.use_peft and train_config.from_peft_checkpoint:
             intermediate_epoch = int(train_config.from_peft_checkpoint.split("/")[-2].split("_")[-1]) - 1
+            intermediate_step = int(train_config.from_peft_checkpoint.split("/")[-1].split("_")[-1])
             if epoch < intermediate_epoch:
                 logger.log_rank_zero(f"Skipping epoch {epoch + 1} since fine tuning has already completed for it.")
-                # to bring the count of train_step in sync with where it left off
-                total_train_steps += len(train_dataloader)
                 continue
 
         logger.log_rank_zero(f"Starting epoch {epoch + 1}/{train_config.num_epochs}")
@@ -149,20 +148,18 @@ def train(
 
         num_dummy_samples = 0
         for step, batch in enumerate(train_dataloader):
+            # total_train_steps indicates the cumulative number of training steps completed across all epochs.
+            # When resuming fine-tuning from previously saved checkpoints, total_train_steps indicates the total number of steps trained across the earlier session and the ongoing one.
+            total_train_steps = (epoch) * len(train_dataloader) + step
             # resume training from a particular checkpoint, assuming the dataset is not shuffled
             if train_config.use_peft and train_config.from_peft_checkpoint:
-                intermediate_step = int(train_config.from_peft_checkpoint.split("/")[-1].split("_")[-1])
-                intermediate_epoch = int(train_config.from_peft_checkpoint.split("/")[-2].split("_")[-1]) - 1
                 # to bring the count of train_step in sync with where it left off
                 if epoch == intermediate_epoch and step == 0:
-                    total_train_steps += intermediate_step
                     logger.log_rank_zero(
                         f"Skipping first {intermediate_step} steps for epoch {epoch + 1}, since fine tuning has already completed for it."
                     )
                 if epoch == intermediate_epoch and step < intermediate_step:
-                    total_train_steps += 1
                     continue
-            total_train_steps += 1
 
             if train_config.max_train_step > 0 and total_train_steps >= train_config.max_train_step:
                 max_steps_reached = True
@@ -235,12 +232,12 @@ def train(
             else:
                 num_samples_in_cur_update = len(train_dataloader) % train_config.gradient_accumulation_steps
 
-            loss = loss / num_samples_in_cur_update
+            normalized_loss = loss / num_samples_in_cur_update
 
             if train_config.grad_scaler:
-                scaler.scale(loss).backward()  # backward pass
+                scaler.scale(normalized_loss).backward()  # backward pass
             else:
-                loss.backward()  # backward pass
+                normalized_loss.backward()  # backward pass
 
             if is_optimizer_step:
                 if train_config.grad_scaler:
@@ -358,7 +355,6 @@ def train(
         logger.log_rank_zero(
             f"Epoch {epoch + 1}: Train epoch loss: {train_epoch_loss:.4f}, Train metric: {train_epoch_metric:.4f}, Epoch time {epoch_end_time:.2f} sec"
         )
-
         # Saving the results every epoch to plot later
         if train_config.save_metrics:
             save_to_json(
@@ -377,9 +373,14 @@ def train(
 
     results["last_epoch_train_loss"] = train_epoch_loss.cpu()
     results["last_epoch_train_metric"] = train_epoch_metric.cpu()
+    results["train_step_loss"] = train_step_loss
+    results["train_step_metric"] = train_step_metric
+
     if train_config.run_validation:
         results["last_epoch_eval_loss"] = eval_epoch_loss.cpu()
         results["last_epoch_eval_metric"] = eval_epoch_metric.cpu()
+        results["eval_step_loss"] = eval_step_loss
+        results["eval_step_metric"] = eval_step_metric
     results["avg_epoch_time"] = avg_epoch_time
     results["avg_checkpoint_time"] = avg_checkpoint_time
     if train_config.save_metrics:
 
@@ -27,7 +27,7 @@
 from QEfficient.transformers.models.pytorch_transforms import CustomOpsTransform, KVCacheTransform
 from QEfficient.utils import constants
 from QEfficient.utils._utils import get_padding_shape_from_config
-from QEfficient.utils.cache import to_hashable
+from QEfficient.utils.hash_utils import to_hashable
 
 logger = logging.getLogger(__name__)
 
 
@@ -18,7 +18,7 @@
 from QEfficient import QEFFAutoModelForCausalLM
 from QEfficient.peft.lora.pytorch_transforms import LoraModelInputsTransform, TargetModulesTransform
 from QEfficient.utils import constants, get_padding_shape_from_config
-from QEfficient.utils.cache import to_hashable
+from QEfficient.utils.hash_utils import to_hashable
 from QEfficient.utils.logging_utils import logger
 
 
 
@@ -925,14 +925,6 @@ def get_specializations(
         )
         vision_size = num_features_per_tile * max_num_tiles
 
-        downsample_ratio = int(round(1.0 / (self.config.vision_config.pixel_shuffle_ratio**2)))
-        num_features_per_tile = int(
-            (img_size // self.config.vision_config.patch_size)
-            * (img_size // self.config.vision_config.patch_size)
-            // downsample_ratio
-        )
-        vision_size = num_features_per_tile * max_num_tiles
-
         vision = [
             {
                 "batch_size": batch_size,
Original file line number	Diff line number	Diff line change
`@@ -925,14 +925,6 @@ def get_specializations(`
`925`	`925`	`)`
`926`	`926`	`vision_size = num_features_per_tile * max_num_tiles`
`927`	`927`
`928`		`- downsample_ratio = int(round(1.0 / (self.config.vision_config.pixel_shuffle_ratio**2)))`
`929`		`- num_features_per_tile = int(`
`930`		`- (img_size // self.config.vision_config.patch_size)`
`931`		`- * (img_size // self.config.vision_config.patch_size)`
`932`		`- // downsample_ratio`
`933`		`- )`
`934`		`- vision_size = num_features_per_tile * max_num_tiles`
`935`		`-`
`936`	`928`	`vision = [`
`937`	`929`	`{`
`938`	`930`	`"batch_size": batch_size,`