fix safetensors loading bug and add some modifications based on orangepi (#2055)

xing-yiren · web-flow · commit 6ed1cf3c2832 · 2025-07-01T19:47:09.000+08:00
diff --git a/mindnlp/core/nn/functional.py b/mindnlp/core/nn/functional.py
@@ -14,6 +14,8 @@
 def gelu(input, approximate='none'):
     if use_pyboost():
         return mindspore.mint.nn.functional.gelu(input, approximate=approximate)
+    if ON_ORANGE_PI:
+        return mindspore.mint.nn.functional.gelu(input, approximate=approximate)
     return ops.gelu(input, approximate)
 
 def relu(input):
diff --git a/mindnlp/core/serialization.py b/mindnlp/core/serialization.py
@@ -45,6 +45,7 @@
 
 import safetensors
 import safetensors.numpy
+from safetensors import deserialize
 
 from mindnlp.core import nn
 from mindnlp.core.nn import Parameter
@@ -1575,6 +1576,48 @@ def get_tensor(self, name):
         return self.tensors[name].get()
 
 
+def legacy_safe_load_file(filename):
+    """
+    This function safely loads a file containing state dictionary data and converts it into a dictionary of MindSpore Parameters.
+    
+    Args:
+        filename (str): The path to the file containing the state dictionary data to be loaded.
+    
+    Returns:
+        dict: A dictionary where keys are parameter names and values are MindSpore Parameters.
+    
+    Raises:
+        FileNotFoundError: If the specified file 'filename' does not exist.
+        ValueError: If the data in the file is not in the correct format to create MindSpore Parameters.
+    """
+    with open(filename, "rb") as f:
+        data = f.read()
+
+    safeview = deserialize(data)
+
+    result = {}
+    try:
+        for k, v in safeview:
+            dtype = _MS_TYPES[v["dtype"]]
+            if (not SUPPORT_BF16 and dtype != mindspore.bfloat16) or SUPPORT_BF16:
+                arr = Tensor.convert_bytes_to_tensor(bytes(v["data"]), tuple(v["shape"]), dtype)
+                result[k] = Tensor(arr)
+            else:
+                raise TypeError('Do not support bfloat16 on current device, use numpy as convert buffer to boost load.')
+        return result
+
+    except Exception as e:
+        for k, v in safeview:
+            dtype = _NP_TYPES[v["dtype"]]
+            arr = np.frombuffer(v["data"], dtype=dtype).reshape(v["shape"])
+
+            if (not SUPPORT_BF16 and dtype != bfloat16) or SUPPORT_BF16:
+                result[k] = Tensor.from_numpy(arr)
+            else:
+                result[k] = Tensor.from_numpy(arr.astype(np.float16))
+        return result
+
+
 def safe_load_file(filename):
     """
     This function safely loads a file containing state dictionary data and converts it into a dictionary of MindSpore Parameters.
@@ -1591,9 +1634,12 @@ def safe_load_file(filename):
     """
 
     result = {}
-    with fast_safe_open(filename, framework="np") as f:
-        for k in f.keys():
-            result[k] = f.get_tensor(k)
+    try:
+        with fast_safe_open(filename, framework="np") as f:
+            for k in f.keys():
+                result[k] = f.get_tensor(k)
+    except Exception as e:
+        result = legacy_safe_load_file(filename)
     return result
 
 
diff --git a/mindnlp/engine/trainer/base.py b/mindnlp/engine/trainer/base.py
@@ -594,7 +594,7 @@ def num_tokens(self, train_ds: 'mindspore.dataset.Dataset', max_steps: Optional[
         """
         train_tokens = 0
         try:
-            for step, batch in train_ds.create_dict_iterator():
+            for step, batch in enumerate(train_ds.create_dict_iterator()):
                 tokens = batch["input_ids"].numel()
                 if max_steps is not None:
                     return tokens * max_steps