microsoft · jywu-msft · Jul 17, 2025 · Jul 15, 2025 · Jul 16, 2025
diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -89,7 +89,12 @@ struct OrtTensorRTProviderOptionsV2 {
   size_t trt_onnx_bytestream_size{0};               // size of the byte stream provided as "trt_onnx_bytestream"
                                                     // can be updated using: UpdateTensorRTProviderOptionsWithValue
 
-  const char* trt_engine_cache_prefix{nullptr};  // specify engine cache prefix
-  int trt_engine_hw_compatible{0};               // Enable hardware compatibility. Default 0 = false, nonzero = true
-  const char* trt_op_types_to_exclude{};         // Exclude specific ops from running on TRT.
+  const void* trt_external_data_bytestream{nullptr};  // The byte stream containing the weights to override the ones provided in the ONNX model.
+                                                      // can be updated using: UpdateTensorRTProviderOptionsWithValue
+  size_t trt_external_data_bytestream_size{0};        // size of the byte stream provided as "trt_external_data_bytestream"
+                                                      // can be updated using: UpdateTensorRTProviderOptionsWithValue
+  const char* trt_engine_cache_prefix{nullptr};       // specify engine cache prefix
+  int trt_engine_hw_compatible{0};                    // Enable hardware compatibility. Default 0 = false, nonzero = true
+  const char* trt_op_types_to_exclude{};              // Exclude specific ops from running on TRT.
+  int trt_load_user_initializer{0};                   // Save initializers locally instead of to disk. Default 0 = false, nonzero = true
 };
diff --git a/onnxruntime/core/graph/graph_proto_serializer.cc b/onnxruntime/core/graph/graph_proto_serializer.cc
@@ -94,8 +94,8 @@ void GraphViewerToProto(const GraphViewer& graph_view,
       current_scope_initializer_set.insert(name);
       auto* p_initializer = graph_proto.add_initializer();
 
-      // Do not save raw or external data into the graph, only the metadata
-      if (!include_initializer_data && (init->has_raw_data() || init->has_data_location())) {
+      // Do not save raw into the graph, only the metadata
+      if (!include_initializer_data && init->has_raw_data()) {
         // Set datatype
         if (init->has_data_type()) {
           p_initializer->set_data_type(init->data_type());

diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -298,6 +298,8 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
                                                            make_secure_path_checks,
                                                            onnx_model_bytestream_,
                                                            onnx_model_bytestream_size_,
+                                                           onnx_external_data_bytestream_,
+                                                           onnx_external_data_bytestream_size_,
                                                            (*trt_engine_).get(),
                                                            false /* serialize refitted engine to disk */,
                                                            detailed_build_log_);
@@ -367,6 +369,8 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
                                                            make_secure_path_checks,
                                                            onnx_model_bytestream_,
                                                            onnx_model_bytestream_size_,
+                                                           onnx_external_data_bytestream_,
+                                                           onnx_external_data_bytestream_size_,
                                                            (*trt_engine_).get(),
                                                            true /* serialize refitted engine to disk */,
                                                            detailed_build_log_);

diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
@@ -54,6 +54,8 @@ class TensorRTCacheModelHandler {
                             std::string onnx_model_folder_path,
                             const void* onnx_model_bytestream,
                             size_t onnx_model_bytestream_size,
+                            const void* onnx_external_data_bytestream,
+                            size_t onnx_external_data_bytestream_size,
                             bool detailed_build_log)
       : trt_engine_(trt_engine),
         trt_runtime_(trt_runtime),
@@ -63,6 +65,8 @@ class TensorRTCacheModelHandler {
         onnx_model_folder_path_(onnx_model_folder_path),
         onnx_model_bytestream_(onnx_model_bytestream),
         onnx_model_bytestream_size_(onnx_model_bytestream_size),
+        onnx_external_data_bytestream_(onnx_external_data_bytestream),
+        onnx_external_data_bytestream_size_(onnx_external_data_bytestream_size),
         detailed_build_log_(detailed_build_log) {
   }
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TensorRTCacheModelHandler);
@@ -80,6 +84,8 @@ class TensorRTCacheModelHandler {
   std::string onnx_model_folder_path_;
   const void* onnx_model_bytestream_;
   size_t onnx_model_bytestream_size_;
+  const void* onnx_external_data_bytestream_;
+  size_t onnx_external_data_bytestream_size_;
   bool detailed_build_log_;
 };  // TRTCacheModelHandler
 }  // namespace onnxruntime