Skip to content

[TRT-EP] Add loadModelProto APIs #25409

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,12 @@ struct OrtTensorRTProviderOptionsV2 {
size_t trt_onnx_bytestream_size{0}; // size of the byte stream provided as "trt_onnx_bytestream"
// can be updated using: UpdateTensorRTProviderOptionsWithValue

const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix
int trt_engine_hw_compatible{0}; // Enable hardware compatibility. Default 0 = false, nonzero = true
const char* trt_op_types_to_exclude{}; // Exclude specific ops from running on TRT.
const void* trt_external_data_bytestream{nullptr}; // The byte stream containing the weights to override the ones provided in the ONNX model.
// can be updated using: UpdateTensorRTProviderOptionsWithValue
size_t trt_external_data_bytestream_size{0}; // size of the byte stream provided as "trt_external_data_bytestream"
// can be updated using: UpdateTensorRTProviderOptionsWithValue
const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix
int trt_engine_hw_compatible{0}; // Enable hardware compatibility. Default 0 = false, nonzero = true
const char* trt_op_types_to_exclude{}; // Exclude specific ops from running on TRT.
int trt_load_user_initializer{0}; // Save initializers locally instead of to disk. Default 0 = false, nonzero = true
};
4 changes: 2 additions & 2 deletions onnxruntime/core/graph/graph_proto_serializer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ void GraphViewerToProto(const GraphViewer& graph_view,
current_scope_initializer_set.insert(name);
auto* p_initializer = graph_proto.add_initializer();

// Do not save raw or external data into the graph, only the metadata
if (!include_initializer_data && (init->has_raw_data() || init->has_data_location())) {
// Do not save raw into the graph, only the metadata
if (!include_initializer_data && init->has_raw_data()) {
// Set datatype
if (init->has_data_type()) {
p_initializer->set_data_type(init->data_type());
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
make_secure_path_checks,
onnx_model_bytestream_,
onnx_model_bytestream_size_,
onnx_external_data_bytestream_,
onnx_external_data_bytestream_size_,
(*trt_engine_).get(),
false /* serialize refitted engine to disk */,
detailed_build_log_);
Expand Down Expand Up @@ -367,6 +369,8 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph
make_secure_path_checks,
onnx_model_bytestream_,
onnx_model_bytestream_size_,
onnx_external_data_bytestream_,
onnx_external_data_bytestream_size_,
(*trt_engine_).get(),
true /* serialize refitted engine to disk */,
detailed_build_log_);
Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class TensorRTCacheModelHandler {
std::string onnx_model_folder_path,
const void* onnx_model_bytestream,
size_t onnx_model_bytestream_size,
const void* onnx_external_data_bytestream,
size_t onnx_external_data_bytestream_size,
bool detailed_build_log)
: trt_engine_(trt_engine),
trt_runtime_(trt_runtime),
Expand All @@ -63,6 +65,8 @@ class TensorRTCacheModelHandler {
onnx_model_folder_path_(onnx_model_folder_path),
onnx_model_bytestream_(onnx_model_bytestream),
onnx_model_bytestream_size_(onnx_model_bytestream_size),
onnx_external_data_bytestream_(onnx_external_data_bytestream),
onnx_external_data_bytestream_size_(onnx_external_data_bytestream_size),
detailed_build_log_(detailed_build_log) {
}
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TensorRTCacheModelHandler);
Expand All @@ -80,6 +84,8 @@ class TensorRTCacheModelHandler {
std::string onnx_model_folder_path_;
const void* onnx_model_bytestream_;
size_t onnx_model_bytestream_size_;
const void* onnx_external_data_bytestream_;
size_t onnx_external_data_bytestream_size_;
bool detailed_build_log_;
}; // TRTCacheModelHandler
} // namespace onnxruntime
Loading
Loading