From 43f261dfe45027d7e1be39a1995ae6c492609660 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 11 Jul 2025 18:47:16 -0700 Subject: [PATCH 01/46] add support for perf test to run plugin ep --- .../test/perftest/command_args_parser.cc | 12 +++-- onnxruntime/test/perftest/main.cc | 40 +++++++++----- onnxruntime/test/perftest/ort_test_session.cc | 53 ++++++++++++++++++- onnxruntime/test/perftest/ort_test_session.h | 1 + .../test/perftest/performance_runner.h | 1 + .../test/perftest/test_configuration.h | 1 + 6 files changed, 90 insertions(+), 18 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 843875a881f0a..3411a38259ef8 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -39,8 +39,8 @@ namespace perftest { "\t-A: Disable memory arena\n" "\t-I: Generate tensor input binding. Free dimensions are treated as 1 unless overridden using -f.\n" "\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n" - "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " - "'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai' or 'webgpu'. " + "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu|plugin_ep]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " + "'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai', 'webgpu' or plugin execution provider that provided via ep library. " "Default:'cpu'.\n" "\t-b [tf|ort]: backend to use. Default:ort\n" "\t-r [repeated_times]: Specifies the repeated times if running in 'times' test mode.Default:1000.\n" @@ -195,7 +195,7 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, /*static*/ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { int ch; - while ((ch = getopt(argc, argv, ORT_TSTR("m:e:r:t:p:x:y:c:d:o:u:i:f:F:S:T:C:AMPIDZvhsqznlgR:X"))) != -1) { + while ((ch = getopt(argc, argv, ORT_TSTR("m:e:r:t:p:x:y:c:d:o:u:i:f:F:S:T:C:AMPIDZvhsqznlgR:XL:"))) != -1) { switch (ch) { case 'f': { std::basic_string dim_name; @@ -273,7 +273,8 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, } else if (!CompareCString(optarg, ORT_TSTR("nvtensorrtrtx"))) { test_config.machine_config.provider_type_name = onnxruntime::kNvTensorRTRTXExecutionProvider; } else { - return false; + // Could be plugin EP, save it first and handle later. + test_config.machine_config.provider_type_name = ToUTF8String(optarg); } break; case 'r': @@ -404,6 +405,9 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, case 'X': test_config.run_config.use_extensions = true; break; + case 'L': + test_config.plugin_ep_names_and_libs = optarg; + break; case '?': case 'h': default: diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 43bf54963cabb..42b0950a0d748 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -6,6 +6,7 @@ #include #include "command_args_parser.h" #include "performance_runner.h" +#include "strings_helper.h" #include using namespace onnxruntime; @@ -41,23 +42,36 @@ int real_main(int argc, char* argv[]) { if (failed) return -1; } - std::random_device rd; - perftest::PerformanceRunner perf_runner(env, test_config, rd); + { + std::random_device rd; + perftest::PerformanceRunner perf_runner(env, test_config, rd); - // Exit if user enabled -n option so that user can measure session creation time - if (test_config.run_config.exit_after_session_creation) { - perf_runner.LogSessionCreationTime(); - return 0; - } + // Exit if user enabled -n option so that user can measure session creation time + if (test_config.run_config.exit_after_session_creation) { + perf_runner.LogSessionCreationTime(); + return 0; + } - auto status = perf_runner.Run(); - if (!status.IsOK()) { - printf("Run failed:%s\n", status.ErrorMessage().c_str()); - return -1; - } + auto status = perf_runner.Run(); + if (!status.IsOK()) { + printf("Run failed:%s\n", status.ErrorMessage().c_str()); + return -1; + } - perf_runner.SerializeResult(); + perf_runner.SerializeResult(); + } + // unregister any plugin ep lib if it's registered + std::unordered_map ep_names_to_libs; +#ifdef _MSC_VER + std::string ep_names_and_libs_string = ToUTF8String(test_config.plugin_ep_names_and_libs); +#else + std::string ep_names_and_libs_string = performance_test_config.plugin_ep_names_and_libs; +#endif + onnxruntime::perftest::ParseSessionConfigs(ep_names_and_libs_string, ep_names_to_libs); + for (auto& pair : ep_names_to_libs) { + env.UnregisterExecutionProviderLibrary(pair.first.c_str()); + } return 0; } diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 7a210ca8482a4..a585320cb1a02 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -62,6 +62,56 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device : rand_engine_(rd()), input_names_(m.GetInputCount()), input_names_str_(m.GetInputCount()), input_length_(m.GetInputCount()) { Ort::SessionOptions session_options; +#ifdef _MSC_VER + std::string ep_names_and_libs_string = ToUTF8String(performance_test_config.plugin_ep_names_and_libs); +#else + std::string ep_names_and_libs_string = performance_test_config.plugin_ep_names_and_libs; +#endif + std::unordered_map ep_names_to_libs; + ParseSessionConfigs(ep_names_and_libs_string, ep_names_to_libs); + bool is_plugin_ep_avaiable = false; + + if (ep_names_to_libs.size() > 0) { + // Register plugin EP libraries if provided via "-L" argument. + for (auto& pair : ep_names_to_libs) { + const std::filesystem::path library_path = pair.second; + const std::string registration_name = pair.first; + env.RegisterExecutionProviderLibrary(registration_name.c_str(), library_path.c_str()); + registered_plugin_ep_names_.push_back(registration_name); + } + + std::vector ep_devices = env.GetEpDevices(); + std::vector added_ep_devices; + + // All OrtEpDevice instances must be from the same execution provider. + // Find the OrtEpDevice associated with the execution provider provided via "-e" argument. + Ort::ConstEpDevice plugin_ep_device; + for (Ort::ConstEpDevice& device : ep_devices) { + if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { + plugin_ep_device = device; + added_ep_devices.push_back(plugin_ep_device); + } + } + + if (added_ep_devices.empty()) { + for (auto ep_name : registered_plugin_ep_names_) { + env.UnregisterExecutionProviderLibrary(ep_name.c_str()); + } + ORT_THROW( + "[ERROR] [plugin EP] No matching execution provider name found in EP library's factory."); + } + +#if defined(_MSC_VER) + std::string provider_option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string provider_option_string = performance_test_config.run_config.ep_runtime_config_string; +#endif + std::unordered_map provider_options; + ParseSessionConfigs(provider_option_string, provider_options); + session_options.AppendExecutionProvider_V2(env, added_ep_devices, provider_options); + is_plugin_ep_avaiable = true; + } + provider_name_ = performance_test_config.machine_config.provider_type_name; std::unordered_map provider_options; if (provider_name_ == onnxruntime::kDnnlExecutionProvider) { @@ -574,7 +624,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); #endif } else if (!provider_name_.empty() && provider_name_ != onnxruntime::kCpuExecutionProvider && - provider_name_ != onnxruntime::kOpenVINOExecutionProvider) { + provider_name_ != onnxruntime::kOpenVINOExecutionProvider && + !is_plugin_ep_avaiable) { ORT_THROW("This backend is not included in perf test runner.\n"); } diff --git a/onnxruntime/test/perftest/ort_test_session.h b/onnxruntime/test/perftest/ort_test_session.h index 71f797b0d5a35..bcc324cb996c1 100644 --- a/onnxruntime/test/perftest/ort_test_session.h +++ b/onnxruntime/test/perftest/ort_test_session.h @@ -51,6 +51,7 @@ class OnnxRuntimeTestSession : public TestSession { const int input_length_; std::string provider_name_; std::string device_memory_name_; // Device memory type name to use from the list in allocator.h + std::vector registered_plugin_ep_names_; }; } // namespace perftest diff --git a/onnxruntime/test/perftest/performance_runner.h b/onnxruntime/test/perftest/performance_runner.h index b0a0161e7fd6c..c09af39dcb99c 100644 --- a/onnxruntime/test/perftest/performance_runner.h +++ b/onnxruntime/test/perftest/performance_runner.h @@ -117,6 +117,7 @@ class PerformanceRunner { std::unique_ptr test_case_; std::mutex results_mutex_; + bool unregister_ep_lib = false; }; } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/test_configuration.h b/onnxruntime/test/perftest/test_configuration.h index 8145f5f35c3b3..95acd15368942 100644 --- a/onnxruntime/test/perftest/test_configuration.h +++ b/onnxruntime/test/perftest/test_configuration.h @@ -74,6 +74,7 @@ struct PerformanceTestConfig { ModelInfo model_info; MachineConfig machine_config; RunConfig run_config; + std::basic_string plugin_ep_names_and_libs; }; } // namespace perftest From 8b982ab060f92412daedfeadbad6f0311b1607fb Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 11 Jul 2025 22:50:15 -0700 Subject: [PATCH 02/46] Unregister all registered plugin EP libraries before program exit --- onnxruntime/test/perftest/main.cc | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 42b0950a0d748..654b668186655 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -42,6 +42,9 @@ int real_main(int argc, char* argv[]) { if (failed) return -1; } + + auto status = Status::OK(); + { std::random_device rd; perftest::PerformanceRunner perf_runner(env, test_config, rd); @@ -52,16 +55,22 @@ int real_main(int argc, char* argv[]) { return 0; } - auto status = perf_runner.Run(); + status = perf_runner.Run(); + if (!status.IsOK()) { printf("Run failed:%s\n", status.ErrorMessage().c_str()); - return -1; + } else { + perf_runner.SerializeResult(); } - - perf_runner.SerializeResult(); } - // unregister any plugin ep lib if it's registered + // Unregister all registered plugin EP libraries before program exits. + // + // This is necessary because unregistering the plugin EP also unregisters any associated shared allocators. + // If we don't do this first and program returns, the factories stored inside the environment will be destroyed when the environment goes out of scope. + // Later, when the shared allocator's deleter runs, it may cause a segmentation fault because it attempts to use the already-destroyed factory to call ReleaseAllocator. + // + // See "ep_device.ep_factory->ReleaseAllocator" in Environment::CreateSharedAllocatorImpl. std::unordered_map ep_names_to_libs; #ifdef _MSC_VER std::string ep_names_and_libs_string = ToUTF8String(test_config.plugin_ep_names_and_libs); @@ -72,6 +81,11 @@ int real_main(int argc, char* argv[]) { for (auto& pair : ep_names_to_libs) { env.UnregisterExecutionProviderLibrary(pair.first.c_str()); } + + if (!status.IsOK()) { + return -1; + } + return 0; } From 7fd4713446f2d94751f45e0426cce32c7959c6cb Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 11 Jul 2025 23:03:49 -0700 Subject: [PATCH 03/46] remove unnecessary code --- onnxruntime/test/perftest/performance_runner.h | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/test/perftest/performance_runner.h b/onnxruntime/test/perftest/performance_runner.h index c09af39dcb99c..b0a0161e7fd6c 100644 --- a/onnxruntime/test/perftest/performance_runner.h +++ b/onnxruntime/test/perftest/performance_runner.h @@ -117,7 +117,6 @@ class PerformanceRunner { std::unique_ptr test_case_; std::mutex results_mutex_; - bool unregister_ep_lib = false; }; } // namespace perftest } // namespace onnxruntime From de32583d2d2edca806ca47a65c81766f9e5d7082 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sun, 13 Jul 2025 10:01:28 -0700 Subject: [PATCH 04/46] fix compile error --- onnxruntime/test/perftest/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 654b668186655..044059f78d04d 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -75,7 +75,7 @@ int real_main(int argc, char* argv[]) { #ifdef _MSC_VER std::string ep_names_and_libs_string = ToUTF8String(test_config.plugin_ep_names_and_libs); #else - std::string ep_names_and_libs_string = performance_test_config.plugin_ep_names_and_libs; + std::string ep_names_and_libs_string = test_config.plugin_ep_names_and_libs; #endif onnxruntime::perftest::ParseSessionConfigs(ep_names_and_libs_string, ep_names_to_libs); for (auto& pair : ep_names_to_libs) { From 4a9c5a3fa604bf67a1484337c16aeffa26aa6732 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 18 Jul 2025 15:00:31 -0700 Subject: [PATCH 05/46] Replace getopt with cxxopts and include --list_devices and --select_devices for perf test --- cmake/deps.txt | 2 +- cmake/onnxruntime_unittests.cmake | 10 +- .../test/perftest/command_args_parser.cc | 157 +++++++++++++++++- .../test/perftest/command_args_parser.h | 1 + onnxruntime/test/perftest/main.cc | 48 ++++-- onnxruntime/test/perftest/ort_test_session.cc | 72 +++++--- onnxruntime/test/perftest/strings_helper.cc | 11 ++ onnxruntime/test/perftest/strings_helper.h | 2 + .../test/perftest/test_configuration.h | 3 + onnxruntime/test/perftest/utils.h | 15 +- onnxruntime/test/perftest/windows/utils.cc | 87 ++++++++++ 11 files changed, 361 insertions(+), 47 deletions(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index 7089012a65f26..8c67e20f39293 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -11,7 +11,7 @@ # abseil_cpp;https://github.com/abseil/abseil-cpp/archive/refs/tags/20250512.0.zip;3d6ff7e7ce144d9a53a53bef1f1bf79e1da4b8e1 coremltools;https://github.com/apple/coremltools/archive/refs/tags/7.1.zip;f1bab0f30966f2e217d8e01207d518f230a1641a -cxxopts;https://github.com/jarro2783/cxxopts/archive/3c73d91c0b04e2b59462f0a741be8c07024c1bc0.zip;6c6ca7f8480b26c8d00476e0e24b7184717fe4f0 +cxxopts;https://github.com/jarro2783/cxxopts/archive/refs/tags/v3.3.1.zip;B77F1CE4A03F610488BA0ED17C1BE2EFDBC15564 date;https://github.com/HowardHinnant/date/archive/refs/tags/v3.0.1.zip;2dac0c81dc54ebdd8f8d073a75c053b04b56e159 dlpack;https://github.com/dmlc/dlpack/archive/5c210da409e7f1e51ddf445134a4376fdbd70d7d.zip;e499c86e4e5c5268a87661d7ea39c27fae10907c # This Eigen commit id matches the eigen archive being consumed from https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index d1fb06a95f4c9..6b6e1d0624cb8 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1212,6 +1212,14 @@ endif() if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) if(NOT IOS) + onnxruntime_fetchcontent_declare( + cxxopts + URL ${DEP_URL_cxxopts} + URL_HASH SHA1=${DEP_SHA1_cxxopts} + EXCLUDE_FROM_ALL + ) + onnxruntime_fetchcontent_makeavailable(cxxopts) + #perf test runner set(onnxruntime_perf_test_src_dir ${TEST_SRC_DIR}/perftest) set(onnxruntime_perf_test_src_patterns @@ -1238,7 +1246,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() target_include_directories(onnxruntime_perf_test PRIVATE ${onnx_test_runner_src_dir} ${ONNXRUNTIME_ROOT} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} - ${CMAKE_CURRENT_BINARY_DIR}) + ${CMAKE_CURRENT_BINARY_DIR} ${cxxopts_SOURCE_DIR}/include) if (WIN32) target_compile_options(onnxruntime_perf_test PRIVATE ${disabled_warnings}) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 3411a38259ef8..c592ca29156d8 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -4,6 +4,7 @@ // Licensed under the MIT License. #include "command_args_parser.h" +#include "utils.h" #include #include @@ -19,6 +20,8 @@ #include #endif +#include + #include #include #include @@ -173,7 +176,7 @@ static const ORTCHAR_T* overrideDelimiter = L":"; #else static const ORTCHAR_T* overrideDelimiter = ":"; #endif -static bool ParseDimensionOverride(std::basic_string& dim_identifier, int64_t& override_val) { +static bool ParseDimensionOverride(std::basic_string& dim_identifier, int64_t& override_val, const ORTCHAR_T* optarg) { std::basic_string free_dim_str(optarg); size_t delimiter_location = free_dim_str.find(overrideDelimiter); if (delimiter_location >= free_dim_str.size() - 1) { @@ -200,7 +203,7 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, case 'f': { std::basic_string dim_name; int64_t override_val; - if (!ParseDimensionOverride(dim_name, override_val)) { + if (!ParseDimensionOverride(dim_name, override_val, optarg)) { return false; } test_config.run_config.free_dim_name_overrides[dim_name] = override_val; @@ -209,7 +212,7 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, case 'F': { std::basic_string dim_denotation; int64_t override_val; - if (!ParseDimensionOverride(dim_denotation, override_val)) { + if (!ParseDimensionOverride(dim_denotation, override_val, optarg)) { return false; } test_config.run_config.free_dim_denotation_overrides[dim_denotation] = override_val; @@ -405,9 +408,6 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, case 'X': test_config.run_config.use_extensions = true; break; - case 'L': - test_config.plugin_ep_names_and_libs = optarg; - break; case '?': case 'h': default: @@ -435,5 +435,150 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, return true; } +bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { + try { + cxxopts::Options options("onnxruntime_perf", "ONNX Runtime Performance Test Config"); + + options.add_options()("f", "Free dimension override by name", cxxopts::value>())("F", "Free dimension override by denotation", cxxopts::value>())("m", "Test mode: duration or times", cxxopts::value())("e", "Execution provider", cxxopts::value())("r", "Repeat times", cxxopts::value())("t", "Duration in seconds", cxxopts::value())("p", "Profile output file", cxxopts::value())("x", "Intra-op threads", cxxopts::value())("y", "Inter-op threads", cxxopts::value())("c", "Concurrent session runs", cxxopts::value())("d", "cuDNN conv algo", cxxopts::value())("o", "Graph optimization level", cxxopts::value())("u", "Optimized model path", cxxopts::value())("i", "EP runtime config string", cxxopts::value())("S", "Random seed", cxxopts::value())("T", "Intra-op thread affinities", cxxopts::value())("C", "Session config entries", cxxopts::value())("R", "Custom op library path", cxxopts::value())("A", "Disable CPU mem arena", cxxopts::value()->default_value("false")->implicit_value("true"))("M", "Disable memory pattern", cxxopts::value()->default_value("false")->implicit_value("true"))("s", "Dump statistics", cxxopts::value()->default_value("false")->implicit_value("true"))("v", "Verbose", cxxopts::value()->default_value("false")->implicit_value("true"))("I", "Generate model input binding", cxxopts::value()->default_value("false")->implicit_value("true"))("P", "Use ORT_PARALLEL mode", cxxopts::value()->default_value("false")->implicit_value("true"))("q", "CUDA copy in separate stream", cxxopts::value()->default_value("false")->implicit_value("true"))("z", "Set denormal as zero", cxxopts::value()->default_value("false")->implicit_value("true"))("D", "Disable spinning", cxxopts::value()->default_value("false")->implicit_value("true"))("Z", "Disable spinning between runs", cxxopts::value()->default_value("false")->implicit_value("true"))("n", "Exit after session creation", cxxopts::value()->default_value("false")->implicit_value("true"))("l", "Load model via path", cxxopts::value()->default_value("false")->implicit_value("true"))("g", "Enable CUDA IO binding", cxxopts::value()->default_value("false")->implicit_value("true"))("X", "Use extensions", cxxopts::value()->default_value("false")->implicit_value("true"))("plugin_ep_libs", "Plugin EP names and libs", cxxopts::value())("list_devices", "List all the avaiable devices with info")("select_devices", "Take a list of device index (semicolon separated)", cxxopts::value())("h,help", "Print usage"); + +#ifdef _WIN32 + auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); + auto utf8_argv = utils::ConvertArgvToUtf8CharPtrs(utf8_strings); +#else + auto utf8_argv = argv; +#endif + + auto result = options.parse(utf8_argv.size(), utf8_argv.data()); + if (result.count("help")) { + std::cout << options.help() << std::endl; + return false; + } + + if (result.count("f")) { + std::basic_string dim_name; + int64_t override_val; + auto opt_str = utils::Utf8ToWide(result["f"].as()); + if (!ParseDimensionOverride(dim_name, override_val, opt_str.c_str())) { + return false; + } + test_config.run_config.free_dim_name_overrides[dim_name] = override_val; + } + + if (result.count("F")) { + std::basic_string dim_denotation; + int64_t override_val; + auto opt_str = utils::Utf8ToWide(result["F"].as()); + if (!ParseDimensionOverride(dim_denotation, override_val, opt_str.c_str())) { + return false; + } + test_config.run_config.free_dim_denotation_overrides[dim_denotation] = override_val; + } + + if (result.count("m")) { + auto opt_str = utils::Utf8ToWide(result["m"].as()); + if (!CompareCString(opt_str.c_str(), ORT_TSTR("duration"))) { + test_config.run_config.test_mode = TestMode::kFixDurationMode; + } else if (!CompareCString(optarg, ORT_TSTR("times"))) { + test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; + } else { + return false; + } + } + + if (result.count("p")) test_config.run_config.profile_file = utils::Utf8ToWide(result["p"].as()); + if (result["M"].as()) test_config.run_config.enable_memory_pattern = false; + if (result["A"].as()) test_config.run_config.enable_cpu_mem_arena = false; + + if (result.count("e")) { + auto optarg = result["e"].as().c_str(); + if (!CompareCString(optarg, "cpu")) { + test_config.machine_config.provider_type_name = onnxruntime::kCpuExecutionProvider; + } else if (!CompareCString(optarg, "cuda")) { + test_config.machine_config.provider_type_name = onnxruntime::kCudaExecutionProvider; + } else if (!CompareCString(optarg, "dnnl")) { + test_config.machine_config.provider_type_name = onnxruntime::kDnnlExecutionProvider; + } else if (!CompareCString(optarg, "openvino")) { + test_config.machine_config.provider_type_name = onnxruntime::kOpenVINOExecutionProvider; + } else if (!CompareCString(optarg, "tensorrt")) { + test_config.machine_config.provider_type_name = onnxruntime::kTensorrtExecutionProvider; + } else if (!CompareCString(optarg, "qnn")) { + test_config.machine_config.provider_type_name = onnxruntime::kQnnExecutionProvider; + } else if (!CompareCString(optarg, "snpe")) { + test_config.machine_config.provider_type_name = onnxruntime::kSnpeExecutionProvider; + } else if (!CompareCString(optarg, "nnapi")) { + test_config.machine_config.provider_type_name = onnxruntime::kNnapiExecutionProvider; + } else if (!CompareCString(optarg, "vsinpu")) { + test_config.machine_config.provider_type_name = onnxruntime::kVSINPUExecutionProvider; + } else if (!CompareCString(optarg, "coreml")) { + test_config.machine_config.provider_type_name = onnxruntime::kCoreMLExecutionProvider; + } else if (!CompareCString(optarg, "dml")) { + test_config.machine_config.provider_type_name = onnxruntime::kDmlExecutionProvider; + } else if (!CompareCString(optarg, "acl")) { + test_config.machine_config.provider_type_name = onnxruntime::kAclExecutionProvider; + } else if (!CompareCString(optarg, "armnn")) { + test_config.machine_config.provider_type_name = onnxruntime::kArmNNExecutionProvider; + } else if (!CompareCString(optarg, "rocm")) { + test_config.machine_config.provider_type_name = onnxruntime::kRocmExecutionProvider; + } else if (!CompareCString(optarg, "migraphx")) { + test_config.machine_config.provider_type_name = onnxruntime::kMIGraphXExecutionProvider; + } else if (!CompareCString(optarg, "xnnpack")) { + test_config.machine_config.provider_type_name = onnxruntime::kXnnpackExecutionProvider; + } else if (!CompareCString(optarg, "vitisai")) { + test_config.machine_config.provider_type_name = onnxruntime::kVitisAIExecutionProvider; + } else if (!CompareCString(optarg, "webgpu")) { + test_config.machine_config.provider_type_name = onnxruntime::kWebGpuExecutionProvider; + } else if (!CompareCString(optarg, "nvtensorrtrtx")) { + test_config.machine_config.provider_type_name = onnxruntime::kNvTensorRTRTXExecutionProvider; + } else { + // Could be plugin EP, save it first and handle later. + test_config.machine_config.provider_type_name = optarg; + } + } + + if (result.count("r")) { + auto val = result["r"].as(); + if (val <= 0) return false; + test_config.run_config.repeated_times = static_cast(val); + test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; + } + if (result.count("t")) { + auto val = result["t"].as(); + if (val <= 0) return false; + test_config.run_config.duration_in_seconds = static_cast(val); + test_config.run_config.test_mode = TestMode::kFixDurationMode; + } + + if (result["s"].as()) test_config.run_config.f_dump_statistics = true; + + if (result.count("S")) { + auto val = result["S"].as(); + if (val <= 0) return false; + test_config.run_config.random_seed_for_input_data = val; + } + + if (result.count("plugin_ep_libs")) test_config.plugin_ep_names_and_libs = utils::Utf8ToWide(result["plugin_ep_libs"].as()); + if (result.count("list_devices")) test_config.list_available_devices = true; + if (result.count("select_devices")) test_config.selected_devices = result["select_devices"].as(); + + // Positional arguments + std::vector positional = result.unmatched(); + if (positional.size() == 1) { + test_config.model_info.model_file_path = utils::Utf8ToWide(positional[0]); + test_config.run_config.f_dump_statistics = true; + } else if (positional.size() == 2) { + test_config.model_info.model_file_path = utils::Utf8ToWide(positional[0]); + test_config.model_info.result_file_path = utils::Utf8ToWide(positional[1]); + } else { + return false; + } + + } catch (const std::exception& ex) { + std::cerr << "Error parsing options: " << ex.what() << std::endl; + return false; + } + + return true; +} + } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/command_args_parser.h b/onnxruntime/test/perftest/command_args_parser.h index 86c81072233c0..d1ed02bcf4776 100644 --- a/onnxruntime/test/perftest/command_args_parser.h +++ b/onnxruntime/test/perftest/command_args_parser.h @@ -13,6 +13,7 @@ class CommandLineParser { public: static void ShowUsage(); static bool ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); + static bool ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); }; } // namespace perftest diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 044059f78d04d..888ae3069e143 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -5,6 +5,7 @@ #include #include #include "command_args_parser.h" +#include "utils.h" #include "performance_runner.h" #include "strings_helper.h" #include @@ -19,7 +20,7 @@ int real_main(int argc, char* argv[]) { #endif g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); perftest::PerformanceTestConfig test_config; - if (!perftest::CommandLineParser::ParseArguments(test_config, argc, argv)) { + if (!perftest::CommandLineParser::ParseArgumentsV2(test_config, argc, argv)) { perftest::CommandLineParser::ShowUsage(); return -1; } @@ -43,9 +44,23 @@ int real_main(int argc, char* argv[]) { return -1; } + if (!test_config.plugin_ep_names_and_libs.empty()) { + perftest::utils::RegisterExecutionProviderLibrary(env, test_config); + } + + if (test_config.list_available_devices) { + perftest::utils::list_devices(env); + if (test_config.registered_plugin_eps.empty()) { + fprintf(stdout, "No plugin execution provider libraries are registered. Please specify them using \"--plugin_ep_libs\"; otherwise, only CPU may be available.\n"); + } else { + perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); + } + return 0; + } + auto status = Status::OK(); - { + try { std::random_device rd; perftest::PerformanceRunner perf_runner(env, test_config, rd); @@ -55,6 +70,8 @@ int real_main(int argc, char* argv[]) { return 0; } + throw std::runtime_error("Something went wrong"); + status = perf_runner.Run(); if (!status.IsOK()) { @@ -62,7 +79,23 @@ int real_main(int argc, char* argv[]) { } else { perf_runner.SerializeResult(); } + } catch (const std::exception& ex) { + if (!test_config.registered_plugin_eps.empty()) { + perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); + return -1; + } } + // The try/catch block above ensures the following: + // 1) Plugin EP libraries are unregistered if an exception occurs. + // 2) Objects are released in the correct order when running a plugin EP. + // + // Proper destruction order is critical to avoid use-after-free issues. The expected order of deleters is: + // session -> session allocator (accessed via EP factory) -> plugin EP -> env -> + // shared allocator (accessed via EP factory) -> plugin EP factory (owned by env) + // + // Without this order, the environment (`env`) might be destroyed first, and + // any subsequent access to the session allocator's deleter (which depends on the EP factory) + // can result in a segmentation fault because the factory has already been destroyed. // Unregister all registered plugin EP libraries before program exits. // @@ -71,15 +104,8 @@ int real_main(int argc, char* argv[]) { // Later, when the shared allocator's deleter runs, it may cause a segmentation fault because it attempts to use the already-destroyed factory to call ReleaseAllocator. // // See "ep_device.ep_factory->ReleaseAllocator" in Environment::CreateSharedAllocatorImpl. - std::unordered_map ep_names_to_libs; -#ifdef _MSC_VER - std::string ep_names_and_libs_string = ToUTF8String(test_config.plugin_ep_names_and_libs); -#else - std::string ep_names_and_libs_string = test_config.plugin_ep_names_and_libs; -#endif - onnxruntime::perftest::ParseSessionConfigs(ep_names_and_libs_string, ep_names_to_libs); - for (auto& pair : ep_names_to_libs) { - env.UnregisterExecutionProviderLibrary(pair.first.c_str()); + if (!test_config.registered_plugin_eps.empty()) { + perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); } if (!status.IsOK()) { diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index a585320cb1a02..4017013009e9c 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -51,6 +51,18 @@ std::chrono::duration OnnxRuntimeTestSession::Run() { session_.Run(Ort::RunOptions{nullptr}, input_names_.data(), input.data(), input_names_.size(), output_names_raw_ptr.data(), outputs_.data(), output_names_raw_ptr.size()); + for (size_t i = 0; i < outputs_.size(); i++) { + Ort::Value& ort_output = outputs_[i]; + const float* output_data = ort_output.GetTensorData(); + gsl::span output_span(output_data, 6); + std::cout << output_span[0] << std::endl; + std::cout << output_span[1] << std::endl; + std::cout << output_span[2] << std::endl; + std::cout << output_span[3] << std::endl; + std::cout << output_span[4] << std::endl; + std::cout << output_span[5] << std::endl; + } + auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration duration_seconds = end - start; return duration_seconds; @@ -62,34 +74,44 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device : rand_engine_(rd()), input_names_(m.GetInputCount()), input_names_str_(m.GetInputCount()), input_length_(m.GetInputCount()) { Ort::SessionOptions session_options; -#ifdef _MSC_VER - std::string ep_names_and_libs_string = ToUTF8String(performance_test_config.plugin_ep_names_and_libs); -#else - std::string ep_names_and_libs_string = performance_test_config.plugin_ep_names_and_libs; -#endif - std::unordered_map ep_names_to_libs; - ParseSessionConfigs(ep_names_and_libs_string, ep_names_to_libs); bool is_plugin_ep_avaiable = false; - if (ep_names_to_libs.size() > 0) { - // Register plugin EP libraries if provided via "-L" argument. - for (auto& pair : ep_names_to_libs) { - const std::filesystem::path library_path = pair.second; - const std::string registration_name = pair.first; - env.RegisterExecutionProviderLibrary(registration_name.c_str(), library_path.c_str()); - registered_plugin_ep_names_.push_back(registration_name); - } - + // Add devices created from plugin EP + if (!performance_test_config.registered_plugin_eps.empty()) { std::vector ep_devices = env.GetEpDevices(); std::vector added_ep_devices; + std::unordered_set added_ep_device_index_set; + + // Select devices by provided device index + if (!performance_test_config.selected_devices.empty()) { + std::vector device_list; + device_list.reserve(performance_test_config.selected_devices.size()); + ParseDeviceList(performance_test_config.selected_devices, device_list); + for (auto index : device_list) { + if (index > (ep_devices.size() - 1)) { + fprintf(stderr, "The device index provided is not correct. Will skip this device id."); + } + + Ort::ConstEpDevice& device = ep_devices[index]; - // All OrtEpDevice instances must be from the same execution provider. - // Find the OrtEpDevice associated with the execution provider provided via "-e" argument. - Ort::ConstEpDevice plugin_ep_device; - for (Ort::ConstEpDevice& device : ep_devices) { - if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { - plugin_ep_device = device; - added_ep_devices.push_back(plugin_ep_device); + if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { + if (added_ep_device_index_set.find(index) == added_ep_device_index_set.end()) { + added_ep_devices.push_back(device); + added_ep_device_index_set.insert(index); + } + } else { + std::string err_msg = "[WARNING]: The device index and its corresponding OrtEpDevice is not created from " + + performance_test_config.machine_config.provider_type_name + ". Will skip adding this device.\n"; + fprintf(stderr, err_msg.c_str()); + } + } + } else { + // All OrtEpDevice instances must be from the same execution provider. + // Find and select the OrtEpDevice associated with the execution provider provided via "-e" argument. + for (Ort::ConstEpDevice& device : ep_devices) { + if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { + added_ep_devices.push_back(device); + } } } @@ -101,11 +123,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device "[ERROR] [plugin EP] No matching execution provider name found in EP library's factory."); } -#if defined(_MSC_VER) std::string provider_option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string provider_option_string = performance_test_config.run_config.ep_runtime_config_string; -#endif std::unordered_map provider_options; ParseSessionConfigs(provider_option_string, provider_options); session_options.AppendExecutionProvider_V2(env, added_ep_devices, provider_options); diff --git a/onnxruntime/test/perftest/strings_helper.cc b/onnxruntime/test/perftest/strings_helper.cc index 9fd49da1d0486..fe70da1b0d609 100644 --- a/onnxruntime/test/perftest/strings_helper.cc +++ b/onnxruntime/test/perftest/strings_helper.cc @@ -53,5 +53,16 @@ void ParseSessionConfigs(const std::string& configs_string, session_configs.insert(std::make_pair(std::move(key), std::move(value))); } } + +void ParseDeviceList(const std::string& input, std::vector& result) { + std::stringstream ss(input); + std::string item; + + while (std::getline(ss, item, ';')) { + if (!item.empty()) { + result.push_back(std::stoi(item)); + } + } +} } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/strings_helper.h b/onnxruntime/test/perftest/strings_helper.h index 0d6c56709fde6..f205c8ed39cfb 100644 --- a/onnxruntime/test/perftest/strings_helper.h +++ b/onnxruntime/test/perftest/strings_helper.h @@ -12,5 +12,7 @@ namespace perftest { void ParseSessionConfigs(const std::string& configs_string, std::unordered_map& session_configs, const std::unordered_set& available_keys = {}); + +void ParseDeviceList(const std::string& input, std::vector& result); } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/test_configuration.h b/onnxruntime/test/perftest/test_configuration.h index 95acd15368942..404e6ca30eadd 100644 --- a/onnxruntime/test/perftest/test_configuration.h +++ b/onnxruntime/test/perftest/test_configuration.h @@ -75,6 +75,9 @@ struct PerformanceTestConfig { MachineConfig machine_config; RunConfig run_config; std::basic_string plugin_ep_names_and_libs; + std::vector registered_plugin_eps; + std::string selected_devices; + bool list_available_devices = false; }; } // namespace perftest diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index f22abc04fa99e..f642419943695 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -2,7 +2,8 @@ // Licensed under the MIT License. #pragma once - +#include "test/perftest/test_configuration.h" +#include #include namespace onnxruntime { @@ -22,6 +23,18 @@ class ICPUUsage { std::unique_ptr CreateICPUUsage(); +std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]); + +std::vector ConvertArgvToUtf8CharPtrs(std::vector& utf8_args); + +std::wstring Utf8ToWide(const std::string& utf8_str); + +bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); + +bool UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); + +void list_devices(Ort::Env& env); + } // namespace utils } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index 9a1846a1c0901..3df143772d120 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -2,11 +2,14 @@ // Licensed under the MIT License. #include "test/perftest/utils.h" +#include "test/perftest/strings_helper.h" +#include #include #include #include +#include namespace onnxruntime { namespace perftest { @@ -75,6 +78,90 @@ std::unique_ptr CreateICPUUsage() { return std::make_unique(); } +#ifdef _WIN32 +std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]) { + std::vector utf8_args; + utf8_args.reserve(argc); + for (int i = 0; i < argc; ++i) { + utf8_args.push_back(ToUTF8String(argv[i])); + } + return utf8_args; +} + +std::vector ConvertArgvToUtf8CharPtrs(std::vector& utf8_args) { + std::vector utf8_argv; + utf8_argv.reserve(utf8_args.size()); + for (auto& str : utf8_args) { + utf8_argv.push_back(&str[0]); // safe since std::string is mutable + } + return utf8_argv; +} +#endif + +std::basic_string Utf8ToWide(const std::string& utf8_str) { + // ORTCHAR_T == char -> just convert to std::basic_string + if constexpr (std::is_same_v) { + return std::basic_string(utf8_str.begin(), utf8_str.end()); + } + + if (utf8_str.empty()) return std::basic_string(); + + int size_needed = MultiByteToWideChar(CP_UTF8, 0, utf8_str.c_str(), -1, nullptr, 0); + if (size_needed <= 0) return std::basic_string(); + + std::basic_string wide_str(size_needed, 0); + MultiByteToWideChar(CP_UTF8, 0, utf8_str.c_str(), -1, &wide_str[0], size_needed); + wide_str.pop_back(); // Remove null terminator added by API + + return wide_str; +} + +void list_devices(Ort::Env& env) { + std::vector ep_devices = env.GetEpDevices(); + + for (size_t i = 0; i < ep_devices.size(); ++i) { + auto device = ep_devices[i]; + std::string device_info_msg = "===== device id " + std::to_string(i) + " ======\n"; + device_info_msg += "name: " + std::string(device.EpName()) + "\n"; + device_info_msg += "vendor: " + std::string(device.EpVendor()) + "\n"; + + auto metadata = device.EpMetadata(); + std::unordered_map metadata_entries = metadata.GetKeyValuePairs(); + if (!metadata_entries.empty()) { + device_info_msg += "metadata:\n"; + } + + for (auto& entry : metadata_entries) { + device_info_msg += " " + entry.first + ": " + entry.second + "\n"; + } + device_info_msg += "\n"; + fprintf(stdout, device_info_msg.c_str()); + } +} + +bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { + if (!test_config.plugin_ep_names_and_libs.empty()) { + std::unordered_map ep_names_to_libs; + ParseSessionConfigs(ToUTF8String(test_config.plugin_ep_names_and_libs), ep_names_to_libs); + if (ep_names_to_libs.size() > 0) { + for (auto& pair : ep_names_to_libs) { + const std::filesystem::path library_path = pair.second; + const std::string registration_name = pair.first; + env.RegisterExecutionProviderLibrary(registration_name.c_str(), Utf8ToWide(library_path.string())); + test_config.registered_plugin_eps.push_back(registration_name); + } + } + } + return true; +} + +bool UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { + for (auto& registration_name : test_config.registered_plugin_eps) { + env.UnregisterExecutionProviderLibrary(registration_name.c_str()); + } + return true; +} + } // namespace utils } // namespace perftest } // namespace onnxruntime From 6cbc3ea6c08bb24a3e4dd0f3700b176b4e1a7d82 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 18 Jul 2025 16:48:46 -0700 Subject: [PATCH 06/46] fix build errors on Linux --- .../test/perftest/command_args_parser.cc | 10 +-- onnxruntime/test/perftest/common_utils.cc | 64 +++++++++++++++++++ onnxruntime/test/perftest/ort_test_session.cc | 2 +- onnxruntime/test/perftest/posix/utils.cc | 5 ++ onnxruntime/test/perftest/windows/utils.cc | 2 +- 5 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 onnxruntime/test/perftest/common_utils.cc diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index c592ca29156d8..380af6ffc016e 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -444,11 +444,11 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int #ifdef _WIN32 auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); auto utf8_argv = utils::ConvertArgvToUtf8CharPtrs(utf8_strings); + auto result = options.parse(utf8_argv.size(), utf8_argv.data()); #else - auto utf8_argv = argv; + auto result = options.parse(argc, argv); #endif - auto result = options.parse(utf8_argv.size(), utf8_argv.data()); if (result.count("help")) { std::cout << options.help() << std::endl; return false; @@ -457,7 +457,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("f")) { std::basic_string dim_name; int64_t override_val; - auto opt_str = utils::Utf8ToWide(result["f"].as()); + std::basic_string opt_str = utils::Utf8ToWide(result["f"].as()); if (!ParseDimensionOverride(dim_name, override_val, opt_str.c_str())) { return false; } @@ -467,7 +467,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("F")) { std::basic_string dim_denotation; int64_t override_val; - auto opt_str = utils::Utf8ToWide(result["F"].as()); + std::basic_string opt_str = utils::Utf8ToWide(result["F"].as()); if (!ParseDimensionOverride(dim_denotation, override_val, opt_str.c_str())) { return false; } @@ -475,7 +475,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int } if (result.count("m")) { - auto opt_str = utils::Utf8ToWide(result["m"].as()); + std::basic_string opt_str = utils::Utf8ToWide(result["m"].as()); if (!CompareCString(opt_str.c_str(), ORT_TSTR("duration"))) { test_config.run_config.test_mode = TestMode::kFixDurationMode; } else if (!CompareCString(optarg, ORT_TSTR("times"))) { diff --git a/onnxruntime/test/perftest/common_utils.cc b/onnxruntime/test/perftest/common_utils.cc new file mode 100644 index 0000000000000..d6acd64b05061 --- /dev/null +++ b/onnxruntime/test/perftest/common_utils.cc @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "test/perftest/utils.h" +#include "test/perftest/strings_helper.h" +#include + +#include + +#include + +namespace onnxruntime { +namespace perftest { +namespace utils { + +void list_devices(Ort::Env& env) { + std::vector ep_devices = env.GetEpDevices(); + + for (size_t i = 0; i < ep_devices.size(); ++i) { + auto device = ep_devices[i]; + std::string device_info_msg = "===== device id " + std::to_string(i) + " ======\n"; + device_info_msg += "name: " + std::string(device.EpName()) + "\n"; + device_info_msg += "vendor: " + std::string(device.EpVendor()) + "\n"; + + auto metadata = device.EpMetadata(); + std::unordered_map metadata_entries = metadata.GetKeyValuePairs(); + if (!metadata_entries.empty()) { + device_info_msg += "metadata:\n"; + } + + for (auto& entry : metadata_entries) { + device_info_msg += " " + entry.first + ": " + entry.second + "\n"; + } + device_info_msg += "\n"; + fprintf(stdout, device_info_msg.c_str()); + } +} + +bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { + if (!test_config.plugin_ep_names_and_libs.empty()) { + std::unordered_map ep_names_to_libs; + ParseSessionConfigs(ToUTF8String(test_config.plugin_ep_names_and_libs), ep_names_to_libs); + if (ep_names_to_libs.size() > 0) { + for (auto& pair : ep_names_to_libs) { + const std::filesystem::path library_path = pair.second; + const std::string registration_name = pair.first; + env.RegisterExecutionProviderLibrary(registration_name.c_str(), Utf8ToWide(library_path.string())); + test_config.registered_plugin_eps.push_back(registration_name); + } + } + } + return true; +} + +bool UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { + for (auto& registration_name : test_config.registered_plugin_eps) { + env.UnregisterExecutionProviderLibrary(registration_name.c_str()); + } + return true; +} + +} // namespace utils +} // namespace perftest +} // namespace onnxruntime diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 4017013009e9c..732144d9824ce 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -88,7 +88,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device device_list.reserve(performance_test_config.selected_devices.size()); ParseDeviceList(performance_test_config.selected_devices, device_list); for (auto index : device_list) { - if (index > (ep_devices.size() - 1)) { + if (static_cast(index) > (ep_devices.size() - 1)) { fprintf(stderr, "The device index provided is not correct. Will skip this device id."); } diff --git a/onnxruntime/test/perftest/posix/utils.cc b/onnxruntime/test/perftest/posix/utils.cc index 9bf029d8dff35..c3c08c21e45b4 100644 --- a/onnxruntime/test/perftest/posix/utils.cc +++ b/onnxruntime/test/perftest/posix/utils.cc @@ -58,6 +58,11 @@ std::unique_ptr CreateICPUUsage() { return std::make_unique(); } +std::basic_string Utf8ToOrtString(const std::string& utf8_str) { + // ORTCHAR_T == char -> just convert to std::basic_string + return std::basic_string(utf8_str.begin(), utf8_str.end()); +} + } // namespace utils } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index 3df143772d120..436ccadff0318 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -98,7 +98,7 @@ std::vector ConvertArgvToUtf8CharPtrs(std::vector& utf } #endif -std::basic_string Utf8ToWide(const std::string& utf8_str) { +std::basic_string Utf8ToOrtString(const std::string& utf8_str) { // ORTCHAR_T == char -> just convert to std::basic_string if constexpr (std::is_same_v) { return std::basic_string(utf8_str.begin(), utf8_str.end()); From eb2220e50ff7c7fbe616040f562aa07d242660fe Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 18 Jul 2025 17:16:55 -0700 Subject: [PATCH 07/46] address build erros/warnings --- .../test/perftest/command_args_parser.cc | 267 +----------------- .../test/perftest/command_args_parser.h | 2 +- onnxruntime/test/perftest/common_utils.cc | 2 +- onnxruntime/test/perftest/main.cc | 2 +- onnxruntime/test/perftest/strings_helper.cc | 1 + onnxruntime/test/perftest/strings_helper.h | 1 + onnxruntime/test/perftest/utils.h | 4 +- onnxruntime/test/perftest/windows/utils.cc | 47 --- 8 files changed, 18 insertions(+), 308 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 380af6ffc016e..b51c5d68435f2 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -12,14 +12,6 @@ #include #include -// Windows Specific -#ifdef _WIN32 -#include "getopt.h" -#include "windows.h" -#else -#include -#endif - #include #include @@ -196,245 +188,6 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, return true; } -/*static*/ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { - int ch; - while ((ch = getopt(argc, argv, ORT_TSTR("m:e:r:t:p:x:y:c:d:o:u:i:f:F:S:T:C:AMPIDZvhsqznlgR:XL:"))) != -1) { - switch (ch) { - case 'f': { - std::basic_string dim_name; - int64_t override_val; - if (!ParseDimensionOverride(dim_name, override_val, optarg)) { - return false; - } - test_config.run_config.free_dim_name_overrides[dim_name] = override_val; - break; - } - case 'F': { - std::basic_string dim_denotation; - int64_t override_val; - if (!ParseDimensionOverride(dim_denotation, override_val, optarg)) { - return false; - } - test_config.run_config.free_dim_denotation_overrides[dim_denotation] = override_val; - break; - } - case 'm': - if (!CompareCString(optarg, ORT_TSTR("duration"))) { - test_config.run_config.test_mode = TestMode::kFixDurationMode; - } else if (!CompareCString(optarg, ORT_TSTR("times"))) { - test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; - } else { - return false; - } - break; - case 'p': - test_config.run_config.profile_file = optarg; - break; - case 'M': - test_config.run_config.enable_memory_pattern = false; - break; - case 'A': - test_config.run_config.enable_cpu_mem_arena = false; - break; - case 'e': - if (!CompareCString(optarg, ORT_TSTR("cpu"))) { - test_config.machine_config.provider_type_name = onnxruntime::kCpuExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("cuda"))) { - test_config.machine_config.provider_type_name = onnxruntime::kCudaExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("dnnl"))) { - test_config.machine_config.provider_type_name = onnxruntime::kDnnlExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("openvino"))) { - test_config.machine_config.provider_type_name = onnxruntime::kOpenVINOExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("tensorrt"))) { - test_config.machine_config.provider_type_name = onnxruntime::kTensorrtExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("qnn"))) { - test_config.machine_config.provider_type_name = onnxruntime::kQnnExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("snpe"))) { - test_config.machine_config.provider_type_name = onnxruntime::kSnpeExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("nnapi"))) { - test_config.machine_config.provider_type_name = onnxruntime::kNnapiExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("vsinpu"))) { - test_config.machine_config.provider_type_name = onnxruntime::kVSINPUExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("coreml"))) { - test_config.machine_config.provider_type_name = onnxruntime::kCoreMLExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("dml"))) { - test_config.machine_config.provider_type_name = onnxruntime::kDmlExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("acl"))) { - test_config.machine_config.provider_type_name = onnxruntime::kAclExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("armnn"))) { - test_config.machine_config.provider_type_name = onnxruntime::kArmNNExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("rocm"))) { - test_config.machine_config.provider_type_name = onnxruntime::kRocmExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("migraphx"))) { - test_config.machine_config.provider_type_name = onnxruntime::kMIGraphXExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("xnnpack"))) { - test_config.machine_config.provider_type_name = onnxruntime::kXnnpackExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("vitisai"))) { - test_config.machine_config.provider_type_name = onnxruntime::kVitisAIExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("webgpu"))) { - test_config.machine_config.provider_type_name = onnxruntime::kWebGpuExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("nvtensorrtrtx"))) { - test_config.machine_config.provider_type_name = onnxruntime::kNvTensorRTRTXExecutionProvider; - } else { - // Could be plugin EP, save it first and handle later. - test_config.machine_config.provider_type_name = ToUTF8String(optarg); - } - break; - case 'r': - test_config.run_config.repeated_times = static_cast(OrtStrtol(optarg, nullptr)); - if (test_config.run_config.repeated_times <= 0) { - return false; - } - test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; - break; - case 't': - test_config.run_config.duration_in_seconds = static_cast(OrtStrtol(optarg, nullptr)); - if (test_config.run_config.repeated_times <= 0) { - return false; - } - test_config.run_config.test_mode = TestMode::kFixDurationMode; - break; - case 's': - test_config.run_config.f_dump_statistics = true; - break; - case 'S': - test_config.run_config.random_seed_for_input_data = static_cast( - OrtStrtol(optarg, nullptr)); - break; - case 'v': - test_config.run_config.f_verbose = true; - break; - case 'x': - test_config.run_config.intra_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); - if (test_config.run_config.intra_op_num_threads < 0) { - return false; - } - break; - case 'y': - test_config.run_config.inter_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); - if (test_config.run_config.inter_op_num_threads < 0) { - return false; - } - break; - case 'P': - test_config.run_config.execution_mode = ExecutionMode::ORT_PARALLEL; - break; - case 'c': - test_config.run_config.concurrent_session_runs = - static_cast(OrtStrtol(optarg, nullptr)); - if (test_config.run_config.concurrent_session_runs <= 0) { - return false; - } - break; - case 'o': { - int tmp = static_cast(OrtStrtol(optarg, nullptr)); - switch (tmp) { - case ORT_DISABLE_ALL: - test_config.run_config.optimization_level = ORT_DISABLE_ALL; - break; - case ORT_ENABLE_BASIC: - test_config.run_config.optimization_level = ORT_ENABLE_BASIC; - break; - case ORT_ENABLE_EXTENDED: - test_config.run_config.optimization_level = ORT_ENABLE_EXTENDED; - break; - case ORT_ENABLE_LAYOUT: - test_config.run_config.optimization_level = ORT_ENABLE_LAYOUT; - break; - case ORT_ENABLE_ALL: - test_config.run_config.optimization_level = ORT_ENABLE_ALL; - break; - default: { - if (tmp > ORT_ENABLE_ALL) { // relax constraint - test_config.run_config.optimization_level = ORT_ENABLE_ALL; - } else { - return false; - } - } - } - break; - } - case 'u': - test_config.run_config.optimized_model_path = optarg; - break; - case 'I': - test_config.run_config.generate_model_input_binding = true; - break; - case 'd': - test_config.run_config.cudnn_conv_algo = static_cast(OrtStrtol(optarg, nullptr)); - break; - case 'q': - test_config.run_config.do_cuda_copy_in_separate_stream = true; - break; - case 'z': - test_config.run_config.set_denormal_as_zero = true; - break; - case 'i': - test_config.run_config.ep_runtime_config_string = optarg; - break; - case 'T': - test_config.run_config.intra_op_thread_affinities = ToUTF8String(optarg); - break; - case 'C': { - ORT_TRY { - ParseSessionConfigs(ToUTF8String(optarg), test_config.run_config.session_config_entries); - } - ORT_CATCH(const std::exception& ex) { - ORT_HANDLE_EXCEPTION([&]() { - fprintf(stderr, "Error parsing session configuration entries: %s\n", ex.what()); - }); - return false; - } - break; - } - case 'D': - test_config.run_config.disable_spinning = true; - break; - case 'Z': - test_config.run_config.disable_spinning_between_run = true; - break; - case 'n': - test_config.run_config.exit_after_session_creation = true; - break; - case 'l': - test_config.model_info.load_via_path = true; - break; - case 'R': - test_config.run_config.register_custom_op_path = optarg; - break; - case 'g': - test_config.run_config.enable_cuda_io_binding = true; - break; - case 'X': - test_config.run_config.use_extensions = true; - break; - case '?': - case 'h': - default: - return false; - } - } - - // parse model_path and result_file_path - argc -= optind; - argv += optind; - - switch (argc) { - case 2: - test_config.model_info.result_file_path = argv[1]; - break; - case 1: - test_config.run_config.f_dump_statistics = true; - break; - default: - return false; - } - - test_config.model_info.model_file_path = argv[0]; - - return true; -} - bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { try { cxxopts::Options options("onnxruntime_perf", "ONNX Runtime Performance Test Config"); @@ -444,7 +197,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int #ifdef _WIN32 auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); auto utf8_argv = utils::ConvertArgvToUtf8CharPtrs(utf8_strings); - auto result = options.parse(utf8_argv.size(), utf8_argv.data()); + auto result = options.parse(static_cast(utf8_argv.size()), utf8_argv.data()); #else auto result = options.parse(argc, argv); #endif @@ -457,7 +210,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("f")) { std::basic_string dim_name; int64_t override_val; - std::basic_string opt_str = utils::Utf8ToWide(result["f"].as()); + std::basic_string opt_str = utils::Utf8ToOrtString(result["f"].as()); if (!ParseDimensionOverride(dim_name, override_val, opt_str.c_str())) { return false; } @@ -467,7 +220,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("F")) { std::basic_string dim_denotation; int64_t override_val; - std::basic_string opt_str = utils::Utf8ToWide(result["F"].as()); + std::basic_string opt_str = utils::Utf8ToOrtString(result["F"].as()); if (!ParseDimensionOverride(dim_denotation, override_val, opt_str.c_str())) { return false; } @@ -475,17 +228,17 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int } if (result.count("m")) { - std::basic_string opt_str = utils::Utf8ToWide(result["m"].as()); + std::basic_string opt_str = utils::Utf8ToOrtString(result["m"].as()); if (!CompareCString(opt_str.c_str(), ORT_TSTR("duration"))) { test_config.run_config.test_mode = TestMode::kFixDurationMode; - } else if (!CompareCString(optarg, ORT_TSTR("times"))) { + } else if (!CompareCString(opt_str.c_str(), ORT_TSTR("times"))) { test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; } else { return false; } } - if (result.count("p")) test_config.run_config.profile_file = utils::Utf8ToWide(result["p"].as()); + if (result.count("p")) test_config.run_config.profile_file = utils::Utf8ToOrtString(result["p"].as()); if (result["M"].as()) test_config.run_config.enable_memory_pattern = false; if (result["A"].as()) test_config.run_config.enable_cpu_mem_arena = false; @@ -556,18 +309,18 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int test_config.run_config.random_seed_for_input_data = val; } - if (result.count("plugin_ep_libs")) test_config.plugin_ep_names_and_libs = utils::Utf8ToWide(result["plugin_ep_libs"].as()); + if (result.count("plugin_ep_libs")) test_config.plugin_ep_names_and_libs = utils::Utf8ToOrtString(result["plugin_ep_libs"].as()); if (result.count("list_devices")) test_config.list_available_devices = true; if (result.count("select_devices")) test_config.selected_devices = result["select_devices"].as(); // Positional arguments std::vector positional = result.unmatched(); if (positional.size() == 1) { - test_config.model_info.model_file_path = utils::Utf8ToWide(positional[0]); + test_config.model_info.model_file_path = utils::Utf8ToOrtString(positional[0]); test_config.run_config.f_dump_statistics = true; } else if (positional.size() == 2) { - test_config.model_info.model_file_path = utils::Utf8ToWide(positional[0]); - test_config.model_info.result_file_path = utils::Utf8ToWide(positional[1]); + test_config.model_info.model_file_path = utils::Utf8ToOrtString(positional[0]); + test_config.model_info.result_file_path = utils::Utf8ToOrtString(positional[1]); } else { return false; } diff --git a/onnxruntime/test/perftest/command_args_parser.h b/onnxruntime/test/perftest/command_args_parser.h index d1ed02bcf4776..61a7e9aa9f52a 100644 --- a/onnxruntime/test/perftest/command_args_parser.h +++ b/onnxruntime/test/perftest/command_args_parser.h @@ -12,7 +12,7 @@ struct PerformanceTestConfig; class CommandLineParser { public: static void ShowUsage(); - static bool ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); + //static bool ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); static bool ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); }; diff --git a/onnxruntime/test/perftest/common_utils.cc b/onnxruntime/test/perftest/common_utils.cc index d6acd64b05061..33d2fbf49db8f 100644 --- a/onnxruntime/test/perftest/common_utils.cc +++ b/onnxruntime/test/perftest/common_utils.cc @@ -44,7 +44,7 @@ bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test for (auto& pair : ep_names_to_libs) { const std::filesystem::path library_path = pair.second; const std::string registration_name = pair.first; - env.RegisterExecutionProviderLibrary(registration_name.c_str(), Utf8ToWide(library_path.string())); + env.RegisterExecutionProviderLibrary(registration_name.c_str(), Utf8ToOrtString(library_path.string())); test_config.registered_plugin_eps.push_back(registration_name); } } diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 888ae3069e143..77aba4e5aa8d8 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -79,7 +79,7 @@ int real_main(int argc, char* argv[]) { } else { perf_runner.SerializeResult(); } - } catch (const std::exception& ex) { + } catch (const std::exception&) { if (!test_config.registered_plugin_eps.empty()) { perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); return -1; diff --git a/onnxruntime/test/perftest/strings_helper.cc b/onnxruntime/test/perftest/strings_helper.cc index fe70da1b0d609..e57530cfa2761 100644 --- a/onnxruntime/test/perftest/strings_helper.cc +++ b/onnxruntime/test/perftest/strings_helper.cc @@ -5,6 +5,7 @@ #include #include +#include #include "strings_helper.h" #include "core/common/common.h" diff --git a/onnxruntime/test/perftest/strings_helper.h b/onnxruntime/test/perftest/strings_helper.h index f205c8ed39cfb..241a9c29ab6f0 100644 --- a/onnxruntime/test/perftest/strings_helper.h +++ b/onnxruntime/test/perftest/strings_helper.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace onnxruntime { namespace perftest { diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index f642419943695..788c18b276352 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -23,11 +23,13 @@ class ICPUUsage { std::unique_ptr CreateICPUUsage(); +#ifdef _WIN32 std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]); std::vector ConvertArgvToUtf8CharPtrs(std::vector& utf8_args); +#endif -std::wstring Utf8ToWide(const std::string& utf8_str); +std::basic_string Utf8ToOrtString(const std::string& utf8_str); bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index 436ccadff0318..8e45763ae2fcf 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -115,53 +115,6 @@ std::basic_string Utf8ToOrtString(const std::string& utf8_str) { return wide_str; } - -void list_devices(Ort::Env& env) { - std::vector ep_devices = env.GetEpDevices(); - - for (size_t i = 0; i < ep_devices.size(); ++i) { - auto device = ep_devices[i]; - std::string device_info_msg = "===== device id " + std::to_string(i) + " ======\n"; - device_info_msg += "name: " + std::string(device.EpName()) + "\n"; - device_info_msg += "vendor: " + std::string(device.EpVendor()) + "\n"; - - auto metadata = device.EpMetadata(); - std::unordered_map metadata_entries = metadata.GetKeyValuePairs(); - if (!metadata_entries.empty()) { - device_info_msg += "metadata:\n"; - } - - for (auto& entry : metadata_entries) { - device_info_msg += " " + entry.first + ": " + entry.second + "\n"; - } - device_info_msg += "\n"; - fprintf(stdout, device_info_msg.c_str()); - } -} - -bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { - if (!test_config.plugin_ep_names_and_libs.empty()) { - std::unordered_map ep_names_to_libs; - ParseSessionConfigs(ToUTF8String(test_config.plugin_ep_names_and_libs), ep_names_to_libs); - if (ep_names_to_libs.size() > 0) { - for (auto& pair : ep_names_to_libs) { - const std::filesystem::path library_path = pair.second; - const std::string registration_name = pair.first; - env.RegisterExecutionProviderLibrary(registration_name.c_str(), Utf8ToWide(library_path.string())); - test_config.registered_plugin_eps.push_back(registration_name); - } - } - } - return true; -} - -bool UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { - for (auto& registration_name : test_config.registered_plugin_eps) { - env.UnregisterExecutionProviderLibrary(registration_name.c_str()); - } - return true; -} - } // namespace utils } // namespace perftest } // namespace onnxruntime From 3d3dacb9881006b6825bb01032ba8763a05f7dfe Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 18 Jul 2025 17:34:44 -0700 Subject: [PATCH 08/46] remove test code --- onnxruntime/test/perftest/main.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 77aba4e5aa8d8..8dfeecb25f232 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -70,8 +70,6 @@ int real_main(int argc, char* argv[]) { return 0; } - throw std::runtime_error("Something went wrong"); - status = perf_runner.Run(); if (!status.IsOK()) { From aa085cb0b7513cae0a3658743a92b190819add78 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 18 Jul 2025 18:21:29 -0700 Subject: [PATCH 09/46] continue the rest of converting getopt to cxxopts --- .../test/perftest/command_args_parser.cc | 148 +++++++++++++++++- onnxruntime/test/perftest/main.cc | 5 +- 2 files changed, 145 insertions(+), 8 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index b51c5d68435f2..2e1b01e3dabcb 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -192,7 +192,43 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int try { cxxopts::Options options("onnxruntime_perf", "ONNX Runtime Performance Test Config"); - options.add_options()("f", "Free dimension override by name", cxxopts::value>())("F", "Free dimension override by denotation", cxxopts::value>())("m", "Test mode: duration or times", cxxopts::value())("e", "Execution provider", cxxopts::value())("r", "Repeat times", cxxopts::value())("t", "Duration in seconds", cxxopts::value())("p", "Profile output file", cxxopts::value())("x", "Intra-op threads", cxxopts::value())("y", "Inter-op threads", cxxopts::value())("c", "Concurrent session runs", cxxopts::value())("d", "cuDNN conv algo", cxxopts::value())("o", "Graph optimization level", cxxopts::value())("u", "Optimized model path", cxxopts::value())("i", "EP runtime config string", cxxopts::value())("S", "Random seed", cxxopts::value())("T", "Intra-op thread affinities", cxxopts::value())("C", "Session config entries", cxxopts::value())("R", "Custom op library path", cxxopts::value())("A", "Disable CPU mem arena", cxxopts::value()->default_value("false")->implicit_value("true"))("M", "Disable memory pattern", cxxopts::value()->default_value("false")->implicit_value("true"))("s", "Dump statistics", cxxopts::value()->default_value("false")->implicit_value("true"))("v", "Verbose", cxxopts::value()->default_value("false")->implicit_value("true"))("I", "Generate model input binding", cxxopts::value()->default_value("false")->implicit_value("true"))("P", "Use ORT_PARALLEL mode", cxxopts::value()->default_value("false")->implicit_value("true"))("q", "CUDA copy in separate stream", cxxopts::value()->default_value("false")->implicit_value("true"))("z", "Set denormal as zero", cxxopts::value()->default_value("false")->implicit_value("true"))("D", "Disable spinning", cxxopts::value()->default_value("false")->implicit_value("true"))("Z", "Disable spinning between runs", cxxopts::value()->default_value("false")->implicit_value("true"))("n", "Exit after session creation", cxxopts::value()->default_value("false")->implicit_value("true"))("l", "Load model via path", cxxopts::value()->default_value("false")->implicit_value("true"))("g", "Enable CUDA IO binding", cxxopts::value()->default_value("false")->implicit_value("true"))("X", "Use extensions", cxxopts::value()->default_value("false")->implicit_value("true"))("plugin_ep_libs", "Plugin EP names and libs", cxxopts::value())("list_devices", "List all the avaiable devices with info")("select_devices", "Take a list of device index (semicolon separated)", cxxopts::value())("h,help", "Print usage"); + options.add_options() + ("f", "Free dimension override by name", cxxopts::value>()) + ("F", "Free dimension override by denotation", cxxopts::value>()) + ("m", "Test mode: duration or times", cxxopts::value()) + ("e", "Execution provider", cxxopts::value()) + ("r", "Repeat times", cxxopts::value()) + ("t", "Duration in seconds", cxxopts::value()) + ("p", "Profile output file", cxxopts::value()) + ("x", "Intra-op threads", cxxopts::value()) + ("y", "Inter-op threads", cxxopts::value()) + ("c", "Concurrent session runs", cxxopts::value()) + ("d", "cuDNN conv algo", cxxopts::value()) + ("o", "Graph optimization level", cxxopts::value()) + ("u", "Optimized model path", cxxopts::value()) + ("i", "EP runtime config string", cxxopts::value()) + ("S", "Random seed", cxxopts::value()) + ("T", "Intra-op thread affinities", cxxopts::value()) + ("C", "Session config entries", cxxopts::value()) + ("R", "Custom op library path", cxxopts::value()) + ("A", "Disable CPU mem arena", cxxopts::value()->default_value("false")->implicit_value("true")) + ("M", "Disable memory pattern", cxxopts::value()->default_value("false")->implicit_value("true")) + ("s", "Dump statistics", cxxopts::value()->default_value("false")->implicit_value("true")) + ("v", "Verbose", cxxopts::value()->default_value("false")->implicit_value("true")) + ("I", "Generate model input binding", cxxopts::value()->default_value("false")->implicit_value("true")) + ("P", "Use ORT_PARALLEL mode", cxxopts::value()->default_value("false")->implicit_value("true")) + ("q", "CUDA copy in separate stream", cxxopts::value()->default_value("false")->implicit_value("true")) + ("z", "Set denormal as zero", cxxopts::value()->default_value("false")->implicit_value("true")) + ("D", "Disable spinning", cxxopts::value()->default_value("false")->implicit_value("true")) + ("Z", "Disable spinning between runs", cxxopts::value()->default_value("false")->implicit_value("true")) + ("n", "Exit after session creation", cxxopts::value()->default_value("false")->implicit_value("true")) + ("l", "Load model via path", cxxopts::value()->default_value("false")->implicit_value("true")) + ("g", "Enable CUDA IO binding", cxxopts::value()->default_value("false")->implicit_value("true")) + ("X", "Use extensions", cxxopts::value()->default_value("false")->implicit_value("true")) + ("plugin_ep_libs", "Plugin EP names and libs", cxxopts::value()) + ("list_devices", "List all the avaiable devices with info") + ("select_devices", "Take a list of device index (semicolon separated)", cxxopts::value()) + ("h,help", "Print usage"); #ifdef _WIN32 auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); @@ -289,15 +325,16 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int } if (result.count("r")) { - auto val = result["r"].as(); + auto val = result["r"].as(); if (val <= 0) return false; - test_config.run_config.repeated_times = static_cast(val); + test_config.run_config.repeated_times = val; test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; } + if (result.count("t")) { - auto val = result["t"].as(); + auto val = result["t"].as(); if (val <= 0) return false; - test_config.run_config.duration_in_seconds = static_cast(val); + test_config.run_config.duration_in_seconds = val; test_config.run_config.test_mode = TestMode::kFixDurationMode; } @@ -305,14 +342,113 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("S")) { auto val = result["S"].as(); - if (val <= 0) return false; test_config.run_config.random_seed_for_input_data = val; } + if (result["v"].as()) test_config.run_config.f_verbose = true; + + if (result.count("x")) { + auto val = result["x"].as(); + if (val < 0) return false; + test_config.run_config.intra_op_num_threads = val; + } + + if (result.count("y")) { + auto val = result["y"].as(); + if (val < 0) return false; + test_config.run_config.inter_op_num_threads = val; + } + + if (result.count("P")) { + test_config.run_config.execution_mode = ExecutionMode::ORT_PARALLEL; + } + + if (result.count("c")) { + auto val = result["c"].as(); + if (static_cast(val) <= 0) return false; + test_config.run_config.concurrent_session_runs = val; + } + + if (result.count("o")) { + auto val = result["o"].as(); + switch (val) { + case ORT_DISABLE_ALL: + test_config.run_config.optimization_level = ORT_DISABLE_ALL; + break; + case ORT_ENABLE_BASIC: + test_config.run_config.optimization_level = ORT_ENABLE_BASIC; + break; + case ORT_ENABLE_EXTENDED: + test_config.run_config.optimization_level = ORT_ENABLE_EXTENDED; + break; + case ORT_ENABLE_LAYOUT: + test_config.run_config.optimization_level = ORT_ENABLE_LAYOUT; + break; + case ORT_ENABLE_ALL: + test_config.run_config.optimization_level = ORT_ENABLE_ALL; + break; + default: { + if (val > ORT_ENABLE_ALL) { // relax constraint + test_config.run_config.optimization_level = ORT_ENABLE_ALL; + } else { + return false; + } + } + } + } + + if (result.count("u")) test_config.run_config.optimized_model_path = utils::Utf8ToOrtString(result["u"].as()); + + if (result.count("I")) test_config.run_config.generate_model_input_binding = true; + + if (result.count("d")) { + auto val = result["d"].as(); + if (val < 0) return false; + test_config.run_config.cudnn_conv_algo = val; + } + + if (result.count("q")) test_config.run_config.do_cuda_copy_in_separate_stream = true; + + if (result.count("z")) test_config.run_config.set_denormal_as_zero = true; + + if (result.count("i")) test_config.run_config.ep_runtime_config_string = utils::Utf8ToOrtString(result["i"].as()); + + if (result.count("T")) test_config.run_config.intra_op_thread_affinities = result["T"].as(); + + if (result.count("C")) { + ORT_TRY { + ParseSessionConfigs(result["C"].as(), test_config.run_config.session_config_entries); + } + ORT_CATCH(const std::exception& ex) { + ORT_HANDLE_EXCEPTION([&]() { + fprintf(stderr, "Error parsing session configuration entries: %s\n", ex.what()); + }); + return false; + } + } + + if (result.count("D")) test_config.run_config.disable_spinning = true; + + if (result.count("Z")) test_config.run_config.disable_spinning_between_run = true; + + if (result.count("n")) test_config.run_config.exit_after_session_creation = true; + + if (result.count("l")) test_config.model_info.load_via_path = true; + + if (result.count("R")) test_config.run_config.register_custom_op_path = utils::Utf8ToOrtString(result["R"].as()); + + if (result.count("g")) test_config.run_config.enable_cuda_io_binding = true; + + if (result.count("X")) test_config.run_config.use_extensions = true; + if (result.count("plugin_ep_libs")) test_config.plugin_ep_names_and_libs = utils::Utf8ToOrtString(result["plugin_ep_libs"].as()); if (result.count("list_devices")) test_config.list_available_devices = true; if (result.count("select_devices")) test_config.selected_devices = result["select_devices"].as(); + if (result.count("h")) { + perftest::CommandLineParser::ShowUsage(); + } + // Positional arguments std::vector positional = result.unmatched(); if (positional.size() == 1) { diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 8dfeecb25f232..da06632cf180a 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -77,11 +77,12 @@ int real_main(int argc, char* argv[]) { } else { perf_runner.SerializeResult(); } - } catch (const std::exception&) { + } catch (const std::exception& ex) { + std::cerr << ex.what() << std::endl; if (!test_config.registered_plugin_eps.empty()) { perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); - return -1; } + return -1; } // The try/catch block above ensures the following: // 1) Plugin EP libraries are unregistered if an exception occurs. From c5fd68c812c49181d9fe506eba81ae60aaaa3b74 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 18 Jul 2025 22:10:21 -0700 Subject: [PATCH 10/46] address compile warning --- onnxruntime/test/perftest/common_utils.cc | 2 +- onnxruntime/test/perftest/ort_test_session.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/test/perftest/common_utils.cc b/onnxruntime/test/perftest/common_utils.cc index 33d2fbf49db8f..4f7bbc9ee2a8b 100644 --- a/onnxruntime/test/perftest/common_utils.cc +++ b/onnxruntime/test/perftest/common_utils.cc @@ -32,7 +32,7 @@ void list_devices(Ort::Env& env) { device_info_msg += " " + entry.first + ": " + entry.second + "\n"; } device_info_msg += "\n"; - fprintf(stdout, device_info_msg.c_str()); + fprintf(stdout, "%s", device_info_msg.c_str()); } } diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 732144d9824ce..967313a0e9210 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -89,7 +89,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device ParseDeviceList(performance_test_config.selected_devices, device_list); for (auto index : device_list) { if (static_cast(index) > (ep_devices.size() - 1)) { - fprintf(stderr, "The device index provided is not correct. Will skip this device id."); + fprintf(stderr, "%s", "The device index provided is not correct. Will skip this device id."); } Ort::ConstEpDevice& device = ep_devices[index]; @@ -102,7 +102,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else { std::string err_msg = "[WARNING]: The device index and its corresponding OrtEpDevice is not created from " + performance_test_config.machine_config.provider_type_name + ". Will skip adding this device.\n"; - fprintf(stderr, err_msg.c_str()); + fprintf(stderr, "%s", err_msg.c_str()); } } } else { From 5fb5693547fc8ec044bad1f43e5fb26cf0905d6c Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 18 Jul 2025 23:13:58 -0700 Subject: [PATCH 11/46] update usage explanation --- .../test/perftest/command_args_parser.cc | 17 ++++++++++++----- onnxruntime/test/perftest/ort_test_session.cc | 9 ++++++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 2e1b01e3dabcb..341713f94b156 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -34,7 +34,7 @@ namespace perftest { "\t-A: Disable memory arena\n" "\t-I: Generate tensor input binding. Free dimensions are treated as 1 unless overridden using -f.\n" "\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n" - "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu|plugin_ep]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " + "\t-e,--ep [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu|plugin_ep]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " "'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai', 'webgpu' or plugin execution provider that provided via ep library. " "Default:'cpu'.\n" "\t-b [tf|ort]: backend to use. Default:ort\n" @@ -161,6 +161,10 @@ namespace perftest { "\t-X [Enable onnxruntime-extensions custom ops]: Registers custom ops from onnxruntime-extensions. " "onnxruntime-extensions must have been built in to onnxruntime. This can be done with the build.py " "'--use_extensions' option.\n" + "\t--plugin_ep_libs [registration names and libraries] Specifies a list of plugin execution provider(EP) registration names and their corresponding shared libraries to register.\n" + "\t [Usage]: --plugin_ep_libs 'plugin_ep_1|plugin_ep_2.dll plugin_ep_2|plugin_ep_2.dll'\n" + "\t--list_devices Prints all available device indices and their properties (including metadata).\n" + "\t--select_devices [list of device indices] A semicolon-separated list of device indices to add to the session and run with.\n" "\t-h: help\n"); } #ifdef _WIN32 @@ -190,13 +194,13 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { try { - cxxopts::Options options("onnxruntime_perf", "ONNX Runtime Performance Test Config"); + cxxopts::Options options("onnxruntime_perf_test", "perf_test [options...] model_path [result_file]"); options.add_options() ("f", "Free dimension override by name", cxxopts::value>()) ("F", "Free dimension override by denotation", cxxopts::value>()) ("m", "Test mode: duration or times", cxxopts::value()) - ("e", "Execution provider", cxxopts::value()) + ("e,ep", "Execution provider", cxxopts::value()) ("r", "Repeat times", cxxopts::value()) ("t", "Duration in seconds", cxxopts::value()) ("p", "Profile output file", cxxopts::value()) @@ -226,8 +230,8 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int ("g", "Enable CUDA IO binding", cxxopts::value()->default_value("false")->implicit_value("true")) ("X", "Use extensions", cxxopts::value()->default_value("false")->implicit_value("true")) ("plugin_ep_libs", "Plugin EP names and libs", cxxopts::value()) - ("list_devices", "List all the avaiable devices with info") - ("select_devices", "Take a list of device index (semicolon separated)", cxxopts::value()) + ("list_devices", "Prints all available device indices and their properties (including metadata)") + ("select_devices", "A semicolon-separated list of device indices to add to the session and run with", cxxopts::value()) ("h,help", "Print usage"); #ifdef _WIN32 @@ -238,10 +242,12 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int auto result = options.parse(argc, argv); #endif + /* if (result.count("help")) { std::cout << options.help() << std::endl; return false; } + */ if (result.count("f")) { std::basic_string dim_name; @@ -447,6 +453,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("h")) { perftest::CommandLineParser::ShowUsage(); + return false; } // Positional arguments diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 967313a0e9210..117bf97a47f70 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -98,9 +98,10 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device if (added_ep_device_index_set.find(index) == added_ep_device_index_set.end()) { added_ep_devices.push_back(device); added_ep_device_index_set.insert(index); + fprintf(stdout, "Device [Index: %d, Name: %s] has been added to session.", index, device.EpName()); } } else { - std::string err_msg = "[WARNING]: The device index and its corresponding OrtEpDevice is not created from " + + std::string err_msg = "[WARNING] [plugin EP]: The device index and its corresponding OrtEpDevice is not created from " + performance_test_config.machine_config.provider_type_name + ". Will skip adding this device.\n"; fprintf(stderr, "%s", err_msg.c_str()); } @@ -108,9 +109,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else { // All OrtEpDevice instances must be from the same execution provider. // Find and select the OrtEpDevice associated with the execution provider provided via "-e" argument. - for (Ort::ConstEpDevice& device : ep_devices) { + for (int index = 0; static_cast(index) < ep_devices.size(); ++index) { + Ort::ConstEpDevice& device = ep_devices[index]; if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { added_ep_devices.push_back(device); + fprintf(stdout, "Device [Index: %d, Name: %s] has been added to session.", index, device.EpName()); } } } @@ -120,7 +123,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device env.UnregisterExecutionProviderLibrary(ep_name.c_str()); } ORT_THROW( - "[ERROR] [plugin EP] No matching execution provider name found in EP library's factory."); + "[ERROR] [plugin EP]: No matching devices found."); } std::string provider_option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); From f673e0e1b3f62c53c734dedeef691829a868e12b Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 18 Jul 2025 23:14:38 -0700 Subject: [PATCH 12/46] remove test code --- onnxruntime/test/perftest/ort_test_session.cc | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 117bf97a47f70..18d19742937fc 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -51,18 +51,6 @@ std::chrono::duration OnnxRuntimeTestSession::Run() { session_.Run(Ort::RunOptions{nullptr}, input_names_.data(), input.data(), input_names_.size(), output_names_raw_ptr.data(), outputs_.data(), output_names_raw_ptr.size()); - for (size_t i = 0; i < outputs_.size(); i++) { - Ort::Value& ort_output = outputs_[i]; - const float* output_data = ort_output.GetTensorData(); - gsl::span output_span(output_data, 6); - std::cout << output_span[0] << std::endl; - std::cout << output_span[1] << std::endl; - std::cout << output_span[2] << std::endl; - std::cout << output_span[3] << std::endl; - std::cout << output_span[4] << std::endl; - std::cout << output_span[5] << std::endl; - } - auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration duration_seconds = end - start; return duration_seconds; From 3607d68561846262782b920767f75af2acdcce4c Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 21 Jul 2025 10:00:15 -0700 Subject: [PATCH 13/46] add -DCXXOPTS_NO_RTTI for minimal build --- cmake/onnxruntime_unittests.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 6b6e1d0624cb8..b339baa842cc6 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1255,6 +1255,10 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() endif() + if(onnxruntime_MINIMAL_BUILD) + add_definitions(-DCXXOPTS_NO_RTTI) + endif() + if (onnxruntime_BUILD_SHARED_LIB) #It will dynamically link to onnxruntime. So please don't add onxruntime_graph/onxruntime_framework/... here. #onnxruntime_common is kind of ok because it is thin, tiny and totally stateless. From e03d00b2aeed249bd1afc76ccffc9394cd07687e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 21 Jul 2025 10:55:08 -0700 Subject: [PATCH 14/46] address lintrunner issue --- .../test/perftest/command_args_parser.cc | 82 ++++++++++--------- .../test/perftest/command_args_parser.h | 3 +- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 341713f94b156..f706c1963544b 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -192,47 +192,46 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, return true; } -bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { +bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { try { cxxopts::Options options("onnxruntime_perf_test", "perf_test [options...] model_path [result_file]"); - options.add_options() - ("f", "Free dimension override by name", cxxopts::value>()) - ("F", "Free dimension override by denotation", cxxopts::value>()) - ("m", "Test mode: duration or times", cxxopts::value()) - ("e,ep", "Execution provider", cxxopts::value()) - ("r", "Repeat times", cxxopts::value()) - ("t", "Duration in seconds", cxxopts::value()) - ("p", "Profile output file", cxxopts::value()) - ("x", "Intra-op threads", cxxopts::value()) - ("y", "Inter-op threads", cxxopts::value()) - ("c", "Concurrent session runs", cxxopts::value()) - ("d", "cuDNN conv algo", cxxopts::value()) - ("o", "Graph optimization level", cxxopts::value()) - ("u", "Optimized model path", cxxopts::value()) - ("i", "EP runtime config string", cxxopts::value()) - ("S", "Random seed", cxxopts::value()) - ("T", "Intra-op thread affinities", cxxopts::value()) - ("C", "Session config entries", cxxopts::value()) - ("R", "Custom op library path", cxxopts::value()) - ("A", "Disable CPU mem arena", cxxopts::value()->default_value("false")->implicit_value("true")) - ("M", "Disable memory pattern", cxxopts::value()->default_value("false")->implicit_value("true")) - ("s", "Dump statistics", cxxopts::value()->default_value("false")->implicit_value("true")) - ("v", "Verbose", cxxopts::value()->default_value("false")->implicit_value("true")) - ("I", "Generate model input binding", cxxopts::value()->default_value("false")->implicit_value("true")) - ("P", "Use ORT_PARALLEL mode", cxxopts::value()->default_value("false")->implicit_value("true")) - ("q", "CUDA copy in separate stream", cxxopts::value()->default_value("false")->implicit_value("true")) - ("z", "Set denormal as zero", cxxopts::value()->default_value("false")->implicit_value("true")) - ("D", "Disable spinning", cxxopts::value()->default_value("false")->implicit_value("true")) - ("Z", "Disable spinning between runs", cxxopts::value()->default_value("false")->implicit_value("true")) - ("n", "Exit after session creation", cxxopts::value()->default_value("false")->implicit_value("true")) - ("l", "Load model via path", cxxopts::value()->default_value("false")->implicit_value("true")) - ("g", "Enable CUDA IO binding", cxxopts::value()->default_value("false")->implicit_value("true")) - ("X", "Use extensions", cxxopts::value()->default_value("false")->implicit_value("true")) - ("plugin_ep_libs", "Plugin EP names and libs", cxxopts::value()) - ("list_devices", "Prints all available device indices and their properties (including metadata)") - ("select_devices", "A semicolon-separated list of device indices to add to the session and run with", cxxopts::value()) - ("h,help", "Print usage"); + options.add_options()("f", "Free dimension override by name", cxxopts::value>()); + options.add_options()("F", "Free dimension override by denotation", cxxopts::value>()); + options.add_options()("m", "Test mode: duration or times", cxxopts::value()); + options.add_options()("e,ep", "Execution provider", cxxopts::value()); + options.add_options()("r", "Repeat times", cxxopts::value()); + options.add_options()("t", "Duration in seconds", cxxopts::value()); + options.add_options()("p", "Profile output file", cxxopts::value()); + options.add_options()("x", "Intra-op threads", cxxopts::value()); + options.add_options()("y", "Inter-op threads", cxxopts::value()); + options.add_options()("c", "Concurrent session runs", cxxopts::value()); + options.add_options()("d", "cuDNN conv algo", cxxopts::value()); + options.add_options()("o", "Graph optimization level", cxxopts::value()); + options.add_options()("u", "Optimized model path", cxxopts::value()); + options.add_options()("i", "EP runtime config string", cxxopts::value()); + options.add_options()("S", "Random seed", cxxopts::value()); + options.add_options()("T", "Intra-op thread affinities", cxxopts::value()); + options.add_options()("C", "Session config entries", cxxopts::value()); + options.add_options()("R", "Custom op library path", cxxopts::value()); + options.add_options()("A", "Disable CPU mem arena", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("M", "Disable memory pattern", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("s", "Dump statistics", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("v", "Verbose", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("I", "Generate model input binding", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("P", "Use ORT_PARALLEL mode", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("q", "CUDA copy in separate stream", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("z", "Set denormal as zero", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("D", "Disable spinning", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("Z", "Disable spinning between runs", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("n", "Exit after session creation", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("l", "Load model via path", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("g", "Enable CUDA IO binding", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("X", "Use extensions", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("plugin_ep_libs", "Plugin EP names and libs", cxxopts::value()); + options.add_options()("list_devices", "Prints all available device indices and their properties (including metadata)"); + options.add_options()("select_devices", "A semicolon-separated list of device indices to add to the session and run with", cxxopts::value()); + options.add_options()("h,help", "Print usage"); #ifdef _WIN32 auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); @@ -448,7 +447,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("X")) test_config.run_config.use_extensions = true; if (result.count("plugin_ep_libs")) test_config.plugin_ep_names_and_libs = utils::Utf8ToOrtString(result["plugin_ep_libs"].as()); - if (result.count("list_devices")) test_config.list_available_devices = true; + if (result.count("select_devices")) test_config.selected_devices = result["select_devices"].as(); if (result.count("h")) { @@ -456,6 +455,11 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int return false; } + if (result.count("list_devices")) { + test_config.list_available_devices = true; + return true; + } + // Positional arguments std::vector positional = result.unmatched(); if (positional.size() == 1) { diff --git a/onnxruntime/test/perftest/command_args_parser.h b/onnxruntime/test/perftest/command_args_parser.h index 61a7e9aa9f52a..86c81072233c0 100644 --- a/onnxruntime/test/perftest/command_args_parser.h +++ b/onnxruntime/test/perftest/command_args_parser.h @@ -12,8 +12,7 @@ struct PerformanceTestConfig; class CommandLineParser { public: static void ShowUsage(); - //static bool ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); - static bool ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); + static bool ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); }; } // namespace perftest From 07362002478db675904fa7f30e9f27b5fa8288dd Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 21 Jul 2025 11:11:52 -0700 Subject: [PATCH 15/46] fix type --- onnxruntime/test/perftest/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index da06632cf180a..ac8a0bf6c7e77 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -20,7 +20,7 @@ int real_main(int argc, char* argv[]) { #endif g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); perftest::PerformanceTestConfig test_config; - if (!perftest::CommandLineParser::ParseArgumentsV2(test_config, argc, argv)) { + if (!perftest::CommandLineParser::ParseArguments(test_config, argc, argv)) { perftest::CommandLineParser::ShowUsage(); return -1; } From 0396a27ca0b9aeaf566fd6280a1259e1a97da863 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 21 Jul 2025 11:46:16 -0700 Subject: [PATCH 16/46] fix typos --- onnxruntime/test/perftest/ort_test_session.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 18d19742937fc..2abcdf4c6e96d 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -62,7 +62,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device : rand_engine_(rd()), input_names_(m.GetInputCount()), input_names_str_(m.GetInputCount()), input_length_(m.GetInputCount()) { Ort::SessionOptions session_options; - bool is_plugin_ep_avaiable = false; + bool is_plugin_ep_available = false; // Add devices created from plugin EP if (!performance_test_config.registered_plugin_eps.empty()) { @@ -86,10 +86,10 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device if (added_ep_device_index_set.find(index) == added_ep_device_index_set.end()) { added_ep_devices.push_back(device); added_ep_device_index_set.insert(index); - fprintf(stdout, "Device [Index: %d, Name: %s] has been added to session.", index, device.EpName()); + fprintf(stdout, "[Plugin EP] Device [Index: %d, Name: %s] has been added to session.", index, device.EpName()); } } else { - std::string err_msg = "[WARNING] [plugin EP]: The device index and its corresponding OrtEpDevice is not created from " + + std::string err_msg = "[Plugin EP] [WARNING] : The device index and its corresponding OrtEpDevice is not created from " + performance_test_config.machine_config.provider_type_name + ". Will skip adding this device.\n"; fprintf(stderr, "%s", err_msg.c_str()); } @@ -118,7 +118,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device std::unordered_map provider_options; ParseSessionConfigs(provider_option_string, provider_options); session_options.AppendExecutionProvider_V2(env, added_ep_devices, provider_options); - is_plugin_ep_avaiable = true; + is_plugin_ep_available = true; } provider_name_ = performance_test_config.machine_config.provider_type_name; @@ -634,7 +634,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); } else if (!provider_name_.empty() && provider_name_ != onnxruntime::kCpuExecutionProvider && provider_name_ != onnxruntime::kOpenVINOExecutionProvider && - !is_plugin_ep_avaiable) { + !is_plugin_ep_available) { ORT_THROW("This backend is not included in perf test runner.\n"); } From fe9e1de5ef95ed42aed2d9489ff4e06d8e6e0abf Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 21 Jul 2025 15:47:41 -0700 Subject: [PATCH 17/46] add back getopts for the build which disables exceptions --- .../test/perftest/command_args_parser.cc | 276 +++++++++++++++++- .../test/perftest/command_args_parser.h | 1 + onnxruntime/test/perftest/main.cc | 17 +- 3 files changed, 277 insertions(+), 17 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index f706c1963544b..d6cc2914617a1 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -12,7 +12,19 @@ #include #include +#ifdef DISABLE_EXCEPTIONS + +// Windows Specific +#ifdef _WIN32 +#include "getopt.h" +#include "windows.h" +#else +#include +#endif + +#else #include +#endif #include #include @@ -34,7 +46,7 @@ namespace perftest { "\t-A: Disable memory arena\n" "\t-I: Generate tensor input binding. Free dimensions are treated as 1 unless overridden using -f.\n" "\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n" - "\t-e,--ep [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu|plugin_ep]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " + "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu|plugin_ep]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " "'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai', 'webgpu' or plugin execution provider that provided via ep library. " "Default:'cpu'.\n" "\t-b [tf|ort]: backend to use. Default:ort\n" @@ -161,10 +173,12 @@ namespace perftest { "\t-X [Enable onnxruntime-extensions custom ops]: Registers custom ops from onnxruntime-extensions. " "onnxruntime-extensions must have been built in to onnxruntime. This can be done with the build.py " "'--use_extensions' option.\n" +#ifndef DISABLE_EXCEPTIONS "\t--plugin_ep_libs [registration names and libraries] Specifies a list of plugin execution provider(EP) registration names and their corresponding shared libraries to register.\n" "\t [Usage]: --plugin_ep_libs 'plugin_ep_1|plugin_ep_2.dll plugin_ep_2|plugin_ep_2.dll'\n" "\t--list_devices Prints all available device indices and their properties (including metadata).\n" "\t--select_devices [list of device indices] A semicolon-separated list of device indices to add to the session and run with.\n" +#endif "\t-h: help\n"); } #ifdef _WIN32 @@ -172,8 +186,8 @@ static const ORTCHAR_T* overrideDelimiter = L":"; #else static const ORTCHAR_T* overrideDelimiter = ":"; #endif -static bool ParseDimensionOverride(std::basic_string& dim_identifier, int64_t& override_val, const ORTCHAR_T* optarg) { - std::basic_string free_dim_str(optarg); +static bool ParseDimensionOverride(std::basic_string& dim_identifier, int64_t& override_val, const ORTCHAR_T* option) { + std::basic_string free_dim_str(option); size_t delimiter_location = free_dim_str.find(overrideDelimiter); if (delimiter_location >= free_dim_str.size() - 1) { return false; @@ -192,12 +206,251 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, return true; } -bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { - try { +#ifdef DISABLE_EXCEPTIONS +/*static*/ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { + int ch; + while ((ch = getopt(argc, argv, ORT_TSTR("m:e:r:t:p:x:y:c:d:o:u:i:f:F:S:T:C:AMPIDZvhsqznlgR:X"))) != -1) { + switch (ch) { + case 'f': { + std::basic_string dim_name; + int64_t override_val; + if (!ParseDimensionOverride(dim_name, override_val, optarg)) { + return false; + } + test_config.run_config.free_dim_name_overrides[dim_name] = override_val; + break; + } + case 'F': { + std::basic_string dim_denotation; + int64_t override_val; + if (!ParseDimensionOverride(dim_denotation, override_val, optarg)) { + return false; + } + test_config.run_config.free_dim_denotation_overrides[dim_denotation] = override_val; + break; + } + case 'm': + if (!CompareCString(optarg, ORT_TSTR("duration"))) { + test_config.run_config.test_mode = TestMode::kFixDurationMode; + } else if (!CompareCString(optarg, ORT_TSTR("times"))) { + test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; + } else { + return false; + } + break; + case 'p': + test_config.run_config.profile_file = optarg; + break; + case 'M': + test_config.run_config.enable_memory_pattern = false; + break; + case 'A': + test_config.run_config.enable_cpu_mem_arena = false; + break; + case 'e': + if (!CompareCString(optarg, ORT_TSTR("cpu"))) { + test_config.machine_config.provider_type_name = onnxruntime::kCpuExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("cuda"))) { + test_config.machine_config.provider_type_name = onnxruntime::kCudaExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("dnnl"))) { + test_config.machine_config.provider_type_name = onnxruntime::kDnnlExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("openvino"))) { + test_config.machine_config.provider_type_name = onnxruntime::kOpenVINOExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("tensorrt"))) { + test_config.machine_config.provider_type_name = onnxruntime::kTensorrtExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("qnn"))) { + test_config.machine_config.provider_type_name = onnxruntime::kQnnExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("snpe"))) { + test_config.machine_config.provider_type_name = onnxruntime::kSnpeExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("nnapi"))) { + test_config.machine_config.provider_type_name = onnxruntime::kNnapiExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("vsinpu"))) { + test_config.machine_config.provider_type_name = onnxruntime::kVSINPUExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("coreml"))) { + test_config.machine_config.provider_type_name = onnxruntime::kCoreMLExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("dml"))) { + test_config.machine_config.provider_type_name = onnxruntime::kDmlExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("acl"))) { + test_config.machine_config.provider_type_name = onnxruntime::kAclExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("armnn"))) { + test_config.machine_config.provider_type_name = onnxruntime::kArmNNExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("rocm"))) { + test_config.machine_config.provider_type_name = onnxruntime::kRocmExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("migraphx"))) { + test_config.machine_config.provider_type_name = onnxruntime::kMIGraphXExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("xnnpack"))) { + test_config.machine_config.provider_type_name = onnxruntime::kXnnpackExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("vitisai"))) { + test_config.machine_config.provider_type_name = onnxruntime::kVitisAIExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("webgpu"))) { + test_config.machine_config.provider_type_name = onnxruntime::kWebGpuExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("nvtensorrtrtx"))) { + test_config.machine_config.provider_type_name = onnxruntime::kNvTensorRTRTXExecutionProvider; + } else { + return false; + } + break; + case 'r': + test_config.run_config.repeated_times = static_cast(OrtStrtol(optarg, nullptr)); + if (test_config.run_config.repeated_times <= 0) { + return false; + } + test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; + break; + case 't': + test_config.run_config.duration_in_seconds = static_cast(OrtStrtol(optarg, nullptr)); + if (test_config.run_config.repeated_times <= 0) { + return false; + } + test_config.run_config.test_mode = TestMode::kFixDurationMode; + break; + case 's': + test_config.run_config.f_dump_statistics = true; + break; + case 'S': + test_config.run_config.random_seed_for_input_data = static_cast( + OrtStrtol(optarg, nullptr)); + break; + case 'v': + test_config.run_config.f_verbose = true; + break; + case 'x': + test_config.run_config.intra_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); + if (test_config.run_config.intra_op_num_threads < 0) { + return false; + } + break; + case 'y': + test_config.run_config.inter_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); + if (test_config.run_config.inter_op_num_threads < 0) { + return false; + } + break; + case 'P': + test_config.run_config.execution_mode = ExecutionMode::ORT_PARALLEL; + break; + case 'c': + test_config.run_config.concurrent_session_runs = + static_cast(OrtStrtol(optarg, nullptr)); + if (test_config.run_config.concurrent_session_runs <= 0) { + return false; + } + break; + case 'o': { + int tmp = static_cast(OrtStrtol(optarg, nullptr)); + switch (tmp) { + case ORT_DISABLE_ALL: + test_config.run_config.optimization_level = ORT_DISABLE_ALL; + break; + case ORT_ENABLE_BASIC: + test_config.run_config.optimization_level = ORT_ENABLE_BASIC; + break; + case ORT_ENABLE_EXTENDED: + test_config.run_config.optimization_level = ORT_ENABLE_EXTENDED; + break; + case ORT_ENABLE_LAYOUT: + test_config.run_config.optimization_level = ORT_ENABLE_LAYOUT; + break; + case ORT_ENABLE_ALL: + test_config.run_config.optimization_level = ORT_ENABLE_ALL; + break; + default: { + if (tmp > ORT_ENABLE_ALL) { // relax constraint + test_config.run_config.optimization_level = ORT_ENABLE_ALL; + } else { + return false; + } + } + } + break; + } + case 'u': + test_config.run_config.optimized_model_path = optarg; + break; + case 'I': + test_config.run_config.generate_model_input_binding = true; + break; + case 'd': + test_config.run_config.cudnn_conv_algo = static_cast(OrtStrtol(optarg, nullptr)); + break; + case 'q': + test_config.run_config.do_cuda_copy_in_separate_stream = true; + break; + case 'z': + test_config.run_config.set_denormal_as_zero = true; + break; + case 'i': + test_config.run_config.ep_runtime_config_string = optarg; + break; + case 'T': + test_config.run_config.intra_op_thread_affinities = ToUTF8String(optarg); + break; + case 'C': { + ORT_TRY { + ParseSessionConfigs(ToUTF8String(optarg), test_config.run_config.session_config_entries); + } + ORT_CATCH(const std::exception& ex) { + ORT_HANDLE_EXCEPTION([&]() { + fprintf(stderr, "Error parsing session configuration entries: %s\n", ex.what()); + }); + return false; + } + break; + } + case 'D': + test_config.run_config.disable_spinning = true; + break; + case 'Z': + test_config.run_config.disable_spinning_between_run = true; + break; + case 'n': + test_config.run_config.exit_after_session_creation = true; + break; + case 'l': + test_config.model_info.load_via_path = true; + break; + case 'R': + test_config.run_config.register_custom_op_path = optarg; + break; + case 'g': + test_config.run_config.enable_cuda_io_binding = true; + break; + case 'X': + test_config.run_config.use_extensions = true; + break; + case '?': + case 'h': + default: + return false; + } + } + + // parse model_path and result_file_path + argc -= optind; + argv += optind; + + switch (argc) { + case 2: + test_config.model_info.result_file_path = argv[1]; + break; + case 1: + test_config.run_config.f_dump_statistics = true; + break; + default: + return false; + } + + test_config.model_info.model_file_path = argv[0]; + + return true; +} +#else +bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { + ORT_TRY { cxxopts::Options options("onnxruntime_perf_test", "perf_test [options...] model_path [result_file]"); - options.add_options()("f", "Free dimension override by name", cxxopts::value>()); - options.add_options()("F", "Free dimension override by denotation", cxxopts::value>()); + options.add_options()("f", "Free dimension override by name", cxxopts::value >()); + options.add_options()("F", "Free dimension override by denotation", cxxopts::value >()); options.add_options()("m", "Test mode: duration or times", cxxopts::value()); options.add_options()("e,ep", "Execution provider", cxxopts::value()); options.add_options()("r", "Repeat times", cxxopts::value()); @@ -471,14 +724,17 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a } else { return false; } - - } catch (const std::exception& ex) { - std::cerr << "Error parsing options: " << ex.what() << std::endl; + } + ORT_CATCH(const std::exception& ex) { + ORT_HANDLE_EXCEPTION([&]() { + fprintf(stderr, "Error parsing options: %s\n", ex.what()); + }); return false; } return true; } +#endif } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/command_args_parser.h b/onnxruntime/test/perftest/command_args_parser.h index 86c81072233c0..d1ed02bcf4776 100644 --- a/onnxruntime/test/perftest/command_args_parser.h +++ b/onnxruntime/test/perftest/command_args_parser.h @@ -13,6 +13,7 @@ class CommandLineParser { public: static void ShowUsage(); static bool ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); + static bool ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); }; } // namespace perftest diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index ac8a0bf6c7e77..1592ec6dc78ce 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -20,7 +20,11 @@ int real_main(int argc, char* argv[]) { #endif g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); perftest::PerformanceTestConfig test_config; +#ifdef DISABLE_EXCEPTIONS if (!perftest::CommandLineParser::ParseArguments(test_config, argc, argv)) { +#else + if (!perftest::CommandLineParser::ParseArgumentsV2(test_config, argc, argv)) { +#endif perftest::CommandLineParser::ShowUsage(); return -1; } @@ -60,7 +64,7 @@ int real_main(int argc, char* argv[]) { auto status = Status::OK(); - try { + ORT_TRY { std::random_device rd; perftest::PerformanceRunner perf_runner(env, test_config, rd); @@ -77,12 +81,11 @@ int real_main(int argc, char* argv[]) { } else { perf_runner.SerializeResult(); } - } catch (const std::exception& ex) { - std::cerr << ex.what() << std::endl; - if (!test_config.registered_plugin_eps.empty()) { - perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); - } - return -1; + } + ORT_CATCH(const std::exception& ex) { + ORT_HANDLE_EXCEPTION([&]() { + fprintf(stderr, "%s\n", ex.what()); + }); } // The try/catch block above ensures the following: // 1) Plugin EP libraries are unregistered if an exception occurs. From 8af32b3a73a4366dc674be61c9ed932801cfdba8 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 21 Jul 2025 16:20:34 -0700 Subject: [PATCH 18/46] add define for DISABLE_EXCEPTIONS --- cmake/onnxruntime_unittests.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index b339baa842cc6..6d033e894febc 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1259,6 +1259,10 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) add_definitions(-DCXXOPTS_NO_RTTI) endif() + if(onnxruntime_DISABLE_EXCEPTIONS) + add_definitions(-DDISABLE_EXCEPTIONS=1) + endif() + if (onnxruntime_BUILD_SHARED_LIB) #It will dynamically link to onnxruntime. So please don't add onxruntime_graph/onxruntime_framework/... here. #onnxruntime_common is kind of ok because it is thin, tiny and totally stateless. From 51b3412600a3d86cb81b2c90e57dd02b1242d885 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 23 Jul 2025 14:53:05 -0700 Subject: [PATCH 19/46] address reviewer's comments --- cmake/onnxruntime_unittests.cmake | 2 +- .../test/perftest/command_args_parser.cc | 274 +----------------- onnxruntime/test/perftest/common_utils.cc | 6 +- onnxruntime/test/perftest/main.cc | 12 +- onnxruntime/test/perftest/ort_test_session.cc | 7 +- onnxruntime/test/perftest/posix/utils.cc | 6 - onnxruntime/test/perftest/utils.h | 4 +- onnxruntime/test/perftest/windows/utils.cc | 22 +- 8 files changed, 26 insertions(+), 307 deletions(-) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 637d59fadb912..7eec0beb27707 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1258,7 +1258,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() if(onnxruntime_DISABLE_EXCEPTIONS) - add_definitions(-DDISABLE_EXCEPTIONS=1) + add_definitions(-DCXXOPTS_NO_EXCEPTIONS) endif() if (onnxruntime_BUILD_SHARED_LIB) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index d6cc2914617a1..18bc4648a8477 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -12,19 +12,7 @@ #include #include -#ifdef DISABLE_EXCEPTIONS - -// Windows Specific -#ifdef _WIN32 -#include "getopt.h" -#include "windows.h" -#else -#include -#endif - -#else #include -#endif #include #include @@ -206,245 +194,6 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, return true; } -#ifdef DISABLE_EXCEPTIONS -/*static*/ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { - int ch; - while ((ch = getopt(argc, argv, ORT_TSTR("m:e:r:t:p:x:y:c:d:o:u:i:f:F:S:T:C:AMPIDZvhsqznlgR:X"))) != -1) { - switch (ch) { - case 'f': { - std::basic_string dim_name; - int64_t override_val; - if (!ParseDimensionOverride(dim_name, override_val, optarg)) { - return false; - } - test_config.run_config.free_dim_name_overrides[dim_name] = override_val; - break; - } - case 'F': { - std::basic_string dim_denotation; - int64_t override_val; - if (!ParseDimensionOverride(dim_denotation, override_val, optarg)) { - return false; - } - test_config.run_config.free_dim_denotation_overrides[dim_denotation] = override_val; - break; - } - case 'm': - if (!CompareCString(optarg, ORT_TSTR("duration"))) { - test_config.run_config.test_mode = TestMode::kFixDurationMode; - } else if (!CompareCString(optarg, ORT_TSTR("times"))) { - test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; - } else { - return false; - } - break; - case 'p': - test_config.run_config.profile_file = optarg; - break; - case 'M': - test_config.run_config.enable_memory_pattern = false; - break; - case 'A': - test_config.run_config.enable_cpu_mem_arena = false; - break; - case 'e': - if (!CompareCString(optarg, ORT_TSTR("cpu"))) { - test_config.machine_config.provider_type_name = onnxruntime::kCpuExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("cuda"))) { - test_config.machine_config.provider_type_name = onnxruntime::kCudaExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("dnnl"))) { - test_config.machine_config.provider_type_name = onnxruntime::kDnnlExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("openvino"))) { - test_config.machine_config.provider_type_name = onnxruntime::kOpenVINOExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("tensorrt"))) { - test_config.machine_config.provider_type_name = onnxruntime::kTensorrtExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("qnn"))) { - test_config.machine_config.provider_type_name = onnxruntime::kQnnExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("snpe"))) { - test_config.machine_config.provider_type_name = onnxruntime::kSnpeExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("nnapi"))) { - test_config.machine_config.provider_type_name = onnxruntime::kNnapiExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("vsinpu"))) { - test_config.machine_config.provider_type_name = onnxruntime::kVSINPUExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("coreml"))) { - test_config.machine_config.provider_type_name = onnxruntime::kCoreMLExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("dml"))) { - test_config.machine_config.provider_type_name = onnxruntime::kDmlExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("acl"))) { - test_config.machine_config.provider_type_name = onnxruntime::kAclExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("armnn"))) { - test_config.machine_config.provider_type_name = onnxruntime::kArmNNExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("rocm"))) { - test_config.machine_config.provider_type_name = onnxruntime::kRocmExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("migraphx"))) { - test_config.machine_config.provider_type_name = onnxruntime::kMIGraphXExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("xnnpack"))) { - test_config.machine_config.provider_type_name = onnxruntime::kXnnpackExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("vitisai"))) { - test_config.machine_config.provider_type_name = onnxruntime::kVitisAIExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("webgpu"))) { - test_config.machine_config.provider_type_name = onnxruntime::kWebGpuExecutionProvider; - } else if (!CompareCString(optarg, ORT_TSTR("nvtensorrtrtx"))) { - test_config.machine_config.provider_type_name = onnxruntime::kNvTensorRTRTXExecutionProvider; - } else { - return false; - } - break; - case 'r': - test_config.run_config.repeated_times = static_cast(OrtStrtol(optarg, nullptr)); - if (test_config.run_config.repeated_times <= 0) { - return false; - } - test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; - break; - case 't': - test_config.run_config.duration_in_seconds = static_cast(OrtStrtol(optarg, nullptr)); - if (test_config.run_config.repeated_times <= 0) { - return false; - } - test_config.run_config.test_mode = TestMode::kFixDurationMode; - break; - case 's': - test_config.run_config.f_dump_statistics = true; - break; - case 'S': - test_config.run_config.random_seed_for_input_data = static_cast( - OrtStrtol(optarg, nullptr)); - break; - case 'v': - test_config.run_config.f_verbose = true; - break; - case 'x': - test_config.run_config.intra_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); - if (test_config.run_config.intra_op_num_threads < 0) { - return false; - } - break; - case 'y': - test_config.run_config.inter_op_num_threads = static_cast(OrtStrtol(optarg, nullptr)); - if (test_config.run_config.inter_op_num_threads < 0) { - return false; - } - break; - case 'P': - test_config.run_config.execution_mode = ExecutionMode::ORT_PARALLEL; - break; - case 'c': - test_config.run_config.concurrent_session_runs = - static_cast(OrtStrtol(optarg, nullptr)); - if (test_config.run_config.concurrent_session_runs <= 0) { - return false; - } - break; - case 'o': { - int tmp = static_cast(OrtStrtol(optarg, nullptr)); - switch (tmp) { - case ORT_DISABLE_ALL: - test_config.run_config.optimization_level = ORT_DISABLE_ALL; - break; - case ORT_ENABLE_BASIC: - test_config.run_config.optimization_level = ORT_ENABLE_BASIC; - break; - case ORT_ENABLE_EXTENDED: - test_config.run_config.optimization_level = ORT_ENABLE_EXTENDED; - break; - case ORT_ENABLE_LAYOUT: - test_config.run_config.optimization_level = ORT_ENABLE_LAYOUT; - break; - case ORT_ENABLE_ALL: - test_config.run_config.optimization_level = ORT_ENABLE_ALL; - break; - default: { - if (tmp > ORT_ENABLE_ALL) { // relax constraint - test_config.run_config.optimization_level = ORT_ENABLE_ALL; - } else { - return false; - } - } - } - break; - } - case 'u': - test_config.run_config.optimized_model_path = optarg; - break; - case 'I': - test_config.run_config.generate_model_input_binding = true; - break; - case 'd': - test_config.run_config.cudnn_conv_algo = static_cast(OrtStrtol(optarg, nullptr)); - break; - case 'q': - test_config.run_config.do_cuda_copy_in_separate_stream = true; - break; - case 'z': - test_config.run_config.set_denormal_as_zero = true; - break; - case 'i': - test_config.run_config.ep_runtime_config_string = optarg; - break; - case 'T': - test_config.run_config.intra_op_thread_affinities = ToUTF8String(optarg); - break; - case 'C': { - ORT_TRY { - ParseSessionConfigs(ToUTF8String(optarg), test_config.run_config.session_config_entries); - } - ORT_CATCH(const std::exception& ex) { - ORT_HANDLE_EXCEPTION([&]() { - fprintf(stderr, "Error parsing session configuration entries: %s\n", ex.what()); - }); - return false; - } - break; - } - case 'D': - test_config.run_config.disable_spinning = true; - break; - case 'Z': - test_config.run_config.disable_spinning_between_run = true; - break; - case 'n': - test_config.run_config.exit_after_session_creation = true; - break; - case 'l': - test_config.model_info.load_via_path = true; - break; - case 'R': - test_config.run_config.register_custom_op_path = optarg; - break; - case 'g': - test_config.run_config.enable_cuda_io_binding = true; - break; - case 'X': - test_config.run_config.use_extensions = true; - break; - case '?': - case 'h': - default: - return false; - } - } - - // parse model_path and result_file_path - argc -= optind; - argv += optind; - - switch (argc) { - case 2: - test_config.model_info.result_file_path = argv[1]; - break; - case 1: - test_config.run_config.f_dump_statistics = true; - break; - default: - return false; - } - - test_config.model_info.model_file_path = argv[0]; - - return true; -} -#else bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { ORT_TRY { cxxopts::Options options("onnxruntime_perf_test", "perf_test [options...] model_path [result_file]"); @@ -504,7 +253,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("f")) { std::basic_string dim_name; int64_t override_val; - std::basic_string opt_str = utils::Utf8ToOrtString(result["f"].as()); + std::basic_string opt_str = ToPathString(result["f"].as()); if (!ParseDimensionOverride(dim_name, override_val, opt_str.c_str())) { return false; } @@ -514,7 +263,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("F")) { std::basic_string dim_denotation; int64_t override_val; - std::basic_string opt_str = utils::Utf8ToOrtString(result["F"].as()); + std::basic_string opt_str = ToPathString(result["F"].as()); if (!ParseDimensionOverride(dim_denotation, override_val, opt_str.c_str())) { return false; } @@ -522,7 +271,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int } if (result.count("m")) { - std::basic_string opt_str = utils::Utf8ToOrtString(result["m"].as()); + std::basic_string opt_str = ToPathString(result["m"].as()); if (!CompareCString(opt_str.c_str(), ORT_TSTR("duration"))) { test_config.run_config.test_mode = TestMode::kFixDurationMode; } else if (!CompareCString(opt_str.c_str(), ORT_TSTR("times"))) { @@ -532,7 +281,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int } } - if (result.count("p")) test_config.run_config.profile_file = utils::Utf8ToOrtString(result["p"].as()); + if (result.count("p")) test_config.run_config.profile_file = ToPathString(result["p"].as()); if (result["M"].as()) test_config.run_config.enable_memory_pattern = false; if (result["A"].as()) test_config.run_config.enable_cpu_mem_arena = false; @@ -655,7 +404,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int } } - if (result.count("u")) test_config.run_config.optimized_model_path = utils::Utf8ToOrtString(result["u"].as()); + if (result.count("u")) test_config.run_config.optimized_model_path = ToPathString(result["u"].as()); if (result.count("I")) test_config.run_config.generate_model_input_binding = true; @@ -669,7 +418,7 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("z")) test_config.run_config.set_denormal_as_zero = true; - if (result.count("i")) test_config.run_config.ep_runtime_config_string = utils::Utf8ToOrtString(result["i"].as()); + if (result.count("i")) test_config.run_config.ep_runtime_config_string = ToPathString(result["i"].as()); if (result.count("T")) test_config.run_config.intra_op_thread_affinities = result["T"].as(); @@ -693,13 +442,13 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("l")) test_config.model_info.load_via_path = true; - if (result.count("R")) test_config.run_config.register_custom_op_path = utils::Utf8ToOrtString(result["R"].as()); + if (result.count("R")) test_config.run_config.register_custom_op_path = ToPathString(result["R"].as()); if (result.count("g")) test_config.run_config.enable_cuda_io_binding = true; if (result.count("X")) test_config.run_config.use_extensions = true; - if (result.count("plugin_ep_libs")) test_config.plugin_ep_names_and_libs = utils::Utf8ToOrtString(result["plugin_ep_libs"].as()); + if (result.count("plugin_ep_libs")) test_config.plugin_ep_names_and_libs = ToPathString(result["plugin_ep_libs"].as()); if (result.count("select_devices")) test_config.selected_devices = result["select_devices"].as(); @@ -716,11 +465,11 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int // Positional arguments std::vector positional = result.unmatched(); if (positional.size() == 1) { - test_config.model_info.model_file_path = utils::Utf8ToOrtString(positional[0]); + test_config.model_info.model_file_path = ToPathString(positional[0]); test_config.run_config.f_dump_statistics = true; } else if (positional.size() == 2) { - test_config.model_info.model_file_path = utils::Utf8ToOrtString(positional[0]); - test_config.model_info.result_file_path = utils::Utf8ToOrtString(positional[1]); + test_config.model_info.model_file_path = ToPathString(positional[0]); + test_config.model_info.result_file_path = ToPathString(positional[1]); } else { return false; } @@ -734,7 +483,6 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int return true; } -#endif } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/common_utils.cc b/onnxruntime/test/perftest/common_utils.cc index 4f7bbc9ee2a8b..53e06dc40d80c 100644 --- a/onnxruntime/test/perftest/common_utils.cc +++ b/onnxruntime/test/perftest/common_utils.cc @@ -13,7 +13,7 @@ namespace onnxruntime { namespace perftest { namespace utils { -void list_devices(Ort::Env& env) { +void ListDevices(const Ort::Env& env) { std::vector ep_devices = env.GetEpDevices(); for (size_t i = 0; i < ep_devices.size(); ++i) { @@ -44,7 +44,7 @@ bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test for (auto& pair : ep_names_to_libs) { const std::filesystem::path library_path = pair.second; const std::string registration_name = pair.first; - env.RegisterExecutionProviderLibrary(registration_name.c_str(), Utf8ToOrtString(library_path.string())); + env.RegisterExecutionProviderLibrary(registration_name.c_str(), ToPathString(library_path.string())); test_config.registered_plugin_eps.push_back(registration_name); } } @@ -54,7 +54,7 @@ bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test bool UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { for (auto& registration_name : test_config.registered_plugin_eps) { - env.UnregisterExecutionProviderLibrary(registration_name.c_str()); + auto status = Ort::GetApi().UnregisterExecutionProviderLibrary(env, registration_name.c_str()); } return true; } diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 1592ec6dc78ce..956ccbece081d 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -20,11 +20,7 @@ int real_main(int argc, char* argv[]) { #endif g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); perftest::PerformanceTestConfig test_config; -#ifdef DISABLE_EXCEPTIONS - if (!perftest::CommandLineParser::ParseArguments(test_config, argc, argv)) { -#else if (!perftest::CommandLineParser::ParseArgumentsV2(test_config, argc, argv)) { -#endif perftest::CommandLineParser::ShowUsage(); return -1; } @@ -52,8 +48,14 @@ int real_main(int argc, char* argv[]) { perftest::utils::RegisterExecutionProviderLibrary(env, test_config); } + auto unregister_plugin_eps_at_scope_exit = gsl::finally([&]() { + if (!test_config.registered_plugin_eps.empty()) { + perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); // TODO ensure that this won't throw since it is called from the gsl::final_action destructor. + } + }); + if (test_config.list_available_devices) { - perftest::utils::list_devices(env); + perftest::utils::ListDevices(env); if (test_config.registered_plugin_eps.empty()) { fprintf(stdout, "No plugin execution provider libraries are registered. Please specify them using \"--plugin_ep_libs\"; otherwise, only CPU may be available.\n"); } else { diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 2abcdf4c6e96d..c9f90eab5ea67 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -78,6 +78,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device for (auto index : device_list) { if (static_cast(index) > (ep_devices.size() - 1)) { fprintf(stderr, "%s", "The device index provided is not correct. Will skip this device id."); + continue; } Ort::ConstEpDevice& device = ep_devices[index]; @@ -107,11 +108,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } if (added_ep_devices.empty()) { - for (auto ep_name : registered_plugin_ep_names_) { - env.UnregisterExecutionProviderLibrary(ep_name.c_str()); - } - ORT_THROW( - "[ERROR] [plugin EP]: No matching devices found."); + ORT_THROW("[ERROR] [plugin EP]: No matching devices found."); } std::string provider_option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); diff --git a/onnxruntime/test/perftest/posix/utils.cc b/onnxruntime/test/perftest/posix/utils.cc index c3c08c21e45b4..d44cbcea22734 100644 --- a/onnxruntime/test/perftest/posix/utils.cc +++ b/onnxruntime/test/perftest/posix/utils.cc @@ -57,12 +57,6 @@ class CPUUsage : public ICPUUsage { std::unique_ptr CreateICPUUsage() { return std::make_unique(); } - -std::basic_string Utf8ToOrtString(const std::string& utf8_str) { - // ORTCHAR_T == char -> just convert to std::basic_string - return std::basic_string(utf8_str.begin(), utf8_str.end()); -} - } // namespace utils } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index 788c18b276352..7051dc6b356a3 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -29,13 +29,11 @@ std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]); std::vector ConvertArgvToUtf8CharPtrs(std::vector& utf8_args); #endif -std::basic_string Utf8ToOrtString(const std::string& utf8_str); - bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); bool UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); -void list_devices(Ort::Env& env); +void ListDevices(const Ort::Env& env); } // namespace utils } // namespace perftest diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index 8e45763ae2fcf..53e34c1ed3f14 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -78,7 +78,6 @@ std::unique_ptr CreateICPUUsage() { return std::make_unique(); } -#ifdef _WIN32 std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]) { std::vector utf8_args; utf8_args.reserve(argc); @@ -92,29 +91,10 @@ std::vector ConvertArgvToUtf8CharPtrs(std::vector& utf std::vector utf8_argv; utf8_argv.reserve(utf8_args.size()); for (auto& str : utf8_args) { - utf8_argv.push_back(&str[0]); // safe since std::string is mutable + utf8_argv.push_back(&str[0]); } return utf8_argv; } -#endif - -std::basic_string Utf8ToOrtString(const std::string& utf8_str) { - // ORTCHAR_T == char -> just convert to std::basic_string - if constexpr (std::is_same_v) { - return std::basic_string(utf8_str.begin(), utf8_str.end()); - } - - if (utf8_str.empty()) return std::basic_string(); - - int size_needed = MultiByteToWideChar(CP_UTF8, 0, utf8_str.c_str(), -1, nullptr, 0); - if (size_needed <= 0) return std::basic_string(); - - std::basic_string wide_str(size_needed, 0); - MultiByteToWideChar(CP_UTF8, 0, utf8_str.c_str(), -1, &wide_str[0], size_needed); - wide_str.pop_back(); // Remove null terminator added by API - - return wide_str; -} } // namespace utils } // namespace perftest } // namespace onnxruntime From e281ca523ad5b2ff60b1801116f7e75509ea07b9 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 23 Jul 2025 15:33:06 -0700 Subject: [PATCH 20/46] revert code back --- onnxruntime/test/perftest/common_utils.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/test/perftest/common_utils.cc b/onnxruntime/test/perftest/common_utils.cc index 53e06dc40d80c..011db27271ced 100644 --- a/onnxruntime/test/perftest/common_utils.cc +++ b/onnxruntime/test/perftest/common_utils.cc @@ -54,7 +54,8 @@ bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test bool UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { for (auto& registration_name : test_config.registered_plugin_eps) { - auto status = Ort::GetApi().UnregisterExecutionProviderLibrary(env, registration_name.c_str()); + //auto status = Ort::GetApi().UnregisterExecutionProviderLibrary(env, registration_name.c_str()); + env.UnregisterExecutionProviderLibrary(registration_name.c_str()); } return true; } From bdfd3f55c14af2e35712439420983b68d38a8061 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 23 Jul 2025 17:47:13 -0700 Subject: [PATCH 21/46] address reviewer's comments --- onnxruntime/test/perftest/common_utils.cc | 20 +++--- onnxruntime/test/perftest/main.cc | 64 +++++-------------- onnxruntime/test/perftest/ort_test_session.cc | 13 ++++ onnxruntime/test/perftest/utils.h | 4 +- 4 files changed, 43 insertions(+), 58 deletions(-) diff --git a/onnxruntime/test/perftest/common_utils.cc b/onnxruntime/test/perftest/common_utils.cc index 011db27271ced..6739530ffedc4 100644 --- a/onnxruntime/test/perftest/common_utils.cc +++ b/onnxruntime/test/perftest/common_utils.cc @@ -36,7 +36,7 @@ void ListDevices(const Ort::Env& env) { } } -bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { +void RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { if (!test_config.plugin_ep_names_and_libs.empty()) { std::unordered_map ep_names_to_libs; ParseSessionConfigs(ToUTF8String(test_config.plugin_ep_names_and_libs), ep_names_to_libs); @@ -44,20 +44,24 @@ bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test for (auto& pair : ep_names_to_libs) { const std::filesystem::path library_path = pair.second; const std::string registration_name = pair.first; - env.RegisterExecutionProviderLibrary(registration_name.c_str(), ToPathString(library_path.string())); - test_config.registered_plugin_eps.push_back(registration_name); + Ort::Status status(Ort::GetApi().RegisterExecutionProviderLibrary(env, registration_name.c_str(), ToPathString(library_path.string()).c_str())); + if (status.IsOK()) { + test_config.registered_plugin_eps.push_back(registration_name); + } else { + fprintf(stderr, "Can't register %s plugin library: %s", registration_name.c_str(), status.GetErrorMessage().c_str()); + } } } } - return true; } -bool UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { +void UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config) { for (auto& registration_name : test_config.registered_plugin_eps) { - //auto status = Ort::GetApi().UnregisterExecutionProviderLibrary(env, registration_name.c_str()); - env.UnregisterExecutionProviderLibrary(registration_name.c_str()); + Ort::Status status(Ort::GetApi().UnregisterExecutionProviderLibrary(env, registration_name.c_str())); + if (!status.IsOK()) { + fprintf(stderr, "%s", status.GetErrorMessage().c_str()); + } } - return true; } } // namespace utils diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 956ccbece081d..573c9fdb0e636 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -48,9 +48,14 @@ int real_main(int argc, char* argv[]) { perftest::utils::RegisterExecutionProviderLibrary(env, test_config); } + // Unregister all registered plugin EP libraries before program exits. + // This is necessary because unregistering the plugin EP also unregisters any associated shared allocators. + // If we don't do this and program returns, the factories stored inside the environment will be destroyed when the environment goes out of scope. + // Later, when the shared allocator's deleter runs, it may cause a segmentation fault because it attempts to use the already-destroyed factory to call ReleaseAllocator. + // See "ep_device.ep_factory->ReleaseAllocator" in Environment::CreateSharedAllocatorImpl. auto unregister_plugin_eps_at_scope_exit = gsl::finally([&]() { if (!test_config.registered_plugin_eps.empty()) { - perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); // TODO ensure that this won't throw since it is called from the gsl::final_action destructor. + perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); // this won't throw } }); @@ -58,64 +63,27 @@ int real_main(int argc, char* argv[]) { perftest::utils::ListDevices(env); if (test_config.registered_plugin_eps.empty()) { fprintf(stdout, "No plugin execution provider libraries are registered. Please specify them using \"--plugin_ep_libs\"; otherwise, only CPU may be available.\n"); - } else { - perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); } return 0; } - auto status = Status::OK(); - - ORT_TRY { - std::random_device rd; - perftest::PerformanceRunner perf_runner(env, test_config, rd); - - // Exit if user enabled -n option so that user can measure session creation time - if (test_config.run_config.exit_after_session_creation) { - perf_runner.LogSessionCreationTime(); - return 0; - } - - status = perf_runner.Run(); + std::random_device rd; + perftest::PerformanceRunner perf_runner(env, test_config, rd); - if (!status.IsOK()) { - printf("Run failed:%s\n", status.ErrorMessage().c_str()); - } else { - perf_runner.SerializeResult(); - } - } - ORT_CATCH(const std::exception& ex) { - ORT_HANDLE_EXCEPTION([&]() { - fprintf(stderr, "%s\n", ex.what()); - }); - } - // The try/catch block above ensures the following: - // 1) Plugin EP libraries are unregistered if an exception occurs. - // 2) Objects are released in the correct order when running a plugin EP. - // - // Proper destruction order is critical to avoid use-after-free issues. The expected order of deleters is: - // session -> session allocator (accessed via EP factory) -> plugin EP -> env -> - // shared allocator (accessed via EP factory) -> plugin EP factory (owned by env) - // - // Without this order, the environment (`env`) might be destroyed first, and - // any subsequent access to the session allocator's deleter (which depends on the EP factory) - // can result in a segmentation fault because the factory has already been destroyed. - - // Unregister all registered plugin EP libraries before program exits. - // - // This is necessary because unregistering the plugin EP also unregisters any associated shared allocators. - // If we don't do this first and program returns, the factories stored inside the environment will be destroyed when the environment goes out of scope. - // Later, when the shared allocator's deleter runs, it may cause a segmentation fault because it attempts to use the already-destroyed factory to call ReleaseAllocator. - // - // See "ep_device.ep_factory->ReleaseAllocator" in Environment::CreateSharedAllocatorImpl. - if (!test_config.registered_plugin_eps.empty()) { - perftest::utils::UnregisterExecutionProviderLibrary(env, test_config); + // Exit if user enabled -n option so that user can measure session creation time + if (test_config.run_config.exit_after_session_creation) { + perf_runner.LogSessionCreationTime(); + return 0; } + auto status = perf_runner.Run(); if (!status.IsOK()) { + printf("Run failed:%s\n", status.ErrorMessage().c_str()); return -1; } + perf_runner.SerializeResult(); + return 0; } diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index c9f90eab5ea67..d2ab12752a876 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -53,6 +53,19 @@ std::chrono::duration OnnxRuntimeTestSession::Run() { auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration duration_seconds = end - start; + + for (size_t i = 0; i < outputs_.size(); i++) { + Ort::Value& ort_output = outputs_[i]; + const float* output_data = ort_output.GetTensorData(); + gsl::span output_span(output_data, 6); + std::cout << output_span[0] << std::endl; + std::cout << output_span[1] << std::endl; + std::cout << output_span[2] << std::endl; + std::cout << output_span[3] << std::endl; + std::cout << output_span[4] << std::endl; + std::cout << output_span[5] << std::endl; + } + return duration_seconds; } diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index 7051dc6b356a3..5ad140f6bbedb 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -29,9 +29,9 @@ std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]); std::vector ConvertArgvToUtf8CharPtrs(std::vector& utf8_args); #endif -bool RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); +void RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); -bool UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); +void UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); void ListDevices(const Ort::Env& env); From 22d3b80721e24b71b3c54b07c2dcb2fdfd44f46f Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 23 Jul 2025 22:08:09 -0700 Subject: [PATCH 22/46] address reviewer's comments --- .../test/perftest/command_args_parser.cc | 98 +++++++++---------- .../test/perftest/command_args_parser.h | 1 - onnxruntime/test/perftest/main.cc | 2 +- onnxruntime/test/perftest/ort_test_session.cc | 19 +--- onnxruntime/test/perftest/ort_test_session.h | 1 - onnxruntime/test/perftest/strings_helper.cc | 5 +- onnxruntime/test/perftest/utils.h | 2 +- onnxruntime/test/perftest/windows/utils.cc | 2 +- 8 files changed, 54 insertions(+), 76 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 18bc4648a8477..56270c17f83af 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -34,7 +34,7 @@ namespace perftest { "\t-A: Disable memory arena\n" "\t-I: Generate tensor input binding. Free dimensions are treated as 1 unless overridden using -f.\n" "\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n" - "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu|plugin_ep]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " + "\t-e or --ep [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu|plugin_ep]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " "'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai', 'webgpu' or plugin execution provider that provided via ep library. " "Default:'cpu'.\n" "\t-b [tf|ort]: backend to use. Default:ort\n" @@ -161,12 +161,10 @@ namespace perftest { "\t-X [Enable onnxruntime-extensions custom ops]: Registers custom ops from onnxruntime-extensions. " "onnxruntime-extensions must have been built in to onnxruntime. This can be done with the build.py " "'--use_extensions' option.\n" -#ifndef DISABLE_EXCEPTIONS "\t--plugin_ep_libs [registration names and libraries] Specifies a list of plugin execution provider(EP) registration names and their corresponding shared libraries to register.\n" "\t [Usage]: --plugin_ep_libs 'plugin_ep_1|plugin_ep_2.dll plugin_ep_2|plugin_ep_2.dll'\n" - "\t--list_devices Prints all available device indices and their properties (including metadata).\n" - "\t--select_devices [list of device indices] A semicolon-separated list of device indices to add to the session and run with.\n" -#endif + "\t--list_ep_devices Prints all available device indices and their properties (including metadata). This option makes the program exit early without performing inference.\n" + "\t--select_ep_devices [list of device indices] A semicolon-separated list of device indices to add to the session and run with.\n" "\t-h: help\n"); } #ifdef _WIN32 @@ -194,62 +192,56 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, return true; } -bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { +bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { ORT_TRY { cxxopts::Options options("onnxruntime_perf_test", "perf_test [options...] model_path [result_file]"); - - options.add_options()("f", "Free dimension override by name", cxxopts::value >()); - options.add_options()("F", "Free dimension override by denotation", cxxopts::value >()); - options.add_options()("m", "Test mode: duration or times", cxxopts::value()); - options.add_options()("e,ep", "Execution provider", cxxopts::value()); - options.add_options()("r", "Repeat times", cxxopts::value()); - options.add_options()("t", "Duration in seconds", cxxopts::value()); - options.add_options()("p", "Profile output file", cxxopts::value()); - options.add_options()("x", "Intra-op threads", cxxopts::value()); - options.add_options()("y", "Inter-op threads", cxxopts::value()); - options.add_options()("c", "Concurrent session runs", cxxopts::value()); - options.add_options()("d", "cuDNN conv algo", cxxopts::value()); - options.add_options()("o", "Graph optimization level", cxxopts::value()); - options.add_options()("u", "Optimized model path", cxxopts::value()); - options.add_options()("i", "EP runtime config string", cxxopts::value()); - options.add_options()("S", "Random seed", cxxopts::value()); - options.add_options()("T", "Intra-op thread affinities", cxxopts::value()); - options.add_options()("C", "Session config entries", cxxopts::value()); - options.add_options()("R", "Custom op library path", cxxopts::value()); - options.add_options()("A", "Disable CPU mem arena", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("M", "Disable memory pattern", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("s", "Dump statistics", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("v", "Verbose", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("I", "Generate model input binding", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("P", "Use ORT_PARALLEL mode", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("q", "CUDA copy in separate stream", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("z", "Set denormal as zero", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("D", "Disable spinning", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("Z", "Disable spinning between runs", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("n", "Exit after session creation", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("l", "Load model via path", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("g", "Enable CUDA IO binding", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("X", "Use extensions", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("plugin_ep_libs", "Plugin EP names and libs", cxxopts::value()); - options.add_options()("list_devices", "Prints all available device indices and their properties (including metadata)"); - options.add_options()("select_devices", "A semicolon-separated list of device indices to add to the session and run with", cxxopts::value()); - options.add_options()("h,help", "Print usage"); + + // See ShowUsage() for detailed option descriptions. + options.add_options()("f", "", cxxopts::value >()); + options.add_options()("F", "", cxxopts::value >()); + options.add_options()("m", "", cxxopts::value()); + options.add_options()("e,ep", "", cxxopts::value()); + options.add_options()("r", "", cxxopts::value()); + options.add_options()("t", "", cxxopts::value()); + options.add_options()("p", "", cxxopts::value()); + options.add_options()("x", "", cxxopts::value()); + options.add_options()("y", "", cxxopts::value()); + options.add_options()("c", "", cxxopts::value()); + options.add_options()("d", "", cxxopts::value()); + options.add_options()("o", "", cxxopts::value()); + options.add_options()("u", "", cxxopts::value()); + options.add_options()("i", "", cxxopts::value()); + options.add_options()("S", "", cxxopts::value()); + options.add_options()("T", "", cxxopts::value()); + options.add_options()("C", "", cxxopts::value()); + options.add_options()("R", "", cxxopts::value()); + options.add_options()("A", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("M", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("s", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("v", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("I", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("P", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("q", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("z", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("D", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("Z", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("n", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("l", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("g", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("X", "", cxxopts::value()->default_value("false")->implicit_value("true")); + options.add_options()("plugin_ep_libs", "", cxxopts::value()); + options.add_options()("list_ep_devices", ""); + options.add_options()("select_ep_devices", "", cxxopts::value()); + options.add_options()("h,help", ""); #ifdef _WIN32 auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); - auto utf8_argv = utils::ConvertArgvToUtf8CharPtrs(utf8_strings); + auto utf8_argv = utils::CStringsFromStrings(utf8_strings); auto result = options.parse(static_cast(utf8_argv.size()), utf8_argv.data()); #else auto result = options.parse(argc, argv); #endif - /* - if (result.count("help")) { - std::cout << options.help() << std::endl; - return false; - } - */ - if (result.count("f")) { std::basic_string dim_name; int64_t override_val; @@ -450,14 +442,14 @@ bool CommandLineParser::ParseArgumentsV2(PerformanceTestConfig& test_config, int if (result.count("plugin_ep_libs")) test_config.plugin_ep_names_and_libs = ToPathString(result["plugin_ep_libs"].as()); - if (result.count("select_devices")) test_config.selected_devices = result["select_devices"].as(); + if (result.count("select_ep_devices")) test_config.selected_devices = result["select_ep_devices"].as(); if (result.count("h")) { perftest::CommandLineParser::ShowUsage(); return false; } - if (result.count("list_devices")) { + if (result.count("list_ep_devices")) { test_config.list_available_devices = true; return true; } diff --git a/onnxruntime/test/perftest/command_args_parser.h b/onnxruntime/test/perftest/command_args_parser.h index d1ed02bcf4776..86c81072233c0 100644 --- a/onnxruntime/test/perftest/command_args_parser.h +++ b/onnxruntime/test/perftest/command_args_parser.h @@ -13,7 +13,6 @@ class CommandLineParser { public: static void ShowUsage(); static bool ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); - static bool ParseArgumentsV2(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); }; } // namespace perftest diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 573c9fdb0e636..090cfd7747736 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -20,7 +20,7 @@ int real_main(int argc, char* argv[]) { #endif g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); perftest::PerformanceTestConfig test_config; - if (!perftest::CommandLineParser::ParseArgumentsV2(test_config, argc, argv)) { + if (!perftest::CommandLineParser::ParseArguments(test_config, argc, argv)) { perftest::CommandLineParser::ShowUsage(); return -1; } diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index d2ab12752a876..ec26ddf65d9f4 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -53,19 +53,6 @@ std::chrono::duration OnnxRuntimeTestSession::Run() { auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration duration_seconds = end - start; - - for (size_t i = 0; i < outputs_.size(); i++) { - Ort::Value& ort_output = outputs_[i]; - const float* output_data = ort_output.GetTensorData(); - gsl::span output_span(output_data, 6); - std::cout << output_span[0] << std::endl; - std::cout << output_span[1] << std::endl; - std::cout << output_span[2] << std::endl; - std::cout << output_span[3] << std::endl; - std::cout << output_span[4] << std::endl; - std::cout << output_span[5] << std::endl; - } - return duration_seconds; } @@ -100,10 +87,10 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device if (added_ep_device_index_set.find(index) == added_ep_device_index_set.end()) { added_ep_devices.push_back(device); added_ep_device_index_set.insert(index); - fprintf(stdout, "[Plugin EP] Device [Index: %d, Name: %s] has been added to session.", index, device.EpName()); + fprintf(stdout, "[Plugin EP] EP Device [Index: %d, Name: %s] has been added to session.", index, device.EpName()); } } else { - std::string err_msg = "[Plugin EP] [WARNING] : The device index and its corresponding OrtEpDevice is not created from " + + std::string err_msg = "[Plugin EP] [WARNING] : The EP device index and its corresponding OrtEpDevice is not created from " + performance_test_config.machine_config.provider_type_name + ". Will skip adding this device.\n"; fprintf(stderr, "%s", err_msg.c_str()); } @@ -115,7 +102,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device Ort::ConstEpDevice& device = ep_devices[index]; if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { added_ep_devices.push_back(device); - fprintf(stdout, "Device [Index: %d, Name: %s] has been added to session.", index, device.EpName()); + fprintf(stdout, "EP Device [Index: %d, Name: %s] has been added to session.", index, device.EpName()); } } } diff --git a/onnxruntime/test/perftest/ort_test_session.h b/onnxruntime/test/perftest/ort_test_session.h index bcc324cb996c1..71f797b0d5a35 100644 --- a/onnxruntime/test/perftest/ort_test_session.h +++ b/onnxruntime/test/perftest/ort_test_session.h @@ -51,7 +51,6 @@ class OnnxRuntimeTestSession : public TestSession { const int input_length_; std::string provider_name_; std::string device_memory_name_; // Device memory type name to use from the list in allocator.h - std::vector registered_plugin_ep_names_; }; } // namespace perftest diff --git a/onnxruntime/test/perftest/strings_helper.cc b/onnxruntime/test/perftest/strings_helper.cc index e57530cfa2761..c9638dd4290b9 100644 --- a/onnxruntime/test/perftest/strings_helper.cc +++ b/onnxruntime/test/perftest/strings_helper.cc @@ -5,10 +5,10 @@ #include #include -#include #include "strings_helper.h" #include "core/common/common.h" +#include "core/common/parse_string.h" namespace onnxruntime { namespace perftest { @@ -61,7 +61,8 @@ void ParseDeviceList(const std::string& input, std::vector& result) { while (std::getline(ss, item, ';')) { if (!item.empty()) { - result.push_back(std::stoi(item)); + int value = ParseStringWithClassicLocale(item); + result.push_back(value); } } } diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index 5ad140f6bbedb..d00db824c5bd0 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -26,7 +26,7 @@ std::unique_ptr CreateICPUUsage(); #ifdef _WIN32 std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]); -std::vector ConvertArgvToUtf8CharPtrs(std::vector& utf8_args); +std::vector CStringsFromStrings(const std::vector& utf8_args); #endif void RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index 53e34c1ed3f14..7f610cc8bccd1 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -87,7 +87,7 @@ std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]) { return utf8_args; } -std::vector ConvertArgvToUtf8CharPtrs(std::vector& utf8_args) { +std::vector CStringsFromStrings(const std::vector& utf8_args) { std::vector utf8_argv; utf8_argv.reserve(utf8_args.size()); for (auto& str : utf8_args) { From 2cd8a30519320e0685c86ec6e9e54cd2b00a17e3 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 24 Jul 2025 07:59:03 -0700 Subject: [PATCH 23/46] address lint issue --- onnxruntime/test/perftest/command_args_parser.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 56270c17f83af..60ee6feeeba72 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -195,7 +195,7 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { ORT_TRY { cxxopts::Options options("onnxruntime_perf_test", "perf_test [options...] model_path [result_file]"); - + // See ShowUsage() for detailed option descriptions. options.add_options()("f", "", cxxopts::value >()); options.add_options()("F", "", cxxopts::value >()); From ad4d8a83ee89e833d75beab6baf5c234aff91321 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 24 Jul 2025 10:13:34 -0700 Subject: [PATCH 24/46] add '\n' for fprintf --- onnxruntime/test/perftest/ort_test_session.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index ec26ddf65d9f4..8304d9ae6531e 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -87,7 +87,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device if (added_ep_device_index_set.find(index) == added_ep_device_index_set.end()) { added_ep_devices.push_back(device); added_ep_device_index_set.insert(index); - fprintf(stdout, "[Plugin EP] EP Device [Index: %d, Name: %s] has been added to session.", index, device.EpName()); + fprintf(stdout, "[Plugin EP] EP Device [Index: %d, Name: %s] has been added to session.\n", index, device.EpName()); } } else { std::string err_msg = "[Plugin EP] [WARNING] : The EP device index and its corresponding OrtEpDevice is not created from " + @@ -102,7 +102,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device Ort::ConstEpDevice& device = ep_devices[index]; if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { added_ep_devices.push_back(device); - fprintf(stdout, "EP Device [Index: %d, Name: %s] has been added to session.", index, device.EpName()); + fprintf(stdout, "EP Device [Index: %d, Name: %s] has been added to session.\n", index, device.EpName()); } } } From 3e92522b2f010ffcbd87aca30126e8457bbd9009 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 25 Jul 2025 12:00:07 -0700 Subject: [PATCH 25/46] address reviewers' comments --- cmake/onnxruntime_unittests.cmake | 4 +- .../test/perftest/command_args_parser.cc | 31 ++++++++--- onnxruntime/test/perftest/common_utils.cc | 2 +- onnxruntime/test/perftest/ort_test_session.cc | 55 +++++++++++-------- onnxruntime/test/perftest/strings_helper.cc | 52 +++++++++++++++++- onnxruntime/test/perftest/strings_helper.h | 6 +- .../test/perftest/test_configuration.h | 2 + 7 files changed, 116 insertions(+), 36 deletions(-) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 7eec0beb27707..952a85385fda2 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1254,11 +1254,11 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() if(onnxruntime_MINIMAL_BUILD) - add_definitions(-DCXXOPTS_NO_RTTI) + target_compile_definitions(onnxruntime_perf_test PRIVATE CXXOPTS_NO_RTTI) endif() if(onnxruntime_DISABLE_EXCEPTIONS) - add_definitions(-DCXXOPTS_NO_EXCEPTIONS) + target_compile_definitions(onnxruntime_perf_test PRIVATE CXXOPTS_NO_EXCEPTIONS) endif() if (onnxruntime_BUILD_SHARED_LIB) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 60ee6feeeba72..e757b2ff26531 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -34,8 +34,8 @@ namespace perftest { "\t-A: Disable memory arena\n" "\t-I: Generate tensor input binding. Free dimensions are treated as 1 unless overridden using -f.\n" "\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n" - "\t-e or --ep [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu|plugin_ep]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " - "'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai', 'webgpu' or plugin execution provider that provided via ep library. " + "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu|plugin_ep]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " + "'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai' and 'webgpu'. " "Default:'cpu'.\n" "\t-b [tf|ort]: backend to use. Default:ort\n" "\t-r [repeated_times]: Specifies the repeated times if running in 'times' test mode.Default:1000.\n" @@ -161,10 +161,18 @@ namespace perftest { "\t-X [Enable onnxruntime-extensions custom ops]: Registers custom ops from onnxruntime-extensions. " "onnxruntime-extensions must have been built in to onnxruntime. This can be done with the build.py " "'--use_extensions' option.\n" - "\t--plugin_ep_libs [registration names and libraries] Specifies a list of plugin execution provider(EP) registration names and their corresponding shared libraries to register.\n" - "\t [Usage]: --plugin_ep_libs 'plugin_ep_1|plugin_ep_2.dll plugin_ep_2|plugin_ep_2.dll'\n" - "\t--list_ep_devices Prints all available device indices and their properties (including metadata). This option makes the program exit early without performing inference.\n" - "\t--select_ep_devices [list of device indices] A semicolon-separated list of device indices to add to the session and run with.\n" + "\n" + "\t--plugin_ep_libs [registration names and libraries] Specifies a list of plugin execution provider (EP) registration names and their corresponding shared libraries to register.\n" + "\t [Usage]: --plugin_ep_libs \"plugin_ep_name_1|plugin_ep_1.dll plugin_ep_name_2|plugin_ep_2.dll ... \"\n" + "\n" + "\t--plugin_eps [Plugin EPs] Specifies a semicolon-separated list of plugin execution providers (EPs) to use.\n" + "\t [Usage]: --plugin_eps \"plugin_ep_1;plugin_ep_2;... \"\n" + "\n" + "\t--plugin_ep_options [EP options] Specifies provider options for each EP listed in --plugin_eps. Options (key-value pairs) for each EP are separated by space and EPs are separated by semicolons.\n" + "\t [Usage]: --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;ep_2_option_1_key|ep_2_option_1_value ...;... \"\n" + "\n" + "\t--list_ep_devices Prints all available device indices and their properties (including metadata). This option makes the program exit early without performing inference.\n" + "\t--select_ep_devices [list of device indices] A semicolon-separated list of device indices to add to the session and run with.\n" "\t-h: help\n"); } #ifdef _WIN32 @@ -200,7 +208,7 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a options.add_options()("f", "", cxxopts::value >()); options.add_options()("F", "", cxxopts::value >()); options.add_options()("m", "", cxxopts::value()); - options.add_options()("e,ep", "", cxxopts::value()); + options.add_options()("e", "", cxxopts::value()); options.add_options()("r", "", cxxopts::value()); options.add_options()("t", "", cxxopts::value()); options.add_options()("p", "", cxxopts::value()); @@ -230,6 +238,8 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a options.add_options()("g", "", cxxopts::value()->default_value("false")->implicit_value("true")); options.add_options()("X", "", cxxopts::value()->default_value("false")->implicit_value("true")); options.add_options()("plugin_ep_libs", "", cxxopts::value()); + options.add_options()("plugin_eps", "", cxxopts::value()); + options.add_options()("plugin_ep_options", "", cxxopts::value()); options.add_options()("list_ep_devices", ""); options.add_options()("select_ep_devices", "", cxxopts::value()); options.add_options()("h,help", ""); @@ -318,8 +328,7 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a } else if (!CompareCString(optarg, "nvtensorrtrtx")) { test_config.machine_config.provider_type_name = onnxruntime::kNvTensorRTRTXExecutionProvider; } else { - // Could be plugin EP, save it first and handle later. - test_config.machine_config.provider_type_name = optarg; + return false; } } @@ -442,6 +451,10 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a if (result.count("plugin_ep_libs")) test_config.plugin_ep_names_and_libs = ToPathString(result["plugin_ep_libs"].as()); + if (result.count("plugin_eps")) ParseEpList(result["plugin_eps"].as(), test_config.machine_config.plugin_provider_type_list); + + if (result.count("plugin_ep_options")) test_config.run_config.ep_runtime_config_string = ToPathString(result["plugin_ep_options"].as()); + if (result.count("select_ep_devices")) test_config.selected_devices = result["select_ep_devices"].as(); if (result.count("h")) { diff --git a/onnxruntime/test/perftest/common_utils.cc b/onnxruntime/test/perftest/common_utils.cc index 6739530ffedc4..c377618b187a7 100644 --- a/onnxruntime/test/perftest/common_utils.cc +++ b/onnxruntime/test/perftest/common_utils.cc @@ -48,7 +48,7 @@ void RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test if (status.IsOK()) { test_config.registered_plugin_eps.push_back(registration_name); } else { - fprintf(stderr, "Can't register %s plugin library: %s", registration_name.c_str(), status.GetErrorMessage().c_str()); + fprintf(stderr, "Can't register %s plugin library: %s\n", registration_name.c_str(), status.GetErrorMessage().c_str()); } } } diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 8304d9ae6531e..9c36e83931198 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -62,19 +62,21 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device : rand_engine_(rd()), input_names_(m.GetInputCount()), input_names_str_(m.GetInputCount()), input_length_(m.GetInputCount()) { Ort::SessionOptions session_options; - bool is_plugin_ep_available = false; - - // Add devices created from plugin EP + // Add EP devices if any (created by plugin EP) if (!performance_test_config.registered_plugin_eps.empty()) { std::vector ep_devices = env.GetEpDevices(); - std::vector added_ep_devices; + // EP -> associated EP devices (All OrtEpDevice instances must be from the same execution provider) + std::unordered_map> added_ep_devices; std::unordered_set added_ep_device_index_set; - // Select devices by provided device index + auto& ep_list = performance_test_config.machine_config.plugin_provider_type_list; + std::unordered_set ep_set(ep_list.begin(), ep_list.end()); + + // Select EP devices by provided device index if (!performance_test_config.selected_devices.empty()) { std::vector device_list; device_list.reserve(performance_test_config.selected_devices.size()); - ParseDeviceList(performance_test_config.selected_devices, device_list); + ParseEpDeviceList(performance_test_config.selected_devices, device_list); for (auto index : device_list) { if (static_cast(index) > (ep_devices.size() - 1)) { fprintf(stderr, "%s", "The device index provided is not correct. Will skip this device id."); @@ -82,10 +84,9 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } Ort::ConstEpDevice& device = ep_devices[index]; - - if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { + if (ep_set.find(std::string(device.EpName())) != ep_set.end()) { if (added_ep_device_index_set.find(index) == added_ep_device_index_set.end()) { - added_ep_devices.push_back(device); + added_ep_devices[device.EpName()].push_back(device); added_ep_device_index_set.insert(index); fprintf(stdout, "[Plugin EP] EP Device [Index: %d, Name: %s] has been added to session.\n", index, device.EpName()); } @@ -96,26 +97,37 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } } } else { - // All OrtEpDevice instances must be from the same execution provider. - // Find and select the OrtEpDevice associated with the execution provider provided via "-e" argument. - for (int index = 0; static_cast(index) < ep_devices.size(); ++index) { + // Find and select the OrtEpDevice associated with the EP in "--plugin_eps". + for (size_t index = 0; index < ep_devices.size(); ++index) { Ort::ConstEpDevice& device = ep_devices[index]; - if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { - added_ep_devices.push_back(device); + if (ep_set.find(std::string(device.EpName())) != ep_set.end()) { + added_ep_devices[device.EpName()].push_back(device); fprintf(stdout, "EP Device [Index: %d, Name: %s] has been added to session.\n", index, device.EpName()); } } } if (added_ep_devices.empty()) { - ORT_THROW("[ERROR] [plugin EP]: No matching devices found."); + ORT_THROW("[ERROR] [Plugin EP]: No matching EP devices found."); + } + + std::string ep_option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); + + // A list of EP's associated options + std::vector> ep_options; + ParseEpOptions(ep_option_string, ep_options); + + // EP -> associated EP options + std::unordered_map> ep_options_map; + for (size_t i = 0; i < ep_list.size(); ++i) { + ep_options_map.emplace(ep_list[i], ep_options[i]); } - std::string provider_option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); - std::unordered_map provider_options; - ParseSessionConfigs(provider_option_string, provider_options); - session_options.AppendExecutionProvider_V2(env, added_ep_devices, provider_options); - is_plugin_ep_available = true; + for (auto& ep_and_devices : added_ep_devices) { + auto& ep = ep_and_devices.first; + auto& devices = ep_and_devices.second; + session_options.AppendExecutionProvider_V2(env, devices, ep_options_map[ep]); + } } provider_name_ = performance_test_config.machine_config.provider_type_name; @@ -630,8 +642,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); #endif } else if (!provider_name_.empty() && provider_name_ != onnxruntime::kCpuExecutionProvider && - provider_name_ != onnxruntime::kOpenVINOExecutionProvider && - !is_plugin_ep_available) { + provider_name_ != onnxruntime::kOpenVINOExecutionProvider) { ORT_THROW("This backend is not included in perf test runner.\n"); } diff --git a/onnxruntime/test/perftest/strings_helper.cc b/onnxruntime/test/perftest/strings_helper.cc index c9638dd4290b9..e4256d3a5517f 100644 --- a/onnxruntime/test/perftest/strings_helper.cc +++ b/onnxruntime/test/perftest/strings_helper.cc @@ -55,7 +55,57 @@ void ParseSessionConfigs(const std::string& configs_string, } } -void ParseDeviceList(const std::string& input, std::vector& result) { +/** + * @brief Splits a string by a given delimiter while preserving empty tokens. + * + * This function splits the input string into substrings separated by the specified delimiter. + * Unlike std::getline, it preserves empty tokens that result from leading, trailing, or consecutive delimiters. + * + * @param input The input string to split. + * @param delim The delimiter character to split on. + * @param out The output vector to store the resulting substrings. It will be appended to, not cleared. + * + * @example + * std::vector tokens; + * SplitAndHandleEmptyTokens(";a|b;;x|y;", ';', tokens); + * // tokens = ["", "a|b", "", "x|y", ""] + */ +void SplitAndHandleEmptyTokens(const std::string& input, char delim, std::vector& out) { + std::string::size_type start = 0; + auto end = input.find(delim); + while (end != std::string::npos) { + out.emplace_back(input.substr(start, end - start)); // preserves empty + start = end + 1; + end = input.find(delim, start); + } + out.emplace_back(input.substr(start)); // last token +} + +void ParseEpOptions(const std::string& input, std::vector>& result) { + std::vector tokens; + SplitAndHandleEmptyTokens(input, ';', tokens); + + for (const auto& token : tokens) { + result.emplace_back(); // Adds a new empty map + if (!token.empty()) { + ParseSessionConfigs(token, result.back()); // only parse non-empty + } + // if token is empty, we still get an empty map in `result` + } +} + +void ParseEpList(const std::string& input, std::vector& result) { + std::stringstream ss(input); + std::string token; + + while (std::getline(ss, token, ';')) { + if (!token.empty()) { + result.push_back(token); + } + } +} + +void ParseEpDeviceList(const std::string& input, std::vector& result) { std::stringstream ss(input); std::string item; diff --git a/onnxruntime/test/perftest/strings_helper.h b/onnxruntime/test/perftest/strings_helper.h index 241a9c29ab6f0..d54b274807815 100644 --- a/onnxruntime/test/perftest/strings_helper.h +++ b/onnxruntime/test/perftest/strings_helper.h @@ -14,6 +14,10 @@ void ParseSessionConfigs(const std::string& configs_string, std::unordered_map& session_configs, const std::unordered_set& available_keys = {}); -void ParseDeviceList(const std::string& input, std::vector& result); +void ParseEpList(const std::string& input, std::vector& result); + +void ParseEpOptions(const std::string& input, std::vector>& result); + +void ParseEpDeviceList(const std::string& input, std::vector& result); } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/test_configuration.h b/onnxruntime/test/perftest/test_configuration.h index 404e6ca30eadd..1eb5c1a527426 100644 --- a/onnxruntime/test/perftest/test_configuration.h +++ b/onnxruntime/test/perftest/test_configuration.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "core/graph/constants.h" #include "core/framework/session_options.h" @@ -35,6 +36,7 @@ struct ModelInfo { struct MachineConfig { Platform platform{Platform::kWindows}; std::string provider_type_name{onnxruntime::kCpuExecutionProvider}; + std::vector plugin_provider_type_list; }; struct RunConfig { From d2fb3359fb38d248402e138274726e8a2eee3ce6 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 25 Jul 2025 13:25:16 -0700 Subject: [PATCH 26/46] fix some issues --- onnxruntime/test/perftest/ort_test_session.cc | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 9c36e83931198..16dd36efd057e 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -102,7 +102,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device Ort::ConstEpDevice& device = ep_devices[index]; if (ep_set.find(std::string(device.EpName())) != ep_set.end()) { added_ep_devices[device.EpName()].push_back(device); - fprintf(stdout, "EP Device [Index: %d, Name: %s] has been added to session.\n", index, device.EpName()); + fprintf(stdout, "EP Device [Index: %d, Name: %s] has been added to session.\n", static_cast(index), device.EpName()); } } } @@ -113,14 +113,22 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device std::string ep_option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); - // A list of EP's associated options - std::vector> ep_options; - ParseEpOptions(ep_option_string, ep_options); + // A list of EP's associated provider options + std::vector> ep_options_list; + ParseEpOptions(ep_option_string, ep_options_list); - // EP -> associated EP options + // If user only provide the EPs' provider options for the first several EPs, + // add empty options for the rest EPs. + if (ep_options_list.size() < ep_list.size()) { + for (size_t i = ep_options_list.size(); i < ep_list.size(); ++i) { + ep_options_list.emplace_back(); // Adds a new empty map + } + } + + // EP -> associated provider options std::unordered_map> ep_options_map; for (size_t i = 0; i < ep_list.size(); ++i) { - ep_options_map.emplace(ep_list[i], ep_options[i]); + ep_options_map.emplace(ep_list[i], ep_options_list[i]); } for (auto& ep_and_devices : added_ep_devices) { From ad0ac64d48e67ca0fd526a36ad5e28c671862047 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 25 Jul 2025 13:44:02 -0700 Subject: [PATCH 27/46] update option usage example --- onnxruntime/test/perftest/command_args_parser.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index e757b2ff26531..d4796da0d9f4b 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -169,7 +169,9 @@ namespace perftest { "\t [Usage]: --plugin_eps \"plugin_ep_1;plugin_ep_2;... \"\n" "\n" "\t--plugin_ep_options [EP options] Specifies provider options for each EP listed in --plugin_eps. Options (key-value pairs) for each EP are separated by space and EPs are separated by semicolons.\n" - "\t [Usage]: --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;ep_2_option_1_key|ep_2_option_1_value ...;... \"\n" + "\t [Usage]: --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" + "\t --plugin_ep_options \";ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" + "\t --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;;ep_3_option_1_key|ep_3_option_1_value ...;... \" or \n" "\n" "\t--list_ep_devices Prints all available device indices and their properties (including metadata). This option makes the program exit early without performing inference.\n" "\t--select_ep_devices [list of device indices] A semicolon-separated list of device indices to add to the session and run with.\n" From ec5e973d6ca01793e03e209077cf0273cea402c6 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 25 Jul 2025 14:00:09 -0700 Subject: [PATCH 28/46] revert usage description --- onnxruntime/test/perftest/command_args_parser.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index d4796da0d9f4b..34f1fa1ac63b3 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -34,8 +34,8 @@ namespace perftest { "\t-A: Disable memory arena\n" "\t-I: Generate tensor input binding. Free dimensions are treated as 1 unless overridden using -f.\n" "\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n" - "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu|plugin_ep]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " - "'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai' and 'webgpu'. " + "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " + "'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai' or 'webgpu'. " "Default:'cpu'.\n" "\t-b [tf|ort]: backend to use. Default:ort\n" "\t-r [repeated_times]: Specifies the repeated times if running in 'times' test mode.Default:1000.\n" @@ -171,7 +171,7 @@ namespace perftest { "\t--plugin_ep_options [EP options] Specifies provider options for each EP listed in --plugin_eps. Options (key-value pairs) for each EP are separated by space and EPs are separated by semicolons.\n" "\t [Usage]: --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" "\t --plugin_ep_options \";ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" - "\t --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;;ep_3_option_1_key|ep_3_option_1_value ...;... \" or \n" + "\t --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;;ep_3_option_1_key|ep_3_option_1_value ...;... \" \n" "\n" "\t--list_ep_devices Prints all available device indices and their properties (including metadata). This option makes the program exit early without performing inference.\n" "\t--select_ep_devices [list of device indices] A semicolon-separated list of device indices to add to the session and run with.\n" From 2396d2c29f1c28254e170415d22c4eecfba52865 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 30 Jul 2025 12:34:38 -0700 Subject: [PATCH 29/46] Use abseil (ABSL Flags) instead of cxxopts --- cmake/onnxruntime_unittests.cmake | 20 +- .../test/perftest/command_args_parser.cc | 344 ++++++++++-------- .../test/perftest/test_configuration.h | 4 +- onnxruntime/test/perftest/utils.h | 2 +- onnxruntime/test/perftest/windows/utils.cc | 4 +- 5 files changed, 203 insertions(+), 171 deletions(-) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 952a85385fda2..31eef22534d81 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1210,14 +1210,6 @@ endif() if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) if(NOT IOS) - onnxruntime_fetchcontent_declare( - cxxopts - URL ${DEP_URL_cxxopts} - URL_HASH SHA1=${DEP_SHA1_cxxopts} - EXCLUDE_FROM_ALL - ) - onnxruntime_fetchcontent_makeavailable(cxxopts) - #perf test runner set(onnxruntime_perf_test_src_dir ${TEST_SRC_DIR}/perftest) set(onnxruntime_perf_test_src_patterns @@ -1244,7 +1236,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() target_include_directories(onnxruntime_perf_test PRIVATE ${onnx_test_runner_src_dir} ${ONNXRUNTIME_ROOT} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} - ${CMAKE_CURRENT_BINARY_DIR} ${cxxopts_SOURCE_DIR}/include) + ${CMAKE_CURRENT_BINARY_DIR} if (WIN32) target_compile_options(onnxruntime_perf_test PRIVATE ${disabled_warnings}) @@ -1253,14 +1245,6 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() endif() - if(onnxruntime_MINIMAL_BUILD) - target_compile_definitions(onnxruntime_perf_test PRIVATE CXXOPTS_NO_RTTI) - endif() - - if(onnxruntime_DISABLE_EXCEPTIONS) - target_compile_definitions(onnxruntime_perf_test PRIVATE CXXOPTS_NO_EXCEPTIONS) - endif() - if (onnxruntime_BUILD_SHARED_LIB) #It will dynamically link to onnxruntime. So please don't add onxruntime_graph/onxruntime_framework/... here. #onnxruntime_common is kind of ok because it is thin, tiny and totally stateless. @@ -1268,7 +1252,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) onnx_test_runner_common onnxruntime_test_utils onnxruntime_common onnxruntime onnxruntime_flatbuffers onnx_test_data_proto ${onnxruntime_EXTERNAL_LIBRARIES} - ${GETOPT_LIB_WIDE} ${SYS_PATH_LIB} ${CMAKE_DL_LIBS}) + absl::flags absl::flags_parse ${SYS_PATH_LIB} ${CMAKE_DL_LIBS}) if(NOT WIN32) if(onnxruntime_USE_SNPE) list(APPEND onnxruntime_perf_test_libs onnxruntime_providers_snpe) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 34f1fa1ac63b3..18548cd94156d 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -12,7 +12,8 @@ #include #include -#include +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" #include #include @@ -21,6 +22,45 @@ #include "test_configuration.h" #include "strings_helper.h" +// Declare flags. See ShowUsage() for detailed option descriptions +ABSL_FLAG(std::string, f, "", ""); +ABSL_FLAG(std::string, F, "", ""); +ABSL_FLAG(std::string, m, "", ""); +ABSL_FLAG(std::string, e, "", ""); +ABSL_FLAG(size_t, r, 1000, ""); +ABSL_FLAG(size_t, t, 600, ""); +ABSL_FLAG(std::string, p, "", ""); +ABSL_FLAG(int, x, 0, ""); +ABSL_FLAG(int, y, 0, ""); +ABSL_FLAG(size_t, c, 1, ""); +ABSL_FLAG(int, d, 0, ""); +ABSL_FLAG(int, o, 99, ""); +ABSL_FLAG(std::string, u, "", ""); +ABSL_FLAG(std::string, i, "", ""); +ABSL_FLAG(int, S, -1, ""); +ABSL_FLAG(std::string, T, "", ""); +ABSL_FLAG(std::string, C, "", ""); +ABSL_FLAG(std::string, R, "", ""); +ABSL_FLAG(bool, A, true, ""); +ABSL_FLAG(bool, M, true, ""); +ABSL_FLAG(bool, s, false, ""); +ABSL_FLAG(bool, v, false, ""); +ABSL_FLAG(bool, I, false, ""); +ABSL_FLAG(bool, P, false, ""); +ABSL_FLAG(bool, q, false, ""); +ABSL_FLAG(bool, z, false, ""); +ABSL_FLAG(bool, D, false, ""); +ABSL_FLAG(bool, Z, false, ""); +ABSL_FLAG(bool, n, false, ""); +ABSL_FLAG(bool, l, false, ""); +ABSL_FLAG(bool, g, false, ""); +ABSL_FLAG(bool, X, false, ""); +ABSL_FLAG(std::string, plugin_ep_libs, "", ""); +ABSL_FLAG(std::string, plugin_eps, "", ""); +ABSL_FLAG(std::string, plugin_ep_options, "", ""); +ABSL_FLAG(bool, list_ep_devices, false, ""); +ABSL_FLAG(std::string, select_ep_devices, "", ""); + namespace onnxruntime { namespace perftest { @@ -177,19 +217,15 @@ namespace perftest { "\t--select_ep_devices [list of device indices] A semicolon-separated list of device indices to add to the session and run with.\n" "\t-h: help\n"); } -#ifdef _WIN32 -static const ORTCHAR_T* overrideDelimiter = L":"; -#else -static const ORTCHAR_T* overrideDelimiter = ":"; -#endif -static bool ParseDimensionOverride(std::basic_string& dim_identifier, int64_t& override_val, const ORTCHAR_T* option) { - std::basic_string free_dim_str(option); - size_t delimiter_location = free_dim_str.find(overrideDelimiter); + +static bool ParseDimensionOverride(std::string& dim_identifier, int64_t& override_val, const char* option) { + std::basic_string free_dim_str(option); + size_t delimiter_location = free_dim_str.find(":"); if (delimiter_location >= free_dim_str.size() - 1) { return false; } dim_identifier = free_dim_str.substr(0, delimiter_location); - std::basic_string override_val_str = free_dim_str.substr(delimiter_location + 1, std::wstring::npos); + std::string override_val_str = free_dim_str.substr(delimiter_location + 1, std::string::npos); ORT_TRY { override_val = std::stoll(override_val_str.c_str()); if (override_val <= 0) { @@ -204,184 +240,164 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { ORT_TRY { - cxxopts::Options options("onnxruntime_perf_test", "perf_test [options...] model_path [result_file]"); - - // See ShowUsage() for detailed option descriptions. - options.add_options()("f", "", cxxopts::value >()); - options.add_options()("F", "", cxxopts::value >()); - options.add_options()("m", "", cxxopts::value()); - options.add_options()("e", "", cxxopts::value()); - options.add_options()("r", "", cxxopts::value()); - options.add_options()("t", "", cxxopts::value()); - options.add_options()("p", "", cxxopts::value()); - options.add_options()("x", "", cxxopts::value()); - options.add_options()("y", "", cxxopts::value()); - options.add_options()("c", "", cxxopts::value()); - options.add_options()("d", "", cxxopts::value()); - options.add_options()("o", "", cxxopts::value()); - options.add_options()("u", "", cxxopts::value()); - options.add_options()("i", "", cxxopts::value()); - options.add_options()("S", "", cxxopts::value()); - options.add_options()("T", "", cxxopts::value()); - options.add_options()("C", "", cxxopts::value()); - options.add_options()("R", "", cxxopts::value()); - options.add_options()("A", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("M", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("s", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("v", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("I", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("P", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("q", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("z", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("D", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("Z", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("n", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("l", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("g", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("X", "", cxxopts::value()->default_value("false")->implicit_value("true")); - options.add_options()("plugin_ep_libs", "", cxxopts::value()); - options.add_options()("plugin_eps", "", cxxopts::value()); - options.add_options()("plugin_ep_options", "", cxxopts::value()); - options.add_options()("list_ep_devices", ""); - options.add_options()("select_ep_devices", "", cxxopts::value()); - options.add_options()("h,help", ""); - #ifdef _WIN32 auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); auto utf8_argv = utils::CStringsFromStrings(utf8_strings); - auto result = options.parse(static_cast(utf8_argv.size()), utf8_argv.data()); + + auto positional = absl::ParseCommandLine(static_cast(utf8_argv.size()), utf8_argv.data()); #else - auto result = options.parse(argc, argv); + auto positional = absl::ParseCommandLine(argc, argv); #endif - if (result.count("f")) { - std::basic_string dim_name; + // -f + std::string opt_str = absl::GetFlag(FLAGS_f); + if (!opt_str.empty()) { + std::string dim_name; int64_t override_val; - std::basic_string opt_str = ToPathString(result["f"].as()); if (!ParseDimensionOverride(dim_name, override_val, opt_str.c_str())) { return false; } test_config.run_config.free_dim_name_overrides[dim_name] = override_val; } - if (result.count("F")) { - std::basic_string dim_denotation; + // -F + opt_str = absl::GetFlag(FLAGS_F); + if (!opt_str.empty()) { + std::string dim_denotation; int64_t override_val; - std::basic_string opt_str = ToPathString(result["F"].as()); if (!ParseDimensionOverride(dim_denotation, override_val, opt_str.c_str())) { return false; } test_config.run_config.free_dim_denotation_overrides[dim_denotation] = override_val; } - if (result.count("m")) { - std::basic_string opt_str = ToPathString(result["m"].as()); - if (!CompareCString(opt_str.c_str(), ORT_TSTR("duration"))) { + // -m + opt_str = absl::GetFlag(FLAGS_m); + if (!opt_str.empty()) { + if (opt_str == "duration") { test_config.run_config.test_mode = TestMode::kFixDurationMode; - } else if (!CompareCString(opt_str.c_str(), ORT_TSTR("times"))) { + } else if (opt_str == "times") { test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; } else { return false; } } - if (result.count("p")) test_config.run_config.profile_file = ToPathString(result["p"].as()); - if (result["M"].as()) test_config.run_config.enable_memory_pattern = false; - if (result["A"].as()) test_config.run_config.enable_cpu_mem_arena = false; + // -p + std::basic_string opt_w_str = ToPathString(absl::GetFlag(FLAGS_p)); + if (!opt_w_str.empty()) test_config.run_config.profile_file = opt_w_str; + + // -M + bool opt_bool = absl::GetFlag(FLAGS_M); + if (opt_bool != true) test_config.run_config.enable_memory_pattern = false; - if (result.count("e")) { - auto optarg = result["e"].as().c_str(); - if (!CompareCString(optarg, "cpu")) { + // -A + opt_bool = absl::GetFlag(FLAGS_A); + if (opt_bool != true) test_config.run_config.enable_cpu_mem_arena = false; + + // -e + opt_str = absl::GetFlag(FLAGS_e); + if (!opt_str.empty()) { + if (opt_str == "cpu") { test_config.machine_config.provider_type_name = onnxruntime::kCpuExecutionProvider; - } else if (!CompareCString(optarg, "cuda")) { + } else if (opt_str == "cuda") { test_config.machine_config.provider_type_name = onnxruntime::kCudaExecutionProvider; - } else if (!CompareCString(optarg, "dnnl")) { + } else if (opt_str == "dnnl") { test_config.machine_config.provider_type_name = onnxruntime::kDnnlExecutionProvider; - } else if (!CompareCString(optarg, "openvino")) { + } else if (opt_str == "openvino") { test_config.machine_config.provider_type_name = onnxruntime::kOpenVINOExecutionProvider; - } else if (!CompareCString(optarg, "tensorrt")) { + } else if (opt_str == "tensorrt") { test_config.machine_config.provider_type_name = onnxruntime::kTensorrtExecutionProvider; - } else if (!CompareCString(optarg, "qnn")) { + } else if (opt_str == "qnn") { test_config.machine_config.provider_type_name = onnxruntime::kQnnExecutionProvider; - } else if (!CompareCString(optarg, "snpe")) { + } else if (opt_str == "snpe") { test_config.machine_config.provider_type_name = onnxruntime::kSnpeExecutionProvider; - } else if (!CompareCString(optarg, "nnapi")) { + } else if (opt_str == "nnapi") { test_config.machine_config.provider_type_name = onnxruntime::kNnapiExecutionProvider; - } else if (!CompareCString(optarg, "vsinpu")) { + } else if (opt_str == "vsinpu") { test_config.machine_config.provider_type_name = onnxruntime::kVSINPUExecutionProvider; - } else if (!CompareCString(optarg, "coreml")) { + } else if (opt_str == "coreml") { test_config.machine_config.provider_type_name = onnxruntime::kCoreMLExecutionProvider; - } else if (!CompareCString(optarg, "dml")) { + } else if (opt_str == "dml") { test_config.machine_config.provider_type_name = onnxruntime::kDmlExecutionProvider; - } else if (!CompareCString(optarg, "acl")) { + } else if (opt_str == "acl") { test_config.machine_config.provider_type_name = onnxruntime::kAclExecutionProvider; - } else if (!CompareCString(optarg, "armnn")) { + } else if (opt_str == "armnn") { test_config.machine_config.provider_type_name = onnxruntime::kArmNNExecutionProvider; - } else if (!CompareCString(optarg, "rocm")) { + } else if (opt_str == "rocm") { test_config.machine_config.provider_type_name = onnxruntime::kRocmExecutionProvider; - } else if (!CompareCString(optarg, "migraphx")) { + } else if (opt_str == "migraphx") { test_config.machine_config.provider_type_name = onnxruntime::kMIGraphXExecutionProvider; - } else if (!CompareCString(optarg, "xnnpack")) { + } else if (opt_str == "xnnpack") { test_config.machine_config.provider_type_name = onnxruntime::kXnnpackExecutionProvider; - } else if (!CompareCString(optarg, "vitisai")) { + } else if (opt_str == "vitisai") { test_config.machine_config.provider_type_name = onnxruntime::kVitisAIExecutionProvider; - } else if (!CompareCString(optarg, "webgpu")) { + } else if (opt_str == "webgpu") { test_config.machine_config.provider_type_name = onnxruntime::kWebGpuExecutionProvider; - } else if (!CompareCString(optarg, "nvtensorrtrtx")) { + } else if (opt_str == "nvtensorrtrtx") { test_config.machine_config.provider_type_name = onnxruntime::kNvTensorRTRTXExecutionProvider; } else { return false; } } - if (result.count("r")) { - auto val = result["r"].as(); - if (val <= 0) return false; + // -r + size_t val = absl::GetFlag(FLAGS_r); + if (val != static_cast(1000) /* default value for absl flag -r */) { + if (val <= static_cast(0)) return false; test_config.run_config.repeated_times = val; test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; } - if (result.count("t")) { - auto val = result["t"].as(); - if (val <= 0) return false; + // -t + val = absl::GetFlag(FLAGS_t); + if (val != static_cast(600) /* default value for absl flag -t */) { + if (val <= static_cast(0)) return false; test_config.run_config.duration_in_seconds = val; test_config.run_config.test_mode = TestMode::kFixDurationMode; } - if (result["s"].as()) test_config.run_config.f_dump_statistics = true; + // -s + opt_bool = absl::GetFlag(FLAGS_s); + if (opt_bool) test_config.run_config.f_dump_statistics = true; - if (result.count("S")) { - auto val = result["S"].as(); - test_config.run_config.random_seed_for_input_data = val; - } + // -S + int val_int = absl::GetFlag(FLAGS_S); + if (val_int != -1) test_config.run_config.random_seed_for_input_data = val_int; - if (result["v"].as()) test_config.run_config.f_verbose = true; + // -v + opt_bool = absl::GetFlag(FLAGS_v); + if (opt_bool) test_config.run_config.f_verbose = true; - if (result.count("x")) { - auto val = result["x"].as(); - if (val < 0) return false; - test_config.run_config.intra_op_num_threads = val; + // -x + val_int = absl::GetFlag(FLAGS_x); + if (val_int != 0 /* default value for absl flag -x */) { + if (val_int < 0) return false; + test_config.run_config.intra_op_num_threads = val_int; } - if (result.count("y")) { - auto val = result["y"].as(); - if (val < 0) return false; - test_config.run_config.inter_op_num_threads = val; + // -y + val_int = absl::GetFlag(FLAGS_y); + if (val_int != 0 /* default value for absl flag -y */) { + if (val_int < 0) return false; + test_config.run_config.inter_op_num_threads = val_int; } - if (result.count("P")) { - test_config.run_config.execution_mode = ExecutionMode::ORT_PARALLEL; - } + // -P + opt_bool = absl::GetFlag(FLAGS_P); + if (opt_bool) test_config.run_config.execution_mode = ExecutionMode::ORT_PARALLEL; - if (result.count("c")) { - auto val = result["c"].as(); + // -c + val = absl::GetFlag(FLAGS_c); + if (val != static_cast(1) /* default value for absl flag -c */) { if (static_cast(val) <= 0) return false; test_config.run_config.concurrent_session_runs = val; } - if (result.count("o")) { - auto val = result["o"].as(); - switch (val) { + // -o + val_int = absl::GetFlag(FLAGS_o); + if (val_int != 99 /* default value for absl flag -o */) { + switch (val_int) { case ORT_DISABLE_ALL: test_config.run_config.optimization_level = ORT_DISABLE_ALL; break; @@ -407,27 +423,42 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a } } - if (result.count("u")) test_config.run_config.optimized_model_path = ToPathString(result["u"].as()); + // -u + opt_w_str = ToPathString(absl::GetFlag(FLAGS_u)); + if (!opt_str.empty()) test_config.run_config.optimized_model_path = opt_w_str; - if (result.count("I")) test_config.run_config.generate_model_input_binding = true; + // -I + opt_bool = absl::GetFlag(FLAGS_I); + if (opt_bool) test_config.run_config.generate_model_input_binding = true; - if (result.count("d")) { - auto val = result["d"].as(); - if (val < 0) return false; - test_config.run_config.cudnn_conv_algo = val; + // -d + val_int = absl::GetFlag(FLAGS_d); + if (val_int != 0 /* default value for absl flag -d */) { + if (val_int < 0) return false; + test_config.run_config.cudnn_conv_algo = val_int; } - if (result.count("q")) test_config.run_config.do_cuda_copy_in_separate_stream = true; + // -q + opt_bool = absl::GetFlag(FLAGS_q); + if (opt_bool) test_config.run_config.do_cuda_copy_in_separate_stream = true; - if (result.count("z")) test_config.run_config.set_denormal_as_zero = true; + // -z + opt_bool = absl::GetFlag(FLAGS_z); + if (opt_bool) test_config.run_config.set_denormal_as_zero = true; - if (result.count("i")) test_config.run_config.ep_runtime_config_string = ToPathString(result["i"].as()); + // -i + opt_w_str = ToPathString(absl::GetFlag(FLAGS_i)); + if (!opt_w_str.empty()) test_config.run_config.ep_runtime_config_string = opt_w_str; - if (result.count("T")) test_config.run_config.intra_op_thread_affinities = result["T"].as(); + // -T + opt_str = absl::GetFlag(FLAGS_T); + if (!opt_str.empty()) test_config.run_config.intra_op_thread_affinities = opt_str; - if (result.count("C")) { + // -C + opt_str = absl::GetFlag(FLAGS_C); + if (!opt_str.empty()) { ORT_TRY { - ParseSessionConfigs(result["C"].as(), test_config.run_config.session_config_entries); + ParseSessionConfigs(opt_str, test_config.run_config.session_config_entries); } ORT_CATCH(const std::exception& ex) { ORT_HANDLE_EXCEPTION([&]() { @@ -437,40 +468,57 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a } } - if (result.count("D")) test_config.run_config.disable_spinning = true; - - if (result.count("Z")) test_config.run_config.disable_spinning_between_run = true; + // -D + opt_bool = absl::GetFlag(FLAGS_D); + if (opt_bool) test_config.run_config.disable_spinning = true; - if (result.count("n")) test_config.run_config.exit_after_session_creation = true; + // -Z + opt_bool = absl::GetFlag(FLAGS_Z); + if (opt_bool) test_config.run_config.disable_spinning_between_run = true; - if (result.count("l")) test_config.model_info.load_via_path = true; + // -n + opt_bool = absl::GetFlag(FLAGS_n); + if (opt_bool) test_config.run_config.exit_after_session_creation = true; - if (result.count("R")) test_config.run_config.register_custom_op_path = ToPathString(result["R"].as()); + // -l + opt_bool = absl::GetFlag(FLAGS_l); + if (opt_bool) test_config.model_info.load_via_path = true; - if (result.count("g")) test_config.run_config.enable_cuda_io_binding = true; + // -R + opt_w_str = ToPathString(absl::GetFlag(FLAGS_R)); + if (!opt_w_str.empty()) test_config.run_config.register_custom_op_path = opt_w_str; - if (result.count("X")) test_config.run_config.use_extensions = true; + // -g + opt_bool = absl::GetFlag(FLAGS_g); + if (opt_bool) test_config.run_config.enable_cuda_io_binding = true; - if (result.count("plugin_ep_libs")) test_config.plugin_ep_names_and_libs = ToPathString(result["plugin_ep_libs"].as()); + // -X + opt_bool = absl::GetFlag(FLAGS_X); + if (opt_bool) test_config.run_config.use_extensions = true; - if (result.count("plugin_eps")) ParseEpList(result["plugin_eps"].as(), test_config.machine_config.plugin_provider_type_list); + // --plugin_ep_libs + opt_w_str = ToPathString(absl::GetFlag(FLAGS_plugin_ep_libs)); + if (!opt_w_str.empty()) test_config.plugin_ep_names_and_libs = opt_w_str; - if (result.count("plugin_ep_options")) test_config.run_config.ep_runtime_config_string = ToPathString(result["plugin_ep_options"].as()); + // --plugin_eps + opt_str = absl::GetFlag(FLAGS_plugin_eps); + if (!opt_str.empty()) ParseEpList(opt_str, test_config.machine_config.plugin_provider_type_list); - if (result.count("select_ep_devices")) test_config.selected_devices = result["select_ep_devices"].as(); + // --plugin_ep_options + opt_w_str = ToPathString(absl::GetFlag(FLAGS_plugin_ep_options)); + if (!opt_w_str.empty()) test_config.run_config.ep_runtime_config_string = opt_w_str; - if (result.count("h")) { - perftest::CommandLineParser::ShowUsage(); - return false; - } - - if (result.count("list_ep_devices")) { + // --list_ep_devices + opt_bool = absl::GetFlag(FLAGS_list_ep_devices); + if (opt_bool) { test_config.list_available_devices = true; return true; } - // Positional arguments - std::vector positional = result.unmatched(); + // --select_ep_devices + opt_str = absl::GetFlag(FLAGS_select_ep_devices); + if (!opt_str.empty()) test_config.selected_devices = opt_str; + if (positional.size() == 1) { test_config.model_info.model_file_path = ToPathString(positional[0]); test_config.run_config.f_dump_statistics = true; diff --git a/onnxruntime/test/perftest/test_configuration.h b/onnxruntime/test/perftest/test_configuration.h index 1eb5c1a527426..ae81dc6b7ef40 100644 --- a/onnxruntime/test/perftest/test_configuration.h +++ b/onnxruntime/test/perftest/test_configuration.h @@ -61,8 +61,8 @@ struct RunConfig { bool set_denormal_as_zero{false}; std::basic_string ep_runtime_config_string; std::unordered_map session_config_entries; - std::map, int64_t> free_dim_name_overrides; - std::map, int64_t> free_dim_denotation_overrides; + std::map free_dim_name_overrides; + std::map free_dim_denotation_overrides; std::string intra_op_thread_affinities; bool disable_spinning = false; bool disable_spinning_between_run = false; diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index d00db824c5bd0..180b2d5e3f08e 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -26,7 +26,7 @@ std::unique_ptr CreateICPUUsage(); #ifdef _WIN32 std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]); -std::vector CStringsFromStrings(const std::vector& utf8_args); +std::vector CStringsFromStrings(std::vector& utf8_args); #endif void RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index 7f610cc8bccd1..d30f1e417bafd 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -87,8 +87,8 @@ std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]) { return utf8_args; } -std::vector CStringsFromStrings(const std::vector& utf8_args) { - std::vector utf8_argv; +std::vector CStringsFromStrings(std::vector& utf8_args) { + std::vector utf8_argv; utf8_argv.reserve(utf8_args.size()); for (auto& str : utf8_args) { utf8_argv.push_back(&str[0]); From fdd95eae09d51c527d20ed520ebaced4bec53154 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 30 Jul 2025 13:32:35 -0700 Subject: [PATCH 30/46] update cmake file --- cmake/onnxruntime_unittests.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 31eef22534d81..6c5c64e1d3ab4 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1236,7 +1236,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() target_include_directories(onnxruntime_perf_test PRIVATE ${onnx_test_runner_src_dir} ${ONNXRUNTIME_ROOT} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} - ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_BINARY_DIR}) if (WIN32) target_compile_options(onnxruntime_perf_test PRIVATE ${disabled_warnings}) From 313102436e67f0e3f3c540d0d9206eb1117af577 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 30 Jul 2025 14:07:09 -0700 Subject: [PATCH 31/46] get correct positional options --- onnxruntime/test/perftest/command_args_parser.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 18548cd94156d..9fa8f4f31f984 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -519,12 +519,12 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a opt_str = absl::GetFlag(FLAGS_select_ep_devices); if (!opt_str.empty()) test_config.selected_devices = opt_str; - if (positional.size() == 1) { - test_config.model_info.model_file_path = ToPathString(positional[0]); + if (positional.size() == 2) { + test_config.model_info.model_file_path = ToPathString(positional[1]); test_config.run_config.f_dump_statistics = true; - } else if (positional.size() == 2) { - test_config.model_info.model_file_path = ToPathString(positional[0]); - test_config.model_info.result_file_path = ToPathString(positional[1]); + } else if (positional.size() == 3) { + test_config.model_info.model_file_path = ToPathString(positional[1]); + test_config.model_info.result_file_path = ToPathString(positional[2]); } else { return false; } From 17eaccf3d2ad9e6aa6d40b2c0ed40515d8357c45 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 30 Jul 2025 14:18:36 -0700 Subject: [PATCH 32/46] revert deps.txt --- cmake/deps.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index 8c67e20f39293..7089012a65f26 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -11,7 +11,7 @@ # abseil_cpp;https://github.com/abseil/abseil-cpp/archive/refs/tags/20250512.0.zip;3d6ff7e7ce144d9a53a53bef1f1bf79e1da4b8e1 coremltools;https://github.com/apple/coremltools/archive/refs/tags/7.1.zip;f1bab0f30966f2e217d8e01207d518f230a1641a -cxxopts;https://github.com/jarro2783/cxxopts/archive/refs/tags/v3.3.1.zip;B77F1CE4A03F610488BA0ED17C1BE2EFDBC15564 +cxxopts;https://github.com/jarro2783/cxxopts/archive/3c73d91c0b04e2b59462f0a741be8c07024c1bc0.zip;6c6ca7f8480b26c8d00476e0e24b7184717fe4f0 date;https://github.com/HowardHinnant/date/archive/refs/tags/v3.0.1.zip;2dac0c81dc54ebdd8f8d073a75c053b04b56e159 dlpack;https://github.com/dmlc/dlpack/archive/5c210da409e7f1e51ddf445134a4376fdbd70d7d.zip;e499c86e4e5c5268a87661d7ea39c27fae10907c # This Eigen commit id matches the eigen archive being consumed from https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip From 0550d6c0561d30231c3eb020fc993e5049b19dee Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 30 Jul 2025 14:26:31 -0700 Subject: [PATCH 33/46] use PathString typedef --- onnxruntime/test/perftest/command_args_parser.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 9fa8f4f31f984..7a4bece88905f 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -284,7 +284,7 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a } // -p - std::basic_string opt_w_str = ToPathString(absl::GetFlag(FLAGS_p)); + PathString opt_w_str = ToPathString(absl::GetFlag(FLAGS_p)); if (!opt_w_str.empty()) test_config.run_config.profile_file = opt_w_str; // -M From 6cbb20f02ef842165e69626bdaa857c0c3843d96 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 31 Jul 2025 23:01:25 -0700 Subject: [PATCH 34/46] address some of the reviewer's comments --- .../test/perftest/command_args_parser.cc | 331 +++-- .../test/perftest/command_args_parser.h | 3 + onnxruntime/test/perftest/ort_test_session.cc | 14 + .../test/perftest/ort_test_session.ccc | 1109 +++++++++++++++++ onnxruntime/test/perftest/utils.h | 2 + onnxruntime/test/perftest/windows/utils.cc | 9 + 6 files changed, 1360 insertions(+), 108 deletions(-) create mode 100644 onnxruntime/test/perftest/ort_test_session.ccc diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 7a4bece88905f..047542bd09337 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -12,9 +12,6 @@ #include #include -#include "absl/flags/flag.h" -#include "absl/flags/parse.h" - #include #include #include @@ -22,44 +19,148 @@ #include "test_configuration.h" #include "strings_helper.h" -// Declare flags. See ShowUsage() for detailed option descriptions -ABSL_FLAG(std::string, f, "", ""); -ABSL_FLAG(std::string, F, "", ""); -ABSL_FLAG(std::string, m, "", ""); -ABSL_FLAG(std::string, e, "", ""); -ABSL_FLAG(size_t, r, 1000, ""); -ABSL_FLAG(size_t, t, 600, ""); -ABSL_FLAG(std::string, p, "", ""); -ABSL_FLAG(int, x, 0, ""); -ABSL_FLAG(int, y, 0, ""); -ABSL_FLAG(size_t, c, 1, ""); -ABSL_FLAG(int, d, 0, ""); -ABSL_FLAG(int, o, 99, ""); -ABSL_FLAG(std::string, u, "", ""); -ABSL_FLAG(std::string, i, "", ""); -ABSL_FLAG(int, S, -1, ""); -ABSL_FLAG(std::string, T, "", ""); -ABSL_FLAG(std::string, C, "", ""); -ABSL_FLAG(std::string, R, "", ""); -ABSL_FLAG(bool, A, true, ""); -ABSL_FLAG(bool, M, true, ""); -ABSL_FLAG(bool, s, false, ""); -ABSL_FLAG(bool, v, false, ""); -ABSL_FLAG(bool, I, false, ""); -ABSL_FLAG(bool, P, false, ""); -ABSL_FLAG(bool, q, false, ""); -ABSL_FLAG(bool, z, false, ""); -ABSL_FLAG(bool, D, false, ""); -ABSL_FLAG(bool, Z, false, ""); -ABSL_FLAG(bool, n, false, ""); -ABSL_FLAG(bool, l, false, ""); -ABSL_FLAG(bool, g, false, ""); -ABSL_FLAG(bool, X, false, ""); -ABSL_FLAG(std::string, plugin_ep_libs, "", ""); -ABSL_FLAG(std::string, plugin_eps, "", ""); -ABSL_FLAG(std::string, plugin_ep_options, "", ""); -ABSL_FLAG(bool, list_ep_devices, false, ""); -ABSL_FLAG(std::string, select_ep_devices, "", ""); +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/flags/usage.h" +#include "absl/flags/usage_config.h" +#include "absl/flags/reflection.h" + +static const onnxruntime::perftest::PerformanceTestConfig& DefaultPerformanceTestConfig() { + static onnxruntime::perftest::PerformanceTestConfig default_config{}; + return default_config; +} + +ABSL_FLAG(std::string, f, "", "Specifies a free dimension by name to override to a specific value for performance optimization."); +ABSL_FLAG(std::string, F, "", "Specifies a free dimension by denotation to override to a specific value for performance optimization."); +ABSL_FLAG(std::string, m, "duration", "Specifies the test mode. Value could be 'duration' or 'times'."); +ABSL_FLAG(std::string, e, "cpu", "Specifies the provider 'cpu','cuda','dnnl','tensorrt', 'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai' or 'webgpu'."); +ABSL_FLAG(size_t, r, DefaultPerformanceTestConfig().run_config.repeated_times, "Specifies the repeated times if running in 'times' test mode."); +ABSL_FLAG(size_t, t, DefaultPerformanceTestConfig().run_config.duration_in_seconds, "Specifies the seconds to run for 'duration' mode."); +ABSL_FLAG(std::string, p, "", "Specifies the profile name to enable profiling and dump the profile data to the file."); +ABSL_FLAG(int, x, DefaultPerformanceTestConfig().run_config.intra_op_num_threads, "Sets the number of threads used to parallelize the execution within nodes, A value of 0 means ORT will pick a default. Must >=0."); +ABSL_FLAG(int, y, DefaultPerformanceTestConfig().run_config.inter_op_num_threads, "Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means ORT will pick a default. Must >=0."); +ABSL_FLAG(size_t, c, DefaultPerformanceTestConfig().run_config.concurrent_session_runs, "Specifies the (max) number of runs to invoke simultaneously."); +ABSL_FLAG(int, d, DefaultPerformanceTestConfig().run_config.cudnn_conv_algo, "Specifies CUDNN convolution algorithms: 0(benchmark), 1(heuristic), 2(default)."); +ABSL_FLAG(int, o, DefaultPerformanceTestConfig().run_config.optimization_level, "Specifies graph optimization level. Default is 99 (all). Valid values are 0 (disable), 1 (basic), 2 (extended), 3 (layout), 99 (all)."); +ABSL_FLAG(std::string, u, "", "Specifies the optimized model path for saving."); +ABSL_FLAG(std::string, i, "", + "Specifies EP specific runtime options as key-value pairs.\n Different runtime options available are: \n" + " [Usage]: -e -i '| |'\n" + "\n" + " [ACL only] [enable_fast_math]: Options: 'true', 'false', default: 'false', \n" + "\n" + " [DML only] [performance_preference]: DML device performance preference, options: 'default', 'minimum_power', 'high_performance', \n" + " [DML only] [device_filter]: DML device filter, options: 'any', 'gpu', 'npu', \n" + " [DML only] [disable_metacommands]: Options: 'true', 'false', \n" + " [DML only] [enable_graph_capture]: Options: 'true', 'false', \n" + " [DML only] [enable_graph_serialization]: Options: 'true', 'false', \n" + "\n" + " [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n" + " [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n" + " [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n" + " [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n" + " [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n" + " [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" + "\n" + " [QNN only] [backend_type]: QNN backend type. E.g., 'cpu', 'htp'. Mutually exclusive with 'backend_path'.\n" + " [QNN only] [backend_path]: QNN backend path. E.g., '/folderpath/libQnnHtp.so', '/winfolderpath/QnnHtp.dll'. Mutually exclusive with 'backend_type'.\n" + " [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed', default 'off'.\n" + " [QNN only] [profiling_file_path] : QNN profiling file path if ETW not enabled.\n" + " [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" + " [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0(not set).\n" + " [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n" + " 'high_power_saver', 'low_balanced', 'extreme_power_saver', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" + " [QNN only] [op_packages]: QNN UDO package, allowed format: \n" + " op_packages|::[:],::[:]. \n" + " [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n" + " [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n" + " [QNN only] [htp_graph_finalization_optimization_mode]: QNN graph finalization optimization mode, options: \n" + " '0', '1', '2', '3', default is '0'.\n" + " [QNN only] [soc_model]: The SoC Model number. Refer to QNN SDK documentation for specific values. Defaults to '0' (unknown). \n" + " [QNN only] [htp_arch]: The minimum HTP architecture. The driver will use ops compatible with this architecture. \n" + " Options are '0', '68', '69', '73', '75'. Defaults to '0' (none). \n" + " [QNN only] [device_id]: The ID of the device to use when setting 'htp_arch'. Defaults to '0' (for single device). \n" + " [QNN only] [enable_htp_fp16_precision]: Enable the HTP_FP16 precision so that the float32 model will be inferenced with fp16 precision. \n" + " Otherwise, it will be fp32 precision. Works for float32 model for HTP backend. Defaults to '1' (with FP16 precision.). \n" + " [QNN only] [offload_graph_io_quantization]: Offload graph input quantization and graph output dequantization to another EP (typically CPU EP). \n" + " Defaults to '0' (QNN EP handles the graph I/O quantization and dequantization). \n" + " [QNN only] [enable_htp_spill_fill_buffer]: Enable HTP spill fill buffer, used while generating QNN context binary.\n" + " [QNN only] [enable_htp_shared_memory_allocator]: Enable the QNN HTP shared memory allocator and use it for inputs and outputs. Requires libcdsprpc.so/dll to be available.\n" + " Defaults to '0' (disabled).\n" + " [Example] [For QNN EP] -e qnn -i \"backend_type|cpu\" \n" + "\n" + " [TensorRT only] [trt_max_partition_iterations]: Maximum iterations for TensorRT parser to get capability.\n" + " [TensorRT only] [trt_min_subgraph_size]: Minimum size of TensorRT subgraphs.\n" + " [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n" + " [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n" + " [TensorRT only] [trt_int8_enable]: Enable TensorRT INT8 precision.\n" + " [TensorRT only] [trt_int8_calibration_table_name]: Specify INT8 calibration table name.\n" + " [TensorRT only] [trt_int8_use_native_calibration_table]: Use Native TensorRT calibration table.\n" + " [TensorRT only] [trt_dla_enable]: Enable DLA in Jetson device.\n" + " [TensorRT only] [trt_dla_core]: DLA core number.\n" + " [TensorRT only] [trt_dump_subgraphs]: Dump TRT subgraph to onnx model.\n" + " [TensorRT only] [trt_engine_cache_enable]: Enable engine caching.\n" + " [TensorRT only] [trt_engine_cache_path]: Specify engine cache path.\n" + " [TensorRT only] [trt_engine_cache_prefix]: Customize engine cache prefix when trt_engine_cache_enable is true.\n" + " [TensorRT only] [trt_engine_hw_compatible]: Enable hardware compatibility. Engines ending with '_sm80+' can be re-used across all Ampere+ GPU (a hardware-compatible engine may have lower throughput and/or higher latency than its non-hardware-compatible counterpart).\n" + " [TensorRT only] [trt_weight_stripped_engine_enable]: Enable weight-stripped engine build.\n" + " [TensorRT only] [trt_onnx_model_folder_path]: Folder path for the ONNX model with weights.\n" + " [TensorRT only] [trt_force_sequential_engine_build]: Force TensorRT engines to be built sequentially.\n" + " [TensorRT only] [trt_context_memory_sharing_enable]: Enable TensorRT context memory sharing between subgraphs.\n" + " [TensorRT only] [trt_layer_norm_fp32_fallback]: Force Pow + Reduce ops in layer norm to run in FP32 to avoid overflow.\n" + " [Example] [For TensorRT EP] -e tensorrt -i 'trt_fp16_enable|true trt_int8_enable|true trt_int8_calibration_table_name|calibration.flatbuffers trt_int8_use_native_calibration_table|false trt_force_sequential_engine_build|false'\n" + "\n" + " [NNAPI only] [NNAPI_FLAG_USE_FP16]: Use fp16 relaxation in NNAPI EP..\n" + " [NNAPI only] [NNAPI_FLAG_USE_NCHW]: Use the NCHW layout in NNAPI EP.\n" + " [NNAPI only] [NNAPI_FLAG_CPU_DISABLED]: Prevent NNAPI from using CPU devices.\n" + " [NNAPI only] [NNAPI_FLAG_CPU_ONLY]: Using CPU only in NNAPI EP.\n" + " [Example] [For NNAPI EP] -e nnapi -i \"NNAPI_FLAG_USE_FP16 NNAPI_FLAG_USE_NCHW NNAPI_FLAG_CPU_DISABLED\"\n" + "\n" + " [CoreML only] [ModelFormat]:[MLProgram, NeuralNetwork] Create an ML Program model or Neural Network. Default is NeuralNetwork.\n" + " [CoreML only] [MLComputeUnits]:[CPUAndNeuralEngine CPUAndGPU ALL CPUOnly] Specify to limit the backend device used to run the model.\n" + " [CoreML only] [AllowStaticInputShapes]:[0 1].\n" + " [CoreML only] [EnableOnSubgraphs]:[0 1].\n" + " [CoreML only] [SpecializationStrategy]:[Default FastPrediction].\n" + " [CoreML only] [ProfileComputePlan]:[0 1].\n" + " [CoreML only] [AllowLowPrecisionAccumulationOnGPU]:[0 1].\n" + " [CoreML only] [ModelCacheDirectory]:[path../a/b/c].\n" + " [Example] [For CoreML EP] -e coreml -i \"ModelFormat|MLProgram MLComputeUnits|CPUAndGPU\"\n" + "\n" + " [SNPE only] [runtime]: SNPE runtime, options: 'CPU', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n" + " [SNPE only] [priority]: execution priority, options: 'low', 'normal'. \n" + " [SNPE only] [buffer_type]: options: 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. default: ITENSOR'. \n" + " [SNPE only] [enable_init_cache]: enable SNPE init caching feature, set to 1 to enabled it. Disabled by default. \n" + " [Example] [For SNPE EP] -e snpe -i \"runtime|CPU priority|low\" \n"); +ABSL_FLAG(int, S, DefaultPerformanceTestConfig().run_config.random_seed_for_input_data, "Given random seed, to produce the same input data. This defaults to -1(no initialize)."); +ABSL_FLAG(std::string, T, "", "Specifies intra op thread affinity string."); +ABSL_FLAG(std::string, C, "", + "Specifies session configuration entries as key-value pairs:\n -C \"| |\" \n" + "Refer to onnxruntime_session_options_config_keys.h for valid keys and values. \n" + "[Example] -C \"session.disable_cpu_ep_fallback|1 ep.context_enable|1\" \n"); +ABSL_FLAG(std::string, R, "", "Allows user to register custom op by .so or .dll file."); +ABSL_FLAG(bool, A, DefaultPerformanceTestConfig().run_config.enable_cpu_mem_arena, "Disables memory arena."); +ABSL_FLAG(bool, M, DefaultPerformanceTestConfig().run_config.enable_memory_pattern, "Disables memory pattern."); +ABSL_FLAG(bool, s, DefaultPerformanceTestConfig().run_config.f_dump_statistics, "Shows statistics result, like P75, P90. If no result_file provided this defaults to on."); +ABSL_FLAG(bool, v, DefaultPerformanceTestConfig().run_config.f_verbose, "Shows verbose information."); +ABSL_FLAG(bool, I, DefaultPerformanceTestConfig().run_config.generate_model_input_binding, "Generates tensor input binding. Free dimensions are treated as 1 unless overridden using -f."); +ABSL_FLAG(bool, P, false, "Uses parallel executor instead of sequential executor."); +ABSL_FLAG(bool, q, DefaultPerformanceTestConfig().run_config.do_cuda_copy_in_separate_stream, "[CUDA only] Uses separate stream for copy."); +ABSL_FLAG(bool, z, DefaultPerformanceTestConfig().run_config.set_denormal_as_zero, "Sets denormal as zero. When turning on this option reduces latency dramatically, a model may have denormals."); +ABSL_FLAG(bool, D, DefaultPerformanceTestConfig().run_config.disable_spinning, "Disables spinning entirely for thread owned by onnxruntime intra-op thread pool."); +ABSL_FLAG(bool, Z, DefaultPerformanceTestConfig().run_config.disable_spinning_between_run, "Disallows thread from spinning during runs to reduce cpu usage."); +ABSL_FLAG(bool, n, DefaultPerformanceTestConfig().run_config.exit_after_session_creation, "Allows user to measure session creation time to measure impact of enabling any initialization optimizations."); +ABSL_FLAG(bool, l, DefaultPerformanceTestConfig().model_info.load_via_path, "Provides file as binary in memory by using fopen before session creation."); +ABSL_FLAG(bool, g, DefaultPerformanceTestConfig().run_config.enable_cuda_io_binding, "[TensorRT RTX | TensorRT | CUDA] Enables tensor input and output bindings on CUDA before session run."); +ABSL_FLAG(bool, X, DefaultPerformanceTestConfig().run_config.use_extensions, "Registers custom ops from onnxruntime-extensions."); +ABSL_FLAG(std::string, plugin_ep_libs, "", "Specifies a list of plugin execution provider (EP) registration names and their corresponding shared libraries to register.\n" + "[Usage]: --plugin_ep_libs \"plugin_ep_name_1|plugin_ep_1.dll plugin_ep_name_2|plugin_ep_2.dll ... \""); +ABSL_FLAG(std::string, plugin_eps, "", "Specifies a semicolon-separated list of plugin execution providers (EPs) to use."); +ABSL_FLAG(std::string, plugin_ep_options, "", "Specifies provider options for each EP listed in --plugin_eps. Options (key-value pairs) for each EP are separated by space and EPs are separated by semicolons.\n" + "[Usage]: --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" + "--plugin_ep_options \";ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" + "--plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;;ep_3_option_1_key|ep_3_option_1_value ...;... \""); +ABSL_FLAG(bool, list_ep_devices, false, "Prints all available device indices and their properties (including metadata). This option makes the program exit early without performing inference.\n"); +ABSL_FLAG(std::string, select_ep_devices, "", "Specifies a semicolon-separated list of device indices to add to the session and run with."); namespace onnxruntime { namespace perftest { @@ -238,8 +339,36 @@ static bool ParseDimensionOverride(std::string& dim_identifier, int64_t& overrid return true; } +std::string CustomUsageMessage() { + std::ostringstream oss; + oss << "onnxruntime_perf_test [options...] model_path [result_file]\n\n"; + oss << "Note: Options may be specified with either a single dash(-option) or a double dash(--option).Both forms are accepted and treated identically.\n\n"; + oss << "Options:"; + + return oss.str(); +} + bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { ORT_TRY { + // Following callback is to make sure all the ABSL flags defined above will be showed up when running with "--help". + // Note: By default abseil only wants flags in binary's main. It expects the main routine to reside in .cc or -main.cc or + // _main.cc, where the is the name of the binary (without .exe on Windows). See usage_config.cc in abseil for more details. + absl::FlagsUsageConfig config; + config.contains_help_flags = [](absl::string_view filename) { + auto suffix = utils::Basename(filename); + std::string_view file_has_the_flag_defs(__FILE__); + file_has_the_flag_defs = utils::Basename(file_has_the_flag_defs); + + return suffix == file_has_the_flag_defs; + }; + + config.normalize_filename = [](absl::string_view f) { + return std::string(f); + }; + absl::SetFlagsUsageConfig(config); + + absl::SetProgramUsageMessage(CustomUsageMessage()); + #ifdef _WIN32 auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); auto utf8_argv = utils::CStringsFromStrings(utf8_strings); @@ -284,16 +413,13 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a } // -p - PathString opt_w_str = ToPathString(absl::GetFlag(FLAGS_p)); - if (!opt_w_str.empty()) test_config.run_config.profile_file = opt_w_str; + test_config.run_config.profile_file = ToPathString(absl::GetFlag(FLAGS_p)); // -M - bool opt_bool = absl::GetFlag(FLAGS_M); - if (opt_bool != true) test_config.run_config.enable_memory_pattern = false; + test_config.run_config.enable_memory_pattern = absl::GetFlag(FLAGS_M); // -A - opt_bool = absl::GetFlag(FLAGS_A); - if (opt_bool != true) test_config.run_config.enable_cpu_mem_arena = false; + test_config.run_config.enable_cpu_mem_arena = absl::GetFlag(FLAGS_A); // -e opt_str = absl::GetFlag(FLAGS_e); @@ -341,62 +467,64 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a } } + auto is_option_specified = [&](std::string& option) { + for (int i = 1; i < argc; ++i) { + auto utf8_arg = ToUTF8String(argv[i]); + if (utf8_arg == ("-" + option) || utf8_arg == ("--" + option)) { + return true; + } + } + return false; + }; + // -r - size_t val = absl::GetFlag(FLAGS_r); - if (val != static_cast(1000) /* default value for absl flag -r */) { - if (val <= static_cast(0)) return false; - test_config.run_config.repeated_times = val; + // + // We can’t tell if: + // The user typed -r 1000 (default value) Or the user didn’t type -r at all. + // We need to parse the argv in order to properly set test_node. + opt_str = "r"; + if (is_option_specified(opt_str)) { + if (absl::GetFlag(FLAGS_r) == static_cast(0)) return false; + test_config.run_config.repeated_times = absl::GetFlag(FLAGS_r); test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; } // -t - val = absl::GetFlag(FLAGS_t); - if (val != static_cast(600) /* default value for absl flag -t */) { - if (val <= static_cast(0)) return false; - test_config.run_config.duration_in_seconds = val; + opt_str = "t"; + if (is_option_specified(opt_str)) { + if (absl::GetFlag(FLAGS_t) <= static_cast(0)) return false; + test_config.run_config.duration_in_seconds = absl::GetFlag(FLAGS_t); test_config.run_config.test_mode = TestMode::kFixDurationMode; } // -s - opt_bool = absl::GetFlag(FLAGS_s); - if (opt_bool) test_config.run_config.f_dump_statistics = true; + test_config.run_config.f_dump_statistics = absl::GetFlag(FLAGS_s); // -S - int val_int = absl::GetFlag(FLAGS_S); - if (val_int != -1) test_config.run_config.random_seed_for_input_data = val_int; + test_config.run_config.random_seed_for_input_data = absl::GetFlag(FLAGS_S); // -v - opt_bool = absl::GetFlag(FLAGS_v); - if (opt_bool) test_config.run_config.f_verbose = true; + test_config.run_config.f_verbose = absl::GetFlag(FLAGS_v); // -x - val_int = absl::GetFlag(FLAGS_x); - if (val_int != 0 /* default value for absl flag -x */) { - if (val_int < 0) return false; - test_config.run_config.intra_op_num_threads = val_int; - } - + if (absl::GetFlag(FLAGS_x) < 0) return false; + test_config.run_config.intra_op_num_threads = absl::GetFlag(FLAGS_x); + // -y - val_int = absl::GetFlag(FLAGS_y); - if (val_int != 0 /* default value for absl flag -y */) { - if (val_int < 0) return false; - test_config.run_config.inter_op_num_threads = val_int; - } + if (absl::GetFlag(FLAGS_y) < 0) return false; + test_config.run_config.inter_op_num_threads = absl::GetFlag(FLAGS_y); // -P - opt_bool = absl::GetFlag(FLAGS_P); - if (opt_bool) test_config.run_config.execution_mode = ExecutionMode::ORT_PARALLEL; + if (absl::GetFlag(FLAGS_P)) test_config.run_config.execution_mode = ExecutionMode::ORT_PARALLEL; // -c - val = absl::GetFlag(FLAGS_c); - if (val != static_cast(1) /* default value for absl flag -c */) { - if (static_cast(val) <= 0) return false; - test_config.run_config.concurrent_session_runs = val; - } + if (absl::GetFlag(FLAGS_c) <= static_cast(0)) return false; + test_config.run_config.concurrent_session_runs = absl::GetFlag(FLAGS_c); + // -o - val_int = absl::GetFlag(FLAGS_o); - if (val_int != 99 /* default value for absl flag -o */) { + int val_int = absl::GetFlag(FLAGS_o); + if (val_int != 99) { switch (val_int) { case ORT_DISABLE_ALL: test_config.run_config.optimization_level = ORT_DISABLE_ALL; @@ -414,7 +542,7 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a test_config.run_config.optimization_level = ORT_ENABLE_ALL; break; default: { - if (val > ORT_ENABLE_ALL) { // relax constraint + if (val_int > ORT_ENABLE_ALL) { // relax constraint test_config.run_config.optimization_level = ORT_ENABLE_ALL; } else { return false; @@ -424,27 +552,21 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a } // -u - opt_w_str = ToPathString(absl::GetFlag(FLAGS_u)); + PathString opt_w_str = ToPathString(absl::GetFlag(FLAGS_u)); if (!opt_str.empty()) test_config.run_config.optimized_model_path = opt_w_str; // -I - opt_bool = absl::GetFlag(FLAGS_I); - if (opt_bool) test_config.run_config.generate_model_input_binding = true; + if (absl::GetFlag(FLAGS_I)) test_config.run_config.generate_model_input_binding = true; // -d - val_int = absl::GetFlag(FLAGS_d); - if (val_int != 0 /* default value for absl flag -d */) { - if (val_int < 0) return false; - test_config.run_config.cudnn_conv_algo = val_int; - } + if (absl::GetFlag(FLAGS_d) < 0) return false; + test_config.run_config.cudnn_conv_algo = absl::GetFlag(FLAGS_d); // -q - opt_bool = absl::GetFlag(FLAGS_q); - if (opt_bool) test_config.run_config.do_cuda_copy_in_separate_stream = true; + if (absl::GetFlag(FLAGS_q)) test_config.run_config.do_cuda_copy_in_separate_stream = true; // -z - opt_bool = absl::GetFlag(FLAGS_z); - if (opt_bool) test_config.run_config.set_denormal_as_zero = true; + if (absl::GetFlag(FLAGS_z)) test_config.run_config.set_denormal_as_zero = true; // -i opt_w_str = ToPathString(absl::GetFlag(FLAGS_i)); @@ -469,32 +591,26 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a } // -D - opt_bool = absl::GetFlag(FLAGS_D); - if (opt_bool) test_config.run_config.disable_spinning = true; + if (absl::GetFlag(FLAGS_D)) test_config.run_config.disable_spinning = true; // -Z - opt_bool = absl::GetFlag(FLAGS_Z); - if (opt_bool) test_config.run_config.disable_spinning_between_run = true; + if (absl::GetFlag(FLAGS_Z)) test_config.run_config.disable_spinning_between_run = true; // -n - opt_bool = absl::GetFlag(FLAGS_n); - if (opt_bool) test_config.run_config.exit_after_session_creation = true; + if (absl::GetFlag(FLAGS_n)) test_config.run_config.exit_after_session_creation = true; // -l - opt_bool = absl::GetFlag(FLAGS_l); - if (opt_bool) test_config.model_info.load_via_path = true; + if (absl::GetFlag(FLAGS_l)) test_config.model_info.load_via_path = true; // -R opt_w_str = ToPathString(absl::GetFlag(FLAGS_R)); if (!opt_w_str.empty()) test_config.run_config.register_custom_op_path = opt_w_str; // -g - opt_bool = absl::GetFlag(FLAGS_g); - if (opt_bool) test_config.run_config.enable_cuda_io_binding = true; + if (absl::GetFlag(FLAGS_g)) test_config.run_config.enable_cuda_io_binding = true; // -X - opt_bool = absl::GetFlag(FLAGS_X); - if (opt_bool) test_config.run_config.use_extensions = true; + if (absl::GetFlag(FLAGS_X)) test_config.run_config.use_extensions = true; // --plugin_ep_libs opt_w_str = ToPathString(absl::GetFlag(FLAGS_plugin_ep_libs)); @@ -509,8 +625,7 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a if (!opt_w_str.empty()) test_config.run_config.ep_runtime_config_string = opt_w_str; // --list_ep_devices - opt_bool = absl::GetFlag(FLAGS_list_ep_devices); - if (opt_bool) { + if (absl::GetFlag(FLAGS_list_ep_devices)) { test_config.list_available_devices = true; return true; } diff --git a/onnxruntime/test/perftest/command_args_parser.h b/onnxruntime/test/perftest/command_args_parser.h index 86c81072233c0..9e8e3c3c548d2 100644 --- a/onnxruntime/test/perftest/command_args_parser.h +++ b/onnxruntime/test/perftest/command_args_parser.h @@ -3,6 +3,9 @@ #pragma once #include +#include "test_configuration.h" + + namespace onnxruntime { namespace perftest { diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 16dd36efd057e..a2532952cd1e1 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -53,6 +53,20 @@ std::chrono::duration OnnxRuntimeTestSession::Run() { auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration duration_seconds = end - start; + + for (size_t i = 0; i < outputs_.size(); i++) { + Ort::Value& ort_output = outputs_[i]; + const float* output_data = ort_output.GetTensorData(); + gsl::span output_span(output_data, 6); + std::cout << output_span[0] << std::endl; + std::cout << output_span[1] << std::endl; + std::cout << output_span[2] << std::endl; + std::cout << output_span[3] << std::endl; + std::cout << output_span[4] << std::endl; + std::cout << output_span[5] << std::endl; + std::cout << std::endl; + } + return duration_seconds; } diff --git a/onnxruntime/test/perftest/ort_test_session.ccc b/onnxruntime/test/perftest/ort_test_session.ccc new file mode 100644 index 0000000000000..823e1f509b5aa --- /dev/null +++ b/onnxruntime/test/perftest/ort_test_session.ccc @@ -0,0 +1,1109 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) 2023 NVIDIA Corporation. +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates +// Licensed under the MIT License. + +#include "ort_test_session.h" +#include +#include +#include +#include +#include +#include +#include +#include "core/session/onnxruntime_session_options_config_keys.h" +#include "core/providers/tensorrt/tensorrt_provider_options.h" +#include "core/providers/dnnl/dnnl_provider_options.h" +#include +#include "providers.h" +#include "TestCase.h" +#include "strings_helper.h" + +#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_NV) +#include +#endif + +#ifdef USE_OPENVINO +#include "nlohmann/json.hpp" +#endif + +#ifdef USE_DML +#include "core/providers/dml/dml_provider_factory.h" +#include "core/providers/dml/dml_session_options_config_keys.h" +#endif + +#ifdef _WIN32 +#define strdup _strdup +#endif +extern const OrtApi* g_ort; + +namespace onnxruntime { +namespace perftest { + +std::chrono::duration OnnxRuntimeTestSession::Run() { + // Randomly pick one OrtValueArray from test_inputs_. (NOT ThreadSafe) + const std::uniform_int_distribution::param_type p(0, static_cast(test_inputs_.size() - 1)); + const size_t id = static_cast(dist_(rand_engine_, p)); + + auto& input = test_inputs_.at(id); + auto start = std::chrono::high_resolution_clock::now(); + + session_.Run(Ort::RunOptions{nullptr}, input_names_.data(), input.data(), input_names_.size(), + output_names_raw_ptr.data(), outputs_.data(), output_names_raw_ptr.size()); + + for (size_t i = 0; i < outputs_.size(); i++) { + Ort::Value& ort_output = outputs_[i]; + const float* output_data = ort_output.GetTensorData(); + gsl::span output_span(output_data, 6); + std::cout << output_span[0] << std::endl; + std::cout << output_span[1] << std::endl; + std::cout << output_span[2] << std::endl; + std::cout << output_span[3] << std::endl; + std::cout << output_span[4] << std::endl; + std::cout << output_span[5] << std::endl; + } + + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration duration_seconds = end - start; + return duration_seconds; +} + +OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device& rd, + const PerformanceTestConfig& performance_test_config, + const TestModelInfo& m) + : rand_engine_(rd()), input_names_(m.GetInputCount()), input_names_str_(m.GetInputCount()), input_length_(m.GetInputCount()) { + Ort::SessionOptions session_options; + +#ifdef _MSC_VER + std::string ep_names_and_libs_string = ToUTF8String(performance_test_config.plugin_ep_names_and_libs); +#else + std::string ep_names_and_libs_string = performance_test_config.plugin_ep_names_and_libs; +#endif + std::unordered_map ep_names_to_libs; + ParseSessionConfigs(ep_names_and_libs_string, ep_names_to_libs); + bool is_plugin_ep_avaiable = false; + + if (ep_names_to_libs.size() > 0) { + // Register plugin EP libraries if provided via "-L" argument. + for (auto& pair : ep_names_to_libs) { + const std::filesystem::path library_path = pair.second; + const std::string registration_name = pair.first; + env.RegisterExecutionProviderLibrary(registration_name.c_str(), library_path.c_str()); + registered_plugin_ep_names_.push_back(registration_name); + } + + std::vector ep_devices = env.GetEpDevices(); + std::vector added_ep_devices; + + // All OrtEpDevice instances must be from the same execution provider. + // Find the OrtEpDevice associated with the execution provider provided via "-e" argument. + Ort::ConstEpDevice plugin_ep_device; + for (Ort::ConstEpDevice& device : ep_devices) { + if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { + plugin_ep_device = device; + added_ep_devices.push_back(plugin_ep_device); + } + } + + if (added_ep_devices.empty()) { + for (auto ep_name : registered_plugin_ep_names_) { + env.UnregisterExecutionProviderLibrary(ep_name.c_str()); + } + ORT_THROW( + "[ERROR] [plugin EP] No matching execution provider name found in EP library's factory."); + } + +#if defined(_MSC_VER) + std::string provider_option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string provider_option_string = performance_test_config.run_config.ep_runtime_config_string; +#endif + std::unordered_map provider_options; + ParseSessionConfigs(provider_option_string, provider_options); + session_options.AppendExecutionProvider_V2(env, added_ep_devices, provider_options); + is_plugin_ep_avaiable = true; + } + + provider_name_ = performance_test_config.machine_config.provider_type_name; + std::unordered_map provider_options; + if (provider_name_ == onnxruntime::kDnnlExecutionProvider) { +#ifdef USE_DNNL + // Generate provider options + OrtDnnlProviderOptions dnnl_options; + dnnl_options.use_arena = 1; + dnnl_options.threadpool_args = nullptr; + +#if !defined(DNNL_ORT_THREAD) +#if defined(_MSC_VER) + std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; +#endif // defined(_MSC_VER) + int num_threads = 0; + ParseSessionConfigs(ov_string, provider_options, {"num_of_threads"}); + for (const auto& provider_option : provider_options) { + if (provider_option.first == "num_of_threads") { + std::stringstream sstream(provider_option.second); + sstream >> num_threads; + if (num_threads < 0) { + ORT_THROW( + "[ERROR] [OneDNN] Invalid entry for the key 'num_of_threads'," + " set number of threads or use '0' for default\n"); + // If the user doesnt define num_threads, auto detect threads later + } + } + } + dnnl_options.threadpool_args = static_cast(&num_threads); +#endif // !defined(DNNL_ORT_THREAD) + dnnl_options.use_arena = performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0; + + session_options.AppendExecutionProvider_Dnnl(dnnl_options); +#else + ORT_THROW("DNNL is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kCudaExecutionProvider) { +#ifdef USE_CUDA + const auto& api = Ort::GetApi(); + OrtCUDAProviderOptionsV2* cuda_options; + Ort::ThrowOnError(api.CreateCUDAProviderOptions(&cuda_options)); + std::vector option_keys, option_values; + // used to keep all option keys and value strings alive + std::list buffer; + buffer.emplace_back("cudnn_conv_algo_search"); + option_keys.push_back(buffer.back().c_str()); + switch (performance_test_config.run_config.cudnn_conv_algo) { + case 0: + buffer.emplace_back("EXHAUSTIVE"); + break; + case 1: + buffer.emplace_back("HEURISTIC"); + break; + default: + buffer.emplace_back("DEFAULT"); + break; + } + option_values.push_back(buffer.back().c_str()); + + buffer.emplace_back("do_copy_in_default_stream"); + option_keys.push_back(buffer.back().c_str()); + buffer.emplace_back(!performance_test_config.run_config.do_cuda_copy_in_separate_stream ? "1" : "0"); + option_values.push_back(buffer.back().c_str()); + +#ifdef _MSC_VER + std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; +#endif + ParseSessionConfigs(ov_string, provider_options); + for (const auto& provider_option : provider_options) { + option_keys.push_back(provider_option.first.c_str()); + option_values.push_back(provider_option.second.c_str()); + } + + Ort::Status status(api.UpdateCUDAProviderOptions(cuda_options, + option_keys.data(), option_values.data(), option_keys.size())); + if (!status.IsOK()) { + OrtAllocator* allocator; + char* options; + Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator)); + Ort::ThrowOnError(api.GetCUDAProviderOptionsAsString(cuda_options, allocator, &options)); + ORT_THROW("[ERROR] [CUDA] Configuring the CUDA options failed with message: ", status.GetErrorMessage(), + "\nSupported options are:\n", options); + } + session_options.AppendExecutionProvider_CUDA_V2(*cuda_options); + if (performance_test_config.run_config.enable_cuda_io_binding) { + device_memory_name_ = CUDA; + } +#else + ORT_THROW("CUDA is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kTensorrtExecutionProvider) { +#ifdef USE_TENSORRT + const auto& api = Ort::GetApi(); + OrtTensorRTProviderOptionsV2* tensorrt_options; + Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options)); + std::unique_ptr rel_trt_options( + tensorrt_options, api.ReleaseTensorRTProviderOptions); + std::vector option_keys, option_values; + // used to keep all option keys and value strings alive + std::list buffer; + +#ifdef _MSC_VER + std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; +#endif + ParseSessionConfigs(ov_string, provider_options); + for (const auto& provider_option : provider_options) { + option_keys.push_back(provider_option.first.c_str()); + option_values.push_back(provider_option.second.c_str()); + } + Ort::Status status(api.UpdateTensorRTProviderOptions(tensorrt_options, + option_keys.data(), option_values.data(), option_keys.size())); + if (!status.IsOK()) { + OrtAllocator* allocator; + char* options; + Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator)); + Ort::ThrowOnError(api.GetTensorRTProviderOptionsAsString(tensorrt_options, allocator, &options)); + ORT_THROW("[ERROR] [TensorRT] Configuring the CUDA options failed with message: ", status.GetErrorMessage(), + "\nSupported options are:\n", options); + } + + session_options.AppendExecutionProvider_TensorRT_V2(*tensorrt_options); + + OrtCUDAProviderOptions cuda_options; + cuda_options.device_id = tensorrt_options->device_id; + cuda_options.cudnn_conv_algo_search = static_cast(performance_test_config.run_config.cudnn_conv_algo); + cuda_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream; + // TODO: Support arena configuration for users of perf test + session_options.AppendExecutionProvider_CUDA(cuda_options); + if (performance_test_config.run_config.enable_cuda_io_binding) { + device_memory_name_ = CUDA; + } +#else + ORT_THROW("TensorRT is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kNvTensorRTRTXExecutionProvider) { +#ifdef USE_NV + session_options.AppendExecutionProvider("NvTensorRtRtx", provider_options); + if (performance_test_config.run_config.enable_cuda_io_binding) { + device_memory_name_ = CUDA; + } +#else + ORT_THROW("NV TensorRT RTX is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kQnnExecutionProvider) { +#ifdef USE_QNN +#ifdef _MSC_VER + std::string option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string option_string = performance_test_config.run_config.ep_runtime_config_string; +#endif + ParseSessionConfigs(option_string, provider_options, + {"backend_type", "backend_path", "profiling_file_path", "profiling_level", + "rpc_control_latency", "vtcm_mb", "soc_model", "device_id", "htp_performance_mode", "op_packages", + "qnn_saver_path", "htp_graph_finalization_optimization_mode", "qnn_context_priority", + "htp_arch", "enable_htp_fp16_precision", "offload_graph_io_quantization", + "enable_htp_spill_fill_buffer", "enable_htp_shared_memory_allocator", "dump_json_qnn_graph", + "json_qnn_graph_dir"}); + for (const auto& provider_option : provider_options) { + const std::string& key = provider_option.first; + const std::string& value = provider_option.second; + if (key == "backend_path" || key == "profiling_file_path" || key == "json_qnn_graph_dir") { + if (value.empty()) { + ORT_THROW("Please provide the valid file path."); + } + } else if (key == "profiling_level") { + std::set supported_profiling_level = {"off", "basic", "detailed"}; + if (supported_profiling_level.find(value) == supported_profiling_level.end()) { + ORT_THROW("Supported profiling_level: off, basic, detailed"); + } + } else if (key == "backend_type" || key == "rpc_control_latency" || key == "vtcm_mb" || key == "soc_model" || + key == "device_id") { + // no validation + } else if (key == "htp_performance_mode") { + std::set supported_htp_perf_mode = {"burst", "balanced", "default", "high_performance", + "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", + "power_saver", "sustained_high_performance"}; + if (supported_htp_perf_mode.find(value) == supported_htp_perf_mode.end()) { + std::ostringstream str_stream; + std::copy(supported_htp_perf_mode.begin(), supported_htp_perf_mode.end(), + std::ostream_iterator(str_stream, ",")); + std::string str = str_stream.str(); + ORT_THROW("Supported htp_performance_mode: " + str); + } + } else if (key == "op_packages") { + if (value.empty()) { + ORT_THROW("Please provide the valid op_packages."); + } + } else if (key == "qnn_saver_path") { + // no validation + } else if (key == "htp_graph_finalization_optimization_mode") { + std::set supported_htp_graph_final_opt_modes = {"0", "1", "2", "3"}; + if (supported_htp_graph_final_opt_modes.find(value) == supported_htp_graph_final_opt_modes.end()) { + std::ostringstream str_stream; + std::copy(supported_htp_graph_final_opt_modes.begin(), supported_htp_graph_final_opt_modes.end(), + std::ostream_iterator(str_stream, ",")); + std::string str = str_stream.str(); + ORT_THROW("Wrong value for htp_graph_finalization_optimization_mode. select from: " + str); + } + } else if (key == "qnn_context_priority") { + std::set supported_qnn_context_priority = {"low", "normal", "normal_high", "high"}; + if (supported_qnn_context_priority.find(value) == supported_qnn_context_priority.end()) { + ORT_THROW("Supported qnn_context_priority: low, normal, normal_high, high"); + } + } else if (key == "htp_arch") { + std::set supported_htp_archs = {"0", "68", "69", "73", "75"}; + if (supported_htp_archs.find(value) == supported_htp_archs.end()) { + std::ostringstream str_stream; + std::copy(supported_htp_archs.begin(), supported_htp_archs.end(), + std::ostream_iterator(str_stream, ",")); + std::string str = str_stream.str(); + ORT_THROW("Wrong value for htp_arch. select from: " + str); + } + } else if (key == "enable_htp_fp16_precision" || + key == "offload_graph_io_quantization" || + key == "enable_htp_spill_fill_buffer" || + key == "enable_htp_shared_memory_allocator" || + key == "dump_json_qnn_graph") { + std::set supported_options = {"0", "1"}; + if (supported_options.find(value) == supported_options.end()) { + std::ostringstream str_stream; + std::copy(supported_options.begin(), supported_options.end(), + std::ostream_iterator(str_stream, ",")); + std::string str = str_stream.str(); + ORT_THROW("Wrong value for ", key, ". select from: ", str); + } + + if (key == "enable_htp_shared_memory_allocator" && value == "1") { + // if this option is set, also use the enabled allocator + device_memory_name_ = "QnnHtpShared"; + } + } + } + session_options.AppendExecutionProvider("QNN", provider_options); +#else + ORT_THROW("QNN is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kSnpeExecutionProvider) { +#ifdef USE_SNPE +#ifdef _MSC_VER + std::string option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string option_string = performance_test_config.run_config.ep_runtime_config_string; +#endif + ParseSessionConfigs(option_string, provider_options, {"runtime", "priority", "buffer_type", "enable_init_cache"}); + for (const auto& provider_option : provider_options) { + if (key == "runtime") { + std::set supported_runtime = {"CPU", "GPU_FP32", "GPU", "GPU_FLOAT16", "DSP", "AIP_FIXED_TF"}; + if (supported_runtime.find(value) == supported_runtime.end()) { + ORT_THROW(R"(Wrong configuration value for the key 'runtime'. +select from 'CPU', 'GPU_FP32', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n)"); + } + } else if (key == "priority") { + // no validation + } else if (key == "buffer_type") { + std::set supported_buffer_type = {"TF8", "TF16", "UINT8", "FLOAT", "ITENSOR"}; + if (supported_buffer_type.find(value) == supported_buffer_type.end()) { + ORT_THROW(R"(Wrong configuration value for the key 'buffer_type'. +select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); + } + } else if (key == "enable_init_cache") { + if (value != "1") { + ORT_THROW("Set to 1 to enable_init_cache."); + } + } + } + + session_options.AppendExecutionProvider("SNPE", provider_options); +#else + ORT_THROW("SNPE is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kNnapiExecutionProvider) { +#ifdef USE_NNAPI + uint32_t nnapi_flags = 0; +#ifdef _MSC_VER + std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; +#endif + std::istringstream ss(ov_string); + std::string key; + while (ss >> key) { + if (key == "NNAPI_FLAG_USE_FP16") { + nnapi_flags |= NNAPI_FLAG_USE_FP16; + } else if (key == "NNAPI_FLAG_USE_NCHW") { + nnapi_flags |= NNAPI_FLAG_USE_NCHW; + } else if (key == "NNAPI_FLAG_CPU_DISABLED") { + nnapi_flags |= NNAPI_FLAG_CPU_DISABLED; + } else if (key == "NNAPI_FLAG_CPU_ONLY") { + nnapi_flags |= NNAPI_FLAG_CPU_ONLY; + } else if (key.empty()) { + } else { + ORT_THROW( + "[ERROR] [NNAPI] wrong key type entered. Choose from the following runtime key options " + "that are available for NNAPI. " + "['NNAPI_FLAG_USE_FP16', 'NNAPI_FLAG_USE_NCHW', 'NNAPI_FLAG_CPU_DISABLED', 'NNAPI_FLAG_CPU_ONLY'] \n"); + } + } + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Nnapi(session_options, nnapi_flags)); +#else + ORT_THROW("NNAPI is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kVSINPUExecutionProvider) { +#ifdef USE_VSINPU + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_VSINPU(session_options)); +#else + ORT_THROW("VSINPU is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kCoreMLExecutionProvider) { +#ifdef __APPLE__ +#ifdef USE_COREML + std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; + static const std::unordered_set available_keys = {kCoremlProviderOption_MLComputeUnits, + kCoremlProviderOption_ModelFormat, + kCoremlProviderOption_RequireStaticInputShapes, + kCoremlProviderOption_EnableOnSubgraphs, + kCoremlProviderOption_SpecializationStrategy, + kCoremlProviderOption_ProfileComputePlan, + kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU, + kCoremlProviderOption_ModelCacheDirectory}; + ParseSessionConfigs(ov_string, provider_options, available_keys); + + std::unordered_map available_options = { + {"CPUAndNeuralEngine", "1"}, + {"CPUAndGPU", "1"}, + {"CPUOnly", "1"}, + {"ALL", "1"}, + }; + for (const auto& provider_option : provider_options) { + if (provider_option.first == kCoremlProviderOption_MLComputeUnits && + available_options.find(provider_option.second) != available_options.end()) { + } else if (provider_option.first == kCoremlProviderOption_ModelFormat && + (provider_option.second == "MLProgram" || provider_option.second == "NeuralNetwork")) { + } else if (provider_option.first == kCoremlProviderOption_RequireStaticInputShapes && + (provider_option.second == "1" || provider_option.second == "0")) { + } else if (provider_option.first == kCoremlProviderOption_EnableOnSubgraphs && + (provider_option.second == "0" || provider_option.second == "1")) { + } else if (provider_option.first == kCoremlProviderOption_SpecializationStrategy && + (provider_option.second == "Default" || provider_option.second == "FastPrediction")) { + } else if (provider_option.first == kCoremlProviderOption_ProfileComputePlan && + (provider_option.second == "0" || provider_option.second == "1")) { + } else if (provider_option.first == kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU && + (provider_option.second == "0" || provider_option.second == "1")) { + } else if (provider_option.first == kCoremlProviderOption_ModelCacheDirectory) { + } else { + ORT_THROW("Invalid value for option ", provider_option.first, ": ", provider_option.second); + } + } + // COREML_FLAG_CREATE_MLPROGRAM + session_options.AppendExecutionProvider("CoreML", provider_options); +#else + ORT_THROW("CoreML is not supported in this build\n"); +#endif +#else + ORT_THROW("COREML is not supported on this platform.\n"); +#endif + } else if (provider_name_ == onnxruntime::kDmlExecutionProvider) { +#ifdef USE_DML +#ifdef _MSC_VER + std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; +#endif + ParseSessionConfigs(ov_string, provider_options, + {"device_filter", "performance_preference", "disable_metacommands", + "enable_graph_capture", "enable_graph_serialization"}); + for (const auto& provider_option : provider_options) { + const std::string& key = provider_option.first; + const std::string& value = provider_option.second; + if (key == "device_filter") { + std::set ov_supported_device_types = {"gpu", "npu"}; + if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) { + } else { + ORT_THROW( + "[ERROR] [DML] You have selected a wrong configuration value for the key 'device_filter'. " + "Select from 'gpu', or 'npu' \n"); + } + } else if (key == "performance_preference") { + std::set ov_supported_values = {"default", "high_performance", "minimum_power"}; + if (ov_supported_values.find(value) != ov_supported_values.end()) { + } else { + ORT_THROW( + "[ERROR] [DML] You have selected a wrong configuration value for the key 'performance_preference'. " + "Select from 'default', 'high_performance' or 'minimum_power' \n"); + } + } else if (key == "disable_metacommands") { + std::set ov_supported_values = {"true", "True", "false", "False"}; + if (ov_supported_values.find(value) != ov_supported_values.end()) { + } else { + ORT_THROW( + "[ERROR] [DML] You have selected a wrong value for the key 'disable_metacommands'. " + "Select from 'true' or 'false' \n"); + } + } else if (key == "enable_graph_capture") { + std::set ov_supported_values = {"true", "True", "false", "False"}; + if (ov_supported_values.find(value) != ov_supported_values.end()) { + } else { + ORT_THROW( + "[ERROR] [DML] You have selected a wrong value for the key 'enable_graph_capture'. " + "Select from 'true' or 'false' \n"); + } + } else if (key == "enable_graph_serialization") { + std::set ov_supported_values = {"true", "True", "false", "False"}; + if (ov_supported_values.find(value) != ov_supported_values.end()) { + session_options.AddConfigEntry(kOrtSessionOptionsConfigEnableGraphSerialization, value.data()); + } else { + ORT_THROW( + "[ERROR] [DML] You have selected a wrong value for the key 'enable_graph_serialization'. " + "Select from 'true' or 'false' \n"); + } + } + } + if (provider_options.find("performance_preference") == provider_options.end()) { + provider_options["performance_preference"] = "high_performance"; + } + if (provider_options.find("device_filter") == provider_options.end()) { + provider_options["device_filter"] = "gpu"; + } + if (provider_options.find("disable_metacommands") == provider_options.end()) { + provider_options["disable_metacommands"] = "false"; + } + if (provider_options.find("enable_graph_capture") == provider_options.end()) { + provider_options["enable_graph_capture"] = "false"; + } + session_options.AppendExecutionProvider("DML", provider_options); +#else + ORT_THROW("DML is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kAclExecutionProvider) { +#ifdef USE_ACL +#if defined(_MSC_VER) + std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; +#endif // defined(_MSC_VER) + bool enable_fast_math = false; + ParseSessionConfigs(ov_string, provider_options, {"enable_fast_math"}); + for (const auto& provider_option : provider_options) { + const std::string& key = provider_option.first; + const std::string& value = provider_option.second; + if (key == "enable_fast_math") { + std::set ov_supported_values = {"true", "True", "false", "False"}; + if (ov_supported_values.find(value) != ov_supported_values.end()) { + enable_fast_math = (value == "true") || (value == "True"); + } else { + ORT_THROW( + "[ERROR] [ACL] You have selcted an invalid value for the key 'enable_fast_math'. " + "Select from 'true' or 'false' \n"); + } + } + } + Ort::ThrowOnError( + OrtSessionOptionsAppendExecutionProvider_ACL(session_options, enable_fast_math)); +#else + ORT_THROW("Acl is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kArmNNExecutionProvider) { +#ifdef USE_ARMNN + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ArmNN(session_options, + performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0)); +#else + ORT_THROW("ArmNN is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kRocmExecutionProvider) { +#ifdef USE_ROCM + OrtROCMProviderOptions rocm_options; + rocm_options.miopen_conv_exhaustive_search = performance_test_config.run_config.cudnn_conv_algo; + rocm_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream; + // TODO: Support arena configuration for users of perf test + session_options.AppendExecutionProvider_ROCM(rocm_options); +#else + ORT_THROW("ROCM is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kMIGraphXExecutionProvider) { +#ifdef USE_MIGRAPHX + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(session_options, 0)); +#else + ORT_THROW("MIGraphX is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kXnnpackExecutionProvider) { +#ifdef USE_XNNPACK + session_options.AddConfigEntry(kOrtSessionOptionsConfigAllowIntraOpSpinning, "0"); + session_options.AppendExecutionProvider( + "XNNPACK", {{"intra_op_num_threads", std::to_string(performance_test_config.run_config.intra_op_num_threads)}}); +#else + ORT_THROW("Xnnpack is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kWebGpuExecutionProvider) { +#ifdef USE_WEBGPU + session_options.AppendExecutionProvider("WebGPU", {}); +#else + ORT_THROW("WebGPU is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kVitisAIExecutionProvider) { +#ifdef USE_VITISAI +#ifdef _MSC_VER + std::string option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string option_string = performance_test_config.run_config.ep_runtime_config_string; +#endif + ParseSessionConfigs(option_string, provider_options); + + session_options.AppendExecutionProvider_VitisAI(provider_options); +#else + ORT_THROW("VitisAI is not supported in this build\n"); +#endif + } else if (!provider_name_.empty() && + provider_name_ != onnxruntime::kCpuExecutionProvider && + provider_name_ != onnxruntime::kOpenVINOExecutionProvider && + !is_plugin_ep_avaiable) { + ORT_THROW("This backend is not included in perf test runner.\n"); + } + + if (performance_test_config.run_config.enable_cpu_mem_arena) + session_options.EnableCpuMemArena(); + else + session_options.DisableCpuMemArena(); + if (performance_test_config.run_config.enable_memory_pattern && + performance_test_config.run_config.execution_mode == ExecutionMode::ORT_SEQUENTIAL) + session_options.EnableMemPattern(); + else + session_options.DisableMemPattern(); + session_options.SetExecutionMode(performance_test_config.run_config.execution_mode); + + // Set any extra session configuration entries provided by the user via command-line arguments. + // + // Some session config entries can also be set via dedicated command-line options. + // If the user uses multiple command-line options to set the same session config entry, + // we'll print a warning. Note that the dedicated command-line options will take precedence. + const auto& user_session_configs = performance_test_config.run_config.session_config_entries; + for (auto& it : user_session_configs) { + session_options.AddConfigEntry(it.first.c_str(), it.second.c_str()); + } + + auto warn_dup_config_entry = [&user_session_configs](const char* key) -> void { + if (user_session_configs.find(key) != user_session_configs.end()) { + fprintf(stderr, "[WARNING]: Trying to set session config entry '%s' via multiple command-line options\n", key); + } + }; + + if (performance_test_config.run_config.intra_op_num_threads > 0) { + fprintf(stdout, "Setting intra_op_num_threads to %d\n", performance_test_config.run_config.intra_op_num_threads); + session_options.SetIntraOpNumThreads(performance_test_config.run_config.intra_op_num_threads); + } + + if (!performance_test_config.run_config.intra_op_thread_affinities.empty()) { + warn_dup_config_entry(kOrtSessionOptionsConfigIntraOpThreadAffinities); + fprintf(stdout, "Setting intra op thread affinity as %s\n", performance_test_config.run_config.intra_op_thread_affinities.c_str()); + session_options.AddConfigEntry(kOrtSessionOptionsConfigIntraOpThreadAffinities, performance_test_config.run_config.intra_op_thread_affinities.c_str()); + } + + if (performance_test_config.run_config.disable_spinning) { + warn_dup_config_entry(kOrtSessionOptionsConfigAllowIntraOpSpinning); + fprintf(stdout, "Disabling intra-op thread spinning entirely\n"); + session_options.AddConfigEntry(kOrtSessionOptionsConfigAllowIntraOpSpinning, "0"); + } + + if (performance_test_config.run_config.disable_spinning_between_run) { + warn_dup_config_entry(kOrtSessionOptionsConfigForceSpinningStop); + fprintf(stdout, "Disabling intra-op thread spinning between runs\n"); + session_options.AddConfigEntry(kOrtSessionOptionsConfigForceSpinningStop, "1"); + } + + if (!performance_test_config.run_config.register_custom_op_path.empty()) { + session_options.RegisterCustomOpsLibrary(performance_test_config.run_config.register_custom_op_path.c_str()); + } + + if (performance_test_config.run_config.execution_mode == ExecutionMode::ORT_PARALLEL && performance_test_config.run_config.inter_op_num_threads > 0) { + fprintf(stdout, "Setting inter_op_num_threads to %d\n", performance_test_config.run_config.inter_op_num_threads); + session_options.SetInterOpNumThreads(performance_test_config.run_config.inter_op_num_threads); + } + + // Set optimization level. + session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level); + if (!performance_test_config.run_config.profile_file.empty()) { + session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str()); + } + if (!performance_test_config.run_config.optimized_model_path.empty()) { + session_options.SetOptimizedModelFilePath(performance_test_config.run_config.optimized_model_path.c_str()); + } + if (performance_test_config.run_config.set_denormal_as_zero) { + warn_dup_config_entry(kOrtSessionOptionsConfigSetDenormalAsZero); + session_options.AddConfigEntry(kOrtSessionOptionsConfigSetDenormalAsZero, "1"); + } + if (!performance_test_config.run_config.free_dim_name_overrides.empty()) { + for (auto const& dim_override : performance_test_config.run_config.free_dim_name_overrides) { + if (g_ort->AddFreeDimensionOverrideByName(session_options, ToUTF8String(dim_override.first).c_str(), dim_override.second) != nullptr) { + fprintf(stderr, "AddFreeDimensionOverrideByName failed for named dimension: %s\n", ToUTF8String(dim_override.first).c_str()); + } else { + fprintf(stdout, "Overriding dimension with name, %s, to %d\n", ToUTF8String(dim_override.first).c_str(), (int)dim_override.second); + } + } + } + if (!performance_test_config.run_config.free_dim_denotation_overrides.empty()) { + for (auto const& dim_override : performance_test_config.run_config.free_dim_denotation_overrides) { + if (g_ort->AddFreeDimensionOverride(session_options, ToUTF8String(dim_override.first).c_str(), dim_override.second) != nullptr) { + fprintf(stderr, "AddFreeDimensionOverride failed for dimension denotation: %s\n", ToUTF8String(dim_override.first).c_str()); + } else { + fprintf(stdout, "Overriding dimension with denotation, %s, to %d\n", ToUTF8String(dim_override.first).c_str(), (int)dim_override.second); + } + } + } + if (provider_name_ == onnxruntime::kOpenVINOExecutionProvider) { +#ifdef USE_OPENVINO +#ifdef _MSC_VER + std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); +#else + std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; +#endif + std::unordered_map ov_options; + std::istringstream ss(ov_string); + std::string token; + while (ss >> token) { + if (token == "") { + continue; + } + auto pos = token.find("|"); + if (pos == std::string::npos || pos == 0 || pos == token.length()) { + ORT_THROW("[ERROR] [OpenVINO] Use a '|' to separate the key and value for the run-time option you are trying to use.\n"); + } + + auto key = token.substr(0, pos); + auto value = token.substr(pos + 1); + + if (key == "device_type") { + std::set ov_supported_device_types = {"CPU", "GPU", + "GPU.0", "GPU.1", "NPU"}; + std::set deprecated_device_types = {"CPU_FP32", "GPU_FP32", + "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16", + "GPU.0_FP16", "GPU.1_FP16"}; + size_t num_gpus = 10; + for (size_t i = 0; i <= num_gpus; i++) { + ov_supported_device_types.emplace("GPU." + std::to_string(i)); + } + if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) { + ov_options[key] = value; + } else if (deprecated_device_types.find(value) != deprecated_device_types.end()) { + ov_options[key] = value; + } else if (value.find("HETERO") == 0) { + ov_options[key] = value; + } else if (value.find("MULTI") == 0) { + ov_options[key] = value; + } else if (value.find("AUTO") == 0) { + ov_options[key] = value; + } else { + ORT_THROW( + "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. " + "Select from 'CPU', 'GPU', 'GPU.0', 'GPU.1', 'NPU' or from" + " HETERO/MULTI/AUTO options available. \n"); + } + } else if (key == "device_id") { + if (value == "CPU" || value == "GPU" || value == "NPU") { + ov_options[key] = value; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported device_id is selected. Select from available options."); + } + } else if (key == "precision") { + auto device_type = ov_options["device_type"]; + if (device_type.find("GPU") != std::string::npos) { + if (value == "") { + ov_options[key] = "FP16"; + continue; + } else if (value == "ACCURACY" || value == "FP16" || value == "FP32") { + ov_options[key] = value; + continue; + } else { + ORT_THROW( + "[ERROR] [OpenVINO] Unsupported inference precision is selected. " + "GPU only supported FP32 / FP16. \n"); + } + } else if (device_type.find("NPU") != std::string::npos) { + if (value == "" || value == "ACCURACY" || value == "FP16") { + ov_options[key] = "FP16"; + continue; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. NPU only supported FP16. \n"); + } + } else if (device_type.find("CPU") != std::string::npos) { + if (value == "" || value == "ACCURACY" || value == "FP32") { + ov_options[key] = "FP32"; + continue; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n"); + } + } + } else if (key == "enable_opencl_throttling") { + if (value == "true" || value == "True" || + value == "false" || value == "False") { + ov_options[key] = value; + } else { + ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_opencl_throttling' should be a boolean i.e. true or false. Default value is false.\n"); + } + } else if (key == "enable_qdq_optimizer") { + if (value == "true" || value == "True" || + value == "false" || value == "False") { + ov_options[key] = value; + } else { + ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_qdq_optimizer' should be a boolean i.e. true or false. Default value is false.\n"); + } + } else if (key == "enable_causallm") { + if (value == "true" || value == "True" || + value == "false" || value == "False") { + ov_options[key] = value; + } else { + ORT_THROW( + "[ERROR] [OpenVINO] The value for the key 'enable_causallm' should be a boolean i.e. true or false." + " Default value is false. This provider option must be used with CausalLM Models viz. LLMs & SLMs only.\n"); + } + } else if (key == "disable_dynamic_shapes") { + if (value == "true" || value == "True" || + value == "false" || value == "False") { + ov_options[key] = value; + } else { + ORT_THROW( + "[ERROR] [OpenVINO] The value for the key 'enable_dynamic_shapes' " + "should be a boolean i.e. true or false. Default value is false.\n"); + } + } else if (key == "num_of_threads") { + if (std::stoi(value) <= 0) { + ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n"); + } else { + ov_options[key] = value; + } + } else if (key == "load_config") { + auto load_json = [&](std::string filename) -> std::string { + std::ifstream input_filestream(filename); + if (!input_filestream.is_open()) { + ORT_THROW("Passed an invalid JSON config file path \"" + filename + "\"."); + } + nlohmann::json json_config; + try { + input_filestream >> json_config; + } catch (const OnnxRuntimeException& ex) { + ORT_THROW("Exception parsing config file \"" + filename + "\".\n" + ex.what()); + } catch (const std::exception& ex) { + throw std::runtime_error("Standard exception for config file \"" + filename + "\".\n" + ex.what()); + } catch (...) { + throw std::runtime_error("Unknown exception for config file \"" + filename + "\".\n"); + } + if (json_config.empty()) { + ORT_THROW("Empty JSON content passed \"" + filename + "\"."); + } + return json_config.dump(); + }; + ov_options[key] = load_json(value); + } else if (key == "model_priority") { + ov_options[key] = value; + } else if (key == "cache_dir") { + ov_options[key] = value; + } else if (key == "context") { + ov_options[key] = value; + } else if (key == "num_streams") { + if (std::stoi(value) <= 0 && std::stoi(value) > 8) { + ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n"); + } else { + ov_options[key] = value; + } + } else if (key == "device_memory_name") { + device_memory_name_ = std::move(value); + } else if (key == "device_luid") { + ov_options[key] = value; + } else if (key == "reshape_input") { + ov_options[key] = value; + } else { + ORT_THROW( + "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO." + " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', " + "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer'," + " 'enable_causallm', 'model_priority'] \n"); + } + } + session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); +#else + ORT_THROW("OpenVINO is not supported in this build\n"); +#endif + } + + if (performance_test_config.run_config.use_extensions) { + session_options.EnableOrtCustomOps(); + } + + if (!performance_test_config.model_info.load_via_path) { + session_ = Ort::Session(env, performance_test_config.model_info.model_file_path.c_str(), session_options); + } else { + std::ifstream file(performance_test_config.model_info.model_file_path.c_str(), + std::ios::binary | std::ios::in | std::ios::ate); + if (file.is_open()) { + const std::streampos fsize = file.tellg(); + file.seekg(0, std::ios_base::beg); + std::vector model_bytes(narrow(fsize)); + file.read(model_bytes.data(), narrow(fsize)); + session_ = Ort::Session(env, model_bytes.data(), model_bytes.size(), session_options); + } else { + ORT_THROW("Model file could not be opened.\n"); + } + } + size_t output_count = session_.GetOutputCount(); + output_names_.resize(output_count); + Ort::AllocatorWithDefaultOptions a; + for (size_t i = 0; i != output_count; ++i) { + auto output_name = session_.GetOutputNameAllocated(i, a); + assert(output_name != nullptr); + output_names_[i] = output_name.get(); + } + output_names_raw_ptr.resize(output_count); + for (size_t i = 0; i != output_count; ++i) { + output_names_raw_ptr[i] = output_names_[i].c_str(); + } + + const size_t input_count = static_cast(m.GetInputCount()); + for (size_t i = 0; i != input_count; ++i) { + input_names_str_[i] = m.GetInputName(i); + input_names_[i] = input_names_str_[i].c_str(); + } + + auto transform_fcn = std::function(); + auto new_value = std::function&, Ort::ConstTensorTypeAndShapeInfo&)>(); + if (device_memory_name_.empty()) { + transform_fcn = [](int64_t input) { return input; }; + new_value = [](OrtAllocator*, const std::vector&, Ort::ConstTensorTypeAndShapeInfo&) { + return Ort::Value(nullptr); + }; + } else { + Ort::MemoryInfo memory_info(nullptr); // Default initialize, will be overwritten + if (device_memory_name_ == CUDA) { + memory_info = Ort::MemoryInfo(device_memory_name_.data(), OrtArenaAllocator, 0, OrtMemTypeDefault); + } else { + memory_info = Ort::MemoryInfo(device_memory_name_.data(), OrtArenaAllocator, 0, OrtMemTypeCPUOutput); + } + custom_allocator_ = Ort::Allocator(session_, memory_info); + allocator_ = custom_allocator_; + + // free dimensions are treated as 1 if not overridden + transform_fcn = [](int64_t input) { return (input == -1) ? -input : input; }; + new_value = [](OrtAllocator* allocator, const std::vector& output_shape, Ort::ConstTensorTypeAndShapeInfo& tensor_info) { + return Ort::Value::CreateTensor(allocator, output_shape.data(), output_shape.size(), tensor_info.GetElementType()); + }; + } + + for (size_t i = 0; i < output_names_raw_ptr.size(); i++) { + Ort::TypeInfo type_info = session_.GetOutputTypeInfo(i); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + std::vector output_shape = tensor_info.GetShape(); + std::transform(output_shape.begin(), output_shape.end(), output_shape.begin(), transform_fcn); + outputs_.emplace_back(new_value(allocator_, output_shape, tensor_info)); + } +} + +template +static void FillTensorDataTyped(Ort::Value& tensor, size_t count, int32_t seed = -1, T value = T{}) { + T* data = tensor.GetTensorMutableData(); + + bool random_init = false; + + if (seed >= 0) { + random_init = true; + + std::default_random_engine engine; + engine.seed(seed); + if constexpr (std::is_same::value) { + T max_value = 5.0f; + const std::uniform_real_distribution::param_type p(0, static_cast(max_value)); + std::uniform_real_distribution dist; + for (size_t i = 0; i < count; ++i) { + data[i] = dist(engine, p); + } + } else if constexpr (std::is_same::value || std::is_same::value) { + T max_value = std::numeric_limits::max(); + const std::uniform_int_distribution::param_type p(0, static_cast(max_value)); + std::uniform_int_distribution dist; + for (size_t i = 0; i < count; ++i) { + data[i] = static_cast(dist(engine, p)); + } + } else { + random_init = false; + fprintf(stdout, " this type of data won't be random initialized\n"); + } + } + if (!random_init) { + std::fill_n(data, count, value); + } +} + +// seed=-1 means we keep the initialized it with a constant value "T{}" +// in some case, we want to check the results for multi-runs, with the given we can recap the input data +// another reason is that, the input would be always 255/-127 for uint8_t or int8_t types of input. +// which will produce all zero outputs. +static void InitializeTensorWithSeed(int32_t seed, Ort::Value& tensor) { + const auto type_and_shape = tensor.GetTensorTypeAndShapeInfo(); + const auto count = type_and_shape.GetElementCount(); + const auto element_type = type_and_shape.GetElementType(); + +#define CASE_FOR_TYPE(T) \ + case Ort::TypeToTensorType::type: { \ + FillTensorDataTyped(tensor, count, seed); \ + } break + + switch (element_type) { + CASE_FOR_TYPE(Ort::Float16_t); + CASE_FOR_TYPE(Ort::BFloat16_t); + CASE_FOR_TYPE(float); + CASE_FOR_TYPE(double); + CASE_FOR_TYPE(int8_t); + CASE_FOR_TYPE(int16_t); + CASE_FOR_TYPE(int32_t); + CASE_FOR_TYPE(int64_t); + CASE_FOR_TYPE(uint8_t); + CASE_FOR_TYPE(uint16_t); + CASE_FOR_TYPE(uint32_t); + CASE_FOR_TYPE(uint64_t); + CASE_FOR_TYPE(bool); +#if !defined(DISABLE_FLOAT8_TYPES) + CASE_FOR_TYPE(Ort::Float8E4M3FN_t); + CASE_FOR_TYPE(Ort::Float8E4M3FNUZ_t); + CASE_FOR_TYPE(Ort::Float8E5M2_t); + CASE_FOR_TYPE(Ort::Float8E5M2FNUZ_t); +#endif + case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: + // string tensors are already initialized to contain empty strings + // see onnxruntime::Tensor::Init() + break; + default: + ORT_THROW("Unsupported tensor data type: ", element_type); + } + +#undef CASE_FOR_TYPE +} + +bool OnnxRuntimeTestSession::PopulateGeneratedInputTestData(int32_t seed) { + Ort::AllocatorWithDefaultOptions default_allocator; + // iterate over all input nodes + for (size_t i = 0; i < static_cast(input_length_); i++) { + Ort::TypeInfo type_info = session_.GetInputTypeInfo(i); + if (type_info.GetONNXType() == ONNX_TYPE_TENSOR) { + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + std::vector input_node_dim = tensor_info.GetShape(); + + // free dimensions are treated as 1 if not overridden + auto transform_fcn = [](int64_t input) { return (input == -1) ? -input : input; }; + std::transform(input_node_dim.begin(), input_node_dim.end(), input_node_dim.begin(), transform_fcn); + + if (device_memory_name_ != CUDA) { + Ort::Value input_tensor = Ort::Value::CreateTensor(allocator_, (const int64_t*)input_node_dim.data(), + input_node_dim.size(), tensor_info.GetElementType()); + InitializeTensorWithSeed(seed, input_tensor); + PreLoadTestData(0, i, std::move(input_tensor)); + } +// Create tensor on CPU, initialize and copy to CUDA tensor +#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_NV) + else { + Ort::Value default_tensor = Ort::Value::CreateTensor(default_allocator, (const int64_t*)input_node_dim.data(), + input_node_dim.size(), tensor_info.GetElementType()); + InitializeTensorWithSeed(seed, default_tensor); + + // Get pointer to CPU tensor data + const void* default_ptr = default_tensor.GetTensorRawData(); + + size_t total_bytes = default_tensor.GetTensorSizeInBytes(); + + Ort::Value cuda_tensor = Ort::Value::CreateTensor(allocator_, input_node_dim.data(), + input_node_dim.size(), tensor_info.GetElementType()); + + void* cuda_ptr = cuda_tensor.GetTensorMutableData(); + + // Copy the initialized data from CPU to GPU + cudaError_t cuda_err = cudaMemcpy(cuda_ptr, default_ptr, total_bytes, cudaMemcpyHostToDevice); + if (cuda_err != cudaSuccess) { + ORT_THROW("Failed to copy tensor data from CPU to CUDA device. CUDA Error: ", cudaGetErrorString(cuda_err)); + } + PreLoadTestData(0, i, std::move(cuda_tensor)); + } +#endif + } + } + return true; +} + +} // namespace perftest +} // namespace onnxruntime diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index 180b2d5e3f08e..5feb536d8b115 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -35,6 +35,8 @@ void UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& te void ListDevices(const Ort::Env& env); +std::string_view Basename(std::string_view filename); + } // namespace utils } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index d30f1e417bafd..e2199b1650f06 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -95,6 +95,15 @@ std::vector CStringsFromStrings(std::vector& utf8_args) { } return utf8_argv; } + +// This helper function returns the basename of the filename passed as an argument +std::string_view Basename(std::string_view filename) { + auto last_slash_pos = filename.find_last_of("/\\"); + + return last_slash_pos == absl::string_view::npos + ? filename + : filename.substr(last_slash_pos + 1); +} } // namespace utils } // namespace perftest } // namespace onnxruntime From 706b1c890ce35bfb234fab984641bda01046b384 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 1 Aug 2025 11:43:15 -0700 Subject: [PATCH 35/46] fix build issues --- cmake/onnxruntime_unittests.cmake | 2 +- onnxruntime/test/perftest/command_args_parser.cc | 5 ++--- onnxruntime/test/perftest/common_utils.cc | 9 +++++++++ onnxruntime/test/perftest/utils.h | 2 +- onnxruntime/test/perftest/windows/utils.cc | 9 --------- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 6c5c64e1d3ab4..36ba8db9bdc75 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1272,7 +1272,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) target_link_libraries(onnxruntime_perf_test PRIVATE debug dbghelp advapi32) endif() else() - target_link_libraries(onnxruntime_perf_test PRIVATE onnx_test_runner_common ${GETOPT_LIB_WIDE} ${onnx_test_libs}) + target_link_libraries(onnxruntime_perf_test PRIVATE onnx_test_runner_common absl::flags absl::flags_parse ${onnx_test_libs}) endif() set_target_properties(onnxruntime_perf_test PROPERTIES FOLDER "ONNXRuntimeTest") diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 047542bd09337..da65e77e0022b 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -355,9 +355,9 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a // _main.cc, where the is the name of the binary (without .exe on Windows). See usage_config.cc in abseil for more details. absl::FlagsUsageConfig config; config.contains_help_flags = [](absl::string_view filename) { - auto suffix = utils::Basename(filename); + auto suffix = utils::GetBasename(filename); std::string_view file_has_the_flag_defs(__FILE__); - file_has_the_flag_defs = utils::Basename(file_has_the_flag_defs); + file_has_the_flag_defs = utils::GetBasename(file_has_the_flag_defs); return suffix == file_has_the_flag_defs; }; @@ -366,7 +366,6 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a return std::string(f); }; absl::SetFlagsUsageConfig(config); - absl::SetProgramUsageMessage(CustomUsageMessage()); #ifdef _WIN32 diff --git a/onnxruntime/test/perftest/common_utils.cc b/onnxruntime/test/perftest/common_utils.cc index c377618b187a7..fb1db0ef9d216 100644 --- a/onnxruntime/test/perftest/common_utils.cc +++ b/onnxruntime/test/perftest/common_utils.cc @@ -64,6 +64,15 @@ void UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& te } } +// This helper function returns the basename of the filename passed as an argument +std::string_view GetBasename(std::string_view filename) { + auto last_slash_pos = filename.find_last_of("/\\"); + + return last_slash_pos == absl::string_view::npos + ? filename + : filename.substr(last_slash_pos + 1); +} + } // namespace utils } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index 5feb536d8b115..d46dc3e8758e4 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -35,7 +35,7 @@ void UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& te void ListDevices(const Ort::Env& env); -std::string_view Basename(std::string_view filename); +std::string_view GetBasename(std::string_view filename); } // namespace utils } // namespace perftest diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index e2199b1650f06..d30f1e417bafd 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -95,15 +95,6 @@ std::vector CStringsFromStrings(std::vector& utf8_args) { } return utf8_argv; } - -// This helper function returns the basename of the filename passed as an argument -std::string_view Basename(std::string_view filename) { - auto last_slash_pos = filename.find_last_of("/\\"); - - return last_slash_pos == absl::string_view::npos - ? filename - : filename.substr(last_slash_pos + 1); -} } // namespace utils } // namespace perftest } // namespace onnxruntime From 991bb4183628936c2cb80d65516562e17754ed72 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 1 Aug 2025 13:50:06 -0700 Subject: [PATCH 36/46] address reveiwer's comments --- .../test/perftest/command_args_parser.cc | 395 +++++++++--------- .../test/perftest/command_args_parser.h | 2 - onnxruntime/test/perftest/ort_test_session.cc | 8 +- onnxruntime/test/perftest/strings_helper.cc | 32 +- 4 files changed, 216 insertions(+), 221 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index da65e77e0022b..8eed8d4e0478b 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -152,13 +152,15 @@ ABSL_FLAG(bool, n, DefaultPerformanceTestConfig().run_config.exit_after_session_ ABSL_FLAG(bool, l, DefaultPerformanceTestConfig().model_info.load_via_path, "Provides file as binary in memory by using fopen before session creation."); ABSL_FLAG(bool, g, DefaultPerformanceTestConfig().run_config.enable_cuda_io_binding, "[TensorRT RTX | TensorRT | CUDA] Enables tensor input and output bindings on CUDA before session run."); ABSL_FLAG(bool, X, DefaultPerformanceTestConfig().run_config.use_extensions, "Registers custom ops from onnxruntime-extensions."); -ABSL_FLAG(std::string, plugin_ep_libs, "", "Specifies a list of plugin execution provider (EP) registration names and their corresponding shared libraries to register.\n" - "[Usage]: --plugin_ep_libs \"plugin_ep_name_1|plugin_ep_1.dll plugin_ep_name_2|plugin_ep_2.dll ... \""); +ABSL_FLAG(std::string, plugin_ep_libs, "", + "Specifies a list of plugin execution provider (EP) registration names and their corresponding shared libraries to register.\n" + "[Usage]: --plugin_ep_libs \"plugin_ep_name_1|plugin_ep_1.dll plugin_ep_name_2|plugin_ep_2.dll ... \""); ABSL_FLAG(std::string, plugin_eps, "", "Specifies a semicolon-separated list of plugin execution providers (EPs) to use."); -ABSL_FLAG(std::string, plugin_ep_options, "", "Specifies provider options for each EP listed in --plugin_eps. Options (key-value pairs) for each EP are separated by space and EPs are separated by semicolons.\n" - "[Usage]: --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" - "--plugin_ep_options \";ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" - "--plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;;ep_3_option_1_key|ep_3_option_1_value ...;... \""); +ABSL_FLAG(std::string, plugin_ep_options, "", + "Specifies provider options for each EP listed in --plugin_eps. Options (key-value pairs) for each EP are separated by space and EPs are separated by semicolons.\n" + "[Usage]: --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" + "--plugin_ep_options \";ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" + "--plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;;ep_3_option_1_key|ep_3_option_1_value ...;... \""); ABSL_FLAG(bool, list_ep_devices, false, "Prints all available device indices and their properties (including metadata). This option makes the program exit early without performing inference.\n"); ABSL_FLAG(std::string, select_ep_devices, "", "Specifies a semicolon-separated list of device indices to add to the session and run with."); @@ -349,182 +351,192 @@ std::string CustomUsageMessage() { } bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { - ORT_TRY { - // Following callback is to make sure all the ABSL flags defined above will be showed up when running with "--help". - // Note: By default abseil only wants flags in binary's main. It expects the main routine to reside in .cc or -main.cc or - // _main.cc, where the is the name of the binary (without .exe on Windows). See usage_config.cc in abseil for more details. - absl::FlagsUsageConfig config; - config.contains_help_flags = [](absl::string_view filename) { - auto suffix = utils::GetBasename(filename); - std::string_view file_has_the_flag_defs(__FILE__); - file_has_the_flag_defs = utils::GetBasename(file_has_the_flag_defs); - - return suffix == file_has_the_flag_defs; - }; - - config.normalize_filename = [](absl::string_view f) { - return std::string(f); - }; - absl::SetFlagsUsageConfig(config); - absl::SetProgramUsageMessage(CustomUsageMessage()); + // Following callback is to make sure all the ABSL flags defined above will be showed up when running with "--help". + // Note: By default abseil only wants flags in binary's main. It expects the main routine to reside in .cc or -main.cc or + // _main.cc, where the is the name of the binary (without .exe on Windows). See usage_config.cc in abseil for more details. + absl::FlagsUsageConfig config; + config.contains_help_flags = [](absl::string_view filename) { + auto suffix = utils::GetBasename(filename); + std::string_view file_has_the_flag_defs(__FILE__); + file_has_the_flag_defs = utils::GetBasename(file_has_the_flag_defs); + + return suffix == file_has_the_flag_defs; + }; + + config.normalize_filename = [](absl::string_view f) { + return std::string(f); + }; + absl::SetFlagsUsageConfig(config); + absl::SetProgramUsageMessage(CustomUsageMessage()); #ifdef _WIN32 - auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); - auto utf8_argv = utils::CStringsFromStrings(utf8_strings); + auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); + auto utf8_argv = utils::CStringsFromStrings(utf8_strings); - auto positional = absl::ParseCommandLine(static_cast(utf8_argv.size()), utf8_argv.data()); + auto positional = absl::ParseCommandLine(static_cast(utf8_argv.size()), utf8_argv.data()); #else - auto positional = absl::ParseCommandLine(argc, argv); + auto positional = absl::ParseCommandLine(argc, argv); #endif - // -f - std::string opt_str = absl::GetFlag(FLAGS_f); - if (!opt_str.empty()) { + // -f + { + const auto& dim_override_str = absl::GetFlag(FLAGS_f); + if (!dim_override_str.empty()) { std::string dim_name; int64_t override_val; - if (!ParseDimensionOverride(dim_name, override_val, opt_str.c_str())) { + if (!ParseDimensionOverride(dim_name, override_val, dim_override_str.c_str())) { return false; } test_config.run_config.free_dim_name_overrides[dim_name] = override_val; } + } - // -F - opt_str = absl::GetFlag(FLAGS_F); - if (!opt_str.empty()) { + // -F + { + const auto& dim_override_str = absl::GetFlag(FLAGS_F); + if (!dim_override_str.empty()) { std::string dim_denotation; int64_t override_val; - if (!ParseDimensionOverride(dim_denotation, override_val, opt_str.c_str())) { + if (!ParseDimensionOverride(dim_denotation, override_val, dim_override_str.c_str())) { return false; } test_config.run_config.free_dim_denotation_overrides[dim_denotation] = override_val; } + } - // -m - opt_str = absl::GetFlag(FLAGS_m); - if (!opt_str.empty()) { - if (opt_str == "duration") { + // -m + { + const auto& test_mode_str = absl::GetFlag(FLAGS_m); + if (!test_mode_str.empty()) { + if (test_mode_str == "duration") { test_config.run_config.test_mode = TestMode::kFixDurationMode; - } else if (opt_str == "times") { + } else if (test_mode_str == "times") { test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; } else { return false; } } + } - // -p - test_config.run_config.profile_file = ToPathString(absl::GetFlag(FLAGS_p)); + // -p + { + const auto& profile_file = absl::GetFlag(FLAGS_p); + if (!profile_file.empty()) test_config.run_config.profile_file = ToPathString(profile_file); + } - // -M - test_config.run_config.enable_memory_pattern = absl::GetFlag(FLAGS_M); + // -M + test_config.run_config.enable_memory_pattern = absl::GetFlag(FLAGS_M); - // -A - test_config.run_config.enable_cpu_mem_arena = absl::GetFlag(FLAGS_A); + // -A + test_config.run_config.enable_cpu_mem_arena = absl::GetFlag(FLAGS_A); - // -e - opt_str = absl::GetFlag(FLAGS_e); - if (!opt_str.empty()) { - if (opt_str == "cpu") { + // -e + { + auto const& ep = absl::GetFlag(FLAGS_e); + if (!ep.empty()) { + if (ep == "cpu") { test_config.machine_config.provider_type_name = onnxruntime::kCpuExecutionProvider; - } else if (opt_str == "cuda") { + } else if (ep == "cuda") { test_config.machine_config.provider_type_name = onnxruntime::kCudaExecutionProvider; - } else if (opt_str == "dnnl") { + } else if (ep == "dnnl") { test_config.machine_config.provider_type_name = onnxruntime::kDnnlExecutionProvider; - } else if (opt_str == "openvino") { + } else if (ep == "openvino") { test_config.machine_config.provider_type_name = onnxruntime::kOpenVINOExecutionProvider; - } else if (opt_str == "tensorrt") { + } else if (ep == "tensorrt") { test_config.machine_config.provider_type_name = onnxruntime::kTensorrtExecutionProvider; - } else if (opt_str == "qnn") { + } else if (ep == "qnn") { test_config.machine_config.provider_type_name = onnxruntime::kQnnExecutionProvider; - } else if (opt_str == "snpe") { + } else if (ep == "snpe") { test_config.machine_config.provider_type_name = onnxruntime::kSnpeExecutionProvider; - } else if (opt_str == "nnapi") { + } else if (ep == "nnapi") { test_config.machine_config.provider_type_name = onnxruntime::kNnapiExecutionProvider; - } else if (opt_str == "vsinpu") { + } else if (ep == "vsinpu") { test_config.machine_config.provider_type_name = onnxruntime::kVSINPUExecutionProvider; - } else if (opt_str == "coreml") { + } else if (ep == "coreml") { test_config.machine_config.provider_type_name = onnxruntime::kCoreMLExecutionProvider; - } else if (opt_str == "dml") { + } else if (ep == "dml") { test_config.machine_config.provider_type_name = onnxruntime::kDmlExecutionProvider; - } else if (opt_str == "acl") { + } else if (ep == "acl") { test_config.machine_config.provider_type_name = onnxruntime::kAclExecutionProvider; - } else if (opt_str == "armnn") { + } else if (ep == "armnn") { test_config.machine_config.provider_type_name = onnxruntime::kArmNNExecutionProvider; - } else if (opt_str == "rocm") { + } else if (ep == "rocm") { test_config.machine_config.provider_type_name = onnxruntime::kRocmExecutionProvider; - } else if (opt_str == "migraphx") { + } else if (ep == "migraphx") { test_config.machine_config.provider_type_name = onnxruntime::kMIGraphXExecutionProvider; - } else if (opt_str == "xnnpack") { + } else if (ep == "xnnpack") { test_config.machine_config.provider_type_name = onnxruntime::kXnnpackExecutionProvider; - } else if (opt_str == "vitisai") { + } else if (ep == "vitisai") { test_config.machine_config.provider_type_name = onnxruntime::kVitisAIExecutionProvider; - } else if (opt_str == "webgpu") { + } else if (ep == "webgpu") { test_config.machine_config.provider_type_name = onnxruntime::kWebGpuExecutionProvider; - } else if (opt_str == "nvtensorrtrtx") { + } else if (ep == "nvtensorrtrtx") { test_config.machine_config.provider_type_name = onnxruntime::kNvTensorRTRTXExecutionProvider; } else { return false; } } + } - auto is_option_specified = [&](std::string& option) { - for (int i = 1; i < argc; ++i) { - auto utf8_arg = ToUTF8String(argv[i]); - if (utf8_arg == ("-" + option) || utf8_arg == ("--" + option)) { - return true; - } + // Helper function to check if the option is explicitly specified. + // Abseil Flags does not provide this capability by default. + // It cannot distinguish between cases where: + // - The user typed `-r 1000` (explicitly passing the default value), and + // - The user omitted `-r` entirely. + // To determine this accurately, we must inspect argv directly. + auto is_option_specified = [&](std::string option) { + for (int i = 1; i < argc; ++i) { + auto utf8_arg = ToUTF8String(argv[i]); + if (utf8_arg == ("-" + option) || utf8_arg == ("--" + option)) { + return true; } - return false; - }; - - // -r - // - // We can’t tell if: - // The user typed -r 1000 (default value) Or the user didn’t type -r at all. - // We need to parse the argv in order to properly set test_node. - opt_str = "r"; - if (is_option_specified(opt_str)) { - if (absl::GetFlag(FLAGS_r) == static_cast(0)) return false; - test_config.run_config.repeated_times = absl::GetFlag(FLAGS_r); - test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; } + return false; + }; - // -t - opt_str = "t"; - if (is_option_specified(opt_str)) { - if (absl::GetFlag(FLAGS_t) <= static_cast(0)) return false; - test_config.run_config.duration_in_seconds = absl::GetFlag(FLAGS_t); - test_config.run_config.test_mode = TestMode::kFixDurationMode; - } + // -r + if (is_option_specified("r")) { + if (absl::GetFlag(FLAGS_r) == static_cast(0)) return false; + test_config.run_config.repeated_times = absl::GetFlag(FLAGS_r); + test_config.run_config.test_mode = TestMode::KFixRepeatedTimesMode; + } + + // -t + if (is_option_specified("t")) { + if (absl::GetFlag(FLAGS_t) <= static_cast(0)) return false; + test_config.run_config.duration_in_seconds = absl::GetFlag(FLAGS_t); + test_config.run_config.test_mode = TestMode::kFixDurationMode; + } + + // -s + test_config.run_config.f_dump_statistics = absl::GetFlag(FLAGS_s); - // -s - test_config.run_config.f_dump_statistics = absl::GetFlag(FLAGS_s); + // -S + test_config.run_config.random_seed_for_input_data = absl::GetFlag(FLAGS_S); - // -S - test_config.run_config.random_seed_for_input_data = absl::GetFlag(FLAGS_S); + // -v + test_config.run_config.f_verbose = absl::GetFlag(FLAGS_v); - // -v - test_config.run_config.f_verbose = absl::GetFlag(FLAGS_v); + // -x + if (absl::GetFlag(FLAGS_x) < 0) return false; + test_config.run_config.intra_op_num_threads = absl::GetFlag(FLAGS_x); - // -x - if (absl::GetFlag(FLAGS_x) < 0) return false; - test_config.run_config.intra_op_num_threads = absl::GetFlag(FLAGS_x); - - // -y - if (absl::GetFlag(FLAGS_y) < 0) return false; - test_config.run_config.inter_op_num_threads = absl::GetFlag(FLAGS_y); + // -y + if (absl::GetFlag(FLAGS_y) < 0) return false; + test_config.run_config.inter_op_num_threads = absl::GetFlag(FLAGS_y); - // -P - if (absl::GetFlag(FLAGS_P)) test_config.run_config.execution_mode = ExecutionMode::ORT_PARALLEL; + // -P + if (absl::GetFlag(FLAGS_P)) test_config.run_config.execution_mode = ExecutionMode::ORT_PARALLEL; - // -c - if (absl::GetFlag(FLAGS_c) <= static_cast(0)) return false; - test_config.run_config.concurrent_session_runs = absl::GetFlag(FLAGS_c); - + // -c + if (absl::GetFlag(FLAGS_c) <= static_cast(0)) return false; + test_config.run_config.concurrent_session_runs = absl::GetFlag(FLAGS_c); - // -o - int val_int = absl::GetFlag(FLAGS_o); - if (val_int != 99) { - switch (val_int) { + // -o + { + const auto optimization_level = absl::GetFlag(FLAGS_o); + if (optimization_level != test_config.run_config.optimization_level) { + switch (optimization_level) { case ORT_DISABLE_ALL: test_config.run_config.optimization_level = ORT_DISABLE_ALL; break; @@ -541,7 +553,7 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a test_config.run_config.optimization_level = ORT_ENABLE_ALL; break; default: { - if (val_int > ORT_ENABLE_ALL) { // relax constraint + if (optimization_level > ORT_ENABLE_ALL) { // relax constraint test_config.run_config.optimization_level = ORT_ENABLE_ALL; } else { return false; @@ -549,37 +561,42 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a } } } + } - // -u - PathString opt_w_str = ToPathString(absl::GetFlag(FLAGS_u)); - if (!opt_str.empty()) test_config.run_config.optimized_model_path = opt_w_str; + // -u + { + const auto& optimized_model_path = absl::GetFlag(FLAGS_u); + if (!optimized_model_path.empty()) test_config.run_config.optimized_model_path = ToPathString(optimized_model_path); + } - // -I - if (absl::GetFlag(FLAGS_I)) test_config.run_config.generate_model_input_binding = true; + // -I + test_config.run_config.generate_model_input_binding = absl::GetFlag(FLAGS_I); - // -d - if (absl::GetFlag(FLAGS_d) < 0) return false; - test_config.run_config.cudnn_conv_algo = absl::GetFlag(FLAGS_d); + // -d + if (absl::GetFlag(FLAGS_d) < 0) return false; + test_config.run_config.cudnn_conv_algo = absl::GetFlag(FLAGS_d); - // -q - if (absl::GetFlag(FLAGS_q)) test_config.run_config.do_cuda_copy_in_separate_stream = true; + // -q + test_config.run_config.do_cuda_copy_in_separate_stream = absl::GetFlag(FLAGS_q); - // -z - if (absl::GetFlag(FLAGS_z)) test_config.run_config.set_denormal_as_zero = true; + // -z + test_config.run_config.set_denormal_as_zero = absl::GetFlag(FLAGS_z); - // -i - opt_w_str = ToPathString(absl::GetFlag(FLAGS_i)); - if (!opt_w_str.empty()) test_config.run_config.ep_runtime_config_string = opt_w_str; + // -i + { + const auto& ep_options = absl::GetFlag(FLAGS_i); + if (!ep_options.empty()) test_config.run_config.ep_runtime_config_string = ToPathString(ep_options); + } - // -T - opt_str = absl::GetFlag(FLAGS_T); - if (!opt_str.empty()) test_config.run_config.intra_op_thread_affinities = opt_str; + // -T + if (!absl::GetFlag(FLAGS_T).empty()) test_config.run_config.intra_op_thread_affinities = absl::GetFlag(FLAGS_T); - // -C - opt_str = absl::GetFlag(FLAGS_C); - if (!opt_str.empty()) { + // -C + { + const auto& session_configs = absl::GetFlag(FLAGS_C); + if (!session_configs.empty()) { ORT_TRY { - ParseSessionConfigs(opt_str, test_config.run_config.session_config_entries); + ParseSessionConfigs(session_configs, test_config.run_config.session_config_entries); } ORT_CATCH(const std::exception& ex) { ORT_HANDLE_EXCEPTION([&]() { @@ -588,65 +605,69 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a return false; } } + } - // -D - if (absl::GetFlag(FLAGS_D)) test_config.run_config.disable_spinning = true; + // -D + test_config.run_config.disable_spinning = absl::GetFlag(FLAGS_D); - // -Z - if (absl::GetFlag(FLAGS_Z)) test_config.run_config.disable_spinning_between_run = true; + // -Z + test_config.run_config.disable_spinning_between_run = absl::GetFlag(FLAGS_Z); - // -n - if (absl::GetFlag(FLAGS_n)) test_config.run_config.exit_after_session_creation = true; + // -n + test_config.run_config.exit_after_session_creation = absl::GetFlag(FLAGS_n); - // -l - if (absl::GetFlag(FLAGS_l)) test_config.model_info.load_via_path = true; + // -l + test_config.model_info.load_via_path = absl::GetFlag(FLAGS_l); - // -R - opt_w_str = ToPathString(absl::GetFlag(FLAGS_R)); - if (!opt_w_str.empty()) test_config.run_config.register_custom_op_path = opt_w_str; + // -R + { + const auto& register_custom_op_path = absl::GetFlag(FLAGS_R); + if (!register_custom_op_path.empty()) test_config.run_config.register_custom_op_path = ToPathString(register_custom_op_path); + } - // -g - if (absl::GetFlag(FLAGS_g)) test_config.run_config.enable_cuda_io_binding = true; + // -g + test_config.run_config.enable_cuda_io_binding = absl::GetFlag(FLAGS_g); - // -X - if (absl::GetFlag(FLAGS_X)) test_config.run_config.use_extensions = true; + // -X + test_config.run_config.use_extensions = absl::GetFlag(FLAGS_X); - // --plugin_ep_libs - opt_w_str = ToPathString(absl::GetFlag(FLAGS_plugin_ep_libs)); - if (!opt_w_str.empty()) test_config.plugin_ep_names_and_libs = opt_w_str; + // --plugin_ep_libs + { + const auto& plugin_ep_names_and_libs = absl::GetFlag(FLAGS_plugin_ep_libs); + if (!plugin_ep_names_and_libs.empty()) test_config.plugin_ep_names_and_libs = ToPathString(plugin_ep_names_and_libs); + } - // --plugin_eps - opt_str = absl::GetFlag(FLAGS_plugin_eps); - if (!opt_str.empty()) ParseEpList(opt_str, test_config.machine_config.plugin_provider_type_list); + // --plugin_eps + { + const auto& plugin_eps = absl::GetFlag(FLAGS_plugin_eps); + if (!plugin_eps.empty()) ParseEpList(plugin_eps, test_config.machine_config.plugin_provider_type_list); + } - // --plugin_ep_options - opt_w_str = ToPathString(absl::GetFlag(FLAGS_plugin_ep_options)); - if (!opt_w_str.empty()) test_config.run_config.ep_runtime_config_string = opt_w_str; + // --plugin_ep_options + { + const auto& plugin_ep_options = absl::GetFlag(FLAGS_plugin_ep_options); + if (!plugin_ep_options.empty()) test_config.run_config.ep_runtime_config_string = ToPathString(plugin_ep_options); + } - // --list_ep_devices - if (absl::GetFlag(FLAGS_list_ep_devices)) { - test_config.list_available_devices = true; - return true; - } + // --list_ep_devices + if (absl::GetFlag(FLAGS_list_ep_devices)) { + test_config.list_available_devices = true; + return true; + } - // --select_ep_devices - opt_str = absl::GetFlag(FLAGS_select_ep_devices); - if (!opt_str.empty()) test_config.selected_devices = opt_str; - - if (positional.size() == 2) { - test_config.model_info.model_file_path = ToPathString(positional[1]); - test_config.run_config.f_dump_statistics = true; - } else if (positional.size() == 3) { - test_config.model_info.model_file_path = ToPathString(positional[1]); - test_config.model_info.result_file_path = ToPathString(positional[2]); - } else { - return false; - } + // --select_ep_devices + { + const auto& select_ep_devices = absl::GetFlag(FLAGS_select_ep_devices); + if (!select_ep_devices.empty()) test_config.selected_devices = select_ep_devices; } - ORT_CATCH(const std::exception& ex) { - ORT_HANDLE_EXCEPTION([&]() { - fprintf(stderr, "Error parsing options: %s\n", ex.what()); - }); + + if (positional.size() == 2) { + test_config.model_info.model_file_path = ToPathString(positional[1]); + test_config.run_config.f_dump_statistics = true; + } else if (positional.size() == 3) { + test_config.model_info.model_file_path = ToPathString(positional[1]); + test_config.model_info.result_file_path = ToPathString(positional[2]); + } else { return false; } diff --git a/onnxruntime/test/perftest/command_args_parser.h b/onnxruntime/test/perftest/command_args_parser.h index 9e8e3c3c548d2..2afcfeac9bbd1 100644 --- a/onnxruntime/test/perftest/command_args_parser.h +++ b/onnxruntime/test/perftest/command_args_parser.h @@ -5,8 +5,6 @@ #include #include "test_configuration.h" - - namespace onnxruntime { namespace perftest { diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index a2532952cd1e1..0106167a20c43 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -127,16 +127,18 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device std::string ep_option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); - // A list of EP's associated provider options + // EP's associated provider option lists std::vector> ep_options_list; ParseEpOptions(ep_option_string, ep_options_list); - // If user only provide the EPs' provider options for the first several EPs, - // add empty options for the rest EPs. + // If user only provide the EPs' provider option lists for the first several EPs, + // add empty provider option lists for the rest EPs. if (ep_options_list.size() < ep_list.size()) { for (size_t i = ep_options_list.size(); i < ep_list.size(); ++i) { ep_options_list.emplace_back(); // Adds a new empty map } + } else if (ep_options_list.size() > ep_list.size()) { + ORT_THROW("[ERROR] [Plugin EP]: Too many EP provider option lists provided."); } // EP -> associated provider options diff --git a/onnxruntime/test/perftest/strings_helper.cc b/onnxruntime/test/perftest/strings_helper.cc index e4256d3a5517f..ba023679ea387 100644 --- a/onnxruntime/test/perftest/strings_helper.cc +++ b/onnxruntime/test/perftest/strings_helper.cc @@ -9,6 +9,7 @@ #include "strings_helper.h" #include "core/common/common.h" #include "core/common/parse_string.h" +#include "core/common/string_utils.h" namespace onnxruntime { namespace perftest { @@ -55,40 +56,13 @@ void ParseSessionConfigs(const std::string& configs_string, } } -/** - * @brief Splits a string by a given delimiter while preserving empty tokens. - * - * This function splits the input string into substrings separated by the specified delimiter. - * Unlike std::getline, it preserves empty tokens that result from leading, trailing, or consecutive delimiters. - * - * @param input The input string to split. - * @param delim The delimiter character to split on. - * @param out The output vector to store the resulting substrings. It will be appended to, not cleared. - * - * @example - * std::vector tokens; - * SplitAndHandleEmptyTokens(";a|b;;x|y;", ';', tokens); - * // tokens = ["", "a|b", "", "x|y", ""] - */ -void SplitAndHandleEmptyTokens(const std::string& input, char delim, std::vector& out) { - std::string::size_type start = 0; - auto end = input.find(delim); - while (end != std::string::npos) { - out.emplace_back(input.substr(start, end - start)); // preserves empty - start = end + 1; - end = input.find(delim, start); - } - out.emplace_back(input.substr(start)); // last token -} - void ParseEpOptions(const std::string& input, std::vector>& result) { - std::vector tokens; - SplitAndHandleEmptyTokens(input, ';', tokens); + auto tokens = utils::SplitString(input, ";", true); for (const auto& token : tokens) { result.emplace_back(); // Adds a new empty map if (!token.empty()) { - ParseSessionConfigs(token, result.back()); // only parse non-empty + ParseSessionConfigs(std::string(token), result.back()); // only parse non-empty } // if token is empty, we still get an empty map in `result` } From 01da1a5c833c1e6c16e8d3aa501f0cd890cc7d83 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 4 Aug 2025 21:12:18 -0700 Subject: [PATCH 37/46] Alias '-h' to '--help' and remove showUsage function --- .../test/perftest/command_args_parser.cc | 160 +----------------- .../test/perftest/command_args_parser.h | 1 - onnxruntime/test/perftest/windows/utils.cc | 10 +- 3 files changed, 10 insertions(+), 161 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 8eed8d4e0478b..3f72f38264d0e 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -163,164 +163,11 @@ ABSL_FLAG(std::string, plugin_ep_options, "", "--plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;;ep_3_option_1_key|ep_3_option_1_value ...;... \""); ABSL_FLAG(bool, list_ep_devices, false, "Prints all available device indices and their properties (including metadata). This option makes the program exit early without performing inference.\n"); ABSL_FLAG(std::string, select_ep_devices, "", "Specifies a semicolon-separated list of device indices to add to the session and run with."); +ABSL_FLAG(bool, h, false, "Print program usage."); namespace onnxruntime { namespace perftest { -/*static*/ void CommandLineParser::ShowUsage() { - printf( - "perf_test [options...] model_path [result_file]\n" - "Options:\n" - "\t-m [test_mode]: Specifies the test mode. Value could be 'duration' or 'times'.\n" - "\t\tProvide 'duration' to run the test for a fix duration, and 'times' to repeated for a certain times. \n" - "\t-M: Disable memory pattern.\n" - "\t-A: Disable memory arena\n" - "\t-I: Generate tensor input binding. Free dimensions are treated as 1 unless overridden using -f.\n" - "\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n" - "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " - "'nvtensorrtrtx', 'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai' or 'webgpu'. " - "Default:'cpu'.\n" - "\t-b [tf|ort]: backend to use. Default:ort\n" - "\t-r [repeated_times]: Specifies the repeated times if running in 'times' test mode.Default:1000.\n" - "\t-t [seconds_to_run]: Specifies the seconds to run for 'duration' mode. Default:600.\n" - "\t-p [profile_file]: Specifies the profile name to enable profiling and dump the profile data to the file.\n" - "\t-s: Show statistics result, like P75, P90. If no result_file provided this defaults to on.\n" - "\t-S: Given random seed, to produce the same input data. This defaults to -1(no initialize).\n" - "\t-v: Show verbose information.\n" - "\t-x [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes, A value of 0 means ORT will pick a default. Must >=0.\n" - "\t-y [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means ORT will pick a default. Must >=0.\n" - "\t-f [free_dimension_override]: Specifies a free dimension by name to override to a specific value for performance optimization. " - "Syntax is [dimension_name:override_value]. override_value must > 0\n" - "\t-F [free_dimension_override]: Specifies a free dimension by denotation to override to a specific value for performance optimization. " - "Syntax is [dimension_denotation:override_value]. override_value must > 0\n" - "\t-P: Use parallel executor instead of sequential executor.\n" - "\t-o [optimization level]: Default is 99 (all). Valid values are 0 (disable), 1 (basic), 2 (extended), 3 (layout), 99 (all).\n" - "\t\tPlease see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels.\n" - "\t-u [optimized_model_path]: Specify the optimized model path for saving.\n" - "\t-d [CUDA only][cudnn_conv_algorithm]: Specify CUDNN convolution algorithms: 0(benchmark), 1(heuristic), 2(default). \n" - "\t-q [CUDA only] use separate stream for copy. \n" - "\t-g [TensorRT RTX | TensorRT | CUDA] Enable tensor input and output bindings on CUDA before session run \n" - "\t-z: Set denormal as zero. When turning on this option reduces latency dramatically, a model may have denormals.\n" - "\t-C: Specify session configuration entries as key-value pairs: -C \"| |\" \n" - "\t Refer to onnxruntime_session_options_config_keys.h for valid keys and values. \n" - "\t [Example] -C \"session.disable_cpu_ep_fallback|1 ep.context_enable|1\" \n" - "\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n" - "\t [Usage]: -e -i '| |'\n" - "\n" - "\t [ACL only] [enable_fast_math]: Options: 'true', 'false', default: 'false', \n" - "\t [DML only] [performance_preference]: DML device performance preference, options: 'default', 'minimum_power', 'high_performance', \n" - "\t [DML only] [device_filter]: DML device filter, options: 'any', 'gpu', 'npu', \n" - "\t [DML only] [disable_metacommands]: Options: 'true', 'false', \n" - "\t [DML only] [enable_graph_capture]: Options: 'true', 'false', \n" - "\t [DML only] [enable_graph_serialization]: Options: 'true', 'false', \n" - "\n" - "\t [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n" - "\t [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n" - "\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n" - "\t [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n" - "\t [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n" - "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" - "\n" - "\t [QNN only] [backend_type]: QNN backend type. E.g., 'cpu', 'htp'. Mutually exclusive with 'backend_path'.\n" - "\t [QNN only] [backend_path]: QNN backend path. E.g., '/folderpath/libQnnHtp.so', '/winfolderpath/QnnHtp.dll'. Mutually exclusive with 'backend_type'.\n" - "\t [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed', default 'off'.\n" - "\t [QNN only] [profiling_file_path] : QNN profiling file path if ETW not enabled.\n" - "\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" - "\t [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0(not set).\n" - "\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n" - "\t 'high_power_saver', 'low_balanced', 'extreme_power_saver', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" - "\t [QNN only] [op_packages]: QNN UDO package, allowed format: \n" - "\t op_packages|::[:],::[:]. \n" - "\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n" - "\t [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n" - "\t [QNN only] [htp_graph_finalization_optimization_mode]: QNN graph finalization optimization mode, options: \n" - "\t '0', '1', '2', '3', default is '0'.\n" - "\t [QNN only] [soc_model]: The SoC Model number. Refer to QNN SDK documentation for specific values. Defaults to '0' (unknown). \n" - "\t [QNN only] [htp_arch]: The minimum HTP architecture. The driver will use ops compatible with this architecture. \n" - "\t Options are '0', '68', '69', '73', '75'. Defaults to '0' (none). \n" - "\t [QNN only] [device_id]: The ID of the device to use when setting 'htp_arch'. Defaults to '0' (for single device). \n" - "\t [QNN only] [enable_htp_fp16_precision]: Enable the HTP_FP16 precision so that the float32 model will be inferenced with fp16 precision. \n" - "\t Otherwise, it will be fp32 precision. Works for float32 model for HTP backend. Defaults to '1' (with FP16 precision.). \n" - "\t [QNN only] [offload_graph_io_quantization]: Offload graph input quantization and graph output dequantization to another EP (typically CPU EP). \n" - "\t Defaults to '0' (QNN EP handles the graph I/O quantization and dequantization). \n" - "\t [QNN only] [enable_htp_spill_fill_buffer]: Enable HTP spill fill buffer, used while generating QNN context binary.\n" - "\t [QNN only] [enable_htp_shared_memory_allocator]: Enable the QNN HTP shared memory allocator and use it for inputs and outputs. Requires libcdsprpc.so/dll to be available.\n" - "\t Defaults to '0' (disabled).\n" - "\t [Example] [For QNN EP] -e qnn -i \"backend_type|cpu\" \n" - "\n" - "\t [TensorRT only] [trt_max_partition_iterations]: Maximum iterations for TensorRT parser to get capability.\n" - "\t [TensorRT only] [trt_min_subgraph_size]: Minimum size of TensorRT subgraphs.\n" - "\t [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n" - "\t [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n" - "\t [TensorRT only] [trt_int8_enable]: Enable TensorRT INT8 precision.\n" - "\t [TensorRT only] [trt_int8_calibration_table_name]: Specify INT8 calibration table name.\n" - "\t [TensorRT only] [trt_int8_use_native_calibration_table]: Use Native TensorRT calibration table.\n" - "\t [TensorRT only] [trt_dla_enable]: Enable DLA in Jetson device.\n" - "\t [TensorRT only] [trt_dla_core]: DLA core number.\n" - "\t [TensorRT only] [trt_dump_subgraphs]: Dump TRT subgraph to onnx model.\n" - "\t [TensorRT only] [trt_engine_cache_enable]: Enable engine caching.\n" - "\t [TensorRT only] [trt_engine_cache_path]: Specify engine cache path.\n" - "\t [TensorRT only] [trt_engine_cache_prefix]: Customize engine cache prefix when trt_engine_cache_enable is true.\n" - "\t [TensorRT only] [trt_engine_hw_compatible]: Enable hardware compatibility. Engines ending with '_sm80+' can be re-used across all Ampere+ GPU (a hardware-compatible engine may have lower throughput and/or higher latency than its non-hardware-compatible counterpart).\n" - "\t [TensorRT only] [trt_weight_stripped_engine_enable]: Enable weight-stripped engine build.\n" - "\t [TensorRT only] [trt_onnx_model_folder_path]: Folder path for the ONNX model with weights.\n" - "\t [TensorRT only] [trt_force_sequential_engine_build]: Force TensorRT engines to be built sequentially.\n" - "\t [TensorRT only] [trt_context_memory_sharing_enable]: Enable TensorRT context memory sharing between subgraphs.\n" - "\t [TensorRT only] [trt_layer_norm_fp32_fallback]: Force Pow + Reduce ops in layer norm to run in FP32 to avoid overflow.\n" - "\t [Example] [For TensorRT EP] -e tensorrt -i 'trt_fp16_enable|true trt_int8_enable|true trt_int8_calibration_table_name|calibration.flatbuffers trt_int8_use_native_calibration_table|false trt_force_sequential_engine_build|false'\n" - "\n" - "\t [NNAPI only] [NNAPI_FLAG_USE_FP16]: Use fp16 relaxation in NNAPI EP..\n" - "\t [NNAPI only] [NNAPI_FLAG_USE_NCHW]: Use the NCHW layout in NNAPI EP.\n" - "\t [NNAPI only] [NNAPI_FLAG_CPU_DISABLED]: Prevent NNAPI from using CPU devices.\n" - "\t [NNAPI only] [NNAPI_FLAG_CPU_ONLY]: Using CPU only in NNAPI EP.\n" - "\t [Example] [For NNAPI EP] -e nnapi -i \"NNAPI_FLAG_USE_FP16 NNAPI_FLAG_USE_NCHW NNAPI_FLAG_CPU_DISABLED\"\n" - "\n" - "\t [CoreML only] [ModelFormat]:[MLProgram, NeuralNetwork] Create an ML Program model or Neural Network. Default is NeuralNetwork.\n" - "\t [CoreML only] [MLComputeUnits]:[CPUAndNeuralEngine CPUAndGPU ALL CPUOnly] Specify to limit the backend device used to run the model.\n" - "\t [CoreML only] [AllowStaticInputShapes]:[0 1].\n" - "\t [CoreML only] [EnableOnSubgraphs]:[0 1].\n" - "\t [CoreML only] [SpecializationStrategy]:[Default FastPrediction].\n" - "\t [CoreML only] [ProfileComputePlan]:[0 1].\n" - "\t [CoreML only] [AllowLowPrecisionAccumulationOnGPU]:[0 1].\n" - "\t [CoreML only] [ModelCacheDirectory]:[path../a/b/c].\n" - "\t [Example] [For CoreML EP] -e coreml -i \"ModelFormat|MLProgram MLComputeUnits|CPUAndGPU\"\n" - "\n" - "\t [SNPE only] [runtime]: SNPE runtime, options: 'CPU', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n" - "\t [SNPE only] [priority]: execution priority, options: 'low', 'normal'. \n" - "\t [SNPE only] [buffer_type]: options: 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. default: ITENSOR'. \n" - "\t [SNPE only] [enable_init_cache]: enable SNPE init caching feature, set to 1 to enabled it. Disabled by default. \n" - "\t [Example] [For SNPE EP] -e snpe -i \"runtime|CPU priority|low\" \n\n" - "\n" - "\t-T [Set intra op thread affinities]: Specify intra op thread affinity string\n" - "\t [Example]: -T 1,2;3,4;5,6 or -T 1-2;3-4;5-6 \n" - "\t\t Use semicolon to separate configuration between threads.\n" - "\t\t E.g. 1,2;3,4;5,6 specifies affinities for three threads, the first thread will be attached to the first and second logical processor.\n" - "\t\t The number of affinities must be equal to intra_op_num_threads - 1\n\n" - "\t-D [Disable thread spinning]: disable spinning entirely for thread owned by onnxruntime intra-op thread pool.\n" - "\t-Z [Force thread to stop spinning between runs]: disallow thread from spinning during runs to reduce cpu usage.\n" - "\t-n [Exit after session creation]: allow user to measure session creation time to measure impact of enabling any initialization optimizations.\n" - "\t-l Provide file as binary in memory by using fopen before session creation.\n" - "\t-R [Register custom op]: allow user to register custom op by .so or .dll file.\n" - "\t-X [Enable onnxruntime-extensions custom ops]: Registers custom ops from onnxruntime-extensions. " - "onnxruntime-extensions must have been built in to onnxruntime. This can be done with the build.py " - "'--use_extensions' option.\n" - "\n" - "\t--plugin_ep_libs [registration names and libraries] Specifies a list of plugin execution provider (EP) registration names and their corresponding shared libraries to register.\n" - "\t [Usage]: --plugin_ep_libs \"plugin_ep_name_1|plugin_ep_1.dll plugin_ep_name_2|plugin_ep_2.dll ... \"\n" - "\n" - "\t--plugin_eps [Plugin EPs] Specifies a semicolon-separated list of plugin execution providers (EPs) to use.\n" - "\t [Usage]: --plugin_eps \"plugin_ep_1;plugin_ep_2;... \"\n" - "\n" - "\t--plugin_ep_options [EP options] Specifies provider options for each EP listed in --plugin_eps. Options (key-value pairs) for each EP are separated by space and EPs are separated by semicolons.\n" - "\t [Usage]: --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" - "\t --plugin_ep_options \";ep_2_option_1_key|ep_2_option_1_value ...;... \" or \n" - "\t --plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;;ep_3_option_1_key|ep_3_option_1_value ...;... \" \n" - "\n" - "\t--list_ep_devices Prints all available device indices and their properties (including metadata). This option makes the program exit early without performing inference.\n" - "\t--select_ep_devices [list of device indices] A semicolon-separated list of device indices to add to the session and run with.\n" - "\t-h: help\n"); -} - static bool ParseDimensionOverride(std::string& dim_identifier, int64_t& override_val, const char* option) { std::basic_string free_dim_str(option); size_t delimiter_location = free_dim_str.find(":"); @@ -369,14 +216,9 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a absl::SetFlagsUsageConfig(config); absl::SetProgramUsageMessage(CustomUsageMessage()); -#ifdef _WIN32 auto utf8_strings = utils::ConvertArgvToUtf8Strings(argc, argv); auto utf8_argv = utils::CStringsFromStrings(utf8_strings); - auto positional = absl::ParseCommandLine(static_cast(utf8_argv.size()), utf8_argv.data()); -#else - auto positional = absl::ParseCommandLine(argc, argv); -#endif // -f { diff --git a/onnxruntime/test/perftest/command_args_parser.h b/onnxruntime/test/perftest/command_args_parser.h index 2afcfeac9bbd1..ab99621883a31 100644 --- a/onnxruntime/test/perftest/command_args_parser.h +++ b/onnxruntime/test/perftest/command_args_parser.h @@ -12,7 +12,6 @@ struct PerformanceTestConfig; class CommandLineParser { public: - static void ShowUsage(); static bool ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]); }; diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index d30f1e417bafd..6ec9fc835bfdb 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -82,7 +82,15 @@ std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]) { std::vector utf8_args; utf8_args.reserve(argc); for (int i = 0; i < argc; ++i) { - utf8_args.push_back(ToUTF8String(argv[i])); + std::string utf8_string = ToUTF8String(argv[i]); + + // Abseil flags doens't natively alias "-h" to "--help". + // We make "-h" alias to "--help" here. + if (utf8_string == "-h" || utf8_string == "--h") { + utf8_args.push_back("--help"); + } else { + utf8_args.push_back(utf8_string); + } } return utf8_args; } From 3bf318399444ab48c19aa6406623c0dc493b2032 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 4 Aug 2025 21:12:50 -0700 Subject: [PATCH 38/46] supress mem leak info --- .../core/platform/windows/debug_alloc.cc | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/platform/windows/debug_alloc.cc b/onnxruntime/core/platform/windows/debug_alloc.cc index ad26280a90ecb..dd6388b76e0cf 100644 --- a/onnxruntime/core/platform/windows/debug_alloc.cc +++ b/onnxruntime/core/platform/windows/debug_alloc.cc @@ -243,6 +243,21 @@ Memory_LeakCheck::~Memory_LeakCheck() { // empty_string = new string; // empty_named_groups = new std::map; // empty_group_names = new std::map; }); + // + // In the Abseil (ABSL) flags library used by onnxruntime_perf_test, specifying "--help" + // causes the program to call exit(1). This is an intentional design choice from Google, + // treating "--help" as an early termination condition (the program does not perform its + // normal execution. See MaybeExit in usage.cc). + // + // As a result, many resources will not be cleaned up, including: + // - Abseil's internal storage for flags, allocated in static/global objects inside + // absl::flags_internal (e.g., FlagImpl::Init) + // - The absl::FlagsUsageConfig instance + // - Performance test utilities that hold std::vector objects for converting argv to UTF-8 strings + // - The onnxruntime::perftest::PerformanceTestConfig instance + // + // Essentially, any object instantiated before calling absl::ParseCommandLine will not + // be cleaned up. This behavior is expected when running with "--help". if (string.find("RtlRunOnceExecuteOnce") == std::string::npos && string.find("re2::RE2::Init") == std::string::npos && string.find("dynamic initializer for 'FLAGS_") == std::string::npos && @@ -254,7 +269,12 @@ Memory_LeakCheck::~Memory_LeakCheck() { string.find("testing::internal::ThreadLocalRegistryImpl::GetThreadLocalsMapLocked") == std::string::npos && string.find("testing::internal::ThreadLocalRegistryImpl::GetValueOnCurrentThread") == std::string::npos && string.find("PyInit_onnxruntime_pybind11_state") == std::string::npos && - string.find("google::protobuf::internal::InitProtobufDefaultsSlow") == std::string::npos) { + string.find("google::protobuf::internal::InitProtobufDefaultsSlow") == std::string::npos && + string.find("flags_internal::ParseCommandLineImpl") == std::string::npos && + string.find("SetFlagsUsageConfig") == std::string::npos && + string.find("perftest::utils::ConvertArgvToUtf8Strings") == std::string::npos && + string.find("perftest::utils::CStringsFromStrings") == std::string::npos && + string.find("perftest::PerformanceTestConfig::PerformanceTestConfig") == std::string::npos) { if (leaked_bytes == 0) DebugPrint("\n-----Starting Heap Trace-----\n\n"); From a00c35230474ba48f7c2ab16acb7c6eb0de23c69 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 4 Aug 2025 21:23:13 -0700 Subject: [PATCH 39/46] remove calling showUsage --- onnxruntime/test/perftest/main.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 090cfd7747736..60440efcecc6a 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -21,7 +21,6 @@ int real_main(int argc, char* argv[]) { g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); perftest::PerformanceTestConfig test_config; if (!perftest::CommandLineParser::ParseArguments(test_config, argc, argv)) { - perftest::CommandLineParser::ShowUsage(); return -1; } Ort::Env env{nullptr}; From 7236e44e913bcf81bb7042b1c74f29a8b8dc0eec Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 5 Aug 2025 09:05:45 -0700 Subject: [PATCH 40/46] fix build issue for Linux --- onnxruntime/test/perftest/utils.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index d46dc3e8758e4..98e6c0d4af9bb 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -23,11 +23,9 @@ class ICPUUsage { std::unique_ptr CreateICPUUsage(); -#ifdef _WIN32 std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]); std::vector CStringsFromStrings(std::vector& utf8_args); -#endif void RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); From 02499f818b39b783f3f80c926bb2a23e32e63550 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 5 Aug 2025 09:49:50 -0700 Subject: [PATCH 41/46] fix build issue in Linux --- onnxruntime/test/perftest/common_utils.cc | 26 ++++++++++++++++++++++ onnxruntime/test/perftest/utils.h | 2 +- onnxruntime/test/perftest/windows/utils.cc | 26 ---------------------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/onnxruntime/test/perftest/common_utils.cc b/onnxruntime/test/perftest/common_utils.cc index fb1db0ef9d216..2d4b90996c7d3 100644 --- a/onnxruntime/test/perftest/common_utils.cc +++ b/onnxruntime/test/perftest/common_utils.cc @@ -64,6 +64,32 @@ void UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& te } } +std::vector ConvertArgvToUtf8Strings(int argc, ORTCHAR_T* argv[]) { + std::vector utf8_args; + utf8_args.reserve(argc); + for (int i = 0; i < argc; ++i) { + std::string utf8_string = ToUTF8String(argv[i]); + + // Abseil flags doens't natively alias "-h" to "--help". + // We make "-h" alias to "--help" here. + if (utf8_string == "-h" || utf8_string == "--h") { + utf8_args.push_back("--help"); + } else { + utf8_args.push_back(utf8_string); + } + } + return utf8_args; +} + +std::vector CStringsFromStrings(std::vector& utf8_args) { + std::vector utf8_argv; + utf8_argv.reserve(utf8_args.size()); + for (auto& str : utf8_args) { + utf8_argv.push_back(&str[0]); + } + return utf8_argv; +} + // This helper function returns the basename of the filename passed as an argument std::string_view GetBasename(std::string_view filename) { auto last_slash_pos = filename.find_last_of("/\\"); diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index 98e6c0d4af9bb..7b4161abd505f 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -23,7 +23,7 @@ class ICPUUsage { std::unique_ptr CreateICPUUsage(); -std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]); +std::vector ConvertArgvToUtf8Strings(int argc, ORTCHAR_T* argv[]); std::vector CStringsFromStrings(std::vector& utf8_args); diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index 6ec9fc835bfdb..15982522efe78 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -77,32 +77,6 @@ class CPUUsage : public ICPUUsage { std::unique_ptr CreateICPUUsage() { return std::make_unique(); } - -std::vector ConvertArgvToUtf8Strings(int argc, wchar_t* argv[]) { - std::vector utf8_args; - utf8_args.reserve(argc); - for (int i = 0; i < argc; ++i) { - std::string utf8_string = ToUTF8String(argv[i]); - - // Abseil flags doens't natively alias "-h" to "--help". - // We make "-h" alias to "--help" here. - if (utf8_string == "-h" || utf8_string == "--h") { - utf8_args.push_back("--help"); - } else { - utf8_args.push_back(utf8_string); - } - } - return utf8_args; -} - -std::vector CStringsFromStrings(std::vector& utf8_args) { - std::vector utf8_argv; - utf8_argv.reserve(utf8_args.size()); - for (auto& str : utf8_args) { - utf8_argv.push_back(&str[0]); - } - return utf8_argv; -} } // namespace utils } // namespace perftest } // namespace onnxruntime From 461d57daf3d8e5e2407eb80923b41eadca5c2e9a Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 5 Aug 2025 09:59:13 -0700 Subject: [PATCH 42/46] clean up --- .../test/perftest/command_args_parser.cc | 2 +- onnxruntime/test/perftest/main.cc | 1 + onnxruntime/test/perftest/ort_test_session.cc | 14 - .../test/perftest/ort_test_session.ccc | 1109 ----------------- 4 files changed, 2 insertions(+), 1124 deletions(-) delete mode 100644 onnxruntime/test/perftest/ort_test_session.ccc diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 3f72f38264d0e..a9db91d47b503 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -191,7 +191,7 @@ static bool ParseDimensionOverride(std::string& dim_identifier, int64_t& overrid std::string CustomUsageMessage() { std::ostringstream oss; oss << "onnxruntime_perf_test [options...] model_path [result_file]\n\n"; - oss << "Note: Options may be specified with either a single dash(-option) or a double dash(--option).Both forms are accepted and treated identically.\n\n"; + oss << "Note: Options may be specified with either a single dash(-option) or a double dash(--option). Both forms are accepted and treated identically.\n\n"; oss << "Options:"; return oss.str(); diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 60440efcecc6a..c3a3e5ad65c73 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -21,6 +21,7 @@ int real_main(int argc, char* argv[]) { g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); perftest::PerformanceTestConfig test_config; if (!perftest::CommandLineParser::ParseArguments(test_config, argc, argv)) { + fprintf(stderr, "%s", "See 'onnxruntime_perf_test --help'."); return -1; } Ort::Env env{nullptr}; diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 0106167a20c43..0ed8d485e3b00 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -53,20 +53,6 @@ std::chrono::duration OnnxRuntimeTestSession::Run() { auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration duration_seconds = end - start; - - for (size_t i = 0; i < outputs_.size(); i++) { - Ort::Value& ort_output = outputs_[i]; - const float* output_data = ort_output.GetTensorData(); - gsl::span output_span(output_data, 6); - std::cout << output_span[0] << std::endl; - std::cout << output_span[1] << std::endl; - std::cout << output_span[2] << std::endl; - std::cout << output_span[3] << std::endl; - std::cout << output_span[4] << std::endl; - std::cout << output_span[5] << std::endl; - std::cout << std::endl; - } - return duration_seconds; } diff --git a/onnxruntime/test/perftest/ort_test_session.ccc b/onnxruntime/test/perftest/ort_test_session.ccc deleted file mode 100644 index 823e1f509b5aa..0000000000000 --- a/onnxruntime/test/perftest/ort_test_session.ccc +++ /dev/null @@ -1,1109 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Copyright (c) 2023 NVIDIA Corporation. -// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates -// Licensed under the MIT License. - -#include "ort_test_session.h" -#include -#include -#include -#include -#include -#include -#include -#include "core/session/onnxruntime_session_options_config_keys.h" -#include "core/providers/tensorrt/tensorrt_provider_options.h" -#include "core/providers/dnnl/dnnl_provider_options.h" -#include -#include "providers.h" -#include "TestCase.h" -#include "strings_helper.h" - -#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_NV) -#include -#endif - -#ifdef USE_OPENVINO -#include "nlohmann/json.hpp" -#endif - -#ifdef USE_DML -#include "core/providers/dml/dml_provider_factory.h" -#include "core/providers/dml/dml_session_options_config_keys.h" -#endif - -#ifdef _WIN32 -#define strdup _strdup -#endif -extern const OrtApi* g_ort; - -namespace onnxruntime { -namespace perftest { - -std::chrono::duration OnnxRuntimeTestSession::Run() { - // Randomly pick one OrtValueArray from test_inputs_. (NOT ThreadSafe) - const std::uniform_int_distribution::param_type p(0, static_cast(test_inputs_.size() - 1)); - const size_t id = static_cast(dist_(rand_engine_, p)); - - auto& input = test_inputs_.at(id); - auto start = std::chrono::high_resolution_clock::now(); - - session_.Run(Ort::RunOptions{nullptr}, input_names_.data(), input.data(), input_names_.size(), - output_names_raw_ptr.data(), outputs_.data(), output_names_raw_ptr.size()); - - for (size_t i = 0; i < outputs_.size(); i++) { - Ort::Value& ort_output = outputs_[i]; - const float* output_data = ort_output.GetTensorData(); - gsl::span output_span(output_data, 6); - std::cout << output_span[0] << std::endl; - std::cout << output_span[1] << std::endl; - std::cout << output_span[2] << std::endl; - std::cout << output_span[3] << std::endl; - std::cout << output_span[4] << std::endl; - std::cout << output_span[5] << std::endl; - } - - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration duration_seconds = end - start; - return duration_seconds; -} - -OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device& rd, - const PerformanceTestConfig& performance_test_config, - const TestModelInfo& m) - : rand_engine_(rd()), input_names_(m.GetInputCount()), input_names_str_(m.GetInputCount()), input_length_(m.GetInputCount()) { - Ort::SessionOptions session_options; - -#ifdef _MSC_VER - std::string ep_names_and_libs_string = ToUTF8String(performance_test_config.plugin_ep_names_and_libs); -#else - std::string ep_names_and_libs_string = performance_test_config.plugin_ep_names_and_libs; -#endif - std::unordered_map ep_names_to_libs; - ParseSessionConfigs(ep_names_and_libs_string, ep_names_to_libs); - bool is_plugin_ep_avaiable = false; - - if (ep_names_to_libs.size() > 0) { - // Register plugin EP libraries if provided via "-L" argument. - for (auto& pair : ep_names_to_libs) { - const std::filesystem::path library_path = pair.second; - const std::string registration_name = pair.first; - env.RegisterExecutionProviderLibrary(registration_name.c_str(), library_path.c_str()); - registered_plugin_ep_names_.push_back(registration_name); - } - - std::vector ep_devices = env.GetEpDevices(); - std::vector added_ep_devices; - - // All OrtEpDevice instances must be from the same execution provider. - // Find the OrtEpDevice associated with the execution provider provided via "-e" argument. - Ort::ConstEpDevice plugin_ep_device; - for (Ort::ConstEpDevice& device : ep_devices) { - if (std::string(device.EpName()) == performance_test_config.machine_config.provider_type_name) { - plugin_ep_device = device; - added_ep_devices.push_back(plugin_ep_device); - } - } - - if (added_ep_devices.empty()) { - for (auto ep_name : registered_plugin_ep_names_) { - env.UnregisterExecutionProviderLibrary(ep_name.c_str()); - } - ORT_THROW( - "[ERROR] [plugin EP] No matching execution provider name found in EP library's factory."); - } - -#if defined(_MSC_VER) - std::string provider_option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string provider_option_string = performance_test_config.run_config.ep_runtime_config_string; -#endif - std::unordered_map provider_options; - ParseSessionConfigs(provider_option_string, provider_options); - session_options.AppendExecutionProvider_V2(env, added_ep_devices, provider_options); - is_plugin_ep_avaiable = true; - } - - provider_name_ = performance_test_config.machine_config.provider_type_name; - std::unordered_map provider_options; - if (provider_name_ == onnxruntime::kDnnlExecutionProvider) { -#ifdef USE_DNNL - // Generate provider options - OrtDnnlProviderOptions dnnl_options; - dnnl_options.use_arena = 1; - dnnl_options.threadpool_args = nullptr; - -#if !defined(DNNL_ORT_THREAD) -#if defined(_MSC_VER) - std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; -#endif // defined(_MSC_VER) - int num_threads = 0; - ParseSessionConfigs(ov_string, provider_options, {"num_of_threads"}); - for (const auto& provider_option : provider_options) { - if (provider_option.first == "num_of_threads") { - std::stringstream sstream(provider_option.second); - sstream >> num_threads; - if (num_threads < 0) { - ORT_THROW( - "[ERROR] [OneDNN] Invalid entry for the key 'num_of_threads'," - " set number of threads or use '0' for default\n"); - // If the user doesnt define num_threads, auto detect threads later - } - } - } - dnnl_options.threadpool_args = static_cast(&num_threads); -#endif // !defined(DNNL_ORT_THREAD) - dnnl_options.use_arena = performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0; - - session_options.AppendExecutionProvider_Dnnl(dnnl_options); -#else - ORT_THROW("DNNL is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kCudaExecutionProvider) { -#ifdef USE_CUDA - const auto& api = Ort::GetApi(); - OrtCUDAProviderOptionsV2* cuda_options; - Ort::ThrowOnError(api.CreateCUDAProviderOptions(&cuda_options)); - std::vector option_keys, option_values; - // used to keep all option keys and value strings alive - std::list buffer; - buffer.emplace_back("cudnn_conv_algo_search"); - option_keys.push_back(buffer.back().c_str()); - switch (performance_test_config.run_config.cudnn_conv_algo) { - case 0: - buffer.emplace_back("EXHAUSTIVE"); - break; - case 1: - buffer.emplace_back("HEURISTIC"); - break; - default: - buffer.emplace_back("DEFAULT"); - break; - } - option_values.push_back(buffer.back().c_str()); - - buffer.emplace_back("do_copy_in_default_stream"); - option_keys.push_back(buffer.back().c_str()); - buffer.emplace_back(!performance_test_config.run_config.do_cuda_copy_in_separate_stream ? "1" : "0"); - option_values.push_back(buffer.back().c_str()); - -#ifdef _MSC_VER - std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; -#endif - ParseSessionConfigs(ov_string, provider_options); - for (const auto& provider_option : provider_options) { - option_keys.push_back(provider_option.first.c_str()); - option_values.push_back(provider_option.second.c_str()); - } - - Ort::Status status(api.UpdateCUDAProviderOptions(cuda_options, - option_keys.data(), option_values.data(), option_keys.size())); - if (!status.IsOK()) { - OrtAllocator* allocator; - char* options; - Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator)); - Ort::ThrowOnError(api.GetCUDAProviderOptionsAsString(cuda_options, allocator, &options)); - ORT_THROW("[ERROR] [CUDA] Configuring the CUDA options failed with message: ", status.GetErrorMessage(), - "\nSupported options are:\n", options); - } - session_options.AppendExecutionProvider_CUDA_V2(*cuda_options); - if (performance_test_config.run_config.enable_cuda_io_binding) { - device_memory_name_ = CUDA; - } -#else - ORT_THROW("CUDA is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kTensorrtExecutionProvider) { -#ifdef USE_TENSORRT - const auto& api = Ort::GetApi(); - OrtTensorRTProviderOptionsV2* tensorrt_options; - Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options)); - std::unique_ptr rel_trt_options( - tensorrt_options, api.ReleaseTensorRTProviderOptions); - std::vector option_keys, option_values; - // used to keep all option keys and value strings alive - std::list buffer; - -#ifdef _MSC_VER - std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; -#endif - ParseSessionConfigs(ov_string, provider_options); - for (const auto& provider_option : provider_options) { - option_keys.push_back(provider_option.first.c_str()); - option_values.push_back(provider_option.second.c_str()); - } - Ort::Status status(api.UpdateTensorRTProviderOptions(tensorrt_options, - option_keys.data(), option_values.data(), option_keys.size())); - if (!status.IsOK()) { - OrtAllocator* allocator; - char* options; - Ort::ThrowOnError(api.GetAllocatorWithDefaultOptions(&allocator)); - Ort::ThrowOnError(api.GetTensorRTProviderOptionsAsString(tensorrt_options, allocator, &options)); - ORT_THROW("[ERROR] [TensorRT] Configuring the CUDA options failed with message: ", status.GetErrorMessage(), - "\nSupported options are:\n", options); - } - - session_options.AppendExecutionProvider_TensorRT_V2(*tensorrt_options); - - OrtCUDAProviderOptions cuda_options; - cuda_options.device_id = tensorrt_options->device_id; - cuda_options.cudnn_conv_algo_search = static_cast(performance_test_config.run_config.cudnn_conv_algo); - cuda_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream; - // TODO: Support arena configuration for users of perf test - session_options.AppendExecutionProvider_CUDA(cuda_options); - if (performance_test_config.run_config.enable_cuda_io_binding) { - device_memory_name_ = CUDA; - } -#else - ORT_THROW("TensorRT is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kNvTensorRTRTXExecutionProvider) { -#ifdef USE_NV - session_options.AppendExecutionProvider("NvTensorRtRtx", provider_options); - if (performance_test_config.run_config.enable_cuda_io_binding) { - device_memory_name_ = CUDA; - } -#else - ORT_THROW("NV TensorRT RTX is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kQnnExecutionProvider) { -#ifdef USE_QNN -#ifdef _MSC_VER - std::string option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string option_string = performance_test_config.run_config.ep_runtime_config_string; -#endif - ParseSessionConfigs(option_string, provider_options, - {"backend_type", "backend_path", "profiling_file_path", "profiling_level", - "rpc_control_latency", "vtcm_mb", "soc_model", "device_id", "htp_performance_mode", "op_packages", - "qnn_saver_path", "htp_graph_finalization_optimization_mode", "qnn_context_priority", - "htp_arch", "enable_htp_fp16_precision", "offload_graph_io_quantization", - "enable_htp_spill_fill_buffer", "enable_htp_shared_memory_allocator", "dump_json_qnn_graph", - "json_qnn_graph_dir"}); - for (const auto& provider_option : provider_options) { - const std::string& key = provider_option.first; - const std::string& value = provider_option.second; - if (key == "backend_path" || key == "profiling_file_path" || key == "json_qnn_graph_dir") { - if (value.empty()) { - ORT_THROW("Please provide the valid file path."); - } - } else if (key == "profiling_level") { - std::set supported_profiling_level = {"off", "basic", "detailed"}; - if (supported_profiling_level.find(value) == supported_profiling_level.end()) { - ORT_THROW("Supported profiling_level: off, basic, detailed"); - } - } else if (key == "backend_type" || key == "rpc_control_latency" || key == "vtcm_mb" || key == "soc_model" || - key == "device_id") { - // no validation - } else if (key == "htp_performance_mode") { - std::set supported_htp_perf_mode = {"burst", "balanced", "default", "high_performance", - "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", - "power_saver", "sustained_high_performance"}; - if (supported_htp_perf_mode.find(value) == supported_htp_perf_mode.end()) { - std::ostringstream str_stream; - std::copy(supported_htp_perf_mode.begin(), supported_htp_perf_mode.end(), - std::ostream_iterator(str_stream, ",")); - std::string str = str_stream.str(); - ORT_THROW("Supported htp_performance_mode: " + str); - } - } else if (key == "op_packages") { - if (value.empty()) { - ORT_THROW("Please provide the valid op_packages."); - } - } else if (key == "qnn_saver_path") { - // no validation - } else if (key == "htp_graph_finalization_optimization_mode") { - std::set supported_htp_graph_final_opt_modes = {"0", "1", "2", "3"}; - if (supported_htp_graph_final_opt_modes.find(value) == supported_htp_graph_final_opt_modes.end()) { - std::ostringstream str_stream; - std::copy(supported_htp_graph_final_opt_modes.begin(), supported_htp_graph_final_opt_modes.end(), - std::ostream_iterator(str_stream, ",")); - std::string str = str_stream.str(); - ORT_THROW("Wrong value for htp_graph_finalization_optimization_mode. select from: " + str); - } - } else if (key == "qnn_context_priority") { - std::set supported_qnn_context_priority = {"low", "normal", "normal_high", "high"}; - if (supported_qnn_context_priority.find(value) == supported_qnn_context_priority.end()) { - ORT_THROW("Supported qnn_context_priority: low, normal, normal_high, high"); - } - } else if (key == "htp_arch") { - std::set supported_htp_archs = {"0", "68", "69", "73", "75"}; - if (supported_htp_archs.find(value) == supported_htp_archs.end()) { - std::ostringstream str_stream; - std::copy(supported_htp_archs.begin(), supported_htp_archs.end(), - std::ostream_iterator(str_stream, ",")); - std::string str = str_stream.str(); - ORT_THROW("Wrong value for htp_arch. select from: " + str); - } - } else if (key == "enable_htp_fp16_precision" || - key == "offload_graph_io_quantization" || - key == "enable_htp_spill_fill_buffer" || - key == "enable_htp_shared_memory_allocator" || - key == "dump_json_qnn_graph") { - std::set supported_options = {"0", "1"}; - if (supported_options.find(value) == supported_options.end()) { - std::ostringstream str_stream; - std::copy(supported_options.begin(), supported_options.end(), - std::ostream_iterator(str_stream, ",")); - std::string str = str_stream.str(); - ORT_THROW("Wrong value for ", key, ". select from: ", str); - } - - if (key == "enable_htp_shared_memory_allocator" && value == "1") { - // if this option is set, also use the enabled allocator - device_memory_name_ = "QnnHtpShared"; - } - } - } - session_options.AppendExecutionProvider("QNN", provider_options); -#else - ORT_THROW("QNN is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kSnpeExecutionProvider) { -#ifdef USE_SNPE -#ifdef _MSC_VER - std::string option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string option_string = performance_test_config.run_config.ep_runtime_config_string; -#endif - ParseSessionConfigs(option_string, provider_options, {"runtime", "priority", "buffer_type", "enable_init_cache"}); - for (const auto& provider_option : provider_options) { - if (key == "runtime") { - std::set supported_runtime = {"CPU", "GPU_FP32", "GPU", "GPU_FLOAT16", "DSP", "AIP_FIXED_TF"}; - if (supported_runtime.find(value) == supported_runtime.end()) { - ORT_THROW(R"(Wrong configuration value for the key 'runtime'. -select from 'CPU', 'GPU_FP32', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n)"); - } - } else if (key == "priority") { - // no validation - } else if (key == "buffer_type") { - std::set supported_buffer_type = {"TF8", "TF16", "UINT8", "FLOAT", "ITENSOR"}; - if (supported_buffer_type.find(value) == supported_buffer_type.end()) { - ORT_THROW(R"(Wrong configuration value for the key 'buffer_type'. -select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); - } - } else if (key == "enable_init_cache") { - if (value != "1") { - ORT_THROW("Set to 1 to enable_init_cache."); - } - } - } - - session_options.AppendExecutionProvider("SNPE", provider_options); -#else - ORT_THROW("SNPE is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kNnapiExecutionProvider) { -#ifdef USE_NNAPI - uint32_t nnapi_flags = 0; -#ifdef _MSC_VER - std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; -#endif - std::istringstream ss(ov_string); - std::string key; - while (ss >> key) { - if (key == "NNAPI_FLAG_USE_FP16") { - nnapi_flags |= NNAPI_FLAG_USE_FP16; - } else if (key == "NNAPI_FLAG_USE_NCHW") { - nnapi_flags |= NNAPI_FLAG_USE_NCHW; - } else if (key == "NNAPI_FLAG_CPU_DISABLED") { - nnapi_flags |= NNAPI_FLAG_CPU_DISABLED; - } else if (key == "NNAPI_FLAG_CPU_ONLY") { - nnapi_flags |= NNAPI_FLAG_CPU_ONLY; - } else if (key.empty()) { - } else { - ORT_THROW( - "[ERROR] [NNAPI] wrong key type entered. Choose from the following runtime key options " - "that are available for NNAPI. " - "['NNAPI_FLAG_USE_FP16', 'NNAPI_FLAG_USE_NCHW', 'NNAPI_FLAG_CPU_DISABLED', 'NNAPI_FLAG_CPU_ONLY'] \n"); - } - } - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Nnapi(session_options, nnapi_flags)); -#else - ORT_THROW("NNAPI is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kVSINPUExecutionProvider) { -#ifdef USE_VSINPU - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_VSINPU(session_options)); -#else - ORT_THROW("VSINPU is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kCoreMLExecutionProvider) { -#ifdef __APPLE__ -#ifdef USE_COREML - std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; - static const std::unordered_set available_keys = {kCoremlProviderOption_MLComputeUnits, - kCoremlProviderOption_ModelFormat, - kCoremlProviderOption_RequireStaticInputShapes, - kCoremlProviderOption_EnableOnSubgraphs, - kCoremlProviderOption_SpecializationStrategy, - kCoremlProviderOption_ProfileComputePlan, - kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU, - kCoremlProviderOption_ModelCacheDirectory}; - ParseSessionConfigs(ov_string, provider_options, available_keys); - - std::unordered_map available_options = { - {"CPUAndNeuralEngine", "1"}, - {"CPUAndGPU", "1"}, - {"CPUOnly", "1"}, - {"ALL", "1"}, - }; - for (const auto& provider_option : provider_options) { - if (provider_option.first == kCoremlProviderOption_MLComputeUnits && - available_options.find(provider_option.second) != available_options.end()) { - } else if (provider_option.first == kCoremlProviderOption_ModelFormat && - (provider_option.second == "MLProgram" || provider_option.second == "NeuralNetwork")) { - } else if (provider_option.first == kCoremlProviderOption_RequireStaticInputShapes && - (provider_option.second == "1" || provider_option.second == "0")) { - } else if (provider_option.first == kCoremlProviderOption_EnableOnSubgraphs && - (provider_option.second == "0" || provider_option.second == "1")) { - } else if (provider_option.first == kCoremlProviderOption_SpecializationStrategy && - (provider_option.second == "Default" || provider_option.second == "FastPrediction")) { - } else if (provider_option.first == kCoremlProviderOption_ProfileComputePlan && - (provider_option.second == "0" || provider_option.second == "1")) { - } else if (provider_option.first == kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU && - (provider_option.second == "0" || provider_option.second == "1")) { - } else if (provider_option.first == kCoremlProviderOption_ModelCacheDirectory) { - } else { - ORT_THROW("Invalid value for option ", provider_option.first, ": ", provider_option.second); - } - } - // COREML_FLAG_CREATE_MLPROGRAM - session_options.AppendExecutionProvider("CoreML", provider_options); -#else - ORT_THROW("CoreML is not supported in this build\n"); -#endif -#else - ORT_THROW("COREML is not supported on this platform.\n"); -#endif - } else if (provider_name_ == onnxruntime::kDmlExecutionProvider) { -#ifdef USE_DML -#ifdef _MSC_VER - std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; -#endif - ParseSessionConfigs(ov_string, provider_options, - {"device_filter", "performance_preference", "disable_metacommands", - "enable_graph_capture", "enable_graph_serialization"}); - for (const auto& provider_option : provider_options) { - const std::string& key = provider_option.first; - const std::string& value = provider_option.second; - if (key == "device_filter") { - std::set ov_supported_device_types = {"gpu", "npu"}; - if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) { - } else { - ORT_THROW( - "[ERROR] [DML] You have selected a wrong configuration value for the key 'device_filter'. " - "Select from 'gpu', or 'npu' \n"); - } - } else if (key == "performance_preference") { - std::set ov_supported_values = {"default", "high_performance", "minimum_power"}; - if (ov_supported_values.find(value) != ov_supported_values.end()) { - } else { - ORT_THROW( - "[ERROR] [DML] You have selected a wrong configuration value for the key 'performance_preference'. " - "Select from 'default', 'high_performance' or 'minimum_power' \n"); - } - } else if (key == "disable_metacommands") { - std::set ov_supported_values = {"true", "True", "false", "False"}; - if (ov_supported_values.find(value) != ov_supported_values.end()) { - } else { - ORT_THROW( - "[ERROR] [DML] You have selected a wrong value for the key 'disable_metacommands'. " - "Select from 'true' or 'false' \n"); - } - } else if (key == "enable_graph_capture") { - std::set ov_supported_values = {"true", "True", "false", "False"}; - if (ov_supported_values.find(value) != ov_supported_values.end()) { - } else { - ORT_THROW( - "[ERROR] [DML] You have selected a wrong value for the key 'enable_graph_capture'. " - "Select from 'true' or 'false' \n"); - } - } else if (key == "enable_graph_serialization") { - std::set ov_supported_values = {"true", "True", "false", "False"}; - if (ov_supported_values.find(value) != ov_supported_values.end()) { - session_options.AddConfigEntry(kOrtSessionOptionsConfigEnableGraphSerialization, value.data()); - } else { - ORT_THROW( - "[ERROR] [DML] You have selected a wrong value for the key 'enable_graph_serialization'. " - "Select from 'true' or 'false' \n"); - } - } - } - if (provider_options.find("performance_preference") == provider_options.end()) { - provider_options["performance_preference"] = "high_performance"; - } - if (provider_options.find("device_filter") == provider_options.end()) { - provider_options["device_filter"] = "gpu"; - } - if (provider_options.find("disable_metacommands") == provider_options.end()) { - provider_options["disable_metacommands"] = "false"; - } - if (provider_options.find("enable_graph_capture") == provider_options.end()) { - provider_options["enable_graph_capture"] = "false"; - } - session_options.AppendExecutionProvider("DML", provider_options); -#else - ORT_THROW("DML is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kAclExecutionProvider) { -#ifdef USE_ACL -#if defined(_MSC_VER) - std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; -#endif // defined(_MSC_VER) - bool enable_fast_math = false; - ParseSessionConfigs(ov_string, provider_options, {"enable_fast_math"}); - for (const auto& provider_option : provider_options) { - const std::string& key = provider_option.first; - const std::string& value = provider_option.second; - if (key == "enable_fast_math") { - std::set ov_supported_values = {"true", "True", "false", "False"}; - if (ov_supported_values.find(value) != ov_supported_values.end()) { - enable_fast_math = (value == "true") || (value == "True"); - } else { - ORT_THROW( - "[ERROR] [ACL] You have selcted an invalid value for the key 'enable_fast_math'. " - "Select from 'true' or 'false' \n"); - } - } - } - Ort::ThrowOnError( - OrtSessionOptionsAppendExecutionProvider_ACL(session_options, enable_fast_math)); -#else - ORT_THROW("Acl is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kArmNNExecutionProvider) { -#ifdef USE_ARMNN - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ArmNN(session_options, - performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0)); -#else - ORT_THROW("ArmNN is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kRocmExecutionProvider) { -#ifdef USE_ROCM - OrtROCMProviderOptions rocm_options; - rocm_options.miopen_conv_exhaustive_search = performance_test_config.run_config.cudnn_conv_algo; - rocm_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream; - // TODO: Support arena configuration for users of perf test - session_options.AppendExecutionProvider_ROCM(rocm_options); -#else - ORT_THROW("ROCM is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kMIGraphXExecutionProvider) { -#ifdef USE_MIGRAPHX - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(session_options, 0)); -#else - ORT_THROW("MIGraphX is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kXnnpackExecutionProvider) { -#ifdef USE_XNNPACK - session_options.AddConfigEntry(kOrtSessionOptionsConfigAllowIntraOpSpinning, "0"); - session_options.AppendExecutionProvider( - "XNNPACK", {{"intra_op_num_threads", std::to_string(performance_test_config.run_config.intra_op_num_threads)}}); -#else - ORT_THROW("Xnnpack is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kWebGpuExecutionProvider) { -#ifdef USE_WEBGPU - session_options.AppendExecutionProvider("WebGPU", {}); -#else - ORT_THROW("WebGPU is not supported in this build\n"); -#endif - } else if (provider_name_ == onnxruntime::kVitisAIExecutionProvider) { -#ifdef USE_VITISAI -#ifdef _MSC_VER - std::string option_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string option_string = performance_test_config.run_config.ep_runtime_config_string; -#endif - ParseSessionConfigs(option_string, provider_options); - - session_options.AppendExecutionProvider_VitisAI(provider_options); -#else - ORT_THROW("VitisAI is not supported in this build\n"); -#endif - } else if (!provider_name_.empty() && - provider_name_ != onnxruntime::kCpuExecutionProvider && - provider_name_ != onnxruntime::kOpenVINOExecutionProvider && - !is_plugin_ep_avaiable) { - ORT_THROW("This backend is not included in perf test runner.\n"); - } - - if (performance_test_config.run_config.enable_cpu_mem_arena) - session_options.EnableCpuMemArena(); - else - session_options.DisableCpuMemArena(); - if (performance_test_config.run_config.enable_memory_pattern && - performance_test_config.run_config.execution_mode == ExecutionMode::ORT_SEQUENTIAL) - session_options.EnableMemPattern(); - else - session_options.DisableMemPattern(); - session_options.SetExecutionMode(performance_test_config.run_config.execution_mode); - - // Set any extra session configuration entries provided by the user via command-line arguments. - // - // Some session config entries can also be set via dedicated command-line options. - // If the user uses multiple command-line options to set the same session config entry, - // we'll print a warning. Note that the dedicated command-line options will take precedence. - const auto& user_session_configs = performance_test_config.run_config.session_config_entries; - for (auto& it : user_session_configs) { - session_options.AddConfigEntry(it.first.c_str(), it.second.c_str()); - } - - auto warn_dup_config_entry = [&user_session_configs](const char* key) -> void { - if (user_session_configs.find(key) != user_session_configs.end()) { - fprintf(stderr, "[WARNING]: Trying to set session config entry '%s' via multiple command-line options\n", key); - } - }; - - if (performance_test_config.run_config.intra_op_num_threads > 0) { - fprintf(stdout, "Setting intra_op_num_threads to %d\n", performance_test_config.run_config.intra_op_num_threads); - session_options.SetIntraOpNumThreads(performance_test_config.run_config.intra_op_num_threads); - } - - if (!performance_test_config.run_config.intra_op_thread_affinities.empty()) { - warn_dup_config_entry(kOrtSessionOptionsConfigIntraOpThreadAffinities); - fprintf(stdout, "Setting intra op thread affinity as %s\n", performance_test_config.run_config.intra_op_thread_affinities.c_str()); - session_options.AddConfigEntry(kOrtSessionOptionsConfigIntraOpThreadAffinities, performance_test_config.run_config.intra_op_thread_affinities.c_str()); - } - - if (performance_test_config.run_config.disable_spinning) { - warn_dup_config_entry(kOrtSessionOptionsConfigAllowIntraOpSpinning); - fprintf(stdout, "Disabling intra-op thread spinning entirely\n"); - session_options.AddConfigEntry(kOrtSessionOptionsConfigAllowIntraOpSpinning, "0"); - } - - if (performance_test_config.run_config.disable_spinning_between_run) { - warn_dup_config_entry(kOrtSessionOptionsConfigForceSpinningStop); - fprintf(stdout, "Disabling intra-op thread spinning between runs\n"); - session_options.AddConfigEntry(kOrtSessionOptionsConfigForceSpinningStop, "1"); - } - - if (!performance_test_config.run_config.register_custom_op_path.empty()) { - session_options.RegisterCustomOpsLibrary(performance_test_config.run_config.register_custom_op_path.c_str()); - } - - if (performance_test_config.run_config.execution_mode == ExecutionMode::ORT_PARALLEL && performance_test_config.run_config.inter_op_num_threads > 0) { - fprintf(stdout, "Setting inter_op_num_threads to %d\n", performance_test_config.run_config.inter_op_num_threads); - session_options.SetInterOpNumThreads(performance_test_config.run_config.inter_op_num_threads); - } - - // Set optimization level. - session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level); - if (!performance_test_config.run_config.profile_file.empty()) { - session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str()); - } - if (!performance_test_config.run_config.optimized_model_path.empty()) { - session_options.SetOptimizedModelFilePath(performance_test_config.run_config.optimized_model_path.c_str()); - } - if (performance_test_config.run_config.set_denormal_as_zero) { - warn_dup_config_entry(kOrtSessionOptionsConfigSetDenormalAsZero); - session_options.AddConfigEntry(kOrtSessionOptionsConfigSetDenormalAsZero, "1"); - } - if (!performance_test_config.run_config.free_dim_name_overrides.empty()) { - for (auto const& dim_override : performance_test_config.run_config.free_dim_name_overrides) { - if (g_ort->AddFreeDimensionOverrideByName(session_options, ToUTF8String(dim_override.first).c_str(), dim_override.second) != nullptr) { - fprintf(stderr, "AddFreeDimensionOverrideByName failed for named dimension: %s\n", ToUTF8String(dim_override.first).c_str()); - } else { - fprintf(stdout, "Overriding dimension with name, %s, to %d\n", ToUTF8String(dim_override.first).c_str(), (int)dim_override.second); - } - } - } - if (!performance_test_config.run_config.free_dim_denotation_overrides.empty()) { - for (auto const& dim_override : performance_test_config.run_config.free_dim_denotation_overrides) { - if (g_ort->AddFreeDimensionOverride(session_options, ToUTF8String(dim_override.first).c_str(), dim_override.second) != nullptr) { - fprintf(stderr, "AddFreeDimensionOverride failed for dimension denotation: %s\n", ToUTF8String(dim_override.first).c_str()); - } else { - fprintf(stdout, "Overriding dimension with denotation, %s, to %d\n", ToUTF8String(dim_override.first).c_str(), (int)dim_override.second); - } - } - } - if (provider_name_ == onnxruntime::kOpenVINOExecutionProvider) { -#ifdef USE_OPENVINO -#ifdef _MSC_VER - std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); -#else - std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; -#endif - std::unordered_map ov_options; - std::istringstream ss(ov_string); - std::string token; - while (ss >> token) { - if (token == "") { - continue; - } - auto pos = token.find("|"); - if (pos == std::string::npos || pos == 0 || pos == token.length()) { - ORT_THROW("[ERROR] [OpenVINO] Use a '|' to separate the key and value for the run-time option you are trying to use.\n"); - } - - auto key = token.substr(0, pos); - auto value = token.substr(pos + 1); - - if (key == "device_type") { - std::set ov_supported_device_types = {"CPU", "GPU", - "GPU.0", "GPU.1", "NPU"}; - std::set deprecated_device_types = {"CPU_FP32", "GPU_FP32", - "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16", - "GPU.0_FP16", "GPU.1_FP16"}; - size_t num_gpus = 10; - for (size_t i = 0; i <= num_gpus; i++) { - ov_supported_device_types.emplace("GPU." + std::to_string(i)); - } - if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) { - ov_options[key] = value; - } else if (deprecated_device_types.find(value) != deprecated_device_types.end()) { - ov_options[key] = value; - } else if (value.find("HETERO") == 0) { - ov_options[key] = value; - } else if (value.find("MULTI") == 0) { - ov_options[key] = value; - } else if (value.find("AUTO") == 0) { - ov_options[key] = value; - } else { - ORT_THROW( - "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. " - "Select from 'CPU', 'GPU', 'GPU.0', 'GPU.1', 'NPU' or from" - " HETERO/MULTI/AUTO options available. \n"); - } - } else if (key == "device_id") { - if (value == "CPU" || value == "GPU" || value == "NPU") { - ov_options[key] = value; - } else { - ORT_THROW("[ERROR] [OpenVINO] Unsupported device_id is selected. Select from available options."); - } - } else if (key == "precision") { - auto device_type = ov_options["device_type"]; - if (device_type.find("GPU") != std::string::npos) { - if (value == "") { - ov_options[key] = "FP16"; - continue; - } else if (value == "ACCURACY" || value == "FP16" || value == "FP32") { - ov_options[key] = value; - continue; - } else { - ORT_THROW( - "[ERROR] [OpenVINO] Unsupported inference precision is selected. " - "GPU only supported FP32 / FP16. \n"); - } - } else if (device_type.find("NPU") != std::string::npos) { - if (value == "" || value == "ACCURACY" || value == "FP16") { - ov_options[key] = "FP16"; - continue; - } else { - ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. NPU only supported FP16. \n"); - } - } else if (device_type.find("CPU") != std::string::npos) { - if (value == "" || value == "ACCURACY" || value == "FP32") { - ov_options[key] = "FP32"; - continue; - } else { - ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n"); - } - } - } else if (key == "enable_opencl_throttling") { - if (value == "true" || value == "True" || - value == "false" || value == "False") { - ov_options[key] = value; - } else { - ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_opencl_throttling' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "enable_qdq_optimizer") { - if (value == "true" || value == "True" || - value == "false" || value == "False") { - ov_options[key] = value; - } else { - ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_qdq_optimizer' should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "enable_causallm") { - if (value == "true" || value == "True" || - value == "false" || value == "False") { - ov_options[key] = value; - } else { - ORT_THROW( - "[ERROR] [OpenVINO] The value for the key 'enable_causallm' should be a boolean i.e. true or false." - " Default value is false. This provider option must be used with CausalLM Models viz. LLMs & SLMs only.\n"); - } - } else if (key == "disable_dynamic_shapes") { - if (value == "true" || value == "True" || - value == "false" || value == "False") { - ov_options[key] = value; - } else { - ORT_THROW( - "[ERROR] [OpenVINO] The value for the key 'enable_dynamic_shapes' " - "should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "num_of_threads") { - if (std::stoi(value) <= 0) { - ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n"); - } else { - ov_options[key] = value; - } - } else if (key == "load_config") { - auto load_json = [&](std::string filename) -> std::string { - std::ifstream input_filestream(filename); - if (!input_filestream.is_open()) { - ORT_THROW("Passed an invalid JSON config file path \"" + filename + "\"."); - } - nlohmann::json json_config; - try { - input_filestream >> json_config; - } catch (const OnnxRuntimeException& ex) { - ORT_THROW("Exception parsing config file \"" + filename + "\".\n" + ex.what()); - } catch (const std::exception& ex) { - throw std::runtime_error("Standard exception for config file \"" + filename + "\".\n" + ex.what()); - } catch (...) { - throw std::runtime_error("Unknown exception for config file \"" + filename + "\".\n"); - } - if (json_config.empty()) { - ORT_THROW("Empty JSON content passed \"" + filename + "\"."); - } - return json_config.dump(); - }; - ov_options[key] = load_json(value); - } else if (key == "model_priority") { - ov_options[key] = value; - } else if (key == "cache_dir") { - ov_options[key] = value; - } else if (key == "context") { - ov_options[key] = value; - } else if (key == "num_streams") { - if (std::stoi(value) <= 0 && std::stoi(value) > 8) { - ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n"); - } else { - ov_options[key] = value; - } - } else if (key == "device_memory_name") { - device_memory_name_ = std::move(value); - } else if (key == "device_luid") { - ov_options[key] = value; - } else if (key == "reshape_input") { - ov_options[key] = value; - } else { - ORT_THROW( - "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO." - " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', " - "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer'," - " 'enable_causallm', 'model_priority'] \n"); - } - } - session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); -#else - ORT_THROW("OpenVINO is not supported in this build\n"); -#endif - } - - if (performance_test_config.run_config.use_extensions) { - session_options.EnableOrtCustomOps(); - } - - if (!performance_test_config.model_info.load_via_path) { - session_ = Ort::Session(env, performance_test_config.model_info.model_file_path.c_str(), session_options); - } else { - std::ifstream file(performance_test_config.model_info.model_file_path.c_str(), - std::ios::binary | std::ios::in | std::ios::ate); - if (file.is_open()) { - const std::streampos fsize = file.tellg(); - file.seekg(0, std::ios_base::beg); - std::vector model_bytes(narrow(fsize)); - file.read(model_bytes.data(), narrow(fsize)); - session_ = Ort::Session(env, model_bytes.data(), model_bytes.size(), session_options); - } else { - ORT_THROW("Model file could not be opened.\n"); - } - } - size_t output_count = session_.GetOutputCount(); - output_names_.resize(output_count); - Ort::AllocatorWithDefaultOptions a; - for (size_t i = 0; i != output_count; ++i) { - auto output_name = session_.GetOutputNameAllocated(i, a); - assert(output_name != nullptr); - output_names_[i] = output_name.get(); - } - output_names_raw_ptr.resize(output_count); - for (size_t i = 0; i != output_count; ++i) { - output_names_raw_ptr[i] = output_names_[i].c_str(); - } - - const size_t input_count = static_cast(m.GetInputCount()); - for (size_t i = 0; i != input_count; ++i) { - input_names_str_[i] = m.GetInputName(i); - input_names_[i] = input_names_str_[i].c_str(); - } - - auto transform_fcn = std::function(); - auto new_value = std::function&, Ort::ConstTensorTypeAndShapeInfo&)>(); - if (device_memory_name_.empty()) { - transform_fcn = [](int64_t input) { return input; }; - new_value = [](OrtAllocator*, const std::vector&, Ort::ConstTensorTypeAndShapeInfo&) { - return Ort::Value(nullptr); - }; - } else { - Ort::MemoryInfo memory_info(nullptr); // Default initialize, will be overwritten - if (device_memory_name_ == CUDA) { - memory_info = Ort::MemoryInfo(device_memory_name_.data(), OrtArenaAllocator, 0, OrtMemTypeDefault); - } else { - memory_info = Ort::MemoryInfo(device_memory_name_.data(), OrtArenaAllocator, 0, OrtMemTypeCPUOutput); - } - custom_allocator_ = Ort::Allocator(session_, memory_info); - allocator_ = custom_allocator_; - - // free dimensions are treated as 1 if not overridden - transform_fcn = [](int64_t input) { return (input == -1) ? -input : input; }; - new_value = [](OrtAllocator* allocator, const std::vector& output_shape, Ort::ConstTensorTypeAndShapeInfo& tensor_info) { - return Ort::Value::CreateTensor(allocator, output_shape.data(), output_shape.size(), tensor_info.GetElementType()); - }; - } - - for (size_t i = 0; i < output_names_raw_ptr.size(); i++) { - Ort::TypeInfo type_info = session_.GetOutputTypeInfo(i); - auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); - std::vector output_shape = tensor_info.GetShape(); - std::transform(output_shape.begin(), output_shape.end(), output_shape.begin(), transform_fcn); - outputs_.emplace_back(new_value(allocator_, output_shape, tensor_info)); - } -} - -template -static void FillTensorDataTyped(Ort::Value& tensor, size_t count, int32_t seed = -1, T value = T{}) { - T* data = tensor.GetTensorMutableData(); - - bool random_init = false; - - if (seed >= 0) { - random_init = true; - - std::default_random_engine engine; - engine.seed(seed); - if constexpr (std::is_same::value) { - T max_value = 5.0f; - const std::uniform_real_distribution::param_type p(0, static_cast(max_value)); - std::uniform_real_distribution dist; - for (size_t i = 0; i < count; ++i) { - data[i] = dist(engine, p); - } - } else if constexpr (std::is_same::value || std::is_same::value) { - T max_value = std::numeric_limits::max(); - const std::uniform_int_distribution::param_type p(0, static_cast(max_value)); - std::uniform_int_distribution dist; - for (size_t i = 0; i < count; ++i) { - data[i] = static_cast(dist(engine, p)); - } - } else { - random_init = false; - fprintf(stdout, " this type of data won't be random initialized\n"); - } - } - if (!random_init) { - std::fill_n(data, count, value); - } -} - -// seed=-1 means we keep the initialized it with a constant value "T{}" -// in some case, we want to check the results for multi-runs, with the given we can recap the input data -// another reason is that, the input would be always 255/-127 for uint8_t or int8_t types of input. -// which will produce all zero outputs. -static void InitializeTensorWithSeed(int32_t seed, Ort::Value& tensor) { - const auto type_and_shape = tensor.GetTensorTypeAndShapeInfo(); - const auto count = type_and_shape.GetElementCount(); - const auto element_type = type_and_shape.GetElementType(); - -#define CASE_FOR_TYPE(T) \ - case Ort::TypeToTensorType::type: { \ - FillTensorDataTyped(tensor, count, seed); \ - } break - - switch (element_type) { - CASE_FOR_TYPE(Ort::Float16_t); - CASE_FOR_TYPE(Ort::BFloat16_t); - CASE_FOR_TYPE(float); - CASE_FOR_TYPE(double); - CASE_FOR_TYPE(int8_t); - CASE_FOR_TYPE(int16_t); - CASE_FOR_TYPE(int32_t); - CASE_FOR_TYPE(int64_t); - CASE_FOR_TYPE(uint8_t); - CASE_FOR_TYPE(uint16_t); - CASE_FOR_TYPE(uint32_t); - CASE_FOR_TYPE(uint64_t); - CASE_FOR_TYPE(bool); -#if !defined(DISABLE_FLOAT8_TYPES) - CASE_FOR_TYPE(Ort::Float8E4M3FN_t); - CASE_FOR_TYPE(Ort::Float8E4M3FNUZ_t); - CASE_FOR_TYPE(Ort::Float8E5M2_t); - CASE_FOR_TYPE(Ort::Float8E5M2FNUZ_t); -#endif - case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: - // string tensors are already initialized to contain empty strings - // see onnxruntime::Tensor::Init() - break; - default: - ORT_THROW("Unsupported tensor data type: ", element_type); - } - -#undef CASE_FOR_TYPE -} - -bool OnnxRuntimeTestSession::PopulateGeneratedInputTestData(int32_t seed) { - Ort::AllocatorWithDefaultOptions default_allocator; - // iterate over all input nodes - for (size_t i = 0; i < static_cast(input_length_); i++) { - Ort::TypeInfo type_info = session_.GetInputTypeInfo(i); - if (type_info.GetONNXType() == ONNX_TYPE_TENSOR) { - auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); - std::vector input_node_dim = tensor_info.GetShape(); - - // free dimensions are treated as 1 if not overridden - auto transform_fcn = [](int64_t input) { return (input == -1) ? -input : input; }; - std::transform(input_node_dim.begin(), input_node_dim.end(), input_node_dim.begin(), transform_fcn); - - if (device_memory_name_ != CUDA) { - Ort::Value input_tensor = Ort::Value::CreateTensor(allocator_, (const int64_t*)input_node_dim.data(), - input_node_dim.size(), tensor_info.GetElementType()); - InitializeTensorWithSeed(seed, input_tensor); - PreLoadTestData(0, i, std::move(input_tensor)); - } -// Create tensor on CPU, initialize and copy to CUDA tensor -#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_NV) - else { - Ort::Value default_tensor = Ort::Value::CreateTensor(default_allocator, (const int64_t*)input_node_dim.data(), - input_node_dim.size(), tensor_info.GetElementType()); - InitializeTensorWithSeed(seed, default_tensor); - - // Get pointer to CPU tensor data - const void* default_ptr = default_tensor.GetTensorRawData(); - - size_t total_bytes = default_tensor.GetTensorSizeInBytes(); - - Ort::Value cuda_tensor = Ort::Value::CreateTensor(allocator_, input_node_dim.data(), - input_node_dim.size(), tensor_info.GetElementType()); - - void* cuda_ptr = cuda_tensor.GetTensorMutableData(); - - // Copy the initialized data from CPU to GPU - cudaError_t cuda_err = cudaMemcpy(cuda_ptr, default_ptr, total_bytes, cudaMemcpyHostToDevice); - if (cuda_err != cudaSuccess) { - ORT_THROW("Failed to copy tensor data from CPU to CUDA device. CUDA Error: ", cudaGetErrorString(cuda_err)); - } - PreLoadTestData(0, i, std::move(cuda_tensor)); - } -#endif - } - } - return true; -} - -} // namespace perftest -} // namespace onnxruntime From 1cfa077699504a80036ec617bb5ce992fdebb156 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 5 Aug 2025 11:13:16 -0700 Subject: [PATCH 43/46] clean up --- onnxruntime/test/perftest/command_args_parser.h | 1 - onnxruntime/test/perftest/main.cc | 2 +- onnxruntime/test/perftest/posix/utils.cc | 1 + onnxruntime/test/perftest/test_configuration.h | 1 - onnxruntime/test/perftest/windows/utils.cc | 4 +--- 5 files changed, 3 insertions(+), 6 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.h b/onnxruntime/test/perftest/command_args_parser.h index ab99621883a31..5a94f99874797 100644 --- a/onnxruntime/test/perftest/command_args_parser.h +++ b/onnxruntime/test/perftest/command_args_parser.h @@ -3,7 +3,6 @@ #pragma once #include -#include "test_configuration.h" namespace onnxruntime { namespace perftest { diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index c3a3e5ad65c73..240fb0512e69d 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -5,8 +5,8 @@ #include #include #include "command_args_parser.h" -#include "utils.h" #include "performance_runner.h" +#include "utils.h" #include "strings_helper.h" #include diff --git a/onnxruntime/test/perftest/posix/utils.cc b/onnxruntime/test/perftest/posix/utils.cc index d44cbcea22734..9bf029d8dff35 100644 --- a/onnxruntime/test/perftest/posix/utils.cc +++ b/onnxruntime/test/perftest/posix/utils.cc @@ -57,6 +57,7 @@ class CPUUsage : public ICPUUsage { std::unique_ptr CreateICPUUsage() { return std::make_unique(); } + } // namespace utils } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/test_configuration.h b/onnxruntime/test/perftest/test_configuration.h index ae81dc6b7ef40..c09f2184ccfa3 100644 --- a/onnxruntime/test/perftest/test_configuration.h +++ b/onnxruntime/test/perftest/test_configuration.h @@ -7,7 +7,6 @@ #include #include #include -#include #include "core/graph/constants.h" #include "core/framework/session_options.h" diff --git a/onnxruntime/test/perftest/windows/utils.cc b/onnxruntime/test/perftest/windows/utils.cc index 15982522efe78..9a1846a1c0901 100644 --- a/onnxruntime/test/perftest/windows/utils.cc +++ b/onnxruntime/test/perftest/windows/utils.cc @@ -2,14 +2,11 @@ // Licensed under the MIT License. #include "test/perftest/utils.h" -#include "test/perftest/strings_helper.h" -#include #include #include #include -#include namespace onnxruntime { namespace perftest { @@ -77,6 +74,7 @@ class CPUUsage : public ICPUUsage { std::unique_ptr CreateICPUUsage() { return std::make_unique(); } + } // namespace utils } // namespace perftest } // namespace onnxruntime From 8e48b801cab1c362e8b88135281b165a2024c617 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 5 Aug 2025 15:47:36 -0700 Subject: [PATCH 44/46] Add flags_internal::FlagImpl::Init to the filter of the mem check --- onnxruntime/core/platform/windows/debug_alloc.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/onnxruntime/core/platform/windows/debug_alloc.cc b/onnxruntime/core/platform/windows/debug_alloc.cc index dd6388b76e0cf..7673ef0212e97 100644 --- a/onnxruntime/core/platform/windows/debug_alloc.cc +++ b/onnxruntime/core/platform/windows/debug_alloc.cc @@ -249,6 +249,11 @@ Memory_LeakCheck::~Memory_LeakCheck() { // treating "--help" as an early termination condition (the program does not perform its // normal execution. See MaybeExit in usage.cc). // + // In normal execution of onnxruntime_perf_test, Abseil flags are defined as global variables + // and persist for the lifetime of the program. They are not explicitly freed, so leak checkers + // may report them, but these are not true leaks. Valgrind, for example, reports them as + // "still reachable" rather than "definitely lost". + // // As a result, many resources will not be cleaned up, including: // - Abseil's internal storage for flags, allocated in static/global objects inside // absl::flags_internal (e.g., FlagImpl::Init) @@ -271,6 +276,7 @@ Memory_LeakCheck::~Memory_LeakCheck() { string.find("PyInit_onnxruntime_pybind11_state") == std::string::npos && string.find("google::protobuf::internal::InitProtobufDefaultsSlow") == std::string::npos && string.find("flags_internal::ParseCommandLineImpl") == std::string::npos && + string.find("flags_internal::FlagImpl::Init") == std::string::npos && string.find("SetFlagsUsageConfig") == std::string::npos && string.find("perftest::utils::ConvertArgvToUtf8Strings") == std::string::npos && string.find("perftest::utils::CStringsFromStrings") == std::string::npos && From 330d8ed7653e56bf9719cf80f31ff928472d5430 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 6 Aug 2025 09:43:32 -0700 Subject: [PATCH 45/46] address reviewer's comments --- onnxruntime/test/perftest/command_args_parser.cc | 10 +++------- onnxruntime/test/perftest/common_utils.cc | 11 +---------- onnxruntime/test/perftest/main.cc | 4 ++-- onnxruntime/test/perftest/ort_test_session.cc | 6 +++--- onnxruntime/test/perftest/strings_helper.cc | 2 +- onnxruntime/test/perftest/strings_helper.h | 2 +- onnxruntime/test/perftest/test_configuration.h | 4 ++-- onnxruntime/test/perftest/utils.h | 4 +--- 8 files changed, 14 insertions(+), 29 deletions(-) diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index a9db91d47b503..5c81696d5c57e 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -203,11 +203,7 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a // _main.cc, where the is the name of the binary (without .exe on Windows). See usage_config.cc in abseil for more details. absl::FlagsUsageConfig config; config.contains_help_flags = [](absl::string_view filename) { - auto suffix = utils::GetBasename(filename); - std::string_view file_has_the_flag_defs(__FILE__); - file_has_the_flag_defs = utils::GetBasename(file_has_the_flag_defs); - - return suffix == file_has_the_flag_defs; + return std::filesystem::path(filename).filename() == std::filesystem::path(__FILE__).filename(); }; config.normalize_filename = [](absl::string_view f) { @@ -493,14 +489,14 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a // --list_ep_devices if (absl::GetFlag(FLAGS_list_ep_devices)) { - test_config.list_available_devices = true; + test_config.list_available_ep_devices = true; return true; } // --select_ep_devices { const auto& select_ep_devices = absl::GetFlag(FLAGS_select_ep_devices); - if (!select_ep_devices.empty()) test_config.selected_devices = select_ep_devices; + if (!select_ep_devices.empty()) test_config.selected_ep_device_indices = select_ep_devices; } if (positional.size() == 2) { diff --git a/onnxruntime/test/perftest/common_utils.cc b/onnxruntime/test/perftest/common_utils.cc index 2d4b90996c7d3..5cc6c240e25f0 100644 --- a/onnxruntime/test/perftest/common_utils.cc +++ b/onnxruntime/test/perftest/common_utils.cc @@ -13,7 +13,7 @@ namespace onnxruntime { namespace perftest { namespace utils { -void ListDevices(const Ort::Env& env) { +void ListEpDevices(const Ort::Env& env) { std::vector ep_devices = env.GetEpDevices(); for (size_t i = 0; i < ep_devices.size(); ++i) { @@ -90,15 +90,6 @@ std::vector CStringsFromStrings(std::vector& utf8_args) { return utf8_argv; } -// This helper function returns the basename of the filename passed as an argument -std::string_view GetBasename(std::string_view filename) { - auto last_slash_pos = filename.find_last_of("/\\"); - - return last_slash_pos == absl::string_view::npos - ? filename - : filename.substr(last_slash_pos + 1); -} - } // namespace utils } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 240fb0512e69d..973baf774b024 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -59,8 +59,8 @@ int real_main(int argc, char* argv[]) { } }); - if (test_config.list_available_devices) { - perftest::utils::ListDevices(env); + if (test_config.list_available_ep_devices) { + perftest::utils::ListEpDevices(env); if (test_config.registered_plugin_eps.empty()) { fprintf(stdout, "No plugin execution provider libraries are registered. Please specify them using \"--plugin_ep_libs\"; otherwise, only CPU may be available.\n"); } diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 0ed8d485e3b00..7156a1eb5c347 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -73,10 +73,10 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device std::unordered_set ep_set(ep_list.begin(), ep_list.end()); // Select EP devices by provided device index - if (!performance_test_config.selected_devices.empty()) { + if (!performance_test_config.selected_ep_device_indices.empty()) { std::vector device_list; - device_list.reserve(performance_test_config.selected_devices.size()); - ParseEpDeviceList(performance_test_config.selected_devices, device_list); + device_list.reserve(performance_test_config.selected_ep_device_indices.size()); + ParseEpDeviceIndexList(performance_test_config.selected_ep_device_indices, device_list); for (auto index : device_list) { if (static_cast(index) > (ep_devices.size() - 1)) { fprintf(stderr, "%s", "The device index provided is not correct. Will skip this device id."); diff --git a/onnxruntime/test/perftest/strings_helper.cc b/onnxruntime/test/perftest/strings_helper.cc index ba023679ea387..f4860b35c79da 100644 --- a/onnxruntime/test/perftest/strings_helper.cc +++ b/onnxruntime/test/perftest/strings_helper.cc @@ -79,7 +79,7 @@ void ParseEpList(const std::string& input, std::vector& result) { } } -void ParseEpDeviceList(const std::string& input, std::vector& result) { +void ParseEpDeviceIndexList(const std::string& input, std::vector& result) { std::stringstream ss(input); std::string item; diff --git a/onnxruntime/test/perftest/strings_helper.h b/onnxruntime/test/perftest/strings_helper.h index d54b274807815..621ab746273bd 100644 --- a/onnxruntime/test/perftest/strings_helper.h +++ b/onnxruntime/test/perftest/strings_helper.h @@ -18,6 +18,6 @@ void ParseEpList(const std::string& input, std::vector& result); void ParseEpOptions(const std::string& input, std::vector>& result); -void ParseEpDeviceList(const std::string& input, std::vector& result); +void ParseEpDeviceIndexList(const std::string& input, std::vector& result); } // namespace perftest } // namespace onnxruntime diff --git a/onnxruntime/test/perftest/test_configuration.h b/onnxruntime/test/perftest/test_configuration.h index c09f2184ccfa3..29ee84dd40dac 100644 --- a/onnxruntime/test/perftest/test_configuration.h +++ b/onnxruntime/test/perftest/test_configuration.h @@ -77,8 +77,8 @@ struct PerformanceTestConfig { RunConfig run_config; std::basic_string plugin_ep_names_and_libs; std::vector registered_plugin_eps; - std::string selected_devices; - bool list_available_devices = false; + std::string selected_ep_device_indices; + bool list_available_ep_devices = false; }; } // namespace perftest diff --git a/onnxruntime/test/perftest/utils.h b/onnxruntime/test/perftest/utils.h index 7b4161abd505f..9f180e2c8d942 100644 --- a/onnxruntime/test/perftest/utils.h +++ b/onnxruntime/test/perftest/utils.h @@ -31,9 +31,7 @@ void RegisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test void UnregisterExecutionProviderLibrary(Ort::Env& env, PerformanceTestConfig& test_config); -void ListDevices(const Ort::Env& env); - -std::string_view GetBasename(std::string_view filename); +void ListEpDevices(const Ort::Env& env); } // namespace utils } // namespace perftest From a8483409b494e417d405588fe328a6e877d027e9 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 6 Aug 2025 09:48:49 -0700 Subject: [PATCH 46/46] Remove the filtering of expected mem leak with ABSL flags, will handle this distracting leak message in another PR --- .../core/platform/windows/debug_alloc.cc | 28 +------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/onnxruntime/core/platform/windows/debug_alloc.cc b/onnxruntime/core/platform/windows/debug_alloc.cc index 7673ef0212e97..ad26280a90ecb 100644 --- a/onnxruntime/core/platform/windows/debug_alloc.cc +++ b/onnxruntime/core/platform/windows/debug_alloc.cc @@ -243,26 +243,6 @@ Memory_LeakCheck::~Memory_LeakCheck() { // empty_string = new string; // empty_named_groups = new std::map; // empty_group_names = new std::map; }); - // - // In the Abseil (ABSL) flags library used by onnxruntime_perf_test, specifying "--help" - // causes the program to call exit(1). This is an intentional design choice from Google, - // treating "--help" as an early termination condition (the program does not perform its - // normal execution. See MaybeExit in usage.cc). - // - // In normal execution of onnxruntime_perf_test, Abseil flags are defined as global variables - // and persist for the lifetime of the program. They are not explicitly freed, so leak checkers - // may report them, but these are not true leaks. Valgrind, for example, reports them as - // "still reachable" rather than "definitely lost". - // - // As a result, many resources will not be cleaned up, including: - // - Abseil's internal storage for flags, allocated in static/global objects inside - // absl::flags_internal (e.g., FlagImpl::Init) - // - The absl::FlagsUsageConfig instance - // - Performance test utilities that hold std::vector objects for converting argv to UTF-8 strings - // - The onnxruntime::perftest::PerformanceTestConfig instance - // - // Essentially, any object instantiated before calling absl::ParseCommandLine will not - // be cleaned up. This behavior is expected when running with "--help". if (string.find("RtlRunOnceExecuteOnce") == std::string::npos && string.find("re2::RE2::Init") == std::string::npos && string.find("dynamic initializer for 'FLAGS_") == std::string::npos && @@ -274,13 +254,7 @@ Memory_LeakCheck::~Memory_LeakCheck() { string.find("testing::internal::ThreadLocalRegistryImpl::GetThreadLocalsMapLocked") == std::string::npos && string.find("testing::internal::ThreadLocalRegistryImpl::GetValueOnCurrentThread") == std::string::npos && string.find("PyInit_onnxruntime_pybind11_state") == std::string::npos && - string.find("google::protobuf::internal::InitProtobufDefaultsSlow") == std::string::npos && - string.find("flags_internal::ParseCommandLineImpl") == std::string::npos && - string.find("flags_internal::FlagImpl::Init") == std::string::npos && - string.find("SetFlagsUsageConfig") == std::string::npos && - string.find("perftest::utils::ConvertArgvToUtf8Strings") == std::string::npos && - string.find("perftest::utils::CStringsFromStrings") == std::string::npos && - string.find("perftest::PerformanceTestConfig::PerformanceTestConfig") == std::string::npos) { + string.find("google::protobuf::internal::InitProtobufDefaultsSlow") == std::string::npos) { if (leaked_bytes == 0) DebugPrint("\n-----Starting Heap Trace-----\n\n");