Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -892,7 +892,9 @@ if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE)
if (${QNN_ARCH_ABI} STREQUAL "aarch64-windows-msvc" OR ${QNN_ARCH_ABI} STREQUAL "arm64x-windows-msvc")
file(GLOB EXTRA_HTP_LIB LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/hexagon-v68/unsigned/libQnnHtpV68Skel.so"
"${onnxruntime_QNN_HOME}/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so"
"${onnxruntime_QNN_HOME}/lib/hexagon-v73/unsigned/libqnnhtpv73.cat")
"${onnxruntime_QNN_HOME}/lib/hexagon-v73/unsigned/libqnnhtpv73.cat"
"${onnxruntime_QNN_HOME}/lib/hexagon-v81/unsigned/libQnnHtpV81Skel.so"
"${onnxruntime_QNN_HOME}/lib/hexagon-v81/unsigned/libqnnhtpv81.cat")
list(APPEND QNN_LIB_FILES ${EXTRA_HTP_LIB})
endif()
message(STATUS "QNN lib files: " ${QNN_LIB_FILES})
Expand Down
1 change: 1 addition & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -3937,6 +3937,7 @@ struct OrtApi {
* -# "69"
* -# "73"
* -# "75"
* -# "81"
* "device_id": The ID of the device to use when setting 'htp_arch'. Defaults to "0" (for single device).
* "enable_htp_fp16_precision": Used for float32 model for HTP backend.
* Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/qnn/qnn_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ static void ParseHtpArchitecture(const std::string& htp_arch_string, QnnHtpDevic
qnn_htp_arch = QNN_HTP_DEVICE_ARCH_V73;
} else if (htp_arch_string == "75") {
qnn_htp_arch = QNN_HTP_DEVICE_ARCH_V75;
} else if (htp_arch_string == "81") {
qnn_htp_arch = QNN_HTP_DEVICE_ARCH_V81;
} else {
LOGS_DEFAULT(WARNING) << "Invalid HTP architecture: " << htp_arch_string;
}
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/test/onnx/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void usage() {
"\t '0', '1', '2', '3', default is '0'.\n"
"\t [QNN only] [soc_model]: The SoC Model number. Refer to QNN SDK documentation for specific values. Defaults to '0' (unknown). \n"
"\t [QNN only] [htp_arch]: The minimum HTP architecture. The driver will use ops compatible with this architecture. \n"
"\t Options are '0', '68', '69', '73', '75'. Defaults to '0' (none). \n"
"\t Options are '0', '68', '69', '73', '75', '81'. Defaults to '0' (none). \n"
"\t [QNN only] [device_id]: The ID of the device to use when setting 'htp_arch'. Defaults to '0' (for single device). \n"
"\t [QNN only] [enable_htp_fp16_precision]: Enable the HTP_FP16 precision so that the float32 model will be inferenced with fp16 precision. \n"
"\t Otherwise, it will be fp32 precision. Works for float32 model for HTP backend. Defaults to '1' (with FP16 precision.). \n"
Expand Down Expand Up @@ -607,7 +607,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
ORT_THROW("Wrong value for htp_graph_finalization_optimization_mode. select from: " + str);
}
} else if (key == "htp_arch") {
std::unordered_set<std::string> supported_htp_archs = {"0", "68", "69", "73", "75"};
std::unordered_set<std::string> supported_htp_archs = {"0", "68", "69", "73", "75", "81"};
if (supported_htp_archs.find(value) == supported_htp_archs.end()) {
std::ostringstream str_stream;
std::copy(supported_htp_archs.begin(), supported_htp_archs.end(),
Expand Down
22 changes: 22 additions & 0 deletions onnxruntime/test/providers/qnn/gemm_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,19 @@ TEST_F(QnnHTPBackendTests, Gemm_Static_B_And_Bias) {
ExpectedEPNodeAssignment::All);
}

// Broken on v79 and v81 devices:
// Inaccuracy detected for output 'output_0', element 0
// output_range=31.434787750244141, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): 29.434776306152344
// qdq@QNN_EP val: 28.229671478271484 (err: 1.2051048278808594, err/output_range: 3.8336660861968994%)
// qdq@CPU_EP val: 29.092588424682617 (err: 0.34218788146972656, err/output_range: 1.0885642766952515%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 2.7451016902923584%
// Test 8-bit QDQ Gemm with transposed A/B and static B and Bias inputs.
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
TEST_F(QnnHTPBackendTests, DISABLED_Gemm_TransAB_Static_B_And_Bias_U8) {
#else
TEST_F(QnnHTPBackendTests, Gemm_TransAB_Static_B_And_Bias_U8) {
#endif
std::vector<float> input_a_data = GetFloatDataInRange(-10.0f, 10.0f, 6);
std::vector<float> input_b_data = GetFloatDataInRange(-5.0f, 5.0f, 24);
std::vector<float> input_c_data = GetFloatDataInRange(-1.0f, 1.0f, 4);
Expand All @@ -475,8 +486,19 @@ TEST_F(QnnHTPBackendTests, Gemm_TransAB_Static_B_And_Bias_U16Act_U8Weight) {
true); // Use com.microsoft Q/DQ ops
}

// Broken on v79 and v81 devices:
// Inaccuracy detected for output 'output_0', element 0
// output_range=31.434787750244141, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): 29.434776306152344
// qdq@QNN_EP val: 28.229671478271484 (err: 1.2051048278808594, err/output_range: 3.8336660861968994%)
// qdq@CPU_EP val: 29.092588424682617 (err: 0.34218788146972656, err/output_range: 1.0885642766952515%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 2.7451016902923584%
// Test QDQ Gemm with transposed A/B and dynamic (i.e., not initializer) B and Bias inputs.
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
TEST_F(QnnHTPBackendTests, DISABLED_Gemm_TransAB_Dynamic_B_And_Bias) {
#else
TEST_F(QnnHTPBackendTests, Gemm_TransAB_Dynamic_B_And_Bias) {
#endif
std::vector<float> input_a_data = GetFloatDataInRange(-10.0f, 10.0f, 6);
std::vector<float> input_b_data = GetFloatDataInRange(-5.0f, 5.0f, 24);
std::vector<float> input_c_data = GetFloatDataInRange(-1.0f, 1.0f, 4);
Expand Down
22 changes: 22 additions & 0 deletions onnxruntime/test/providers/qnn/lrn_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,18 @@ TEST_F(QnnCPUBackendTests, LRN_size_larger_than_channel) {
// HTP tests:
//

// Broken on v79 and v81 devices:
// Inaccuracy detected for output 'output_0', element 309
// output_range=19.910608291625977, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): -9.4876022338867188
// qdq@QNN_EP val: -9.3696985244750977 (err: 0.11790370941162109, err/output_range: 0.59216529130935669%)
// qdq@CPU_EP val: -9.5258598327636719 (err: 0.038257598876953125, err/output_range: 0.19214680790901184%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 0.40001851320266724%
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
TEST_F(QnnHTPBackendTests, DISABLED_LRNSize3) {
#else
TEST_F(QnnHTPBackendTests, LRNSize3) {
#endif
RunQDQLRNOpTest<uint8_t>(TestInputDef<float>({1, 128, 4, 5}, false, -10.0f, 10.0f),
3, // Size
ExpectedEPNodeAssignment::All,
Expand All @@ -134,7 +145,18 @@ TEST_F(QnnHTPBackendTests, LRNSize3) {
13); // opset
}

// Broken on v79 devices:
// Inaccuracy detected for output 'output_0', element 185
// output_range=19.911705017089844, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): -5.3502998352050781
// qdq@QNN_EP val: -5.2317028045654297 (err: 0.11859703063964844, err/output_range: 0.59561461210250854%)
// qdq@CPU_EP val: -5.3878731727600098 (err: 0.037573337554931641, err/output_range: 0.18869975209236145%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 0.40691488981246948%
#if defined(__aarch64__)
TEST_F(QnnHTPBackendTests, DISABLED_LRNSize5) {
#else
TEST_F(QnnHTPBackendTests, LRNSize5) {
#endif
RunQDQLRNOpTest<uint8_t>(TestInputDef<float>({1, 128, 4, 5}, false, -10.0f, 10.0f),
5, // Size
ExpectedEPNodeAssignment::All,
Expand Down
12 changes: 12 additions & 0 deletions onnxruntime/test/providers/qnn/matmul_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,19 @@ TEST_F(QnnHTPBackendTests, DISABLED_MatMulOp) {
// RunMatMulOpTest({3, 3, 3}, {3, 2}, true, false, ExpectedEPNodeAssignment::All, "htp", 18, 1e-2f);
}

// Broken on v79 and v81 devices with several results outside of acceptable tolerance.
// Example:
// Inaccuracy detected for output 'output_0', element 0
// output_range=0.010000000707805157, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): 0.010000000707805157
// qdq@QNN_EP val: 0.0099215693771839142 (err: 7.8431330621242523e-05, err/output_range: 0.78431320190429688%)
// qdq@CPU_EP val: 0.010000000707805157 (err: 0, err/output_range: 0%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 0.78431320190429688%
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
TEST_F(QnnHTPBackendTests, DISABLED_MatMulOp_QDQ) {
#else
TEST_F(QnnHTPBackendTests, MatMulOp_QDQ) {
#endif
// UINT8
// RunQDQMatMulOpTest(shape_0, shape_1, is_initializer_0, is_initializer_1, expected_ep_assignment, opset,
// use_contrib_qdq)
Expand Down
44 changes: 44 additions & 0 deletions onnxruntime/test/providers/qnn/reduce_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -430,12 +430,23 @@ static void RunReduceOpQDQTest(const std::string& op_type,
// ReduceSum
//

// Broken on v79 and v81 devices:
// Inaccuracy detected for output 'output_0', element 0
// output_range=2.785210132598877, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): -2.785210132598877
// qdq@QNN_EP val: -2.6541414260864258 (err: 0.13106870651245117, err/output_range: 4.7058820724487305%)
// qdq@CPU_EP val: -2.7415206432342529 (err: 0.043689489364624023, err/output_range: 1.5686246156692505%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 3.1372575759887695%
// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all
// nodes are supported by the QNN EP, and that the inference results match the CPU EP results.
//
// - Uses uint8 as the quantization type.
// - Uses opset 13, which has "axes" as an input.
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
TEST_F(QnnHTPBackendTests, DISABLED_ReduceSumU8Opset13) {
#else
TEST_F(QnnHTPBackendTests, ReduceSumU8Opset13) {
#endif
RunReduceOpQDQTest<uint8_t>("ReduceSum",
TestInputDef<float>({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}),
{0, 1}, // axes
Expand All @@ -454,12 +465,23 @@ TEST_F(QnnHTPBackendTests, ReduceSumU8Opset13_LastAxis) {
13, // opset
ExpectedEPNodeAssignment::All);
}
// Broken on v79 and v81 devices:
// Inaccuracy detected for output 'output_0', element 0
// output_range=2.785210132598877, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): -2.785210132598877
// qdq@QNN_EP val: -2.6541414260864258 (err: 0.13106870651245117, err/output_range: 4.7058820724487305%)
// qdq@CPU_EP val: -2.7415206432342529 (err: 0.043689489364624023, err/output_range: 1.5686246156692505%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 3.1372575759887695%
// Test creates a Q -> DQ -> ReduceSum -> Q -> DQ graph, and checks that all
// nodes are supported by the QNN EP, and that the inference results match the CPU EP results.
//
// - Uses uint8 as the quantization type.
// - Uses opset 11, which has "axes" as an attribute.
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
TEST_F(QnnHTPBackendTests, DISABLED_ReduceSumU8Opset11) {
#else
TEST_F(QnnHTPBackendTests, ReduceSumU8Opset11) {
#endif
RunReduceOpQDQTest<uint8_t>("ReduceSum",
TestInputDef<float>({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}),
{0, 1}, // axes
Expand Down Expand Up @@ -628,12 +650,23 @@ TEST_F(QnnHTPBackendTests, ReduceMinS8Opset18) {
// ReduceMean
//

// Broken on v79 and v81 devices:
// Inaccuracy detected for output 'output_0', element 0
// output_range=0.69630253314971924, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): -0.69630253314971924
// qdq@QNN_EP val: -0.66353535652160645 (err: 0.032767176628112793, err/output_range: 4.7058820724487305%)
// qdq@CPU_EP val: -0.68538016080856323 (err: 0.010922372341156006, err/output_range: 1.5686246156692505%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 3.1372575759887695%
// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all
// nodes are supported by the QNN EP, and that the inference results match the CPU EP results.
//
// - Uses uint8 as the quantization type.
// - Uses opset 18, which has "axes" as an input.
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
TEST_F(QnnHTPBackendTests, DISABLED_ReduceMeanU8Opset18) {
#else
TEST_F(QnnHTPBackendTests, ReduceMeanU8Opset18) {
#endif
RunReduceOpQDQTest<uint8_t>("ReduceMean",
TestInputDef<float>({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}),
{0, 1}, // axes
Expand All @@ -653,12 +686,23 @@ TEST_F(QnnHTPBackendTests, ReduceMeanU8Opset18_LastAxis) {
ExpectedEPNodeAssignment::All);
}

// Broken on v79 and v81 devices:
// Inaccuracy detected for output 'output_0', element 0
// output_range=0.69630253314971924, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): -0.69630253314971924
// qdq@QNN_EP val: -0.66353535652160645 (err: 0.032767176628112793, err/output_range: 4.7058820724487305%)
// qdq@CPU_EP val: -0.68538016080856323 (err: 0.010922372341156006, err/output_range: 1.5686246156692505%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 3.1372575759887695%
// Test creates a Q -> DQ -> ReduceMean -> Q -> DQ graph, and checks that all
// nodes are supported by the QNN EP, and that the inference results match the CPU EP results.
//
// - Uses uint8 as the quantization type.
// - Uses opset 13, which has "axes" as an attribute.
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
TEST_F(QnnHTPBackendTests, DISABLED_ReduceMeanU8Opset13) {
#else
TEST_F(QnnHTPBackendTests, ReduceMeanU8Opset13) {
#endif
RunReduceOpQDQTest<uint8_t>("ReduceMean",
TestInputDef<float>({2, 2}, false, {-10.0f, 3.21289f, -5.9981f, 10.0f}),
{0, 1}, // axes
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/test/providers/qnn/simple_op_htp_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -710,8 +710,19 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Abs_U16) {
true); // Use com.microsoft domain for Q/DQ ops
}

// Broken on v79 and v81 devices:
// Inaccuracy detected for output 'output_0', element 0
// output_range=24, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): -12
// qdq@QNN_EP val: -11.011764526367188 (err: 0.9882354736328125, err/output_range: 4.1176481246948242%)
// qdq@CPU_EP val: -12.047059059143066 (err: 0.047059059143066406, err/output_range: 0.19607941806316376%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 3.9215683937072754%
// Test accuracy of QDQ Ceil op.
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
TEST_F(QnnHTPBackendTests, DISABLED_UnaryOp_Ceil) {
#else
TEST_F(QnnHTPBackendTests, UnaryOp_Ceil) {
#endif
const std::vector<float> input_data = GetFloatDataInRange(-12.0f, 12.0f, 6);
RunQDQOpTest<uint8_t>("Ceil",
{TestInputDef<float>({1, 2, 3}, false, input_data)},
Expand Down
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,14 +417,17 @@ def finalize_options(self):
libs.extend(["DirectML.dll"])
# WebGPU/Dawn Libs
libs.extend(["dxcompiler.dll", "dxil.dll"])
# QNN V68/V73 dependencies
# QNN V68/V73/V81 dependencies
qnn_deps = [
"QnnCpu.dll",
"QnnGpu.dll",
"QnnHtp.dll",
"QnnSaver.dll",
"QnnSystem.dll",
"QnnHtpPrepare.dll",
"QnnHtpV81Stub.dll",
"libQnnHtpV81Skel.so",
"libqnnhtpv81.cat",
"QnnHtpV73Stub.dll",
"libQnnHtpV73Skel.so",
"libqnnhtpv73.cat",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ steps:
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpPrepare.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpV68Stub.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpV73Stub.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpV81Stub.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnSaver.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnSystem.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ steps:
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpPrepare.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpV68Stub.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpV73Stub.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpV81Stub.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnSaver.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnSystem.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib

Expand Down
31 changes: 22 additions & 9 deletions tools/nuget/generate_nuspec_for_native_nuget.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,15 +586,28 @@ def generate_files(line_list, args):
files_list.append(
"<file src=" + '"' + os.path.join(args.native_build_path, "QnnHtpPrepare.dll") + runtimes + " />"
)
files_list.append(
"<file src=" + '"' + os.path.join(args.native_build_path, "QnnHtpV73Stub.dll") + runtimes + " />"
)
files_list.append(
"<file src=" + '"' + os.path.join(args.native_build_path, "libQnnHtpV73Skel.so") + runtimes + " />"
)
files_list.append(
"<file src=" + '"' + os.path.join(args.native_build_path, "libqnnhtpv73.cat") + runtimes + " />"
)
for htp_arch in [73, 81]:
files_list.append(
"<file src="
+ '"'
+ os.path.join(args.native_build_path, f"QnnHtpV{htp_arch}Stub.dll")
+ runtimes
+ " />"
)
files_list.append(
"<file src="
+ '"'
+ os.path.join(args.native_build_path, f"libQnnHtpV{htp_arch}Skel.so")
+ runtimes
+ " />"
)
files_list.append(
"<file src="
+ '"'
+ os.path.join(args.native_build_path, f"libqnnhtpv{htp_arch}.cat")
+ runtimes
+ " />"
)

is_ado_packaging_build = False
# Process runtimes
Expand Down
Loading