From 5e452049f87a5d290120e87bb6472b8145d2de57 Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Wed, 11 Mar 2026 17:42:27 -0700 Subject: [PATCH 1/8] benchdnn: measure_perf_individual: add warm-up run --- tests/benchdnn/dnnl_common.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp index 41cf025c153..3f0e09408d6 100644 --- a/tests/benchdnn/dnnl_common.cpp +++ b/tests/benchdnn/dnnl_common.cpp @@ -663,6 +663,10 @@ void finalize() { inline int measure_perf_individual(timer::timer_t &t, dnnl_stream_t stream, perf_function_t &perf_func, std::vector &dnnl_args) { + // Warm-up run. + DNN_SAFE(perf_func(stream, dnnl_args), WARN); + DNN_SAFE(dnnl_stream_wait(stream), CRIT); + cold_cache_t cold_cache(dnnl_args, stream); t.reset(); From 2b6c4d21535a7b49a842d2e8f9a61dcd05a964f3 Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Thu, 12 Mar 2026 15:45:13 -0700 Subject: [PATCH 2/8] benchdnn: compare: make norm check complementary to p2p when allowed --- tests/benchdnn/bnorm/bnorm.cpp | 4 +-- tests/benchdnn/conv/conv.cpp | 4 +-- tests/benchdnn/deconv/deconv.cpp | 4 +-- tests/benchdnn/gnorm/gnorm.cpp | 4 +-- tests/benchdnn/graph/ref_primitive.cpp | 38 +++++--------------- tests/benchdnn/lnorm/lnorm.cpp | 4 +-- tests/benchdnn/rnn/rnn.cpp | 8 ++--- tests/benchdnn/self/norm.cpp | 2 +- tests/benchdnn/utils/compare.cpp | 50 ++++++++++++++++++++++---- tests/benchdnn/utils/compare.hpp | 16 ++++++--- 10 files changed, 79 insertions(+), 55 deletions(-) diff --git a/tests/benchdnn/bnorm/bnorm.cpp b/tests/benchdnn/bnorm/bnorm.cpp index 8c93b65947f..6750656e48b 100644 --- a/tests/benchdnn/bnorm/bnorm.cpp +++ b/tests/benchdnn/bnorm/bnorm.cpp @@ -513,8 +513,8 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, // Since bwd testing is done using results from forward which are random // fp32 values, diff_scale starts fluctuating, so we check norm for both // data, SC, and SH. - const bool compare_with_norm = (prb->dir & FLAG_BWD); - cmp.set_norm_validation_mode(compare_with_norm); + const bool allow_norm_check = (prb->dir & FLAG_BWD); + cmp.set_allow_norm_check(allow_norm_check); // Digits must be non-negative for safe left-shifting when `digits_dt` // exceeds `digits_f32`. diff --git a/tests/benchdnn/conv/conv.cpp b/tests/benchdnn/conv/conv.cpp index c8fb49d6f89..ecd50fa259b 100644 --- a/tests/benchdnn/conv/conv.cpp +++ b/tests/benchdnn/conv/conv.cpp @@ -441,8 +441,8 @@ void skip_invalid_prb(const prb_t *prb, res_t *res) {} void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, const args_t &ref_args) { - const bool compare_with_norm = (prb->alg & WINO); - cmp.set_norm_validation_mode(compare_with_norm); + const bool allow_norm_check = (prb->alg & WINO); + cmp.set_allow_norm_check(allow_norm_check); float trh = 0.f; if (prb->alg & WINO) { diff --git a/tests/benchdnn/deconv/deconv.cpp b/tests/benchdnn/deconv/deconv.cpp index f48069f66f8..0c6b3c928b7 100644 --- a/tests/benchdnn/deconv/deconv.cpp +++ b/tests/benchdnn/deconv/deconv.cpp @@ -373,8 +373,8 @@ void skip_invalid_prb(const prb_t *prb, res_t *res) {} void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, const args_t &ref_args) { - const bool compare_with_norm = (prb->alg & WINO); - cmp.set_norm_validation_mode(compare_with_norm); + const bool allow_norm_check = (prb->alg & WINO); + cmp.set_allow_norm_check(allow_norm_check); float trh = 0.f; if (prb->alg & WINO) { diff --git a/tests/benchdnn/gnorm/gnorm.cpp b/tests/benchdnn/gnorm/gnorm.cpp index 1dbdbe27317..84983891c1e 100644 --- a/tests/benchdnn/gnorm/gnorm.cpp +++ b/tests/benchdnn/gnorm/gnorm.cpp @@ -534,8 +534,8 @@ void skip_invalid_prb(const prb_t *prb, res_t *res) { void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, const args_t &ref_args) { - const bool compare_with_norm = (prb->dir & FLAG_BWD); - cmp.set_norm_validation_mode(compare_with_norm); + const bool allow_norm_check = (prb->dir & FLAG_BWD); + cmp.set_allow_norm_check(allow_norm_check); const auto dt = prb->dir & FLAG_FWD ? prb->dt[1] : prb->dt[0]; // Digits must be non-negative for safe left-shifting when `digits_dt` diff --git a/tests/benchdnn/graph/ref_primitive.cpp b/tests/benchdnn/graph/ref_primitive.cpp index 66b79b29773..bd07a6fc8a4 100644 --- a/tests/benchdnn/graph/ref_primitive.cpp +++ b/tests/benchdnn/graph/ref_primitive.cpp @@ -367,34 +367,9 @@ void ref_primitive_t::check_correctness( cmp.set_has_eltwise_post_op(has_eltwise); cmp.set_op_output_has_nans(has_nans); - dnn_mem_t mem_fp_abx(mem_fp, dnnl_f32, tag::abx, ::get_cpu_engine()); - // Reset `res` counters when more than a single arg is checked. - res->errors = 0; - res->total = 0; - auto st = cmp.compare(mem_fp_abx, mem_dt, attr, res); - if (st == OK) continue; - - // If comparison failed, try a norm comparison. However, at this point, - // to limit the risk of hiding issues, the norm comparison is enabled - // if number of affected points is really small compared to the total - // number of points - 1 point per every 1024. - // This can be revisited later. - const size_t allowed_error_points = res->total / 1024; - const bool norm_check_allowed = allowed_error_points >= res->errors; - - BENCHDNN_PRINT(0, - "[COMPARE_STATS] Norm check is %s; error_to_total_ratio: " - "%zu/%zu; allowed_ratio: %zu/%zu;\n", - norm_check_allowed ? "allowed" : "prohibited", res->errors, - res->total, allowed_error_points, res->total); - - if (!norm_check_allowed) continue; - - // Reset the `res` statistics state. - res->state = EXECUTED; - res->errors = 0; - res->total = 0; - + // `cmp` object has internal knowledge on when this check must be + // enabled. + cmp.set_allow_norm_check(true); // TODO: there's an open question with how to determine the threshold // and what the criteria to use. Unless a partition says it is some // complex fusion (such as SDP) with a specific data type, setting such @@ -412,8 +387,11 @@ void ref_primitive_t::check_correctness( // // Note: the following threshold is obtained from actual runs on // different hardware. - cmp.set_threshold(2.5e-3f); - cmp.set_norm_validation_mode(true); + cmp.set_threshold_norm(2.5e-3f); + dnn_mem_t mem_fp_abx(mem_fp, dnnl_f32, tag::abx, ::get_cpu_engine()); + // Reset `res` counters when more than a single arg is checked. + res->errors = 0; + res->total = 0; cmp.compare(mem_fp_abx, mem_dt, attr, res); } } diff --git a/tests/benchdnn/lnorm/lnorm.cpp b/tests/benchdnn/lnorm/lnorm.cpp index 8b45a0fc24a..1f8154ae4ac 100644 --- a/tests/benchdnn/lnorm/lnorm.cpp +++ b/tests/benchdnn/lnorm/lnorm.cpp @@ -497,8 +497,8 @@ void skip_invalid_prb(const prb_t *prb, res_t *res) { void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, const args_t &ref_args) { - const bool compare_with_norm = (prb->dir & FLAG_BWD); - cmp.set_norm_validation_mode(compare_with_norm); + const bool allow_norm_check = (prb->dir & FLAG_BWD); + cmp.set_allow_norm_check(allow_norm_check); const auto dt = prb->dir & FLAG_FWD ? prb->dt[1] : prb->dt[0]; // Digits must be non-negative for safe left-shifting when `digits_dt` diff --git a/tests/benchdnn/rnn/rnn.cpp b/tests/benchdnn/rnn/rnn.cpp index f9a81c73b83..c2de2b279f7 100644 --- a/tests/benchdnn/rnn/rnn.cpp +++ b/tests/benchdnn/rnn/rnn.cpp @@ -1039,10 +1039,10 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, // as long as we get precise u8 intermediate results (and so far we do), // the f32 result should be pretty accurate -- the dequantization is just // two simple ops: f32 = scale * u8 + shift. - bool check_p2p = (prb->skip_nonlinear - || ((prb->n_layer == 1) && (prb->n_iter == 1))); - if (prb->is_int8() && rnn_kind == DST_ITER_C) check_p2p = false; - cmp.set_norm_validation_mode(!check_p2p); + const bool disallow_norm_check = prb->skip_nonlinear + || (prb->n_layer == 1 && prb->n_iter == 1) + || (prb->is_int8() && rnn_kind == DST_ITER_C); + cmp.set_allow_norm_check(!disallow_norm_check); const auto rnn_add_check = [&, prb](const compare::compare_t::driver_check_func_args_t &args) { diff --git a/tests/benchdnn/self/norm.cpp b/tests/benchdnn/self/norm.cpp index 529e47bf3db..12f140c3dfd 100644 --- a/tests/benchdnn/self/norm.cpp +++ b/tests/benchdnn/self/norm.cpp @@ -124,7 +124,7 @@ static int check_compare_norm() { res_t res_bad {}; res_bad.state = EXECUTED; compare::compare_t cmp; - cmp.set_norm_validation_mode(true); + cmp.set_allow_norm_check(true); cmp.set_threshold( sqrt(N) / sqrt(exp_sq_sum0) - 10.f * epsilon_dt(dnnl_f32)); cmp.compare(m0, m1, attr_t(), &res_bad); diff --git a/tests/benchdnn/utils/compare.cpp b/tests/benchdnn/utils/compare.cpp index 78391a0436b..452e9ebb978 100644 --- a/tests/benchdnn/utils/compare.cpp +++ b/tests/benchdnn/utils/compare.cpp @@ -146,6 +146,14 @@ bool negative_converts_to_zero(const attr_t &attr, dnnl_data_type_t target_dt) { return false; } + +void reset_case_stats(res_t *res) { + // TODO: introduce res->stats and ctor to replace just the stats part. + res->state = EXECUTED; + res->total = 0; + res->errors = 0; +} + } // namespace bool compare_extreme_values(float a, float b) { @@ -196,7 +204,7 @@ int compare_t::compare_norm(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, // Specifiers to keep data accumulated over several `i`. static thread_local diff_norm_t diff_norm_ithr; - driver_check_func_args_t args(exp_mem, got_f32, i, dt, trh_); + driver_check_func_args_t args(exp_mem, got_f32, i, dt, trh_norm_); if ((std::isnan(args.exp_f32)) || std::isinf(args.exp)) { // Don't include nan inf values into norm as they make it @@ -237,7 +245,7 @@ int compare_t::compare_norm(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, } } - bool ok = diff_norm.rel_diff(norm_t::L2) <= trh_; + bool ok = diff_norm.rel_diff(norm_t::L2) <= trh_norm_; if (!ok) res->errors = 1; const bool dump = need_dump || !ok; @@ -248,7 +256,7 @@ int compare_t::compare_norm(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, // Status may be propagated from previous tensor. Use stats from cur tensor. BENCHDNN_PRINT((res->errors ? 0 : 6), "[COMPARE_STATS]%s: trh=%g (compare against [L2] rel_diff)\n", - get_kind_str().c_str(), trh_); + get_kind_str().c_str(), trh_norm_); if (res->state == EXECUTED) res->state = PASSED; @@ -618,13 +626,43 @@ int compare_t::compare_p2p(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, int compare_t::compare(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, const attr_t &attr, res_t *res) const { - std::string add_args = std::string(use_norm_ ? "use_norm:true;" : "") + std::string add_args + = std::string(allow_norm_check_ ? "allow_norm:true;" : "") + std::string(op_output_has_nans_ ? "has_nans:true;" : "") + std::string(has_prim_ref_ ? "has_prim_ref:true;" : ""); BENCHDNN_PRINT(6, "[COMPARE]%s: zero_trust%%=%.2f%% extra=%s\n", get_kind_str().c_str(), zero_trust_percent_, add_args.c_str()); - if (use_norm_) return compare_norm(exp_mem, got_mem, attr, res); - return compare_p2p(exp_mem, got_mem, attr, res); + auto st = compare_p2p(exp_mem, got_mem, attr, res); + if (st != OK) { + bool call_norm_check = allow_norm_check_; + // Note: the following code specifies additional driver's individual + // desires when to enable norm check. This one purely depends on the + // result of p2p comparison. So far graph is the only driver needing + // such. When this becomes a trend, move it to a registered function + // mechanism. + if (driver_name == "graph") { + // For graph driver there's additional runtime check based on the + // number of failed points. This is done to limit the risk of hiding + // issues. If the number of failed points is reasonably low, let it + // try the norm approach. + const size_t allowed_error_points = res->total / 1024; + const bool norm_check_allowed = allowed_error_points >= res->errors; + + BENCHDNN_PRINT(0, + "[COMPARE_STATS] Norm check is %s; error_to_total_ratio: " + "%zu/%zu; allowed_ratio: %zu/%zu;\n", + norm_check_allowed ? "allowed" : "prohibited", res->errors, + res->total, allowed_error_points, res->total); + + call_norm_check = call_norm_check && norm_check_allowed; + } + + if (call_norm_check) { + reset_case_stats(res); + st = compare_norm(exp_mem, got_mem, attr, res); + } + } + return st; } } // namespace compare diff --git a/tests/benchdnn/utils/compare.hpp b/tests/benchdnn/utils/compare.hpp index c15c65ca089..e64a93f6c91 100644 --- a/tests/benchdnn/utils/compare.hpp +++ b/tests/benchdnn/utils/compare.hpp @@ -45,8 +45,14 @@ struct compare_t { compare_t() = default; - void set_norm_validation_mode(bool un) { use_norm_ = un; } - void set_threshold(float trh) { trh_ = trh; } + void set_allow_norm_check(bool anc) { allow_norm_check_ = anc; } + // Sets both thresholds - for p2p and norm. If needed an updated norm + // threshold, use `set_threshold_norm`. + void set_threshold(float trh) { + trh_ = trh; + trh_norm_ = trh; + } + void set_threshold_norm(float trhn) { trh_norm_ = trhn; } void set_zero_trust_percent(float ztp) { zero_trust_percent_ = ztp; } void set_data_kind(data_kind_t dk) { kind_ = dk; } void set_op_output_has_nans(bool ohn) { op_output_has_nans_ = ohn; } @@ -69,10 +75,12 @@ struct compare_t { const attr_t &attr, res_t *res) const; private: - // Switch between point-to-point and norm comparison. - bool use_norm_ = false; + // If point-to-point comparison fails, allows fallback to norm check. + bool allow_norm_check_ = false; // Threshold for a point-to-point comparison. float trh_ = 0.f; + // Threshold for norm comparison. + float trh_norm_ = 0.f; // The percent value of zeros allowed in the output. float default_zero_trust_percent_ = 30.f; float zero_trust_percent_ = default_zero_trust_percent_; From 1b7e9f86452fd76af2f5fee413d37bfd844dff97 Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Fri, 13 Mar 2026 11:28:02 -0700 Subject: [PATCH 3/8] benchdnn: compare: don't print failed points if norm check passed --- tests/benchdnn/utils/compare.cpp | 67 ++++++++++++-------------------- tests/benchdnn/utils/compare.hpp | 26 +++++++++++++ 2 files changed, 50 insertions(+), 43 deletions(-) diff --git a/tests/benchdnn/utils/compare.cpp b/tests/benchdnn/utils/compare.cpp index 452e9ebb978..43d0e950c47 100644 --- a/tests/benchdnn/utils/compare.cpp +++ b/tests/benchdnn/utils/compare.cpp @@ -32,28 +32,8 @@ namespace compare { namespace { -struct dump_point_ctx_t { - dump_point_ctx_t(const_dnnl_memory_desc_t md, int64_t l_offset, - float exp_f32, float exp, float got, float diff, float rel_diff) - : md(md) - , l_offset(l_offset) - , exp_f32(exp_f32) - , exp(exp) - , got(got) - , diff(diff) - , rel_diff(rel_diff) {} - - const_dnnl_memory_desc_t md; - int64_t l_offset; - float exp_f32; - float exp; - float got; - float diff; - float rel_diff; -}; - void dump_point_values( - const std::string &kind_str, const dump_point_ctx_t &ctx) { + const std::string &kind_str, const compare_t::dump_point_ctx_t &ctx) { dnnl::impl::stringstream_t ss; dims_t l_dims = md2dims(ctx.md); dims_t dims_idx = off2dims_idx(l_dims, ctx.l_offset); @@ -235,21 +215,14 @@ int compare_t::compare_norm(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, diff_norm.done(); - // Serial point dump with enabled dumping when needed for nicer output. - if (need_dump) { - for (int64_t i = 0; i < nelems; ++i) { - driver_check_func_args_t args(exp_mem, got_f32, i, dt, trh_); - dump_point_values(get_kind_str(), - {got_mem.md_, i, args.exp_f32, args.exp, args.got, - args.diff, args.rel_diff}); - } - } - bool ok = diff_norm.rel_diff(norm_t::L2) <= trh_norm_; if (!ok) res->errors = 1; const bool dump = need_dump || !ok; - if (dump) dump_norm_values(diff_norm, get_kind_str()); + if (dump) { + dump_p2p_errors(); + dump_norm_values(diff_norm, get_kind_str()); + } if (res->errors) res->state = FAILED; @@ -321,7 +294,7 @@ int compare_t::compare_p2p(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, static struct { struct data_t { int64_t n_errors; - std::vector dumps; + std::vector dumps; }; data_t &get() { @@ -574,20 +547,19 @@ int compare_t::compare_p2p(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, } // serial comparison with enabled dumping when needed for nicer output. if (n_errors > 0 || need_dump) { - std::vector dumps; for (auto &d : thread_data.data) { - dumps.insert( - dumps.end(), d.second.dumps.begin(), d.second.dumps.end()); + p2p_dumps_.insert(p2p_dumps_.end(), d.second.dumps.begin(), + d.second.dumps.end()); } - std::sort(dumps.begin(), dumps.end(), - [](const dump_point_ctx_t &a, const dump_point_ctx_t &b) { + std::sort(p2p_dumps_.begin(), p2p_dumps_.end(), + [](const compare_t::dump_point_ctx_t &a, + const compare_t::dump_point_ctx_t &b) { return a.l_offset < b.l_offset; }); - size_t max_dump_size - = (verbose >= 10 || dumps.size() < 10) ? dumps.size() : 10; - for (size_t i = 0; i < max_dump_size; i++) { - dump_point_values(get_kind_str(), dumps[i]); - } + // If norm fallback is allowed, these dumps will be printed there. + // This is done to avoid an output disturbance if p2p check fails but + // norm passes. + if (!allow_norm_check_) dump_p2p_errors(); } // Set state to FAILED in case of any errors. @@ -624,6 +596,15 @@ int compare_t::compare_p2p(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, return res->state == FAILED ? FAIL : OK; } +void compare_t::dump_p2p_errors() const { + size_t max_dump_size = (verbose >= 10 || p2p_dumps_.size() < 10) + ? p2p_dumps_.size() + : 10; + for (size_t i = 0; i < max_dump_size; i++) { + dump_point_values(get_kind_str(), p2p_dumps_[i]); + } +} + int compare_t::compare(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, const attr_t &attr, res_t *res) const { std::string add_args diff --git a/tests/benchdnn/utils/compare.hpp b/tests/benchdnn/utils/compare.hpp index e64a93f6c91..88ade855461 100644 --- a/tests/benchdnn/utils/compare.hpp +++ b/tests/benchdnn/utils/compare.hpp @@ -43,6 +43,26 @@ struct compare_t { float trh = 0.f; }; + struct dump_point_ctx_t { + dump_point_ctx_t(const_dnnl_memory_desc_t md, int64_t l_offset, + float exp_f32, float exp, float got, float diff, float rel_diff) + : md(md) + , l_offset(l_offset) + , exp_f32(exp_f32) + , exp(exp) + , got(got) + , diff(diff) + , rel_diff(rel_diff) {} + + const_dnnl_memory_desc_t md; + int64_t l_offset; + float exp_f32; + float exp; + float got; + float diff; + float rel_diff; + }; + compare_t() = default; void set_allow_norm_check(bool anc) { allow_norm_check_ = anc; } @@ -102,11 +122,17 @@ struct compare_t { // layout for proper comparison. bool has_prim_ref_ = false; + // Internal members. + // + // `mutable` to preserve `const`antness of compare methods. + mutable std::vector p2p_dumps_; + // Internal validation methods under `compare` interface. int compare_p2p(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, const attr_t &attr, res_t *res) const; int compare_norm(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, const attr_t &attr, res_t *res) const; + void dump_p2p_errors() const; std::string get_kind_str() const { std::string kind_str; From 294f62c9cefed6f7a2f2b734475f337af1fb677a Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Fri, 13 Mar 2026 12:41:43 -0700 Subject: [PATCH 4/8] benchdnn: utils: data_kind: move labels to a single structure --- tests/benchdnn/brgemm/brgemm_aux.cpp | 1 - tests/benchdnn/utils/data_kind.cpp | 129 +++++++++++++-------------- tests/benchdnn/utils/data_kind.hpp | 4 +- 3 files changed, 63 insertions(+), 71 deletions(-) diff --git a/tests/benchdnn/brgemm/brgemm_aux.cpp b/tests/benchdnn/brgemm/brgemm_aux.cpp index 3ef9eec4d4e..4a4a0664bfc 100644 --- a/tests/benchdnn/brgemm/brgemm_aux.cpp +++ b/tests/benchdnn/brgemm/brgemm_aux.cpp @@ -36,7 +36,6 @@ dnnl_data_type_t prb_t::get_dt(data_kind_t data_kind) const { case WEI: return wei_dt(); case BIA: return bia_dt; case DST: return dst_dt(); - case ACC: return acc_dt(); default: assert(!"unexpected"); return dnnl_data_type_undef; } } diff --git a/tests/benchdnn/utils/data_kind.cpp b/tests/benchdnn/utils/data_kind.cpp index ce08388e072..57c5a67b8c6 100644 --- a/tests/benchdnn/utils/data_kind.cpp +++ b/tests/benchdnn/utils/data_kind.cpp @@ -23,48 +23,63 @@ #include "utils/data_kind.hpp" struct data_kind_entry_t { - data_kind_t dk; std::vector exec_args; + std::string label; }; -static data_kind_entry_t data_kind_table[] = { - // Important implementation detail: - // `arg` to `kind` conversion is 2-to-1, and transparent. - // Since `kind` to `arg` conversion is 1-to-2, it is done according to - // comparison logic. To maintain it easier, first element of arg vector - // is the one that corresponts to the argument expected in comparison. - {SRC, {DNNL_ARG_DIFF_SRC, DNNL_ARG_SRC}}, - {SRC_1, {DNNL_ARG_DIFF_SRC_1, DNNL_ARG_SRC_1}}, - {SRC_2, {DNNL_ARG_DIFF_SRC_2, DNNL_ARG_SRC_2}}, - {SRC_ITER, {DNNL_ARG_DIFF_SRC_ITER, DNNL_ARG_SRC_ITER}}, - {SRC_ITER_C, {DNNL_ARG_DIFF_SRC_ITER_C, DNNL_ARG_SRC_ITER_C}}, - {WEI, {DNNL_ARG_DIFF_WEIGHTS, DNNL_ARG_WEIGHTS}}, - {WEI_ITER, {DNNL_ARG_DIFF_WEIGHTS_ITER, DNNL_ARG_WEIGHTS_ITER}}, - {BIA, {DNNL_ARG_DIFF_BIAS, DNNL_ARG_BIAS}}, - {DST, {DNNL_ARG_DST, DNNL_ARG_DIFF_DST}}, - {DST_ITER, {DNNL_ARG_DST_ITER, DNNL_ARG_DIFF_DST_ITER}}, - {DST_ITER_C, {DNNL_ARG_DST_ITER_C, DNNL_ARG_DIFF_DST_ITER_C}}, - {MEAN, {DNNL_ARG_MEAN}}, - {VAR, {DNNL_ARG_VARIANCE}}, - {SC, {DNNL_ARG_DIFF_SCALE, DNNL_ARG_SCALE}}, - {SH, {DNNL_ARG_DIFF_SHIFT, DNNL_ARG_SHIFT}}, - {AUGRU_ATTENTION, - {DNNL_ARG_DIFF_AUGRU_ATTENTION, DNNL_ARG_AUGRU_ATTENTION}}, - {WEI_PEEPHOLE, - {DNNL_ARG_DIFF_WEIGHTS_PEEPHOLE, DNNL_ARG_WEIGHTS_PEEPHOLE}}, - {WEI_PROJECTION, - {DNNL_ARG_DIFF_WEIGHTS_PROJECTION, - DNNL_ARG_WEIGHTS_PROJECTION}}, - {DROPOUT_MASK, {DNNL_ARG_ATTR_DROPOUT_MASK}}, - {DST_SCALES, {DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST}}, - // Always keep this entry the last in the list - {DAT_TOTAL, {DNNL_ARG_UNDEF}}, -}; +const std::map &data_kind_table() { + static const std::map data_kind_table_ { + // Important implementation detail: + // `arg` to `kind` conversion is 2-to-1, and transparent. + // Since `kind` to `arg` conversion is 1-to-2, it is done according + // to comparison logic. To maintain it easier, first element of arg + // vector is the one that corresponts to the argument expected in + // comparison. + {SRC, {{DNNL_ARG_DIFF_SRC, DNNL_ARG_SRC}, "SRC"}}, + {SRC_1, {{DNNL_ARG_DIFF_SRC_1, DNNL_ARG_SRC_1}, "SRC_ADD"}}, + {SRC_2, {{DNNL_ARG_DIFF_SRC_2, DNNL_ARG_SRC_2}, "SRC_2"}}, + {SRC_ITER, + {{DNNL_ARG_DIFF_SRC_ITER, DNNL_ARG_SRC_ITER}, "SRC_ITER"}}, + {SRC_ITER_C, + {{DNNL_ARG_DIFF_SRC_ITER_C, DNNL_ARG_SRC_ITER_C}, + "SRC_ITER_C"}}, + {WEI, {{DNNL_ARG_DIFF_WEIGHTS, DNNL_ARG_WEIGHTS}, "WEI"}}, + {WEI_ITER, + {{DNNL_ARG_DIFF_WEIGHTS_ITER, DNNL_ARG_WEIGHTS_ITER}, + "WEI_ITER"}}, + {BIA, {{DNNL_ARG_DIFF_BIAS, DNNL_ARG_BIAS}, "BIA"}}, + {DST, {{DNNL_ARG_DST, DNNL_ARG_DIFF_DST}, "DST"}}, + {DST_ITER, + {{DNNL_ARG_DST_ITER, DNNL_ARG_DIFF_DST_ITER}, "DST_ITER"}}, + {DST_ITER_C, + {{DNNL_ARG_DST_ITER_C, DNNL_ARG_DIFF_DST_ITER_C}, + "DST_ITER_C"}}, + {MEAN, {{DNNL_ARG_MEAN}, "MEAN"}}, + {VAR, {{DNNL_ARG_VARIANCE}, "VAR"}}, + {SC, {{DNNL_ARG_DIFF_SCALE, DNNL_ARG_SCALE}, "SC"}}, + {SH, {{DNNL_ARG_DIFF_SHIFT, DNNL_ARG_SHIFT}, "SH"}}, + {AUGRU_ATTENTION, + {{DNNL_ARG_DIFF_AUGRU_ATTENTION, DNNL_ARG_AUGRU_ATTENTION}, + "AUGRU_ATTENTION"}}, + {WEI_PEEPHOLE, + {{DNNL_ARG_DIFF_WEIGHTS_PEEPHOLE, + DNNL_ARG_WEIGHTS_PEEPHOLE}, + "WEI_PEEPHOLE"}}, + {WEI_PROJECTION, + {{DNNL_ARG_DIFF_WEIGHTS_PROJECTION, + DNNL_ARG_WEIGHTS_PROJECTION}, + "WEI_PROJECTION"}}, + {DROPOUT_MASK, {{DNNL_ARG_ATTR_DROPOUT_MASK}, "DROPOUT_MASK"}}, + {DST_SCALES, {{DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST}, "DST_SCALES"}}, + {DAT_TOTAL, {{DNNL_ARG_UNDEF}, "incorrect data kind"}}, + }; + return data_kind_table_; +} data_kind_t exec_arg2data_kind(int arg) { - for (const auto &e : data_kind_table) { - for (const auto &a : e.exec_args) { - if (a == arg) return e.dk; + for (const auto &e : data_kind_table()) { + for (const auto &a : e.second.exec_args) { + if (a == arg) return e.first; } } @@ -82,14 +97,13 @@ data_kind_t exec_arg2data_kind(int arg) { && !is_zero_point_arg && !is_dropout_arg) BENCHDNN_PRINT(0, "Error: arg \'%d\' was not recognized\n", arg); - const auto table_size = sizeof(data_kind_table) / sizeof(*data_kind_table); - return data_kind_table[table_size - 1].dk; + return DAT_TOTAL; } int data_kind2exec_arg(data_kind_t dk) { - for (const auto &e : data_kind_table) { - // See `data_kind_table` comment. It explains why `0` index is taken. - if (e.dk == dk) return e.exec_args[0]; + for (const auto &e : data_kind_table()) { + // See `data_kind_table()` comment. It explains why `0` index is taken. + if (e.first == dk) return e.second.exec_args[0]; } BENCHDNN_PRINT(0, "Error: data_kind \'%s\' was not recognized\n", @@ -97,31 +111,10 @@ int data_kind2exec_arg(data_kind_t dk) { return DNNL_ARG_UNDEF; } -const char *data_kind2str(data_kind_t kind) { - switch (kind) { - case SRC: return "SRC"; - case SRC_1: return "SRC_ADD"; - case SRC_2: return "SRC_2"; - case WEI: return "WEI"; - case BIA: return "BIA"; - case DST: return "DST"; - case DIFF_DST: return "DIFF_DST"; - case ACC: return "ACC"; - case MEAN: return "MEAN"; - case VAR: return "VAR"; - case SC: return "SC"; - case SH: return "SH"; - case DST_ITER: return "DST_ITER"; - case DST_ITER_C: return "DST_ITER_C"; - case AUGRU_ATTENTION: return "AUGRU_ATTENTION"; - case SRC_ITER: return "SRC_ITER"; - case SRC_ITER_C: return "SRC_ITER_C"; - case WEI_ITER: return "WEI_ITER"; - case WEI_PEEPHOLE: return "WEI_PEEPHOLE"; - case WEI_PROJECTION: return "WEI_PROJECTION"; - case DROPOUT_MASK: return "DROPOUT_MASK"; - case DST_SCALES: return "DST_SCALES"; - default: assert(!"incorrect data kind"); +const char *data_kind2str(data_kind_t dk) { + for (const auto &e : data_kind_table()) { + if (e.first == dk) return e.second.label.c_str(); } - return "incorrect data kind"; + + return data_kind_table().at(DAT_TOTAL).label.c_str(); } diff --git a/tests/benchdnn/utils/data_kind.hpp b/tests/benchdnn/utils/data_kind.hpp index b0886c2775c..c55d3321e95 100644 --- a/tests/benchdnn/utils/data_kind.hpp +++ b/tests/benchdnn/utils/data_kind.hpp @@ -23,7 +23,6 @@ enum data_kind_t { BIA, DST, DIFF_DST, - ACC, // bnorm, lnorm SRC_1, // select @@ -48,7 +47,8 @@ enum data_kind_t { // softmax stats DST_1, }; -const char *data_kind2str(data_kind_t kind); + +const char *data_kind2str(data_kind_t dk); // Returns correspondent `data_kind_t` value to a given execution `arg` value. data_kind_t exec_arg2data_kind(int arg); From 61157dcbe80b3efdc6c595186e9ba0e2601ed01f Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Fri, 13 Mar 2026 12:52:10 -0700 Subject: [PATCH 5/8] benchdnn: graph: set kind for compare object and adjust its name --- tests/benchdnn/graph/ref_primitive.cpp | 7 ++++--- tests/benchdnn/softmax/softmax.cpp | 2 +- tests/benchdnn/utils/data_kind.cpp | 1 + tests/benchdnn/utils/data_kind.hpp | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/benchdnn/graph/ref_primitive.cpp b/tests/benchdnn/graph/ref_primitive.cpp index bd07a6fc8a4..3b7e13d3d86 100644 --- a/tests/benchdnn/graph/ref_primitive.cpp +++ b/tests/benchdnn/graph/ref_primitive.cpp @@ -323,7 +323,7 @@ void ref_primitive_t::check_correctness( {DNNL_ARG_BIAS, BIA}, {DNNL_ARG_DIFF_BIAS, BIA}, {DNNL_ARG_DST, DST}, - {DNNL_ARG_DST_1, DST_1}, + {DNNL_ARG_DST_1, SDPA_STATS}, {DNNL_ARG_DIFF_SRC_0, DST}, {DNNL_ARG_SRC_1, SRC_1}, {DNNL_ARG_MEAN, MEAN}, @@ -355,8 +355,8 @@ void ref_primitive_t::check_correctness( const auto &mem_dt = args.find(arg); const auto &mem_fp = args_.find(arg); - if (dnnl_arg_2_data_kind_map.find(arg) - == dnnl_arg_2_data_kind_map.end()) { + auto it = dnnl_arg_2_data_kind_map.find(arg); + if (it == dnnl_arg_2_data_kind_map.end()) { BENCHDNN_PRINT(1, "Output arg %d is unsupported!\n", arg); res->state = UNIMPLEMENTED; return; @@ -365,6 +365,7 @@ void ref_primitive_t::check_correctness( attr_t attr; SWITCH_DRIVER(CASE_CHECK_CORRECTNESS, CASE_CUSTOM_CHECK_CORRECTNESS); + cmp.set_data_kind(it->second); cmp.set_has_eltwise_post_op(has_eltwise); cmp.set_op_output_has_nans(has_nans); // `cmp` object has internal knowledge on when this check must be diff --git a/tests/benchdnn/softmax/softmax.cpp b/tests/benchdnn/softmax/softmax.cpp index cab9ac98e9d..86586346a41 100644 --- a/tests/benchdnn/softmax/softmax.cpp +++ b/tests/benchdnn/softmax/softmax.cpp @@ -283,7 +283,7 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, const float trh = is_flt_or_dbl || is_relaxed_xf16 ? trh_f32 : 0.f; #endif cmp.set_threshold(trh); - if (driver_name == "graph" && kind == DST_1) { + if (driver_name == "graph" && kind == SDPA_STATS) { // softmax stats is computed with eltwise-log, which has a different // and larger threshold than softmax. So we need to adjust the threshold // for this case. diff --git a/tests/benchdnn/utils/data_kind.cpp b/tests/benchdnn/utils/data_kind.cpp index 57c5a67b8c6..90313d3e765 100644 --- a/tests/benchdnn/utils/data_kind.cpp +++ b/tests/benchdnn/utils/data_kind.cpp @@ -71,6 +71,7 @@ const std::map &data_kind_table() { "WEI_PROJECTION"}}, {DROPOUT_MASK, {{DNNL_ARG_ATTR_DROPOUT_MASK}, "DROPOUT_MASK"}}, {DST_SCALES, {{DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST}, "DST_SCALES"}}, + {SDPA_STATS, {{DNNL_ARG_DST_1}, "SDPA_STATS"}}, {DAT_TOTAL, {{DNNL_ARG_UNDEF}, "incorrect data kind"}}, }; return data_kind_table_; diff --git a/tests/benchdnn/utils/data_kind.hpp b/tests/benchdnn/utils/data_kind.hpp index c55d3321e95..4f9936a566c 100644 --- a/tests/benchdnn/utils/data_kind.hpp +++ b/tests/benchdnn/utils/data_kind.hpp @@ -42,10 +42,10 @@ enum data_kind_t { WEI_PROJECTION, DROPOUT_MASK, DST_SCALES, + // SDPA softmax stats + SDPA_STATS, DAT_TOTAL, - // softmax stats - DST_1, }; const char *data_kind2str(data_kind_t dk); From 31f4166eb900347cf24834b23fbc287378f22ca7 Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Mon, 16 Mar 2026 12:37:01 -0700 Subject: [PATCH 6/8] benchdnn: rnn: resolve some false-positive correctness corner cases --- tests/benchdnn/rnn/rnn.cpp | 15 +++++++++++++++ tests/benchdnn/rnn/rnn_aux.cpp | 22 ++++++++++++++++++++++ tests/benchdnn/rnn/rnn_aux.hpp | 1 + tests/benchdnn/utils/compare.cpp | 14 +++++++++----- tests/benchdnn/utils/compare.hpp | 4 +++- 5 files changed, 50 insertions(+), 6 deletions(-) diff --git a/tests/benchdnn/rnn/rnn.cpp b/tests/benchdnn/rnn/rnn.cpp index c2de2b279f7..b0aa7a56c5e 100644 --- a/tests/benchdnn/rnn/rnn.cpp +++ b/tests/benchdnn/rnn/rnn.cpp @@ -1054,6 +1054,21 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, && prb->prop == dnnl_backward) { return args.diff < args.trh; } + + // When a problem uses int computations, DST_ITER(_C) is computed using + // DST_LAYER. However, the library part can compute LAYER and ITER in + // parallel, which can lead to off-by-1 issue for ITER part. + // Reconstruct original DST_LAYER values on got and exp sides and if + // they are off-by-1, let them through. + if (prb->cfg.is_int8() + && (args.dk == rnn_data_kind2data_kind(DST_ITER) + || args.dk == rnn_data_kind2data_kind(DST_ITER_C))) { + const int exp_q = static_cast( + args.exp * prb->data_scale + prb->data_shift); + const int got_q = static_cast( + args.got * prb->data_scale + prb->data_shift); + return abs(got_q - exp_q) <= 1; + } return false; }; cmp.set_driver_check_function(rnn_add_check); diff --git a/tests/benchdnn/rnn/rnn_aux.cpp b/tests/benchdnn/rnn/rnn_aux.cpp index 40fbe0ae680..09403060809 100644 --- a/tests/benchdnn/rnn/rnn_aux.cpp +++ b/tests/benchdnn/rnn/rnn_aux.cpp @@ -565,6 +565,28 @@ rnn_data_kind_t data_kind2rnn_data_kind(data_kind_t data_kind) { return KIND_TOTAL; } +data_kind_t rnn_data_kind2data_kind(rnn_data_kind_t rnn_data_kind) { + switch (rnn_data_kind) { + case rnn_data_kind_t::DST_LAYER: return data_kind_t::DST; + case rnn_data_kind_t::DST_ITER: return data_kind_t::DST_ITER; + case rnn_data_kind_t::DST_ITER_C: return data_kind_t::DST_ITER_C; + case rnn_data_kind_t::DIFF_SRC_LAYER: return data_kind_t::SRC; + case rnn_data_kind_t::DIFF_AUGRU_ATTENTION: + return data_kind_t::AUGRU_ATTENTION; + case rnn_data_kind_t::DIFF_SRC_ITER: return data_kind_t::SRC_ITER; + case rnn_data_kind_t::DIFF_SRC_ITER_C: return data_kind_t::SRC_ITER_C; + case rnn_data_kind_t::DIFF_WEIGHTS_LAYER: return data_kind_t::WEI; + case rnn_data_kind_t::DIFF_WEIGHTS_ITER: return data_kind_t::WEI_ITER; + case rnn_data_kind_t::DIFF_WEIGHTS_PEEPHOLE: + return data_kind_t::WEI_PEEPHOLE; + case rnn_data_kind_t::DIFF_WEIGHTS_PROJECTION: + return data_kind_t::WEI_PROJECTION; + case rnn_data_kind_t::DIFF_BIAS: return data_kind_t::BIA; + default: assert(!"unknown data kind"); + } + return DAT_TOTAL; +} + void prb_t::set_qparams(float fp_min, float fp_max) { if (!cfg.is_int8()) { data_shift = 0.; diff --git a/tests/benchdnn/rnn/rnn_aux.hpp b/tests/benchdnn/rnn/rnn_aux.hpp index c1c0c0896a7..0de6c74c5b6 100644 --- a/tests/benchdnn/rnn/rnn_aux.hpp +++ b/tests/benchdnn/rnn/rnn_aux.hpp @@ -121,6 +121,7 @@ void gates_reduction( const prb_t &prb, const float *b_gates_, float *diff_bias_); rnn_data_kind_t data_kind2rnn_data_kind(data_kind_t data_kind); +data_kind_t rnn_data_kind2data_kind(rnn_data_kind_t rnn_data_kind); }; // namespace rnn diff --git a/tests/benchdnn/utils/compare.cpp b/tests/benchdnn/utils/compare.cpp index 43d0e950c47..f172e3a36eb 100644 --- a/tests/benchdnn/utils/compare.cpp +++ b/tests/benchdnn/utils/compare.cpp @@ -145,7 +145,7 @@ bool compare_extreme_values(float a, float b) { compare_t::driver_check_func_args_t::driver_check_func_args_t( const dnn_mem_t &exp_mem, const dnn_mem_t &got_f32, const int64_t i, - const dnnl_data_type_t data_type, const float trh) + const dnnl_data_type_t data_type, const float trh, data_kind_t dk) : dt(data_type) , idx(i) , exp_f32(exp_mem.get_f32_elem(idx)) @@ -153,7 +153,8 @@ compare_t::driver_check_func_args_t::driver_check_func_args_t( , got(got_f32.get_f32_elem(idx)) , diff(fabsf(exp - got)) , rel_diff(diff / (fabsf(exp) > FLT_MIN ? fabsf(exp) : 1)) - , trh(trh) {} + , trh(trh) + , dk(dk) {} int compare_t::compare_norm(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, const attr_t &attr, res_t *res) const { @@ -184,7 +185,8 @@ int compare_t::compare_norm(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, // Specifiers to keep data accumulated over several `i`. static thread_local diff_norm_t diff_norm_ithr; - driver_check_func_args_t args(exp_mem, got_f32, i, dt, trh_norm_); + driver_check_func_args_t args( + exp_mem, got_f32, i, dt, trh_norm_, kind_); if ((std::isnan(args.exp_f32)) || std::isinf(args.exp)) { // Don't include nan inf values into norm as they make it @@ -337,7 +339,8 @@ int compare_t::compare_p2p(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, static thread_local driver_check_func_args_t args; for (int z = ok; z < 1; z++) { - args = driver_check_func_args_t(exp_f32, got_f32, i, dt, trh_); + args = driver_check_func_args_t( + exp_f32, got_f32, i, dt, trh_, kind_); if (std::isnan(args.exp_f32) && is_integral_dt(dt)) { // Relax output requirements for this case, since different @@ -502,7 +505,8 @@ int compare_t::compare_p2p(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, if (dump) { // Need to initialize `args` in case they weren't. if (args.dt == dnnl_data_type_undef) - args = driver_check_func_args_t(exp_f32, got_f32, i, dt, trh_); + args = driver_check_func_args_t( + exp_f32, got_f32, i, dt, trh_, kind_); out_data.dumps.emplace_back(got_mem.md_, i, args.exp_f32, args.exp, got_val, args.diff, args.rel_diff); diff --git a/tests/benchdnn/utils/compare.hpp b/tests/benchdnn/utils/compare.hpp index 88ade855461..7c4df46b951 100644 --- a/tests/benchdnn/utils/compare.hpp +++ b/tests/benchdnn/utils/compare.hpp @@ -31,7 +31,8 @@ struct compare_t { driver_check_func_args_t() = default; driver_check_func_args_t(const dnn_mem_t &exp_mem, const dnn_mem_t &got_f32, const int64_t i, - const dnnl_data_type_t data_type, const float trh); + const dnnl_data_type_t data_type, const float trh, + data_kind_t dk); dnnl_data_type_t dt = dnnl_data_type_undef; int64_t idx = 0; @@ -41,6 +42,7 @@ struct compare_t { float diff = 0.f; float rel_diff = 0.f; float trh = 0.f; + data_kind_t dk = DAT_TOTAL; }; struct dump_point_ctx_t { From 4bab92ce087876ceb28f30ab1bcd1e29183f0fd8 Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Fri, 13 Mar 2026 16:05:32 -0700 Subject: [PATCH 7/8] revert: benchdnn: inputs: work around false-positive hits This reverts commit 7e4e9df49c7650e7d593ba0166c5a48afd9b2527. --- tests/benchdnn/inputs/rnn/harness_gru_regression | 4 +++- tests/benchdnn/inputs/rnn/shapes_small | 2 +- tests/benchdnn/rnn/rnn.cpp | 10 ---------- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/tests/benchdnn/inputs/rnn/harness_gru_regression b/tests/benchdnn/inputs/rnn/harness_gru_regression index 38858461e45..3456104a7ec 100644 --- a/tests/benchdnn/inputs/rnn/harness_gru_regression +++ b/tests/benchdnn/inputs/rnn/harness_gru_regression @@ -1,2 +1,4 @@ # int8 SIC != SLC ---reset --trivial-strides=true --prop=FWD_I --alg=VANILLA_GRU --activation=UNDEF --direction=left2right --cfg=u8u8u8f32 l1t32mb100sic128slc256dhc128dic128 +--reset +--trivial-strides=true --prop=FWD_I --alg=VANILLA_GRU --activation=UNDEF +--direction=left2right --cfg=u8u8u8f32 l1t47mb100sic128slc256dhc128dic128 diff --git a/tests/benchdnn/inputs/rnn/shapes_small b/tests/benchdnn/inputs/rnn/shapes_small index e8a242a815e..c0c5398e541 100644 --- a/tests/benchdnn/inputs/rnn/shapes_small +++ b/tests/benchdnn/inputs/rnn/shapes_small @@ -1,7 +1,7 @@ # small shapes l8t3mb12_sic16_n"uniform" -l4t2mb20_sic36_n"uniform:unroll_tail" +l4t3mb20_sic36_n"uniform:unroll_tail" l1t2mb6_sic16_slc32_n"non-uniform:slc_neq_sic" l1t1mb7_sic17_dhc34_n"non-uniform:slc_neq_dhc_tail" l1t1mb3_sic16_slc32_dhc64_n"non-uniform:slc_neq_sic_neq_dhc" diff --git a/tests/benchdnn/rnn/rnn.cpp b/tests/benchdnn/rnn/rnn.cpp index b0aa7a56c5e..77e5a9bdca1 100644 --- a/tests/benchdnn/rnn/rnn.cpp +++ b/tests/benchdnn/rnn/rnn.cpp @@ -993,16 +993,6 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, if (prb->prop == dnnl_backward) acc_dim *= MAX2(bwdd_acc_dim, bwdw_acc_dim); // Here the factor 4 just gives some wiggle room for fp32 testing - // Note: the following process of picking a `trh` is likely fine for - // floating-point problems but doesn't suit well for int8. It may happen - // that underlying target implementation will compute DST[i] and DST_ITER[i] - // with small difference around X.5f point ending up rounded differently - // leading to a difference in the output. Turned out, one incorrect point - // leads to norm comparison failure which doesn't make norm validation - // meaningful. - // TODO: consider moving int8 config (DST_ITER only?) on per point check - // with additional verification that underlying sources can have diff_1 - // (though slightly changing shapes can work around failures). float trh = 4 * (1 + (prb->prop == dnnl_backward)) // double wiggle room for bwd * ((prb->direction == dnnl_bidirectional_sum) From 1f693eb84e58080f4110ce315448caf9622ee84d Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Fri, 13 Mar 2026 16:42:17 -0700 Subject: [PATCH 8/8] benchdnn: res: update member doc and introduce reset_stats method --- tests/benchdnn/graph/ref_primitive.cpp | 6 ++--- tests/benchdnn/utils/compare.cpp | 9 +------- tests/benchdnn/utils/res.hpp | 32 ++++++++++++++++++++++---- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/tests/benchdnn/graph/ref_primitive.cpp b/tests/benchdnn/graph/ref_primitive.cpp index 3b7e13d3d86..3a401ce2cd2 100644 --- a/tests/benchdnn/graph/ref_primitive.cpp +++ b/tests/benchdnn/graph/ref_primitive.cpp @@ -390,9 +390,9 @@ void ref_primitive_t::check_correctness( // different hardware. cmp.set_threshold_norm(2.5e-3f); dnn_mem_t mem_fp_abx(mem_fp, dnnl_f32, tag::abx, ::get_cpu_engine()); - // Reset `res` counters when more than a single arg is checked. - res->errors = 0; - res->total = 0; + // Clear previous output stats. + auto cur_res_state = res->state; + res->reset_stats(cur_res_state); cmp.compare(mem_fp_abx, mem_dt, attr, res); } } diff --git a/tests/benchdnn/utils/compare.cpp b/tests/benchdnn/utils/compare.cpp index f172e3a36eb..dd5c9da6256 100644 --- a/tests/benchdnn/utils/compare.cpp +++ b/tests/benchdnn/utils/compare.cpp @@ -127,13 +127,6 @@ bool negative_converts_to_zero(const attr_t &attr, dnnl_data_type_t target_dt) { return false; } -void reset_case_stats(res_t *res) { - // TODO: introduce res->stats and ctor to replace just the stats part. - res->state = EXECUTED; - res->total = 0; - res->errors = 0; -} - } // namespace bool compare_extreme_values(float a, float b) { @@ -643,7 +636,7 @@ int compare_t::compare(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem, } if (call_norm_check) { - reset_case_stats(res); + res->reset_stats(EXECUTED); st = compare_norm(exp_mem, got_mem, attr, res); } } diff --git a/tests/benchdnn/utils/res.hpp b/tests/benchdnn/utils/res.hpp index a7554f86e5c..8f1d13b6095 100644 --- a/tests/benchdnn/utils/res.hpp +++ b/tests/benchdnn/utils/res.hpp @@ -108,15 +108,39 @@ struct check_mem_size_args_t { }; struct res_t { - res_state_t state; - size_t errors, total; + // The state of the `res` object. Changes as the flow continues. The typical + // progression starts with UNTESTED and follows steps: + // Creation: -> INITIALIZED/INVALID_ARGUMENTS/UNIMPLEMENTED/SKIPPED; + // Execution: -> EXECUTED; + // Result: -> PASSED/FAILED/MISTRUSTED. + res_state_t state = UNTESTED; + // A short description of the reason of the obtained status. + std::string reason; + // The number of failed points if case FAILED. + size_t errors = 0; + // The total number of points tested. + size_t total = 0; + // Registered timers during the run. timer::timer_map_t timer_map; + // The implementation name of the validated primitive. std::string impl_name; + // The repro line for a primitive used as a baseline over benchdnn ref. std::string prim_ref_repro; - std::string reason; + // The amount of bytes of 'i'nput and 'o'utput. // TODO: fuse `ibytes` and `obytes` into `mem_size_args`. - size_t ibytes, obytes; + size_t ibytes = 0; + size_t obytes = 0; + // Detailed information about test case memory requirements. check_mem_size_args_t mem_size_args; + + // Resets `state`, `errors`, `total`, `reason` field with default values + // and a given `new_state`. + void reset_stats(res_state_t new_state) { + state = new_state; + reason.clear(); + errors = 0; + total = 0; + } }; #endif