From dff6ae3ca88257a29d24cf333616b5af4345c666 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 12:48:23 -0700 Subject: [PATCH 001/239] test --- .../remove-unused-brs_branch-hints.wast | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 test/lit/passes/remove-unused-brs_branch-hints.wast diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast new file mode 100644 index 00000000000..575ae8532fb --- /dev/null +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -0,0 +1,19 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --remove-unused-brs -all -S -o - \ +;; RUN: | filecheck %s + +(module + (func $if-br (param $x i32) + (block $out + ;; The if-br will turn into a br_if. The branch hint should then go on the + ;; br_if, and remain 01. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (br $out) + ) + ) + (unreachable) + ) +) From 9c3a8929a68aeb47915dbe571c3e4e3c73efa7ca Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 12:49:24 -0700 Subject: [PATCH 002/239] test --- test/lit/passes/remove-unused-brs_branch-hints.wast | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 575ae8532fb..2edcf450eda 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -3,6 +3,15 @@ ;; RUN: | filecheck %s (module + ;; CHECK: (func $if-br (type $0) (param $x i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) (func $if-br (param $x i32) (block $out ;; The if-br will turn into a br_if. The branch hint should then go on the @@ -12,7 +21,8 @@ (local.get $x) (then (br $out) - ) + ) + ) ) (unreachable) ) From 611d6c6e1cbc2ef42b591b78b1a0d9fcb1f14226 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 12:49:32 -0700 Subject: [PATCH 003/239] work --- src/passes/RemoveUnusedBrs.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 64c68f2354e..10d6c8cd058 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -150,6 +150,21 @@ static bool tooCostlyToRunUnconditionally(const PassOptions& passOptions, return tooCostlyToRunUnconditionally(passOptions, max); } +// Copy the branch hint from one instruction to another. +static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { + auto iter = func->codeAnnotations.find(from); + if (iter == func->codeAnnotations.end()) { + // No annotations at all. + return; + } + auto& annotation = iter->second; + if (!annotation.branchLikely) { + // No branch hint annotation. + return; + } + func->codeAnnotations[to].branchLikely = annotation.branchLikely; +} + struct RemoveUnusedBrs : public WalkerPass> { bool isFunctionParallel() override { return true; } @@ -396,6 +411,7 @@ struct RemoveUnusedBrs : public WalkerPass> { curr->condition, br->value, getPassOptions(), *getModule())) { if (!br->condition) { br->condition = curr->condition; + copyBranchHintTo(curr, br, getFunction()); } else { // In this case we can replace // if (condition1) br_if (condition2) From e22ef6d1fb9ae14b0eae428b0df0332fe781d9bc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 14:19:04 -0700 Subject: [PATCH 004/239] work --- src/passes/RemoveUnusedBrs.cpp | 2 +- .../remove-unused-brs_branch-hints.wast | 33 ++++++++++++++++--- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 10d6c8cd058..5aa92689631 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -411,7 +411,6 @@ struct RemoveUnusedBrs : public WalkerPass> { curr->condition, br->value, getPassOptions(), *getModule())) { if (!br->condition) { br->condition = curr->condition; - copyBranchHintTo(curr, br, getFunction()); } else { // In this case we can replace // if (condition1) br_if (condition2) @@ -445,6 +444,7 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeSelect(br->condition, curr->condition, zero); } br->finalize(); + copyBranchHintTo(curr, br, getFunction()); replaceCurrent(Builder(*getModule()).dropIfConcretelyTyped(br)); anotherCycle = true; } diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 2edcf450eda..4f1cc60629e 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -3,17 +3,19 @@ ;; RUN: | filecheck %s (module - ;; CHECK: (func $if-br (type $0) (param $x i32) + ;; CHECK: (func $if-br (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (br_if $out ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) - (func $if-br (param $x i32) + (func $if-br (param $x i32) (param $y i32) (block $out + ;; This nop prevents the entire testcase from being trivial. + (nop) ;; The if-br will turn into a br_if. The branch hint should then go on the ;; br_if, and remain 01. (@metadata.code.branch_hint "\01") @@ -24,6 +26,29 @@ ) ) ) - (unreachable) + ) + + ;; CHECK: (func $if-br_if (type $1) (param $x i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-br_if (param $x i32) + (block $out + (nop) + ;; As above, but the br has a condition. We can merge conditions, and + ;; still move the hint to the br_if. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (br $out) + ) + ) + ) ) ) From e436b0d2212c81d84c7a41ffca0549cc6373fc4a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 14:21:43 -0700 Subject: [PATCH 005/239] work --- .../remove-unused-brs_branch-hints.wast | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 4f1cc60629e..119ed5a5233 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -3,6 +3,9 @@ ;; RUN: | filecheck %s (module + ;; CHECK: (import "a" "b" (func $i32 (type $1) (result i32))) + (import "a" "b" (func $i32 (result i32))) + ;; CHECK: (func $if-br (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (nop) @@ -28,25 +31,31 @@ ) ) - ;; CHECK: (func $if-br_if (type $1) (param $x i32) + ;; CHECK: (func $if-br_if (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (br_if $out - ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $if-br_if (param $x i32) + (func $if-br_if (param $x i32) (param $y i32) (block $out (nop) - ;; As above, but the br has a condition. We can merge conditions, and - ;; still move the hint to the br_if. + ;; As above, but the br has a condition. We can merge conditions (using a + ;; select), and then move the hint to the br_if. (@metadata.code.branch_hint "\01") (if (local.get $x) (then - (br $out) + (br_if $out + (local.get $y) + ) ) ) ) From a0de2269c4ed0f6bf898620c68c60f53a3998f10 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 17:17:00 -0700 Subject: [PATCH 006/239] work --- src/passes/RemoveUnusedBrs.cpp | 37 +++++++++--- .../remove-unused-brs_branch-hints.wast | 56 +++++++++++++++++++ 2 files changed, 84 insertions(+), 9 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 5aa92689631..68334b2b092 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -150,19 +150,30 @@ static bool tooCostlyToRunUnconditionally(const PassOptions& passOptions, return tooCostlyToRunUnconditionally(passOptions, max); } -// Copy the branch hint from one instruction to another. -static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { - auto iter = func->codeAnnotations.find(from); +// Branch hint utilities. +static std::optional getBranchHint(Expression* expr, Function* func) { + auto iter = func->codeAnnotations.find(expr); if (iter == func->codeAnnotations.end()) { // No annotations at all. - return; + return {}; } - auto& annotation = iter->second; - if (!annotation.branchLikely) { - // No branch hint annotation. - return; + return iter->second.branchLikely; +} + +static void setBranchHint(Expression* expr, bool likely, Function* func) { + func->codeAnnotations[expr].branchLikely = likely; +} + +static void clearBranchHint(Expression* expr, Function* func) { + func->codeAnnotations[expr].branchLikely = {}; +} + +// Copy the branch hint from one instruction to another. +static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { + auto fromLikely = getBranchHint(from, func); + if (fromLikely) { + setBranchHint(to, *fromLikely, func); } - func->codeAnnotations[to].branchLikely = annotation.branchLikely; } struct RemoveUnusedBrs : public WalkerPass> { @@ -475,6 +486,14 @@ struct RemoveUnusedBrs : public WalkerPass> { Builder builder(*getModule()); curr->condition = builder.makeSelect( child->condition, curr->condition, builder.makeConst(int32_t(0))); + // A branch hint makes sense if both ifs have one, and they agree + // (otherwise, we don't know if the combined condition is still likely/ + // unlikely). + auto currHint = getBranchHint(curr, getFunction()); + auto childHint = getBranchHint(child, getFunction()); + if (!currHint || currHint != childHint) { + clearBranchHint(curr, getFunction()); + } curr->ifTrue = child->ifTrue; } } diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 119ed5a5233..bf924214d43 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -5,6 +5,8 @@ (module ;; CHECK: (import "a" "b" (func $i32 (type $1) (result i32))) (import "a" "b" (func $i32 (result i32))) + ;; CHECK: (import "a" "b" (func $none (type $2))) + (import "a" "b" (func $none)) ;; CHECK: (func $if-br (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out @@ -31,6 +33,29 @@ ) ) + ;; CHECK: (func $if-br_0 (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-br_0 (param $x i32) (param $y i32) + (block $out + (nop) + ;; As above, but a hint of 0. + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (br $out) + ) + ) + ) + ) + ;; CHECK: (func $if-br_if (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (nop) @@ -60,4 +85,35 @@ ) ) ) + + ;; CHECK: (func $if-if (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-if (param $x i32) (param $y i32) + ;; Both ifs have a hint, so after we merge the ifs the combined condition + ;; remains likely. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ) ) From 9c249c5f89f8089fe32534ba4dd3f599e6234f81 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 17:18:26 -0700 Subject: [PATCH 007/239] work --- .../remove-unused-brs_branch-hints.wast | 52 ++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index bf924214d43..0993b58c208 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -86,7 +86,7 @@ ) ) - ;; CHECK: (func $if-if (type $0) (param $x i32) (param $y i32) + ;; CHECK: (func $if-if-1* (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (select @@ -98,8 +98,28 @@ ;; CHECK-NEXT: (call $none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $if-if (param $x i32) (param $y i32) + (func $if-if-1* (param $x i32) (param $y i32) ;; Both ifs have a hint, so after we merge the ifs the combined condition ;; remains likely. (@metadata.code.branch_hint "\01") @@ -115,5 +135,33 @@ ) ) ) + ;; The outer if still has a hint of 1, but the inner is 0. We emit no hint. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\00") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ;; The outer if still has a hint of 1, but the inner has none. We emit no + ;; hint. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) ) ) From 9d8e1906ee865eadf45ab569ad0b5701ea9d9ccb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 17:19:58 -0700 Subject: [PATCH 008/239] work --- .../remove-unused-brs_branch-hints.wast | 52 +++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 0993b58c208..72c0bdfcf92 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -120,8 +120,8 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $if-if-1* (param $x i32) (param $y i32) - ;; Both ifs have a hint, so after we merge the ifs the combined condition - ;; remains likely. + ;; Both ifs have a hint of 1, so after we merge the ifs the combined + ;; condition remains likely. (@metadata.code.branch_hint "\01") (if (local.get $x) @@ -150,7 +150,53 @@ ) ) ;; The outer if still has a hint of 1, but the inner has none. We emit no - ;; hint. + ;; hint. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ) + + (func $if-if-0* (param $x i32) (param $y i32) + ;; As above, but now the outer if has hints of 0. + + ;; The hints do not match, so we emit no hint. + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ;; The hints match, so we emit 0. XXX + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\00") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ;; No hint is emitted (@metadata.code.branch_hint "\01") (if (local.get $x) From 6a2d39a4966da5df7fafcd9f09b0ef24672264d2 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 17:25:00 -0700 Subject: [PATCH 009/239] work --- src/passes/RemoveUnusedBrs.cpp | 8 ++-- .../remove-unused-brs_branch-hints.wast | 37 ++++++++++++++++++- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 68334b2b092..7110880fc9a 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -486,9 +486,11 @@ struct RemoveUnusedBrs : public WalkerPass> { Builder builder(*getModule()); curr->condition = builder.makeSelect( child->condition, curr->condition, builder.makeConst(int32_t(0))); - // A branch hint makes sense if both ifs have one, and they agree - // (otherwise, we don't know if the combined condition is still likely/ - // unlikely). + // If two conditions A and B are likely, then A && B is also likely + // (though, in theory, slightly less likely, but our branch hints are + // close to certainty). If one of them lacks a hint, we know nothing. If + // both are unlikely, we can say that A && B is also unlikely (in fact + // it is less likely). auto currHint = getBranchHint(curr, getFunction()); auto childHint = getBranchHint(child, getFunction()); if (!currHint || currHint != childHint) { diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 72c0bdfcf92..0460bc7815f 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -165,6 +165,39 @@ ) ) + ;; CHECK: (func $if-if-0* (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $if-if-0* (param $x i32) (param $y i32) ;; As above, but now the outer if has hints of 0. @@ -182,7 +215,7 @@ ) ) ) - ;; The hints match, so we emit 0. XXX + ;; The hints match, so the combined condition is unlikely. (@metadata.code.branch_hint "\00") (if (local.get $x) @@ -196,7 +229,7 @@ ) ) ) - ;; No hint is emitted + ;; Inner lacks a hint, so we emit nothing. (@metadata.code.branch_hint "\01") (if (local.get $x) From da60e55e71f72661c453103c3e2a4269aa092e41 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 17:25:42 -0700 Subject: [PATCH 010/239] work --- .../remove-unused-brs_branch-hints.wast | 73 ++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 0460bc7815f..1da2b8b18cb 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -243,4 +243,75 @@ ) ) ) -) + + ;; CHECK: (func $if-if-?* (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-if-?* (param $x i32) (param $y i32) + ;; As above, but now the outer if has no hint. We emit no hints here. + + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\00") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + (if + (local.get $x) + (then + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + )) From 59c826b784a36708653aaffc62875f4969630d06 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 17:33:45 -0700 Subject: [PATCH 011/239] prep --- src/passes/RemoveUnusedBrs.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 7110880fc9a..c2354ae703b 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -168,7 +168,6 @@ static void clearBranchHint(Expression* expr, Function* func) { func->codeAnnotations[expr].branchLikely = {}; } -// Copy the branch hint from one instruction to another. static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { auto fromLikely = getBranchHint(from, func); if (fromLikely) { @@ -176,6 +175,12 @@ static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { } } +static void flipBranchHint(Expression* expr, Function* func) { + if (auto likely = getBranchHint(expr, func)) { + setBranchHint(expr, !*likely, func); + } +} + struct RemoveUnusedBrs : public WalkerPass> { bool isFunctionParallel() override { return true; } @@ -726,6 +731,8 @@ struct RemoveUnusedBrs : public WalkerPass> { brIf->condition = builder.makeUnary(EqZInt32, brIf->condition); last->name = brIf->name; brIf->name = loop->name; + abort(); + flipBranchHint(brIf, getFunction()); return true; } else { // there are elements in the middle, From b1edd9242cb896d6379bc0474553844a6edd2d8e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 17:40:06 -0700 Subject: [PATCH 012/239] work --- src/passes/RemoveUnusedBrs.cpp | 1 - .../remove-unused-brs_branch-hints.wast | 62 ++++++++++++++++++- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index c2354ae703b..163e79ebb0f 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -731,7 +731,6 @@ struct RemoveUnusedBrs : public WalkerPass> { brIf->condition = builder.makeUnary(EqZInt32, brIf->condition); last->name = brIf->name; brIf->name = loop->name; - abort(); flipBranchHint(brIf, getFunction()); return true; } else { diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 1da2b8b18cb..ac65cc6f1dd 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -3,9 +3,9 @@ ;; RUN: | filecheck %s (module - ;; CHECK: (import "a" "b" (func $i32 (type $1) (result i32))) + ;; CHECK: (import "a" "b" (func $i32 (type $2) (result i32))) (import "a" "b" (func $i32 (result i32))) - ;; CHECK: (import "a" "b" (func $none (type $2))) + ;; CHECK: (import "a" "b" (func $none (type $3))) (import "a" "b" (func $none)) ;; CHECK: (func $if-br (type $0) (param $x i32) (param $y i32) @@ -314,4 +314,60 @@ ) ) ) - )) + ) + + ;; CHECK: (func $loop-br_if-flip (type $1) (param $x i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (block $block + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $loop + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $loop-br_if-flip (param $x i32) + (block $block + (loop $loop + ;; This br_if's condition will flip when it is turned from a break out + ;; of the loop to a continue inside it. The hint should flip too. + (@metadata.code.branch_hint "\00") + (br_if $block + (local.get $x) + ) + (br $loop) + ) + ) + ) + + ;; CHECK: (func $loop-br_if-flip-reverse (type $1) (param $x i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (block $block + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $loop + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $loop-br_if-flip-reverse (param $x i32) + ;; As above, with a hint of 1, that should flip to 0. + (block $block + (loop $loop + (@metadata.code.branch_hint "\01") + (br_if $block + (local.get $x) + ) + (br $loop) + ) + ) + ) +) From b218d296c352ec061009510a7682c211c2f0ad01 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 26 Jun 2025 17:48:35 -0700 Subject: [PATCH 013/239] work --- src/passes/RemoveUnusedBrs.cpp | 1 + .../remove-unused-brs_branch-hints.wast | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 163e79ebb0f..ebbd109d26c 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -752,6 +752,7 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeIf(brIf->condition, builder.makeBreak(brIf->name), stealSlice(builder, block, i + 1, list.size())); + copyBranchHintTo(brIf, list[i], getFunction()); block->finalize(); return true; } diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index ac65cc6f1dd..2b2a5d10d1b 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -370,4 +370,38 @@ ) ) ) + + ;; CHECK: (func $loop-br_if-if (type $1) (param $x i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (block $block + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br $loop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $loop-br_if-if (param $x i32) + (loop $loop + (block $block + ;; This br_if will turn into an if with the same condition. The hint can + ;; be copied over. + (@metadata.code.branch_hint "\00") + (br_if $block + (local.get $x) + ) + ;; Extra code so simpler optimizations do not kick in. + (drop (i32.const 42)) + (br $loop) + ) + ) + ) ) From 6ecf5695e89063615a61f58710ceb0ae406d28a9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 27 Jun 2025 11:27:33 -0700 Subject: [PATCH 014/239] next --- src/passes/RemoveUnusedBrs.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index ebbd109d26c..4b0e70eed8f 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -753,6 +753,9 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeBreak(brIf->name), stealSlice(builder, block, i + 1, list.size())); copyBranchHintTo(brIf, list[i], getFunction()); + // next: benchnarj without binarye + // later: fuzz this: instrument "i am guessing at loc X" and "it was true/it was false", then fuzzz that we don't decreaes times we are right. + // maybe: turn hints into asserts? not for fuzzing, but testing. if hint is 1, assert the value is not 0, etc. block->finalize(); return true; } From 7ef75cd0b62e8ab195eea5220eb0210ad7ff4b57 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 30 Jun 2025 15:09:15 -0700 Subject: [PATCH 015/239] notes --- src/passes/InstrumentBranchHints.cpp | 78 ++++++++++++++++++++++++++++ src/passes/RemoveUnusedBrs.cpp | 2 +- 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 src/passes/InstrumentBranchHints.cpp diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp new file mode 100644 index 00000000000..6e5b6855fc0 --- /dev/null +++ b/src/passes/InstrumentBranchHints.cpp @@ -0,0 +1,78 @@ +/* + * Copyright 2025 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Instruments branch hints and their targets, adding logging that allows us to +// see if the hints were valid or not. We turn +// +// @metadata.branch.hint B +// if (condition) { +// A; +// } else { +// B; +// } +// +// into +// +// @metadata.branch.hint B +// if (temp = condition; log("if on line 123 predicts B"; temp) { +// log("if on line 123 ended up true"); +// A; +// } else { +// B; +// } +// +// That is, the logging identifies the if, logs the prediction (0 or 1) for that +// if, and then if the if were true, we log that, so by scanning all the +// loggings, we can see both the hint and what actually executed. Similarly, for +// br_if: +// +// @metadata.branch.hint B +// (br_if $target (condition)) +// +// into +// +// @metadata.branch.hint B +// (br_if $target (temp = condition; log("br_if on line 456 predicts B"; temp)) +// log("if on line 123 ended up false"); +// +// Note how in this case it is simpler to add the logging on the "false" case, +// since it is right after the br_if. +// +// The motivation for this pass is to fuzz branch hint updates: given a fuzz +// case, we can instrument it and view the loggings, then optimize the original, +// instrument that, and view those loggings. The amount of wrong predictions +// should not decrease (the amount of right ones might, since an if might be +// eliminated entirely by the optimizer). +// + +#include "pass.h" +#include "wasm-builder.h" +#include "wasm.h" + +namespace wasm { + +struct InstrumentBranchHints : public WalkerPass> { + void visitIf(If* curr) { + } + + void visitBreak(Break* curr) { + } +}; + +Pass* createInstrumentBranchHintsPass() { return new InstrumentBranchHints(); } + +} // namespace wasm diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 4b0e70eed8f..07f83e77405 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -753,8 +753,8 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeBreak(brIf->name), stealSlice(builder, block, i + 1, list.size())); copyBranchHintTo(brIf, list[i], getFunction()); - // next: benchnarj without binarye // later: fuzz this: instrument "i am guessing at loc X" and "it was true/it was false", then fuzzz that we don't decreaes times we are right. + // "guess #17: true", then on one arm "guess #7 was true" and "false" on the other; match them upp (ok if guess vanishes - hints can be eliminated) // maybe: turn hints into asserts? not for fuzzing, but testing. if hint is 1, assert the value is not 0, etc. block->finalize(); return true; From 67511760e9f29c09ea576312112ee580cffae18e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 30 Jun 2025 15:20:50 -0700 Subject: [PATCH 016/239] work --- src/passes/CMakeLists.txt | 1 + src/passes/InstrumentBranchHints.cpp | 31 ++++++++++++++++++++++++++++ src/passes/pass.cpp | 4 ++++ src/passes/passes.h | 1 + 4 files changed, 37 insertions(+) diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 6fab09b1bc2..714c4b35df0 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -52,6 +52,7 @@ set(passes_SOURCES HeapStoreOptimization.cpp I64ToI32Lowering.cpp Inlining.cpp + InstrumentBranchHints.cpp InstrumentLocals.cpp InstrumentMemory.cpp Intrinsics.cpp diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 6e5b6855fc0..82671e79932 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -66,10 +66,41 @@ namespace wasm { struct InstrumentBranchHints : public WalkerPass> { + Name LOG_GUESS("log_guess"); + Name LOG_TRUE("log_true"); + Name LOG_FALSE("log_false"); + + Index branchId = 0; + void visitIf(If* curr) { + if (auto likely = getFunction()->codeAnnotations[curr].likely) { + Builder builder(*getModule()); + + // Pick an ID for this branch. + auto id = branchId++; + + // Instrument the condition to add a logging of the guess. + auto temp = builder.addVar(getFunction(), Type::i32); + auto* set = builder.makeLocalTee(temp, curr->condition); + auto* idc = builder.makeConst(Literal(int32_t(id))); + auto* guess = builder.makeConst(Literal(int32_t(*likely))); + auto* logGuess = builder.makeCall(LOG_GUESS, { idc, guess }); + auto* get = builder.makeLocalGet(temp, Type::i32); + curr->condition = builder.makeBlock({ set, logGuess, get }); + + // Log the true branch. + auto* idc2 = builder.makeConst(Literal(int32_t(id))); + auto* logTrue = builder.makeCall(LOG_TRUE, { idc2 }); + curr->ifTrue = builder.makeSequence({ logTrue, curr->ifTrue }); + } } void visitBreak(Break* curr) { + // tidoo + } + + void visitModule(Module* curr) { + // Add imports } }; diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 2042bc71d3a..2294c7917d8 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -259,6 +259,10 @@ void PassRegistry::registerPasses() { "trace-calls", "instrument the build with code to intercept specific function calls", createTraceCallsPass); + registerPass( + "instrument-branch-hints", + "instrument branch hints so we can see which guessed right", + createInstrumentBranchHintsPass); registerPass( "instrument-locals", "instrument the build with code to intercept all loads and stores", diff --git a/src/passes/passes.h b/src/passes/passes.h index e051e466e72..d2ef24e3dcb 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -79,6 +79,7 @@ Pass* createLocalSubtypingPass(); Pass* createLogExecutionPass(); Pass* createIntrinsicLoweringPass(); Pass* createTraceCallsPass(); +Pass* createInstrumentBranchHintsPass(); Pass* createInstrumentLocalsPass(); Pass* createInstrumentMemoryPass(); Pass* createLLVMMemoryCopyFillLoweringPass(); From eadb27f5689e0bfb0a57966b60e49d3844ea9318 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 30 Jun 2025 15:29:21 -0700 Subject: [PATCH 017/239] work --- src/passes/InstrumentBranchHints.cpp | 38 ++++++++++++++++------------ 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 82671e79932..f58cb4bff28 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -66,32 +66,27 @@ namespace wasm { struct InstrumentBranchHints : public WalkerPass> { - Name LOG_GUESS("log_guess"); - Name LOG_TRUE("log_true"); - Name LOG_FALSE("log_false"); + Name LOG_GUESS = "log_guess"; + Name LOG_TRUE = "log_true"; + Name LOG_FALSE = "log_false"; Index branchId = 0; void visitIf(If* curr) { - if (auto likely = getFunction()->codeAnnotations[curr].likely) { + if (auto likely = getFunction()->codeAnnotations[curr].branchLikely) { Builder builder(*getModule()); - // Pick an ID for this branch. + // Pick an ID for this branch and a temp local. + auto temp = builder.addVar(getFunction(), Type::i32); auto id = branchId++; - // Instrument the condition to add a logging of the guess. - auto temp = builder.addVar(getFunction(), Type::i32); - auto* set = builder.makeLocalTee(temp, curr->condition); - auto* idc = builder.makeConst(Literal(int32_t(id))); - auto* guess = builder.makeConst(Literal(int32_t(*likely))); - auto* logGuess = builder.makeCall(LOG_GUESS, { idc, guess }); - auto* get = builder.makeLocalGet(temp, Type::i32); - curr->condition = builder.makeBlock({ set, logGuess, get }); + // Instrument the condition and the true branch. + instrumentCondition(curr->condition, temp, id, *likely); // Log the true branch. - auto* idc2 = builder.makeConst(Literal(int32_t(id))); - auto* logTrue = builder.makeCall(LOG_TRUE, { idc2 }); - curr->ifTrue = builder.makeSequence({ logTrue, curr->ifTrue }); + auto* idc = builder.makeConst(Literal(int32_t(id))); + auto* logTrue = builder.makeCall(LOG_TRUE, { idc }, Type::none); + curr->ifTrue = builder.makeSequence(logTrue, curr->ifTrue); } } @@ -99,6 +94,17 @@ struct InstrumentBranchHints : public WalkerPass Date: Mon, 30 Jun 2025 16:56:12 -0700 Subject: [PATCH 018/239] work --- src/passes/InstrumentBranchHints.cpp | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index f58cb4bff28..fb4dc4f6145 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -83,7 +83,8 @@ struct InstrumentBranchHints : public WalkerPasscondition, temp, id, *likely); - // Log the true branch. + // Log the true branch, which we can easily do by prepending in the ifTrue + // arm. auto* idc = builder.makeConst(Literal(int32_t(id))); auto* logTrue = builder.makeCall(LOG_TRUE, { idc }, Type::none); curr->ifTrue = builder.makeSequence(logTrue, curr->ifTrue); @@ -91,7 +92,22 @@ struct InstrumentBranchHints : public WalkerPasscodeAnnotations[curr].branchLikely) { + Builder builder(*getModule()); + + // Pick an ID for this branch and a temp local. + auto temp = builder.addVar(getFunction(), Type::i32); + auto id = branchId++; + + // Instrument the condition and the true branch. + instrumentCondition(curr->condition, temp, id, *likely); + + // Log the false branch, which we can easily do by appending right after + // the break. + auto* idc = builder.makeConst(Literal(int32_t(id))); + auto* logFalse = builder.makeCall(LOG_FALSE, { idc }, Type::none); + replaceCurrent(builder.makeSequence(logFalse, curr)); + } } // Given the condition of a branch, modify it in place, adding proper logging. From 3f2fcdbb8e420c4c0d9c76d8e195a79c6ffa7934 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 30 Jun 2025 17:04:02 -0700 Subject: [PATCH 019/239] almost --- src/passes/InstrumentBranchHints.cpp | 13 +++- test/lit/passes/instrument-branch-hints.wast | 68 ++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 test/lit/passes/instrument-branch-hints.wast diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index fb4dc4f6145..bb7b10caba8 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -122,7 +122,18 @@ struct InstrumentBranchHints : public WalkerPassaddFunction(Builder::makeFunction(LOG_GUESS, Signature({Type::i32, Type::i32}, Type::none), {})); + auto* logTrue = + curr->addFunction(Builder::makeFunction(LOG_TRUE, Signature(Type::i32, Type::none), {})); + auto* logFalse = + curr->addFunction(Builder::makeFunction(LOG_FALSE, Signature(Type::i32, Type::none), {})); + + for (auto* func : {logGuess, logTrue, logFalse}) { + func->module = "fuzzing-support"; + func->base = func->name; + } } }; diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast new file mode 100644 index 00000000000..45e5834997d --- /dev/null +++ b/test/lit/passes/instrument-branch-hints.wast @@ -0,0 +1,68 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. + +;; RUN: foreach %s %t wasm-opt --instrument-branch-hints -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (param i32))) + + ;; CHECK: (type $2 (func (param i32 i32))) + + ;; CHECK: (import "fuzzing-support" "log_guess" (func $log_guess (param i32 i32))) + + ;; CHECK: (import "fuzzing-support" "log_true" (func $log_true (param i32))) + + ;; CHECK: (import "fuzzing-support" "log_false" (func $log_false (param i32))) + + ;; CHECK: (func $if + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if + (if + (i32.const 42) + (then + (drop (i32.const 1337)) + ) + (else + (drop (i32.const 99)) + ) + ) + ) + + ;; CHECK: (func $br + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br + (block $out + (br_if $out + (i32.const 42) + ) + (drop (i32.const 1337)) + ) + (drop (i32.const 99)) + ) +) From 6f2355a8aba3e7966a75f87e85243c759ee9d257 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 30 Jun 2025 17:08:51 -0700 Subject: [PATCH 020/239] work --- test/lit/passes/instrument-branch-hints.wast | 185 ++++++++++++++++++- 1 file changed, 179 insertions(+), 6 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 45e5834997d..9916969049d 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -17,9 +17,25 @@ ;; CHECK: (import "fuzzing-support" "log_false" (func $log_false (param i32))) ;; CHECK: (func $if + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log_guess + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $log_true + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) @@ -30,8 +46,75 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log_guess + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $log_true + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 11337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 199) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 299) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 342) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log_guess + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $log_true + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 31337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 399) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $if + ;; An if with a 0 hint, a 1 hint, and no hint. + (@metadata.code.branch_hint "\00") (if (i32.const 42) (then @@ -41,28 +124,118 @@ (drop (i32.const 99)) ) ) + (@metadata.code.branch_hint "\01") + (if + (i32.const 142) + (then + (drop (i32.const 11337)) + ) + (else + (drop (i32.const 199)) + ) + ) + (if + (i32.const 242) + (then + (drop (i32.const 21337)) + ) + (else + (drop (i32.const 299)) + ) + ) + ;; Another hint of 0, for more coverage (ensure hint value differs from + ;; break id). + (@metadata.code.branch_hint "\00") + (if + (i32.const 342) + (then + (drop (i32.const 31337)) + ) + (else + (drop (i32.const 399)) + ) + ) ) ;; CHECK: (func $br + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (block $out - ;; CHECK-NEXT: (br_if $out - ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (call $log_false + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log_guess + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: (block $out1 + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (call $log_false + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out1 + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log_guess + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 11337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $out2 + ;; CHECK-NEXT: (br_if $out2 + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $br + ;; As above, with br_if. (block $out + (@metadata.code.branch_hint "\00") (br_if $out (i32.const 42) ) (drop (i32.const 1337)) ) - (drop (i32.const 99)) + (block $out1 + (@metadata.code.branch_hint "\01") + (br_if $out1 + (i32.const 142) + ) + (drop (i32.const 11337)) + ) + (block $out2 + (br_if $out2 + (i32.const 242) + ) + (drop (i32.const 21337)) + ) ) ) From 7f41f7453b2484bd026c78a34ad923b8e0ff1e11 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 30 Jun 2025 17:16:34 -0700 Subject: [PATCH 021/239] done --- src/passes/InstrumentBranchHints.cpp | 12 +++- test/lit/passes/instrument-branch-hints.wast | 63 +++++++++++++++++--- 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index bb7b10caba8..04c0cb82a80 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -106,7 +106,17 @@ struct InstrumentBranchHints : public WalkerPasstype.isConcrete()) { + // We must stash the result, log the false, then return the result, + // using another temp var. + auto tempValue = builder.addVar(getFunction(), curr->type); + auto* set = builder.makeLocalSet(tempValue, curr); + auto* get = builder.makeLocalGet(tempValue, curr->type); + replaceCurrent(builder.makeBlock({ set, logFalse, get })); + } else { + // No return value to bother with, so this is simple. + replaceCurrent(builder.makeSequence(curr, logFalse)); + } } } diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 9916969049d..e012f7754e2 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -8,7 +8,9 @@ ;; CHECK: (type $1 (func (param i32))) - ;; CHECK: (type $2 (func (param i32 i32))) + ;; CHECK: (type $2 (func (result f64))) + + ;; CHECK: (type $3 (func (param i32 i32))) ;; CHECK: (import "fuzzing-support" "log_guess" (func $log_guess (param i32 i32))) @@ -162,9 +164,6 @@ ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (block - ;; CHECK-NEXT: (call $log_false - ;; CHECK-NEXT: (i32.const 3) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (br_if $out ;; CHECK-NEXT: (block (result i32) @@ -178,6 +177,9 @@ ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log_false + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.const 1337) @@ -185,9 +187,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: (block $out1 ;; CHECK-NEXT: (block - ;; CHECK-NEXT: (call $log_false - ;; CHECK-NEXT: (i32.const 4) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (br_if $out1 ;; CHECK-NEXT: (block (result i32) @@ -201,6 +200,9 @@ ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log_false + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.const 11337) @@ -238,4 +240,51 @@ (drop (i32.const 21337)) ) ) + + ;; CHECK: (func $br_value (result f64) + ;; CHECK-NEXT: (local $scratch f64) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 f64) + ;; CHECK-NEXT: (block $out (result f64) + ;; CHECK-NEXT: (local.set $scratch + ;; CHECK-NEXT: (block (result f64) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (f64.const 3.14159) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log_guess + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log_false + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $scratch) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br_value (result f64) + ;; As above, but now with a value. We need to stash it to a local. + (block $out (result f64) + (@metadata.code.branch_hint "\00") + (br_if $out + (f64.const 3.14159) + (i32.const 42) + ) + (drop (i32.const 1337)) + ) + ) ) From 11d45ed725402aab1615fc10b54cf879f830d9b9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 30 Jun 2025 17:16:43 -0700 Subject: [PATCH 022/239] format --- src/passes/InstrumentBranchHints.cpp | 30 ++++++++++++++++------------ src/passes/RemoveUnusedBrs.cpp | 11 +++++++--- src/passes/pass.cpp | 7 +++---- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 04c0cb82a80..4398aa80855 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -65,7 +65,8 @@ namespace wasm { -struct InstrumentBranchHints : public WalkerPass> { +struct InstrumentBranchHints + : public WalkerPass> { Name LOG_GUESS = "log_guess"; Name LOG_TRUE = "log_true"; Name LOG_FALSE = "log_false"; @@ -86,7 +87,7 @@ struct InstrumentBranchHints : public WalkerPassifTrue = builder.makeSequence(logTrue, curr->ifTrue); } } @@ -105,14 +106,14 @@ struct InstrumentBranchHints : public WalkerPasstype.isConcrete()) { // We must stash the result, log the false, then return the result, // using another temp var. auto tempValue = builder.addVar(getFunction(), curr->type); auto* set = builder.makeLocalSet(tempValue, curr); auto* get = builder.makeLocalGet(tempValue, curr->type); - replaceCurrent(builder.makeBlock({ set, logFalse, get })); + replaceCurrent(builder.makeBlock({set, logFalse, get})); } else { // No return value to bother with, so this is simple. replaceCurrent(builder.makeSequence(curr, logFalse)); @@ -121,24 +122,27 @@ struct InstrumentBranchHints : public WalkerPassaddFunction(Builder::makeFunction(LOG_GUESS, Signature({Type::i32, Type::i32}, Type::none), {})); - auto* logTrue = - curr->addFunction(Builder::makeFunction(LOG_TRUE, Signature(Type::i32, Type::none), {})); - auto* logFalse = - curr->addFunction(Builder::makeFunction(LOG_FALSE, Signature(Type::i32, Type::none), {})); + auto* logGuess = curr->addFunction(Builder::makeFunction( + LOG_GUESS, Signature({Type::i32, Type::i32}, Type::none), {})); + auto* logTrue = curr->addFunction( + Builder::makeFunction(LOG_TRUE, Signature(Type::i32, Type::none), {})); + auto* logFalse = curr->addFunction( + Builder::makeFunction(LOG_FALSE, Signature(Type::i32, Type::none), {})); for (auto* func : {logGuess, logTrue, logFalse}) { func->module = "fuzzing-support"; diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 07f83e77405..cb6eddf5931 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -753,9 +753,14 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeBreak(brIf->name), stealSlice(builder, block, i + 1, list.size())); copyBranchHintTo(brIf, list[i], getFunction()); - // later: fuzz this: instrument "i am guessing at loc X" and "it was true/it was false", then fuzzz that we don't decreaes times we are right. - // "guess #17: true", then on one arm "guess #7 was true" and "false" on the other; match them upp (ok if guess vanishes - hints can be eliminated) - // maybe: turn hints into asserts? not for fuzzing, but testing. if hint is 1, assert the value is not 0, etc. + // later: fuzz this: instrument "i am guessing at loc X" and "it + // was true/it was false", then fuzzz that we don't decreaes times + // we are right. + // "guess #17: true", then on one arm "guess #7 was true" and + // "false" on the other; match them upp (ok if guess vanishes - + // hints can be eliminated) + // maybe: turn hints into asserts? not for fuzzing, but testing. + // if hint is 1, assert the value is not 0, etc. block->finalize(); return true; } diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 2294c7917d8..0907428d1c7 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -259,10 +259,9 @@ void PassRegistry::registerPasses() { "trace-calls", "instrument the build with code to intercept specific function calls", createTraceCallsPass); - registerPass( - "instrument-branch-hints", - "instrument branch hints so we can see which guessed right", - createInstrumentBranchHintsPass); + registerPass("instrument-branch-hints", + "instrument branch hints so we can see which guessed right", + createInstrumentBranchHintsPass); registerPass( "instrument-locals", "instrument the build with code to intercept all loads and stores", From 0df46b4c9ab401bba8a2a5bacb9d71d1cde2ff3c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 30 Jun 2025 17:21:17 -0700 Subject: [PATCH 023/239] work --- scripts/fuzz_opt.py | 49 +++++++++++++++++++++++++++++++++++++++++++ scripts/fuzz_shell.js | 18 +++++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 8f832187f5c..84dc3d26294 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1844,6 +1844,54 @@ def get_relevant_lines(wat): compare(get_relevant_lines(original), get_relevant_lines(processed), 'Preserve') +# Test that we preserve branch hints properly. +class BranchHintPreservation(TestCaseHandler): + frequency = 0.1 + + def handle(self, wasm): + # TODO + ''' + # We will later verify that no imports or exports changed, by comparing + # to the unprocessed original text. + original = run([in_bin('wasm-opt'), wasm] + FEATURE_OPTS + ['--print']) + + # We leave if the module has (ref exn) in struct fields (because we have + # no way to generate an exn in a non-function context, and if we picked + # that struct for a global, we'd end up needing a (ref exn) in the + # global scope, which is impossible). The fuzzer is designed to be + # careful not to emit that in testcases, but after the optimizer runs, + # we may end up with struct fields getting refined to that, so we need + # this extra check (which should be hit very rarely). + structs = [line for line in original.split('\n') if '(struct ' in line] + if '(ref exn)' in '\n'.join(structs): + note_ignored_vm_run('has non-nullable exn in struct') + return + + # Generate some random input data. + data = abspath('preserve_input.dat') + make_random_input(random_size(), data) + + # Process the existing wasm file. + processed = run([in_bin('wasm-opt'), data] + FEATURE_OPTS + [ + '-ttf', + '--fuzz-preserve-imports-exports', + '--initial-fuzz=' + wasm, + '--print', + ]) + + def get_relevant_lines(wat): + # Imports and exports are relevant. + lines = [line for line in wat.splitlines() if '(export ' in line or '(import ' in line] + + # Ignore type names, which may vary (e.g. one file may have $5 and + # another may call the same type $17). + lines = [re.sub(r'[(]type [$][0-9a-zA-Z_$]+[)]', '', line) for line in lines] + + return '\n'.join(lines) + + compare(get_relevant_lines(original), get_relevant_lines(processed), 'Preserve')''' + + # The global list of all test case handlers testcase_handlers = [ FuzzExec(), @@ -1859,6 +1907,7 @@ def get_relevant_lines(wat): ClusterFuzz(), Two(), PreserveImportsExports(), + BranchHintPreservation(), ] diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index 3f201b3c812..7487f2c4f11 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -261,8 +261,11 @@ function oneIn(n) { return (randomBits() % n) == 0; } -// Set up the imports. +// Import helpers. var tempRet0; +var branchHints = {}; + +// Set up the imports. var imports = { 'fuzzing-support': { // Logging. @@ -353,6 +356,19 @@ var imports = { // how many time units to wait). }); }, + + 'log_guess': (id, expected) => { + // We are about to execute branch #id, and we expect its condition to + // either be true or false. Stash that expectation. + branchHints[id] = expected; + }, + 'log_true': (id) => { + var expected = branchHints[id]; + assert(expected !== undefined); + // unify the two, add param... + }, + 'log_false': (id) => { + }, }, // Emscripten support. 'env': { From bf929d5d7467de9fde4871b88673ca9b486d3195 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 11:09:39 -0700 Subject: [PATCH 024/239] work --- src/passes/InstrumentBranchHints.cpp | 184 +++++----- test/lit/passes/instrument-branch-hints.wast | 355 +++++++++++++++---- 2 files changed, 383 insertions(+), 156 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 4398aa80855..658e439ffcf 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -20,45 +20,65 @@ // // @metadata.branch.hint B // if (condition) { -// A; +// X // } else { -// B; +// Y // } // // into // // @metadata.branch.hint B -// if (temp = condition; log("if on line 123 predicts B"; temp) { -// log("if on line 123 ended up true"); -// A; +// ;; log the ID of the condition (123), the prediction (B), and the actual +// ;; runtime result (temp == condition). +// if (temp = condition; log(123, B, temp); temp) { +// X // } else { -// B; +// Y // } // -// That is, the logging identifies the if, logs the prediction (0 or 1) for that -// if, and then if the if were true, we log that, so by scanning all the -// loggings, we can see both the hint and what actually executed. Similarly, for -// br_if: +// The motivation for this pass is to fuzz branch hint updates: given a fuzz +// case, we can instrument it and view the loggings, then optimize the original, +// instrument that, and view those loggings. Imagine, for example, that we flip +// the condition but forget to flip the hint: // // @metadata.branch.hint B -// (br_if $target (condition)) +// if (!(temp = condition; log(123, B, temp); temp)) { ;; added a ! +// Y ;; this moved +// } else { +// X ;; this moved +// } // -// into +// The logging before would be 123,B,C (where C is 0 or 1 - the hint might be +// wrong or right, in a fuzz testcase), and the logging after will remain the +// same, so this did not help us yet (because the ! is not the entire condition, +// not just |condition|). But if we run this instrumentation again, we get this: // // @metadata.branch.hint B -// (br_if $target (temp = condition; log("br_if on line 456 predicts B"; temp)) -// log("if on line 123 ended up false"); +// if (temp2 = ( +// !(temp = condition; log(123, B, temp); temp) +// ); log(123, B, temp2); temp2)) { +// Y +// } else { +// X +// } // -// Note how in this case it is simpler to add the logging on the "false" case, -// since it is right after the br_if. +// Note how the full !-ed condition is nested inside another instrumentation +// with another temp local. Also, we inferred the same ID (123) in both cases, +// by scanning the inside of the condition. Using that, the new logging will be +// 123,B,C followed by 123,B,!C. We can therefore find pairs of loggings with +// same ID, and consider the predicted and actual values: // -// The motivation for this pass is to fuzz branch hint updates: given a fuzz -// case, we can instrument it and view the loggings, then optimize the original, -// instrument that, and view those loggings. The amount of wrong predictions -// should not decrease (the amount of right ones might, since an if might be -// eliminated entirely by the optimizer). +// [id,0,0], [id,0,0] - nothing changed: good +// [id,0,0], [id,0,1] - the actual result changed but not the prediction: bad +// [id,0,0], [id,1,0] - prediction changed but not actual result: bad +// [id,0,0], [id,1,1] - actual and predicted both changed: good +// etc. +// +// Regardless of whether the hint was right or wrong, it should change in tandem +// with the actual result. // +#include "ir/find_all.h" #include "pass.h" #include "wasm-builder.h" #include "wasm.h" @@ -67,87 +87,77 @@ namespace wasm { struct InstrumentBranchHints : public WalkerPass> { - Name LOG_GUESS = "log_guess"; - Name LOG_TRUE = "log_true"; - Name LOG_FALSE = "log_false"; + Name MODULE = "fuzzing-support"; + Name LOG_BRANCH = "log-branch"; + + // Our logging function for branches. + Function* logBranch = nullptr; + // The branch id, which increments as we go. Index branchId = 0; void visitIf(If* curr) { - if (auto likely = getFunction()->codeAnnotations[curr].branchLikely) { - Builder builder(*getModule()); - - // Pick an ID for this branch and a temp local. - auto temp = builder.addVar(getFunction(), Type::i32); - auto id = branchId++; - - // Instrument the condition and the true branch. - instrumentCondition(curr->condition, temp, id, *likely); - - // Log the true branch, which we can easily do by prepending in the ifTrue - // arm. - auto* idc = builder.makeConst(Literal(int32_t(id))); - auto* logTrue = builder.makeCall(LOG_TRUE, {idc}, Type::none); - curr->ifTrue = builder.makeSequence(logTrue, curr->ifTrue); - } + processCondition(curr); } void visitBreak(Break* curr) { - if (auto likely = getFunction()->codeAnnotations[curr].branchLikely) { - Builder builder(*getModule()); - - // Pick an ID for this branch and a temp local. - auto temp = builder.addVar(getFunction(), Type::i32); - auto id = branchId++; - - // Instrument the condition and the true branch. - instrumentCondition(curr->condition, temp, id, *likely); - - // Log the false branch, which we can easily do by appending right after - // the break. - auto* idc = builder.makeConst(Literal(int32_t(id))); - auto* logFalse = builder.makeCall(LOG_FALSE, {idc}, Type::none); - if (curr->type.isConcrete()) { - // We must stash the result, log the false, then return the result, - // using another temp var. - auto tempValue = builder.addVar(getFunction(), curr->type); - auto* set = builder.makeLocalSet(tempValue, curr); - auto* get = builder.makeLocalGet(tempValue, curr->type); - replaceCurrent(builder.makeBlock({set, logFalse, get})); - } else { - // No return value to bother with, so this is simple. - replaceCurrent(builder.makeSequence(curr, logFalse)); - } + if (curr->condition) { + processCondition(curr); } } - // Given the condition of a branch, modify it in place, adding proper logging. - void instrumentCondition(Expression*& condition, - Index tempLocal, - Index id, - bool likely) { + template + void processCondition(T* curr) { + auto likely = getFunction()->codeAnnotations[curr].branchLikely; + if (!likely) { + return; + } + Builder builder(*getModule()); - auto* set = builder.makeLocalSet(tempLocal, condition); + + // Pick an ID for this branch. If we see a nested logging (see above), we + // copy that id. + Index id = -1; + for (auto* call : FindAll(curr->condition).list) { + if (call->target == LOG_BRANCH) { + if (id != Index(-1)) { + // We have seen another before, so give up. + id = -1; + break; + } + // This is the first one we see. Use it. + assert(call->operands.size() == 3); + id = call->operands[0]->cast()->value.geti32(); + } + } + // We never found one, or we gave up. + if (id == Index(-1)) { + id = branchId++; + } + + // Instrument the condition. + auto tempLocal = builder.addVar(getFunction(), Type::i32); + auto* set = builder.makeLocalSet(tempLocal, curr->condition); auto* idc = builder.makeConst(Literal(int32_t(id))); - auto* guess = builder.makeConst(Literal(int32_t(likely))); - auto* logGuess = builder.makeCall(LOG_GUESS, {idc, guess}, Type::none); - auto* get = builder.makeLocalGet(tempLocal, Type::i32); - condition = builder.makeBlock({set, logGuess, get}); + auto* guess = builder.makeConst(Literal(int32_t(*likely))); + auto* get1 = builder.makeLocalGet(tempLocal, Type::i32); + auto* logBranch = builder.makeCall(LOG_BRANCH, {idc, guess, get1}, Type::none); + auto* get2 = builder.makeLocalGet(tempLocal, Type::i32); + curr->condition = builder.makeBlock({set, logBranch, get2}); } - void visitModule(Module* curr) { - // Add imports. - auto* logGuess = curr->addFunction(Builder::makeFunction( - LOG_GUESS, Signature({Type::i32, Type::i32}, Type::none), {})); - auto* logTrue = curr->addFunction( - Builder::makeFunction(LOG_TRUE, Signature(Type::i32, Type::none), {})); - auto* logFalse = curr->addFunction( - Builder::makeFunction(LOG_FALSE, Signature(Type::i32, Type::none), {})); - - for (auto* func : {logGuess, logTrue, logFalse}) { - func->module = "fuzzing-support"; - func->base = func->name; + void doWalkModule(Module* module) { + // Find our import, if we were already run on this module. + auto* logBranch = module->getFunctionOrNull(LOG_BRANCH); + if (!logBranch) { + logBranch = module->addFunction(Builder::makeFunction( + LOG_BRANCH, Signature({Type::i32, Type::i32, Type::i32}, Type::none), {})); + logBranch->module = MODULE; + logBranch->base = logBranch->name; } + + // Walk normally, using logBranch as we go. + WalkerPass>::doWalkModule(module); } }; diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index e012f7754e2..6beef392515 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -3,20 +3,18 @@ ;; RUN: foreach %s %t wasm-opt --instrument-branch-hints -S -o - | filecheck %s +;; Also test the results of running again. When a condition is instrumented +;; twice, we should reuse the id. +;; RUN: foreach %s %t wasm-opt --instrument-branch-hints --instrument-branch-hints -S -o - | filecheck %s --check-prefix=TWICE + (module ;; CHECK: (type $0 (func)) - ;; CHECK: (type $1 (func (param i32))) - - ;; CHECK: (type $2 (func (result f64))) - - ;; CHECK: (type $3 (func (param i32 i32))) + ;; CHECK: (type $1 (func (result f64))) - ;; CHECK: (import "fuzzing-support" "log_guess" (func $log_guess (param i32 i32))) + ;; CHECK: (type $2 (func (param i32 i32 i32))) - ;; CHECK: (import "fuzzing-support" "log_true" (func $log_true (param i32))) - - ;; CHECK: (import "fuzzing-support" "log_false" (func $log_false (param i32))) + ;; CHECK: (import "fuzzing-support" "log-branch" (func $log-branch (param i32 i32 i32))) ;; CHECK: (func $if ;; CHECK-NEXT: (local $0 i32) @@ -28,16 +26,14 @@ ;; CHECK-NEXT: (local.set $0 ;; CHECK-NEXT: (i32.const 42) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $log_guess + ;; CHECK-NEXT: (call $log-branch ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (call $log_true - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) @@ -54,16 +50,14 @@ ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (i32.const 142) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $log_guess + ;; CHECK-NEXT: (call $log-branch ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (call $log_true - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.const 11337) ;; CHECK-NEXT: ) @@ -93,16 +87,14 @@ ;; CHECK-NEXT: (local.set $2 ;; CHECK-NEXT: (i32.const 342) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $log_guess + ;; CHECK-NEXT: (call $log-branch ;; CHECK-NEXT: (i32.const 2) ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (call $log_true - ;; CHECK-NEXT: (i32.const 2) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.const 31337) ;; CHECK-NEXT: ) @@ -114,6 +106,137 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; TWICE: (type $0 (func)) + + ;; TWICE: (type $1 (func (result f64))) + + ;; TWICE: (type $2 (func (param i32 i32 i32))) + + ;; TWICE: (import "fuzzing-support" "log-branch" (func $log-branch (param i32 i32 i32))) + + ;; TWICE: (func $if + ;; TWICE-NEXT: (local $0 i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local $2 i32) + ;; TWICE-NEXT: (local $3 i32) + ;; TWICE-NEXT: (local $4 i32) + ;; TWICE-NEXT: (local $5 i32) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $3 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 99) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $4 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (i32.const 142) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $4) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $4) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 11337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 199) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (i32.const 242) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 21337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 299) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $5 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (i32.const 342) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 2) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 2) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $5) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $5) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 31337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 399) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) (func $if ;; An if with a 0 hint, a 1 hint, and no hint. (@metadata.code.branch_hint "\00") @@ -163,22 +286,18 @@ ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (block $out - ;; CHECK-NEXT: (block - ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") - ;; CHECK-NEXT: (br_if $out - ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (local.set $0 - ;; CHECK-NEXT: (i32.const 42) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $log_guess - ;; CHECK-NEXT: (i32.const 3) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $log_false - ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -186,22 +305,18 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (block $out1 - ;; CHECK-NEXT: (block - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") - ;; CHECK-NEXT: (br_if $out1 - ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (local.set $1 - ;; CHECK-NEXT: (i32.const 142) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $log_guess - ;; CHECK-NEXT: (i32.const 4) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out1 + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $log_false - ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -217,6 +332,78 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; TWICE: (func $br + ;; TWICE-NEXT: (local $0 i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local $2 i32) + ;; TWICE-NEXT: (local $3 i32) + ;; TWICE-NEXT: (block $out + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (br_if $out + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 3) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 3) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (block $out1 + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (br_if $out1 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $3 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (i32.const 142) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 4) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 4) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 11337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (block $out2 + ;; TWICE-NEXT: (br_if $out2 + ;; TWICE-NEXT: (i32.const 242) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 21337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) (func $br ;; As above, with br_if. (block $out @@ -244,30 +431,22 @@ ;; CHECK: (func $br_value (result f64) ;; CHECK-NEXT: (local $scratch f64) ;; CHECK-NEXT: (local $1 i32) - ;; CHECK-NEXT: (local $2 f64) ;; CHECK-NEXT: (block $out (result f64) ;; CHECK-NEXT: (local.set $scratch - ;; CHECK-NEXT: (block (result f64) - ;; CHECK-NEXT: (local.set $2 - ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") - ;; CHECK-NEXT: (br_if $out - ;; CHECK-NEXT: (f64.const 3.14159) - ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (local.set $1 - ;; CHECK-NEXT: (i32.const 42) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $log_guess - ;; CHECK-NEXT: (i32.const 5) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $1) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (f64.const 3.14159) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 42) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $log_false - ;; CHECK-NEXT: (i32.const 5) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -276,6 +455,44 @@ ;; CHECK-NEXT: (local.get $scratch) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; TWICE: (func $br_value (result f64) + ;; TWICE-NEXT: (local $scratch f64) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local $2 i32) + ;; TWICE-NEXT: (block $out (result f64) + ;; TWICE-NEXT: (local.set $scratch + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (br_if $out + ;; TWICE-NEXT: (f64.const 3.14159) + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 5) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 5) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $scratch) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) (func $br_value (result f64) ;; As above, but now with a value. We need to stash it to a local. (block $out (result f64) From cd82f50cf6ab14e2a152adbc0402b8c3f33232f6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 11:31:43 -0700 Subject: [PATCH 025/239] work --- scripts/fuzz_opt.py | 102 ++++++++++++++++++++++++++---------------- scripts/fuzz_shell.js | 13 +----- 2 files changed, 65 insertions(+), 50 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 84dc3d26294..f2673e9f368 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1846,54 +1846,76 @@ def get_relevant_lines(wat): # Test that we preserve branch hints properly. class BranchHintPreservation(TestCaseHandler): - frequency = 0.1 + frequency = 1 # XXX def handle(self, wasm): - # TODO - ''' - # We will later verify that no imports or exports changed, by comparing - # to the unprocessed original text. - original = run([in_bin('wasm-opt'), wasm] + FEATURE_OPTS + ['--print']) + opts = get_random_opts() - # We leave if the module has (ref exn) in struct fields (because we have - # no way to generate an exn in a non-function context, and if we picked - # that struct for a global, we'd end up needing a (ref exn) in the - # global scope, which is impossible). The fuzzer is designed to be - # careful not to emit that in testcases, but after the optimizer runs, - # we may end up with struct fields getting refined to that, so we need - # this extra check (which should be hit very rarely). - structs = [line for line in original.split('\n') if '(struct ' in line] - if '(ref exn)' in '\n'.join(structs): - note_ignored_vm_run('has non-nullable exn in struct') - return + # Instrument the wasm with branch hints, optimize, and instrument again. + opts = ['--instrument-branch-hints'] + opts + ['--instrument-branch-hints'] + instrumented = wasm + '.ibh.wasm' + run([in_bin('wasm-opt'), wasm] + opts + ['-o', instrumented]) - # Generate some random input data. - data = abspath('preserve_input.dat') - make_random_input(random_size(), data) + # Run. + out = run_d8_wasm(instrumented) - # Process the existing wasm file. - processed = run([in_bin('wasm-opt'), data] + FEATURE_OPTS + [ - '-ttf', - '--fuzz-preserve-imports-exports', - '--initial-fuzz=' + wasm, - '--print', - ]) - - def get_relevant_lines(wat): - # Imports and exports are relevant. - lines = [line for line in wat.splitlines() if '(export ' in line or '(import ' in line] - - # Ignore type names, which may vary (e.g. one file may have $5 and - # another may call the same type $17). - lines = [re.sub(r'[(]type [$][0-9a-zA-Z_$]+[)]', '', line) for line in lines] - - return '\n'.join(lines) + # Process the output. We look at the lines like this: + # + # log-branch: hint 123 of 1 and actual 0 + # + # Each line reports a branch id, the hint for its condition, and the + # actual result (if the condition was true). + # + # It is fine for hints to not match expectations, in a fuzz testcase - + # that should happen half the time. What is not fine is if the hint and + # the actual result get out of sync, for which we track pairs from the + # double instrumentation, matched by id: + # + # log-branch: hint 123 of 1 and actual 0 + # log-branch: hint 123 of 1 and actual 1 + # + pairs = [] + for line in out.splitlines(): + if line.startswith('log-branch: hint'): + # Add this as the beginning of a possible pair, if there is + # nothing before us, or a complete pair. + if (not pairs) or len(pairs[-1]) == 2: + pairs.append([line]) + continue - compare(get_relevant_lines(original), get_relevant_lines(processed), 'Preserve')''' + # This may complete a pair. + last = pairs[-1] + assert len(last) == 1 + last_id = last[0].split(' ')[2] + line_id = line[0].split(' ')[2] + if last_id == curr_id: + last.append(line) + else: + # They do not match. It is ok if a pair is not found, as the + # optimizer may remove a branch hint or a logging. Start a + # new pair. + pairs.append([line]) + + # Check the pairs. Consider: + # + # log-branch: hint 123 of 1 and actual 0 + # log-branch: hint 123 of 1 and actual 1 + # + # A pair like that is suspect: the actual result shifted - perhaps an + # optimization flipped the condition together with the arms - but the + # hint did not flip with it. That is, we want the pair's hint and actual + # to remain in sync (even if the hint is wrong). + for first, second in pairs: + _, _, first_id, _, first_hint, _, _, first_actual = first[0].split(' ') + _, _, second_id, _, second_hint, _, _, second_actual = second[0].split(' ') + assert first_id == second_id + first_alignment = (first_hint != first_actual) + second_alignment = (second_hint != second_actual) + assert first_alignment == second_alignment # The global list of all test case handlers -testcase_handlers = [ +''' FuzzExec(), CompareVMs(), CheckDeterminism(), @@ -1907,6 +1929,8 @@ def get_relevant_lines(wat): ClusterFuzz(), Two(), PreserveImportsExports(), +''' +testcase_handlers = [ BranchHintPreservation(), ] diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index 7487f2c4f11..c2700e39000 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -357,17 +357,8 @@ var imports = { }); }, - 'log_guess': (id, expected) => { - // We are about to execute branch #id, and we expect its condition to - // either be true or false. Stash that expectation. - branchHints[id] = expected; - }, - 'log_true': (id) => { - var expected = branchHints[id]; - assert(expected !== undefined); - // unify the two, add param... - }, - 'log_false': (id) => { + 'log-branch': (id, expected, actual) => { + console.log(`log-branch: hint ${id} of ${expected} and actual ${actual}`); }, }, // Emscripten support. From 23480407a000f6a69f1bef382793af7d320242a9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 11:42:33 -0700 Subject: [PATCH 026/239] fuzz --- scripts/fuzz_opt.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index f2673e9f368..d31ccebee07 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1854,7 +1854,7 @@ def handle(self, wasm): # Instrument the wasm with branch hints, optimize, and instrument again. opts = ['--instrument-branch-hints'] + opts + ['--instrument-branch-hints'] instrumented = wasm + '.ibh.wasm' - run([in_bin('wasm-opt'), wasm] + opts + ['-o', instrumented]) + run([in_bin('wasm-opt'), wasm] + opts + ['-o', instrumented] + FEATURE_OPTS) # Run. out = run_d8_wasm(instrumented) @@ -1884,12 +1884,12 @@ def handle(self, wasm): continue # This may complete a pair. - last = pairs[-1] - assert len(last) == 1 - last_id = last[0].split(' ')[2] - line_id = line[0].split(' ')[2] - if last_id == curr_id: - last.append(line) + last_pair = pairs[-1] + assert len(last_pair) == 1 + last_id = last_pair[0].split(' ')[2] + line_id = line.split(' ')[2] + if last_id == line_id: + last_pair.append(line) else: # They do not match. It is ok if a pair is not found, as the # optimizer may remove a branch hint or a logging. Start a @@ -1905,14 +1905,21 @@ def handle(self, wasm): # optimization flipped the condition together with the arms - but the # hint did not flip with it. That is, we want the pair's hint and actual # to remain in sync (even if the hint is wrong). - for first, second in pairs: - _, _, first_id, _, first_hint, _, _, first_actual = first[0].split(' ') - _, _, second_id, _, second_hint, _, _, second_actual = second[0].split(' ') + for pair in pairs: + if len(pair) != 2: + continue + first, second = pair + _, _, first_id, _, first_hint, _, _, first_actual = first.split(' ') + _, _, second_id, _, second_hint, _, _, second_actual = second.split(' ') assert first_id == second_id first_alignment = (first_hint != first_actual) second_alignment = (second_hint != second_actual) assert first_alignment == second_alignment + def can_run_on_wasm(self, wasm): + # Avoid things d8 cannot fully run. + return all_disallowed(['shared-everything', 'strings']) + # The global list of all test case handlers ''' From f1cda780966441c6b454f7f6055b40ef9d59efec Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 12:42:30 -0700 Subject: [PATCH 027/239] work --- src/passes/CMakeLists.txt | 1 + src/passes/RandomizeBranchHints.cpp | 69 +++++++++++++++++++++++++++++ src/passes/pass.cpp | 3 ++ 3 files changed, 73 insertions(+) create mode 100644 src/passes/RandomizeBranchHints.cpp diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 714c4b35df0..16891caade3 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -103,6 +103,7 @@ set(passes_SOURCES Strip.cpp StripTargetFeatures.cpp TraceCalls.cpp + RandomizeBranchHints.cpp RedundantSetElimination.cpp RemoveImports.cpp RemoveMemoryInit.cpp diff --git a/src/passes/RandomizeBranchHints.cpp b/src/passes/RandomizeBranchHints.cpp new file mode 100644 index 00000000000..2a243b110d8 --- /dev/null +++ b/src/passes/RandomizeBranchHints.cpp @@ -0,0 +1,69 @@ +/* + * Copyright 2025 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Apply random branch hints. This is really only useful for fuzzing. +// + +#include "pass.h" +#include "wasm-builder.h" +#include "wasm.h" + +namespace wasm { + +struct RandomizeBranchHints + : public WalkerPass>> { + + uint64_t hash = 42; + + void visitExpression(Expression* curr) { + // Add some deterministic randomness as we go. + std::cout << "hash: " << hash << '\n'; + deterministic_hash_combine(hash, curr->_id); + } + + void visitIf(If* curr) { + deterministic_hash_combine(hash, 1337); + processCondition(curr); + } + + void visitBreak(Break* curr) { + deterministic_hash_combine(hash, 99999); + if (curr->condition) { + processCondition(curr); + } + } + + template + void processCondition(T* curr) { + auto& likely = getFunction()->codeAnnotations[curr].branchLikely; + switch (hash % 3) { + case 0: + likely = true; + break; + case 1: + likely = false; + break; + case 2: + likely = {}; + break; + } + } +}; + +Pass* createRandomizeBranchHintsPass() { return new RandomizeBranchHints(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 0907428d1c7..83085bae007 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -412,6 +412,9 @@ void PassRegistry::registerPasses() { registerPass("propagate-globals-globally", "propagate global values to other globals (useful for tests)", createPropagateGlobalsGloballyPass); + registerPass("randomize-branch-hints", + "randomize branch hints (for fuzzing)", + createRandomizeBranchHintsPass); registerPass("remove-non-js-ops", "removes operations incompatible with js", createRemoveNonJSOpsPass); From 74e623033d8b9ce42b65184c88ab29addd072353 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 12:47:46 -0700 Subject: [PATCH 028/239] test --- src/passes/passes.h | 1 + test/lit/passes/randomize-branch-hints.wast | 309 ++++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100644 test/lit/passes/randomize-branch-hints.wast diff --git a/src/passes/passes.h b/src/passes/passes.h index d2ef24e3dcb..e0c03bad8d7 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -131,6 +131,7 @@ Pass* createPrintCallGraphPass(); Pass* createPrintFeaturesPass(); Pass* createPrintFunctionMapPass(); Pass* createPropagateGlobalsGloballyPass(); +Pass* createRandomizeBranchHintsPass(); Pass* createRemoveNonJSOpsPass(); Pass* createRemoveImportsPass(); Pass* createRemoveMemoryInitPass(); diff --git a/test/lit/passes/randomize-branch-hints.wast b/test/lit/passes/randomize-branch-hints.wast new file mode 100644 index 00000000000..95fff9c9ce8 --- /dev/null +++ b/test/lit/passes/randomize-branch-hints.wast @@ -0,0 +1,309 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. + +;; RUN: foreach %s %t wasm-opt --randomize-branch-hints -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (result f64))) + + ;; CHECK: (func $if + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 11337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 199) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 299) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 342) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 31337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 399) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if + ;; We should see various branch hints appear, both true and false, and also + ;; some instructions with no hint. + (if + (i32.const 42) + (then + (drop (i32.const 1337)) + ) + (else + (drop (i32.const 99)) + ) + ) + (if + (i32.const 142) + (then + (drop (i32.const 11337)) + ) + (else + (drop (i32.const 199)) + ) + ) + (if + (i32.const 242) + (then + (drop (i32.const 21337)) + ) + (else + (drop (i32.const 299)) + ) + ) + (if + (i32.const 342) + (then + (drop (i32.const 31337)) + ) + (else + (drop (i32.const 399)) + ) + ) + ) + + ;; CHECK: (func $if-existing + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 11337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 199) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 299) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 342) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 31337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 399) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-existing + ;; We do not error on existing hints, and trample/remove them. + (@metadata.code.branch_hint "\01") + (if + (i32.const 42) + (then + (drop (i32.const 1337)) + ) + (else + (drop (i32.const 99)) + ) + ) + (@metadata.code.branch_hint "\00") + (if + (i32.const 142) + (then + (drop (i32.const 11337)) + ) + (else + (drop (i32.const 199)) + ) + ) + (@metadata.code.branch_hint "\01") + (if + (i32.const 242) + (then + (drop (i32.const 21337)) + ) + (else + (drop (i32.const 299)) + ) + ) + (@metadata.code.branch_hint "\00") + (if + (i32.const 342) + (then + (drop (i32.const 31337)) + ) + (else + (drop (i32.const 399)) + ) + ) + ) + + ;; CHECK: (func $br_if + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $out1 + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out1 + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 11337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $out2 + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out2 + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br_if + ;; As above, with br_if. + (block $out + (br_if $out + (i32.const 42) + ) + (drop (i32.const 1337)) + ) + (block $out1 + (br_if $out1 + (i32.const 142) + ) + (drop (i32.const 11337)) + ) + (block $out2 + ;; Existing hint. + (@metadata.code.branch_hint "\01") + (br_if $out2 + (i32.const 242) + ) + (drop (i32.const 21337)) + ) + ) + + ;; CHECK: (func $br_value (result f64) + ;; CHECK-NEXT: (local $scratch f64) + ;; CHECK-NEXT: (block $out (result f64) + ;; CHECK-NEXT: (local.set $scratch + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (f64.const 3.14159) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $scratch) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br_value (result f64) + ;; As above, but now with a value. We should not error. + (block $out (result f64) + (br_if $out + (f64.const 3.14159) + (i32.const 42) + ) + (drop (i32.const 1337)) + ) + ) + + ;; CHECK: (func $br + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br $out) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br + ;; As above, but now without a condition. We should not error. + (block $out + (br $out + (i32.const 42) + ) + (drop (i32.const 1337)) + ) + ) +) From a35ceb0d908eae217dab546a3ef93d9c8d1319df Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 12:49:50 -0700 Subject: [PATCH 029/239] FUZZ --- scripts/fuzz_opt.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index d31ccebee07..74e403bbdb2 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1849,10 +1849,17 @@ class BranchHintPreservation(TestCaseHandler): frequency = 1 # XXX def handle(self, wasm): - opts = get_random_opts() + opts = [ + # Add random branch hints (so we have something to work with). + '--randomize-branch-hints', + # Instrument them for our fuzzing, then optimize. + '--instrument-branch-hints', + ] + get_random_opts() + [ + # Instrument again, so our fuzzing can see if the optimizations + # messed anything up. + '--instrument-branch-hints', + ] - # Instrument the wasm with branch hints, optimize, and instrument again. - opts = ['--instrument-branch-hints'] + opts + ['--instrument-branch-hints'] instrumented = wasm + '.ibh.wasm' run([in_bin('wasm-opt'), wasm] + opts + ['-o', instrumented] + FEATURE_OPTS) From 5f90bc0f64a2640bd0cad10e2a70e95cb6ad991f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 12:52:47 -0700 Subject: [PATCH 030/239] format --- src/passes/InstrumentBranchHints.cpp | 14 +++++++------- src/passes/RandomizeBranchHints.cpp | 7 ++++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 658e439ffcf..d5db29128e1 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -96,9 +96,7 @@ struct InstrumentBranchHints // The branch id, which increments as we go. Index branchId = 0; - void visitIf(If* curr) { - processCondition(curr); - } + void visitIf(If* curr) { processCondition(curr); } void visitBreak(Break* curr) { if (curr->condition) { @@ -106,8 +104,7 @@ struct InstrumentBranchHints } } - template - void processCondition(T* curr) { + template void processCondition(T* curr) { auto likely = getFunction()->codeAnnotations[curr].branchLikely; if (!likely) { return; @@ -141,7 +138,8 @@ struct InstrumentBranchHints auto* idc = builder.makeConst(Literal(int32_t(id))); auto* guess = builder.makeConst(Literal(int32_t(*likely))); auto* get1 = builder.makeLocalGet(tempLocal, Type::i32); - auto* logBranch = builder.makeCall(LOG_BRANCH, {idc, guess, get1}, Type::none); + auto* logBranch = + builder.makeCall(LOG_BRANCH, {idc, guess, get1}, Type::none); auto* get2 = builder.makeLocalGet(tempLocal, Type::i32); curr->condition = builder.makeBlock({set, logBranch, get2}); } @@ -151,7 +149,9 @@ struct InstrumentBranchHints auto* logBranch = module->getFunctionOrNull(LOG_BRANCH); if (!logBranch) { logBranch = module->addFunction(Builder::makeFunction( - LOG_BRANCH, Signature({Type::i32, Type::i32, Type::i32}, Type::none), {})); + LOG_BRANCH, + Signature({Type::i32, Type::i32, Type::i32}, Type::none), + {})); logBranch->module = MODULE; logBranch->base = logBranch->name; } diff --git a/src/passes/RandomizeBranchHints.cpp b/src/passes/RandomizeBranchHints.cpp index 2a243b110d8..01436639d52 100644 --- a/src/passes/RandomizeBranchHints.cpp +++ b/src/passes/RandomizeBranchHints.cpp @@ -25,7 +25,9 @@ namespace wasm { struct RandomizeBranchHints - : public WalkerPass>> { + : public WalkerPass< + PostWalker>> { uint64_t hash = 42; @@ -47,8 +49,7 @@ struct RandomizeBranchHints } } - template - void processCondition(T* curr) { + template void processCondition(T* curr) { auto& likely = getFunction()->codeAnnotations[curr].branchLikely; switch (hash % 3) { case 0: From 82f5e688c913623b4d2a949b609fb2fc3366bac3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 13:01:08 -0700 Subject: [PATCH 031/239] start --- scripts/fuzz_opt.py | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 74e403bbdb2..4d20df2f210 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1849,22 +1849,35 @@ class BranchHintPreservation(TestCaseHandler): frequency = 1 # XXX def handle(self, wasm): - opts = [ - # Add random branch hints (so we have something to work with). - '--randomize-branch-hints', - # Instrument them for our fuzzing, then optimize. - '--instrument-branch-hints', + # Generate the middle wasm, which has the first round of instrumentation, + # then the final one with optimizations as well. We only run the final + # one, but the middle one is useful to compare when debugging an error. + middle = wasm + '.mid.wasm' + run([ + in_bin('wasm-opt'), + wasm, + # Add random branch hints (so we have something to work with). + '--randomize-branch-hints', + # Instrument them for our fuzzing, then optimize. + '--instrument-branch-hints', + '-o', middle, + '-g', + ] + FEATURE_OPTS) + + final = wasm + '.final.wasm' + run([ + in_bin('wasm-opt'), + middle, ] + get_random_opts() + [ - # Instrument again, so our fuzzing can see if the optimizations - # messed anything up. - '--instrument-branch-hints', - ] - - instrumented = wasm + '.ibh.wasm' - run([in_bin('wasm-opt'), wasm] + opts + ['-o', instrumented] + FEATURE_OPTS) + # Instrument again after opts, so our fuzzing can see if the opts + # messed anything up. + '--instrument-branch-hints', + '-o', final, + '-g', + ] + FEATURE_OPTS) # Run. - out = run_d8_wasm(instrumented) + out = run_d8_wasm(final) # Process the output. We look at the lines like this: # From 44cbc468d27e5e6b19212eea8e2524ef4846925b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 13:10:31 -0700 Subject: [PATCH 032/239] work --- scripts/fuzz_opt.py | 8 +++-- src/passes/InstrumentBranchHints.cpp | 22 +++++++----- test/lit/passes/instrument-branch-hints.wast | 38 ++++++++++---------- 3 files changed, 38 insertions(+), 30 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 4d20df2f210..49ce890b27c 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1892,8 +1892,10 @@ def handle(self, wasm): # double instrumentation, matched by id: # # log-branch: hint 123 of 1 and actual 0 - # log-branch: hint 123 of 1 and actual 1 + # log-branch: hint -123 of 1 and actual 1 # + # The second phase of instrumentation adds negative ids, so here we + # would match 123 with -123. pairs = [] for line in out.splitlines(): if line.startswith('log-branch: hint'): @@ -1908,7 +1910,7 @@ def handle(self, wasm): assert len(last_pair) == 1 last_id = last_pair[0].split(' ')[2] line_id = line.split(' ')[2] - if last_id == line_id: + if last_id >= 0 and last_id == -line_id: last_pair.append(line) else: # They do not match. It is ok if a pair is not found, as the @@ -1931,7 +1933,7 @@ def handle(self, wasm): first, second = pair _, _, first_id, _, first_hint, _, _, first_actual = first.split(' ') _, _, second_id, _, second_hint, _, _, second_actual = second.split(' ') - assert first_id == second_id + assert first_id >= 0 and first_id == -second_id first_alignment = (first_hint != first_actual) second_alignment = (second_hint != second_actual) assert first_alignment == second_alignment diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index d5db29128e1..74e95861820 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -74,8 +74,13 @@ // [id,0,0], [id,1,1] - actual and predicted both changed: good // etc. // +// To make it easy to pair the results, the ID is negative in subsequent +// instrumentations. That is we will match an id of 42 in the first +// instrumentation with an id of -42 in the last (that avoids us matching two +// from the first, if e.g. a branch happens twice in a loop). +// // Regardless of whether the hint was right or wrong, it should change in tandem -// with the actual result. +// with the actual result, see script/fuzz_opt.py's BranchHintPreservation. // #include "ir/find_all.h" @@ -94,7 +99,7 @@ struct InstrumentBranchHints Function* logBranch = nullptr; // The branch id, which increments as we go. - Index branchId = 0; + int branchId = 1; void visitIf(If* curr) { processCondition(curr); } @@ -114,21 +119,22 @@ struct InstrumentBranchHints // Pick an ID for this branch. If we see a nested logging (see above), we // copy that id. - Index id = -1; + int id = 0; for (auto* call : FindAll(curr->condition).list) { if (call->target == LOG_BRANCH) { - if (id != Index(-1)) { + if (id) { // We have seen another before, so give up. - id = -1; + id = 0; break; } - // This is the first one we see. Use it. + // This is the first one we see. Use it, negated to indicate it is from + // the second instrumentation. assert(call->operands.size() == 3); - id = call->operands[0]->cast()->value.geti32(); + id = -call->operands[0]->cast()->value.geti32(); } } // We never found one, or we gave up. - if (id == Index(-1)) { + if (!id) { id = branchId++; } diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 6beef392515..6e4ab32078c 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -4,7 +4,7 @@ ;; RUN: foreach %s %t wasm-opt --instrument-branch-hints -S -o - | filecheck %s ;; Also test the results of running again. When a condition is instrumented -;; twice, we should reuse the id. +;; twice, we should reuse the id, but emit it negated. ;; RUN: foreach %s %t wasm-opt --instrument-branch-hints --instrument-branch-hints -S -o - | filecheck %s --check-prefix=TWICE (module @@ -27,7 +27,7 @@ ;; CHECK-NEXT: (i32.const 42) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (call $log-branch - ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) @@ -51,7 +51,7 @@ ;; CHECK-NEXT: (i32.const 142) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (call $log-branch - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 2) ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) @@ -88,7 +88,7 @@ ;; CHECK-NEXT: (i32.const 342) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (call $log-branch - ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 3) ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: ) @@ -130,7 +130,7 @@ ;; TWICE-NEXT: (i32.const 42) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (i32.const 1) ;; TWICE-NEXT: (i32.const 0) ;; TWICE-NEXT: (local.get $0) ;; TWICE-NEXT: ) @@ -138,7 +138,7 @@ ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (i32.const -1) ;; TWICE-NEXT: (i32.const 0) ;; TWICE-NEXT: (local.get $3) ;; TWICE-NEXT: ) @@ -164,7 +164,7 @@ ;; TWICE-NEXT: (i32.const 142) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (i32.const 2) ;; TWICE-NEXT: (i32.const 1) ;; TWICE-NEXT: (local.get $1) ;; TWICE-NEXT: ) @@ -172,7 +172,7 @@ ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (i32.const -2) ;; TWICE-NEXT: (i32.const 1) ;; TWICE-NEXT: (local.get $4) ;; TWICE-NEXT: ) @@ -211,7 +211,7 @@ ;; TWICE-NEXT: (i32.const 342) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 2) + ;; TWICE-NEXT: (i32.const 3) ;; TWICE-NEXT: (i32.const 0) ;; TWICE-NEXT: (local.get $2) ;; TWICE-NEXT: ) @@ -219,7 +219,7 @@ ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 2) + ;; TWICE-NEXT: (i32.const -3) ;; TWICE-NEXT: (i32.const 0) ;; TWICE-NEXT: (local.get $5) ;; TWICE-NEXT: ) @@ -293,7 +293,7 @@ ;; CHECK-NEXT: (i32.const 42) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (call $log-branch - ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: (i32.const 4) ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) @@ -312,7 +312,7 @@ ;; CHECK-NEXT: (i32.const 142) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (call $log-branch - ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: (i32.const 5) ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) @@ -347,7 +347,7 @@ ;; TWICE-NEXT: (i32.const 42) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 3) + ;; TWICE-NEXT: (i32.const 4) ;; TWICE-NEXT: (i32.const 0) ;; TWICE-NEXT: (local.get $0) ;; TWICE-NEXT: ) @@ -355,7 +355,7 @@ ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 3) + ;; TWICE-NEXT: (i32.const -4) ;; TWICE-NEXT: (i32.const 0) ;; TWICE-NEXT: (local.get $2) ;; TWICE-NEXT: ) @@ -376,7 +376,7 @@ ;; TWICE-NEXT: (i32.const 142) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 4) + ;; TWICE-NEXT: (i32.const 5) ;; TWICE-NEXT: (i32.const 1) ;; TWICE-NEXT: (local.get $1) ;; TWICE-NEXT: ) @@ -384,7 +384,7 @@ ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 4) + ;; TWICE-NEXT: (i32.const -5) ;; TWICE-NEXT: (i32.const 1) ;; TWICE-NEXT: (local.get $3) ;; TWICE-NEXT: ) @@ -441,7 +441,7 @@ ;; CHECK-NEXT: (i32.const 42) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (call $log-branch - ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: (i32.const 6) ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) @@ -471,7 +471,7 @@ ;; TWICE-NEXT: (i32.const 42) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 5) + ;; TWICE-NEXT: (i32.const 6) ;; TWICE-NEXT: (i32.const 0) ;; TWICE-NEXT: (local.get $1) ;; TWICE-NEXT: ) @@ -479,7 +479,7 @@ ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 5) + ;; TWICE-NEXT: (i32.const -6) ;; TWICE-NEXT: (i32.const 0) ;; TWICE-NEXT: (local.get $2) ;; TWICE-NEXT: ) From 738be0de609594fb3b331af160177d6a406e6021 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 13:12:16 -0700 Subject: [PATCH 033/239] fix --- scripts/fuzz_opt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 49ce890b27c..97536e9a312 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1908,8 +1908,8 @@ def handle(self, wasm): # This may complete a pair. last_pair = pairs[-1] assert len(last_pair) == 1 - last_id = last_pair[0].split(' ')[2] - line_id = line.split(' ')[2] + last_id = int(last_pair[0].split(' ')[2]) + line_id = int(line.split(' ')[2]) if last_id >= 0 and last_id == -line_id: last_pair.append(line) else: @@ -1933,7 +1933,7 @@ def handle(self, wasm): first, second = pair _, _, first_id, _, first_hint, _, _, first_actual = first.split(' ') _, _, second_id, _, second_hint, _, _, second_actual = second.split(' ') - assert first_id >= 0 and first_id == -second_id + assert second_id == '-' + first_id first_alignment = (first_hint != first_actual) second_alignment = (second_hint != second_actual) assert first_alignment == second_alignment From ce8a47582ae2d6a0cdf511b8ad781ab27c763875 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 13:22:07 -0700 Subject: [PATCH 034/239] fix --- src/passes/InstrumentBranchHints.cpp | 5 +++++ src/passes/RandomizeBranchHints.cpp | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 74e95861820..7da4c236aca 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -110,6 +110,11 @@ struct InstrumentBranchHints } template void processCondition(T* curr) { + if (curr->condition->type == Type::unreachable) { + // This branch is not even reached. + return; + } + auto likely = getFunction()->codeAnnotations[curr].branchLikely; if (!likely) { return; diff --git a/src/passes/RandomizeBranchHints.cpp b/src/passes/RandomizeBranchHints.cpp index 01436639d52..3064557aed5 100644 --- a/src/passes/RandomizeBranchHints.cpp +++ b/src/passes/RandomizeBranchHints.cpp @@ -33,7 +33,6 @@ struct RandomizeBranchHints void visitExpression(Expression* curr) { // Add some deterministic randomness as we go. - std::cout << "hash: " << hash << '\n'; deterministic_hash_combine(hash, curr->_id); } From 20bb9c11f948127dee4c87935d2d1c4404c01656 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 13:33:07 -0700 Subject: [PATCH 035/239] fix --- src/passes/InstrumentBranchHints.cpp | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 7da4c236aca..d2cf9e58c4f 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -84,6 +84,7 @@ // #include "ir/find_all.h" +#include "ir/names.h" #include "pass.h" #include "wasm-builder.h" #include "wasm.h" @@ -92,11 +93,12 @@ namespace wasm { struct InstrumentBranchHints : public WalkerPass> { + // The module and base names of our import. Name MODULE = "fuzzing-support"; - Name LOG_BRANCH = "log-branch"; + Name BASE = "log-branch"; - // Our logging function for branches. - Function* logBranch = nullptr; + // The internal name of our import. + Name LOG_BRANCH; // The branch id, which increments as we go. int branchId = 1; @@ -157,14 +159,20 @@ struct InstrumentBranchHints void doWalkModule(Module* module) { // Find our import, if we were already run on this module. - auto* logBranch = module->getFunctionOrNull(LOG_BRANCH); - if (!logBranch) { - logBranch = module->addFunction(Builder::makeFunction( - LOG_BRANCH, + for (auto& func : module->functions) { + if (func->module == MODULE && func->base == BASE) { + LOG_BRANCH = func->name; + break; + } + } + if (!LOG_BRANCH) { + auto* func = module->addFunction(Builder::makeFunction( + Names::getValidFunctionName(*module, BASE), Signature({Type::i32, Type::i32, Type::i32}, Type::none), {})); - logBranch->module = MODULE; - logBranch->base = logBranch->name; + func->module = MODULE; + func->base = BASE; + LOG_BRANCH = func->name; } // Walk normally, using logBranch as we go. From 88d6fd87c521ee5a559568eb00581b094992a08e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 13:40:55 -0700 Subject: [PATCH 036/239] test --- test/lit/passes/instrument-branch-hints.wast | 81 ++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 6e4ab32078c..22bb3b89378 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -505,3 +505,84 @@ ) ) ) + +;; This module has our import, but with a minified internal name. We should +;; still use it. +(module + ;; CHECK: (type $0 (func (param i32 i32 i32))) + + ;; CHECK: (type $1 (func)) + + ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) + ;; TWICE: (type $0 (func (param i32 i32 i32))) + + ;; TWICE: (type $1 (func)) + + ;; TWICE: (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) + (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) + + ;; CHECK: (func $if + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $if + ;; TWICE-NEXT: (local $0 i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const -1) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $if + (@metadata.code.branch_hint "\01") + (if + (i32.const 42) + (then + (drop (i32.const 1337)) + ) + ) + ) +) From 6e5abd229525857d34760237caca612f98a1dfaf Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 13:44:53 -0700 Subject: [PATCH 037/239] nicer --- scripts/fuzz_opt.py | 1 + scripts/fuzz_shell.js | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 97536e9a312..920bb9e7c30 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1930,6 +1930,7 @@ def handle(self, wasm): for pair in pairs: if len(pair) != 2: continue + print(pair) first, second = pair _, _, first_id, _, first_hint, _, _, first_actual = first.split(' ') _, _, second_id, _, second_hint, _, _, second_actual = second.split(' ') diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index c2700e39000..cafce709ed1 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -263,7 +263,6 @@ function oneIn(n) { // Import helpers. var tempRet0; -var branchHints = {}; // Set up the imports. var imports = { @@ -358,6 +357,10 @@ var imports = { }, 'log-branch': (id, expected, actual) => { + // We only care about truthiness of the expected and actual values, when + // fuzzing. + expected = +!!expected; + actual = +!!actual; console.log(`log-branch: hint ${id} of ${expected} and actual ${actual}`); }, }, From 7d9f8ff88a332610d5a5f041e04188ec429acee3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 14:09:11 -0700 Subject: [PATCH 038/239] fix --- scripts/fuzz_opt.py | 23 +++++++++++++++-------- src/passes/InstrumentBranchHints.cpp | 7 ++++++- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 920bb9e7c30..d25c17dc0a6 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1930,7 +1930,7 @@ def handle(self, wasm): for pair in pairs: if len(pair) != 2: continue - print(pair) + print(pair) # XXX first, second = pair _, _, first_id, _, first_hint, _, _, first_actual = first.split(' ') _, _, second_id, _, second_hint, _, _, second_actual = second.split(' ') @@ -1980,12 +1980,17 @@ def test_one(random_input, given_wasm): # if given a wasm file we want to use it as is, but we also want to # apply properties like not having any NaNs, which the original fuzz # wasm had applied. that is, we need to preserve properties like not - # having nans through reduction. - try: - run([in_bin('wasm-opt'), given_wasm, '-o', abspath('a.wasm')] + GEN_ARGS + FEATURE_OPTS) - except Exception as e: - print("Internal error in fuzzer! Could not run given wasm") - raise e + # having nans through reduction. still, in some cases we must trust the + # given wasm blindly, without modifications, so we have an env var for + # that. + if os.environ.get('BINARYEN_TRUST_GIVEN_WASM'): + shutil.copyfile(given_wasm, abspath('a.wasm')) + else: + try: + run([in_bin('wasm-opt'), given_wasm, '-o', abspath('a.wasm')] + GEN_ARGS + FEATURE_OPTS) + except Exception as e: + print("Internal error in fuzzer! Could not run given wasm") + raise e else: # emit the target features section so that reduction can work later, # without needing to specify the features @@ -2454,7 +2459,9 @@ def get_random_opts(): (If it does not, then one possible issue is that the fuzzer fails to write a valid binary. If so, you can print the output of the fuzzer's first command (using -ttf / --translate-to-fuzz) in text form and run the reduction from that, -passing --text to the reducer.) +passing --text to the reducer. Another possible fix is to avoid re-processing +the wasm for fuzzing in each iteration, by adding +BINARYEN_TRUST_GIVEN_WASM=1 in the env.) You can also read "%(reduce_sh)s" which has been filled out for you and includes docs and suggestions. diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index d2cf9e58c4f..b66cae47874 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -111,6 +111,10 @@ struct InstrumentBranchHints } } + // Note all the calls we add to our imports. They are definitely not calls + // from an earlier instrumentation. + std::unordered_set addedCalls; + template void processCondition(T* curr) { if (curr->condition->type == Type::unreachable) { // This branch is not even reached. @@ -128,7 +132,7 @@ struct InstrumentBranchHints // copy that id. int id = 0; for (auto* call : FindAll(curr->condition).list) { - if (call->target == LOG_BRANCH) { + if (call->target == LOG_BRANCH && !addedCalls.count(call)) { if (id) { // We have seen another before, so give up. id = 0; @@ -153,6 +157,7 @@ struct InstrumentBranchHints auto* get1 = builder.makeLocalGet(tempLocal, Type::i32); auto* logBranch = builder.makeCall(LOG_BRANCH, {idc, guess, get1}, Type::none); + addedCalls.insert(logBranch); auto* get2 = builder.makeLocalGet(tempLocal, Type::i32); curr->condition = builder.makeBlock({set, logBranch, get2}); } From bfef580db4f84fc478b2ec6e6d7cb18176dbbdb8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 14:20:48 -0700 Subject: [PATCH 039/239] work --- src/passes/InstrumentBranchHints.cpp | 11 +- test/lit/passes/instrument-branch-hints.wast | 204 +++++++++++++++---- 2 files changed, 166 insertions(+), 49 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index b66cae47874..138f4d25644 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -91,6 +91,11 @@ namespace wasm { +namespace { +// The branch id, which increments as we go. +int branchId = 1; +} + struct InstrumentBranchHints : public WalkerPass> { // The module and base names of our import. @@ -100,9 +105,6 @@ struct InstrumentBranchHints // The internal name of our import. Name LOG_BRANCH; - // The branch id, which increments as we go. - int branchId = 1; - void visitIf(If* curr) { processCondition(curr); } void visitBreak(Break* curr) { @@ -134,7 +136,8 @@ struct InstrumentBranchHints for (auto* call : FindAll(curr->condition).list) { if (call->target == LOG_BRANCH && !addedCalls.count(call)) { if (id) { - // We have seen another before, so give up. + // We have seen another before, so give up (it is not worth the effort + // to figure out what belongs to what). id = 0; break; } diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 22bb3b89378..f88b31d45e8 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -504,78 +504,192 @@ (drop (i32.const 1337)) ) ) -) - -;; This module has our import, but with a minified internal name. We should -;; still use it. -(module - ;; CHECK: (type $0 (func (param i32 i32 i32))) - - ;; CHECK: (type $1 (func)) - - ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) - ;; TWICE: (type $0 (func (param i32 i32 i32))) - ;; TWICE: (type $1 (func)) - - ;; TWICE: (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) - (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) - - ;; CHECK: (func $if + ;; CHECK: (func $nested ;; CHECK-NEXT: (local $0 i32) - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (local.set $0 - ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if (result i32) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 7) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $min - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 9) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 342) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 8) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $if + ;; TWICE: (func $nested ;; TWICE-NEXT: (local $0 i32) ;; TWICE-NEXT: (local $1 i32) - ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (local $2 i32) + ;; TWICE-NEXT: (local $3 i32) + ;; TWICE-NEXT: (local $4 i32) + ;; TWICE-NEXT: (local $5 i32) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") ;; TWICE-NEXT: (if ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (local.set $5 ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $0 - ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if (result i32) + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $3 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 7) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -7) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (i32.const 142) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (i32.const 242) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $min - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 9) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: (local.get $2) ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $min - ;; TWICE-NEXT: (i32.const -1) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 10) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $5) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: (local.get $5) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $4 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (i32.const 342) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 8) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -8) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $4) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $4) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) + (func $nested + ;; Do not reuse branch hint ids in the same instrumentation: even if we have + ;; nested conditions, we can see which calls we added, and not reuse their + ;; ids. Only TWICE should ever reuse ids in our output. + (@metadata.code.branch_hint "\00") + (if + (@metadata.code.branch_hint "\01") + (if (result i32) + (i32.const 42) + (then + (i32.const 142) + ) + (else + (i32.const 242) + ) + ) + (then + (@metadata.code.branch_hint "\00") + (if + (i32.const 342) + (then + (drop (i32.const 1337)) + ) + ) + ) + ) + ) +) + +;; This module has our import, but with a minified internal name. We should +;; still use it. +(;;module + (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) + (func $if (@metadata.code.branch_hint "\01") (if @@ -585,4 +699,4 @@ ) ) ) -) +;;) From a8e6c09160336a4eb5cd7a6c8d05df86da17c255 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 15:19:00 -0700 Subject: [PATCH 040/239] fix --- src/passes/InstrumentBranchHints.cpp | 9 ++ test/lit/passes/instrument-branch-hints.wast | 141 ++++++++++++++++--- 2 files changed, 128 insertions(+), 22 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 138f4d25644..037e68ece6e 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -83,6 +83,7 @@ // with the actual result, see script/fuzz_opt.py's BranchHintPreservation. // +#include "ir/eh-utils.h" #include "ir/find_all.h" #include "ir/names.h" #include "pass.h" @@ -165,6 +166,14 @@ struct InstrumentBranchHints curr->condition = builder.makeBlock({set, logBranch, get2}); } + void visitFunction(Function* func) { + // Our added blocks may have caused nested pops. + if (!addedCalls.empty()) { + EHUtils::handleBlockNestedPops(func, *getModule()); + addedCalls.clear(); + } + } + void doWalkModule(Module* module) { // Find our import, if we were already run on this module. for (auto& func : module->functions) { diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index f88b31d45e8..fa17f70b60c 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -1,22 +1,38 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. ;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. -;; RUN: foreach %s %t wasm-opt --instrument-branch-hints -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --instrument-branch-hints -S -o - | filecheck %s ;; Also test the results of running again. When a condition is instrumented ;; twice, we should reuse the id, but emit it negated. -;; RUN: foreach %s %t wasm-opt --instrument-branch-hints --instrument-branch-hints -S -o - | filecheck %s --check-prefix=TWICE +;; RUN: foreach %s %t wasm-opt -all --instrument-branch-hints --instrument-branch-hints -S -o - | filecheck %s --check-prefix=TWICE (module ;; CHECK: (type $0 (func)) - ;; CHECK: (type $1 (func (result f64))) + ;; CHECK: (type $1 (func (param i32))) - ;; CHECK: (type $2 (func (param i32 i32 i32))) + ;; CHECK: (type $2 (func (result f64))) - ;; CHECK: (import "fuzzing-support" "log-branch" (func $log-branch (param i32 i32 i32))) + ;; CHECK: (type $3 (func (param i32 i32 i32))) - ;; CHECK: (func $if + ;; CHECK: (import "fuzzing-support" "log-branch" (func $log-branch (type $3) (param i32 i32 i32))) + + ;; CHECK: (tag $i32 (type $1) (param i32)) + ;; TWICE: (type $0 (func)) + + ;; TWICE: (type $1 (func (param i32))) + + ;; TWICE: (type $2 (func (result f64))) + + ;; TWICE: (type $3 (func (param i32 i32 i32))) + + ;; TWICE: (import "fuzzing-support" "log-branch" (func $log-branch (type $3) (param i32 i32 i32))) + + ;; TWICE: (tag $i32 (type $1) (param i32)) + (tag $i32 (param i32)) + + ;; CHECK: (func $if (type $0) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -106,15 +122,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (type $0 (func)) - - ;; TWICE: (type $1 (func (result f64))) - - ;; TWICE: (type $2 (func (param i32 i32 i32))) - - ;; TWICE: (import "fuzzing-support" "log-branch" (func $log-branch (param i32 i32 i32))) - - ;; TWICE: (func $if + ;; TWICE: (func $if (type $0) ;; TWICE-NEXT: (local $0 i32) ;; TWICE-NEXT: (local $1 i32) ;; TWICE-NEXT: (local $2 i32) @@ -282,7 +290,7 @@ ) ) - ;; CHECK: (func $br + ;; CHECK: (func $br (type $0) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (block $out @@ -332,7 +340,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $br + ;; TWICE: (func $br (type $0) ;; TWICE-NEXT: (local $0 i32) ;; TWICE-NEXT: (local $1 i32) ;; TWICE-NEXT: (local $2 i32) @@ -428,7 +436,7 @@ ) ) - ;; CHECK: (func $br_value (result f64) + ;; CHECK: (func $br_value (type $2) (result f64) ;; CHECK-NEXT: (local $scratch f64) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (block $out (result f64) @@ -455,7 +463,7 @@ ;; CHECK-NEXT: (local.get $scratch) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $br_value (result f64) + ;; TWICE: (func $br_value (type $2) (result f64) ;; TWICE-NEXT: (local $scratch f64) ;; TWICE-NEXT: (local $1 i32) ;; TWICE-NEXT: (local $2 i32) @@ -505,7 +513,7 @@ ) ) - ;; CHECK: (func $nested + ;; CHECK: (func $nested (type $0) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -564,7 +572,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $nested + ;; TWICE: (func $nested (type $0) ;; TWICE-NEXT: (local $0 i32) ;; TWICE-NEXT: (local $1 i32) ;; TWICE-NEXT: (local $2 i32) @@ -617,7 +625,7 @@ ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 10) + ;; TWICE-NEXT: (i32.const 11) ;; TWICE-NEXT: (i32.const 0) ;; TWICE-NEXT: (local.get $5) ;; TWICE-NEXT: ) @@ -683,6 +691,95 @@ ) ) ) + + ;; CHECK: (func $eh-pop (type $0) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (block $label + ;; CHECK-NEXT: (try + ;; CHECK-NEXT: (do + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (catch $i32 + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (pop i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $label + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $eh-pop (type $0) + ;; TWICE-NEXT: (local $0 i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local $2 i32) + ;; TWICE-NEXT: (block $label + ;; TWICE-NEXT: (try + ;; TWICE-NEXT: (do + ;; TWICE-NEXT: (nop) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (catch $i32 + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (pop i32) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (br_if $label + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 10) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -10) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $eh-pop + (block $label + (try + (do + (nop) + ) + (catch $i32 + (@metadata.code.branch_hint "\00") + (br_if $label + ;; This pop will end up in a block after our instrumentation, which + ;; requires fixups. + (pop i32) + ) + ) + ) + ) + ) ) ;; This module has our import, but with a minified internal name. We should From 85c7557d178d527440b6004c755e97186a3c9bcc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 15:19:40 -0700 Subject: [PATCH 041/239] format --- src/passes/InstrumentBranchHints.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 037e68ece6e..ea25d2172b0 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -95,7 +95,7 @@ namespace wasm { namespace { // The branch id, which increments as we go. int branchId = 1; -} +} // namespace struct InstrumentBranchHints : public WalkerPass> { From 70ab2e09cd88718d39869a5974e233806b15155b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 15:20:07 -0700 Subject: [PATCH 042/239] format --- src/passes/InstrumentBranchHints.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index ea25d2172b0..fc282289dab 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -93,9 +93,9 @@ namespace wasm { namespace { + // The branch id, which increments as we go. int branchId = 1; -} // namespace struct InstrumentBranchHints : public WalkerPass> { @@ -197,6 +197,8 @@ struct InstrumentBranchHints } }; +} // anonymous namespace + Pass* createInstrumentBranchHintsPass() { return new InstrumentBranchHints(); } } // namespace wasm From cbf01f6e87a5d02161a03afb1c2cd5abda563d62 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 15:26:48 -0700 Subject: [PATCH 043/239] hunt known bugs --- src/passes/RemoveUnusedBrs.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index cb6eddf5931..2bb922821e0 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -460,7 +460,7 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeSelect(br->condition, curr->condition, zero); } br->finalize(); - copyBranchHintTo(curr, br, getFunction()); + //copyBranchHintTo(curr, br, getFunction()); replaceCurrent(Builder(*getModule()).dropIfConcretelyTyped(br)); anotherCycle = true; } @@ -499,7 +499,7 @@ struct RemoveUnusedBrs : public WalkerPass> { auto currHint = getBranchHint(curr, getFunction()); auto childHint = getBranchHint(child, getFunction()); if (!currHint || currHint != childHint) { - clearBranchHint(curr, getFunction()); + //clearBranchHint(curr, getFunction()); } curr->ifTrue = child->ifTrue; } @@ -731,7 +731,7 @@ struct RemoveUnusedBrs : public WalkerPass> { brIf->condition = builder.makeUnary(EqZInt32, brIf->condition); last->name = brIf->name; brIf->name = loop->name; - flipBranchHint(brIf, getFunction()); + //flipBranchHint(brIf, getFunction()); return true; } else { // there are elements in the middle, @@ -752,7 +752,7 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeIf(brIf->condition, builder.makeBreak(brIf->name), stealSlice(builder, block, i + 1, list.size())); - copyBranchHintTo(brIf, list[i], getFunction()); + //copyBranchHintTo(brIf, list[i], getFunction()); // later: fuzz this: instrument "i am guessing at loc X" and "it // was true/it was false", then fuzzz that we don't decreaes times // we are right. From 3630e4a493a8fec6679333586e8595cf74ba9809 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 15:27:45 -0700 Subject: [PATCH 044/239] hunt known bugs --- src/passes/RemoveUnusedBrs.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 2bb922821e0..0bbd0f7646b 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -161,11 +161,11 @@ static std::optional getBranchHint(Expression* expr, Function* func) { } static void setBranchHint(Expression* expr, bool likely, Function* func) { - func->codeAnnotations[expr].branchLikely = likely; +// func->codeAnnotations[expr].branchLikely = likely; } static void clearBranchHint(Expression* expr, Function* func) { - func->codeAnnotations[expr].branchLikely = {}; +// func->codeAnnotations[expr].branchLikely = {}; } static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { @@ -460,7 +460,7 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeSelect(br->condition, curr->condition, zero); } br->finalize(); - //copyBranchHintTo(curr, br, getFunction()); + copyBranchHintTo(curr, br, getFunction()); replaceCurrent(Builder(*getModule()).dropIfConcretelyTyped(br)); anotherCycle = true; } @@ -499,7 +499,7 @@ struct RemoveUnusedBrs : public WalkerPass> { auto currHint = getBranchHint(curr, getFunction()); auto childHint = getBranchHint(child, getFunction()); if (!currHint || currHint != childHint) { - //clearBranchHint(curr, getFunction()); + clearBranchHint(curr, getFunction()); } curr->ifTrue = child->ifTrue; } @@ -731,7 +731,7 @@ struct RemoveUnusedBrs : public WalkerPass> { brIf->condition = builder.makeUnary(EqZInt32, brIf->condition); last->name = brIf->name; brIf->name = loop->name; - //flipBranchHint(brIf, getFunction()); + flipBranchHint(brIf, getFunction()); return true; } else { // there are elements in the middle, @@ -752,7 +752,7 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeIf(brIf->condition, builder.makeBreak(brIf->name), stealSlice(builder, block, i + 1, list.size())); - //copyBranchHintTo(brIf, list[i], getFunction()); + copyBranchHintTo(brIf, list[i], getFunction()); // later: fuzz this: instrument "i am guessing at loc X" and "it // was true/it was false", then fuzzz that we don't decreaes times // we are right. From 305bbe61964eae41ef5c6e61c9004533068c88ee Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 16:06:13 -0700 Subject: [PATCH 045/239] fix --- scripts/fuzz_shell.js | 7 ++- scripts/test/fuzzing.py | 2 + src/passes/InstrumentBranchHints.cpp | 18 +++++- test/lit/passes/instrument-branch-hints.wast | 59 ++++++++------------ 4 files changed, 48 insertions(+), 38 deletions(-) diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index cafce709ed1..8bc7078faf5 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -357,10 +357,13 @@ var imports = { }, 'log-branch': (id, expected, actual) => { - // We only care about truthiness of the expected and actual values, when - // fuzzing. + // We only care about truthiness of the expected and actual values. expected = +!!expected; actual = +!!actual; + // Log out the expected and actual outcomes. This is useful for fuzzing, + // see fuzz_opt.py. For testing that expectations actually match reality + // (i.e. that branch hints are correct), you can adjust the logic here to + // throw on expected != actual . console.log(`log-branch: hint ${id} of ${expected} and actual ${actual}`); }, }, diff --git a/scripts/test/fuzzing.py b/scripts/test/fuzzing.py index c4b6ae23865..e318f840015 100644 --- a/scripts/test/fuzzing.py +++ b/scripts/test/fuzzing.py @@ -129,6 +129,8 @@ 'string-lifting-section.wast', # TODO: fuzzer support for uninhabitable imported globals 'exact-references.wast', + # We cannot re-instrument such code (see the pass). + 'instrument-branch-hints.wast', ] diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index fc282289dab..808ad407564 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -106,6 +106,11 @@ struct InstrumentBranchHints // The internal name of our import. Name LOG_BRANCH; + // Whether we are the second pass of instrumentation. If so, we only add + // logic to parallel existing hints (for each such hint, we emit one with a + // negative ID, so they can be paired). + bool secondInstrumentation = false; + void visitIf(If* curr) { processCondition(curr); } void visitBreak(Break* curr) { @@ -148,9 +153,17 @@ struct InstrumentBranchHints id = -call->operands[0]->cast()->value.geti32(); } } - // We never found one, or we gave up. if (!id) { + // We never found one, or we gave up. + if (secondInstrumentation) { + // We do not add new things in this case. + return; + } id = branchId++; + } else { + // We found an existing ID. This should only happen in the second + // instrumentation. + assert(secondInstrumentation); } // Instrument the condition. @@ -179,6 +192,9 @@ struct InstrumentBranchHints for (auto& func : module->functions) { if (func->module == MODULE && func->base == BASE) { LOG_BRANCH = func->name; + // The logging function existed before, so this is the second + // instrumentation. + secondInstrumentation = true; break; } } diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index fa17f70b60c..2c2685d2eaf 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -578,58 +578,47 @@ ;; TWICE-NEXT: (local $2 i32) ;; TWICE-NEXT: (local $3 i32) ;; TWICE-NEXT: (local $4 i32) - ;; TWICE-NEXT: (local $5 i32) ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") ;; TWICE-NEXT: (if ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $5 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $2 - ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") - ;; TWICE-NEXT: (if (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if (result i32) + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $3 ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $3 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $0 - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 7) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $0) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $0) - ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (i32.const 42) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -7) + ;; TWICE-NEXT: (i32.const 7) ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: (local.get $0) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $3) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (i32.const 142) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (else - ;; TWICE-NEXT: (i32.const 242) + ;; TWICE-NEXT: (local.get $0) ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -7) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $3) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 9) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (i32.const 142) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (i32.const 242) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $2) ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 11) + ;; TWICE-NEXT: (i32.const 9) ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $5) + ;; TWICE-NEXT: (local.get $2) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $5) + ;; TWICE-NEXT: (local.get $2) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (then ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") From 0b3b4d44163d924a989494ce91699b2c1369c3fd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 16:16:06 -0700 Subject: [PATCH 046/239] fix --- scripts/fuzz_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index d25c17dc0a6..e91df0640fc 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1941,7 +1941,7 @@ def handle(self, wasm): def can_run_on_wasm(self, wasm): # Avoid things d8 cannot fully run. - return all_disallowed(['shared-everything', 'strings']) + return all_disallowed(['shared-everything', 'strings', 'custom-descriptors']) # The global list of all test case handlers From 0838d5236d0f2f824f06258b5d1052fd737445c1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 16:27:01 -0700 Subject: [PATCH 047/239] nicer --- src/passes/InstrumentBranchHints.cpp | 63 +++++++++++++--------------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 808ad407564..999ce997daa 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -75,9 +75,11 @@ // etc. // // To make it easy to pair the results, the ID is negative in subsequent -// instrumentations. That is we will match an id of 42 in the first +// instrumentations. That is, we will match an ID of 42 in the first // instrumentation with an id of -42 in the last (that avoids us matching two -// from the first, if e.g. a branch happens twice in a loop). +// from the first, if e.g. a branch happens twice in a loop). Thus, the first +// instrumentations adds positive IDs, and the second adds negative, which makes +// it trivial to differentiate them. // // Regardless of whether the hint was right or wrong, it should change in tandem // with the actual result, see script/fuzz_opt.py's BranchHintPreservation. @@ -108,7 +110,7 @@ struct InstrumentBranchHints // Whether we are the second pass of instrumentation. If so, we only add // logic to parallel existing hints (for each such hint, we emit one with a - // negative ID, so they can be paired). + // negative ID, so they can be paired, as mentioned above). bool secondInstrumentation = false; void visitIf(If* curr) { processCondition(curr); } @@ -119,9 +121,7 @@ struct InstrumentBranchHints } } - // Note all the calls we add to our imports. They are definitely not calls - // from an earlier instrumentation. - std::unordered_set addedCalls; + bool added = false; template void processCondition(T* curr) { if (curr->condition->type == Type::unreachable) { @@ -136,34 +136,29 @@ struct InstrumentBranchHints Builder builder(*getModule()); - // Pick an ID for this branch. If we see a nested logging (see above), we - // copy that id. + // Pick an ID for this branch. int id = 0; - for (auto* call : FindAll(curr->condition).list) { - if (call->target == LOG_BRANCH && !addedCalls.count(call)) { - if (id) { - // We have seen another before, so give up (it is not worth the effort - // to figure out what belongs to what). - id = 0; - break; - } - // This is the first one we see. Use it, negated to indicate it is from - // the second instrumentation. - assert(call->operands.size() == 3); - id = -call->operands[0]->cast()->value.geti32(); - } - } - if (!id) { - // We never found one, or we gave up. - if (secondInstrumentation) { - // We do not add new things in this case. - return; - } + if (!secondInstrumentation) { + // This is the first instrumentation. We instrument everything, using a + // new positive ID for each. id = branchId++; } else { - // We found an existing ID. This should only happen in the second - // instrumentation. - assert(secondInstrumentation); + // In the second instrumentation we find existing calls and add paired + // ones to them. + for (auto* call : FindAll(curr->condition).list) { + if (call->target == LOG_BRANCH) { + if (id) { + // We have seen another before, so give up (it is not worth the + // effort to figure out what belongs to what). + return; + } + // Use this ID, which must be from the first instrumentation. + assert(call->operands.size() == 3); + id = call->operands[0]->cast()->value.geti32(); + // We will use it negated. + id = -id; + } + } } // Instrument the condition. @@ -174,16 +169,16 @@ struct InstrumentBranchHints auto* get1 = builder.makeLocalGet(tempLocal, Type::i32); auto* logBranch = builder.makeCall(LOG_BRANCH, {idc, guess, get1}, Type::none); - addedCalls.insert(logBranch); auto* get2 = builder.makeLocalGet(tempLocal, Type::i32); curr->condition = builder.makeBlock({set, logBranch, get2}); + added = true; } void visitFunction(Function* func) { // Our added blocks may have caused nested pops. - if (!addedCalls.empty()) { + if (added) { EHUtils::handleBlockNestedPops(func, *getModule()); - addedCalls.clear(); + added = false; } } From 5a64e6727961ecb2ed17eb53d70ae4fce073640d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 16:50:50 -0700 Subject: [PATCH 048/239] fix --- src/passes/InstrumentBranchHints.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 999ce997daa..b6521578340 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -159,6 +159,10 @@ struct InstrumentBranchHints id = -id; } } + if (!id) { + // No call found. + return; + } } // Instrument the condition. From 5b54e6d796e5407f47f24ba472d6bb262a46571a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 1 Jul 2025 16:56:35 -0700 Subject: [PATCH 049/239] test --- test/lit/passes/instrument-branch-hints.wast | 87 ++++++++++++++++++-- 1 file changed, 82 insertions(+), 5 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 2c2685d2eaf..956c126c85b 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -772,17 +772,94 @@ ) ;; This module has our import, but with a minified internal name. We should -;; still use it. -(;;module +;; still use it, and assume we are doing the second instrumentation. That is, +;; CHECK does the second instrumentation here, and TWICE would do a third, but +;; we add nothing there, so CHECK and TWICE are equal here. +(module + ;; CHECK: (type $0 (func (param i32 i32 i32))) + + ;; CHECK: (type $1 (func (param i32))) + + ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (type $0) (param i32 i32 i32))) + ;; TWICE: (type $0 (func (param i32 i32 i32))) + + ;; TWICE: (type $1 (func (param i32))) + + ;; TWICE: (import "fuzzing-support" "log-branch" (func $min (type $0) (param i32 i32 i32))) (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) - (func $if + ;; CHECK: (func $if (type $1) (param $x i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const -42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $if (type $1) (param $x i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const -42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $if (param $x i32) (@metadata.code.branch_hint "\01") (if - (i32.const 42) + (block (result i32) + (call $min + (i32.const 42) + (i32.const 1) + (local.get $x) + ) + (local.get $x) + ) (then (drop (i32.const 1337)) ) ) ) -;;) +) From 4933339e86d5fb59d678966fe181c537ee60424a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 09:39:39 -0700 Subject: [PATCH 050/239] fix --- src/passes/InstrumentBranchHints.cpp | 108 +++++++++++++++---- test/lit/passes/instrument-branch-hints.wast | 59 +++++----- 2 files changed, 124 insertions(+), 43 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index b6521578340..557c12859b8 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -86,8 +86,9 @@ // #include "ir/eh-utils.h" -#include "ir/find_all.h" +#include "ir/local-graph.h" #include "ir/names.h" +#include "ir/properties.h" #include "pass.h" #include "wasm-builder.h" #include "wasm.h" @@ -101,6 +102,9 @@ int branchId = 1; struct InstrumentBranchHints : public WalkerPass> { + + using Super = WalkerPass>; + // The module and base names of our import. Name MODULE = "fuzzing-support"; Name BASE = "log-branch"; @@ -113,6 +117,8 @@ struct InstrumentBranchHints // negative ID, so they can be paired, as mentioned above). bool secondInstrumentation = false; + std::unique_ptr localGraph; + void visitIf(If* curr) { processCondition(curr); } void visitBreak(Break* curr) { @@ -121,6 +127,23 @@ struct InstrumentBranchHints } } + // Track existing calls to our logging, and their gets, so that we can + // identify them and add the second instrumentation properly. This map stores + // gets that map to such calls, specifically their actual values (the same + // value used in the branch, which we want to instrument). + std::unordered_map getsOfPriorInstrumentation; + + void visitCall(Call* curr) { + if (curr->target != LOG_BRANCH) { + return; + } + // Our logging has 3 fields: id, expected, actual. + assert(curr->operands.size() == 3); + if (auto* get = curr->operands[2]->cast()) { + getsOfPriorInstrumentation[get] = curr; + } + } + bool added = false; template void processCondition(T* curr) { @@ -143,24 +166,64 @@ struct InstrumentBranchHints // new positive ID for each. id = branchId++; } else { - // In the second instrumentation we find existing calls and add paired - // ones to them. - for (auto* call : FindAll(curr->condition).list) { - if (call->target == LOG_BRANCH) { - if (id) { - // We have seen another before, so give up (it is not worth the - // effort to figure out what belongs to what). - return; - } - // Use this ID, which must be from the first instrumentation. - assert(call->operands.size() == 3); - id = call->operands[0]->cast()->value.geti32(); - // We will use it negated. - id = -id; + // In the second instrumentation we find existing instrumentation and add + // paired ones to them. To find the existing ones, we look for this + // condition being a local.get that is used in a call to our import, that + // is, something like the pattern we emit below: + // + // (local.set $temp ..) + // (call LOG_BRANCH (local.get $temp)) ;; used in logging + // (if + // (local.get $temp) ;; and used in condition + // + // We also consider the fallthrough, for the nested case: + // + // (if + // (block + // (local.set $temp ..) + // (call LOG_BRANCH (local.get $temp)) ;; used in logging + // (local.get $temp) ;; and used in condition + // ) + // + // TODO tee? + auto* fallthrough = Properties::getFallthrough(curr->condition, getPassOptions(), *getModule()); + auto* get = fallthrough->template dynCast(); + if (!get) { + return; + } + auto& sets = localGraph->getSets(get); + if (sets.size() != 1) { + return; + } + auto* set = *sets.begin(); + // The set should have two gets: the get in the condition we began at, and + // another. + auto& gets = localGraph->getSetInfluences(set); + if (gets.size() != 2) { + return; + } + LocalGet* otherGet = nullptr; + for (auto* get2 : gets) { + if (get2 != get) { + otherGet = get2; } } - if (!id) { - // No call found. + assert(otherGet); + // See if that other get is used in a logging. + auto iter = getsOfPriorInstrumentation.find(otherGet); + if (iter == getsOfPriorInstrumentation.end()) { + return; + } + // Great, this is indeed a prior instrumentation! Add a second + // instrumentation for it, using the old ID (negated). + auto* call = iter->second; + assert(call->operands.size() == 3); + id = -call->operands[0]->template cast()->value.geti32(); + if (id > 0) { + // The seen ID was already negated, so we negated it again to be + // positive. That means the existing instrumentation was a second + // instrumentation, and we should only operate on positive IDs and emit + // negative ones. return; } } @@ -178,7 +241,14 @@ struct InstrumentBranchHints added = true; } - void visitFunction(Function* func) { + void doWalkFunction(Function* func) { + if (secondInstrumentation) { + localGraph = std::make_unique(func, getModule()); + localGraph->computeSetInfluences(); + } + + Super::doWalkFunction(func); + // Our added blocks may have caused nested pops. if (added) { EHUtils::handleBlockNestedPops(func, *getModule()); @@ -208,7 +278,7 @@ struct InstrumentBranchHints } // Walk normally, using logBranch as we go. - WalkerPass>::doWalkModule(module); + Super::doWalkModule(module); } }; diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 956c126c85b..9785e3b2b2b 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -578,47 +578,58 @@ ;; TWICE-NEXT: (local $2 i32) ;; TWICE-NEXT: (local $3 i32) ;; TWICE-NEXT: (local $4 i32) + ;; TWICE-NEXT: (local $5 i32) ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") ;; TWICE-NEXT: (if ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $2 - ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") - ;; TWICE-NEXT: (if (result i32) - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $3 + ;; TWICE-NEXT: (local.set $5 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if (result i32) ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $0 - ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: (local.set $3 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 7) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 7) + ;; TWICE-NEXT: (i32.const -7) ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: (local.get $3) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (i32.const 142) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (i32.const 242) ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -7) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $3) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $3) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (i32.const 142) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (else - ;; TWICE-NEXT: (i32.const 242) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 9) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $2) ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 9) + ;; TWICE-NEXT: (i32.const -9) ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: (local.get $5) ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: (local.get $5) ;; TWICE-NEXT: ) ;; TWICE-NEXT: (then ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") From 6d69bff02d2d59582bc7f222d263932425e1e5d8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 09:42:44 -0700 Subject: [PATCH 051/239] work --- test/lit/passes/instrument-branch-hints.wast | 22 +++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 9785e3b2b2b..d7111364982 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -789,23 +789,27 @@ (module ;; CHECK: (type $0 (func (param i32 i32 i32))) - ;; CHECK: (type $1 (func (param i32))) + ;; CHECK: (type $1 (func)) ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (type $0) (param i32 i32 i32))) ;; TWICE: (type $0 (func (param i32 i32 i32))) - ;; TWICE: (type $1 (func (param i32))) + ;; TWICE: (type $1 (func)) ;; TWICE: (import "fuzzing-support" "log-branch" (func $min (type $0) (param i32 i32 i32))) (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) - ;; CHECK: (func $if (type $1) (param $x i32) + ;; CHECK: (func $if (type $1) + ;; CHECK-NEXT: (local $x i32) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (block (result i32) ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (call $min ;; CHECK-NEXT: (i32.const 42) ;; CHECK-NEXT: (i32.const 1) @@ -828,13 +832,17 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $if (type $1) (param $x i32) + ;; TWICE: (func $if (type $1) + ;; TWICE-NEXT: (local $x i32) ;; TWICE-NEXT: (local $1 i32) ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") ;; TWICE-NEXT: (if ;; TWICE-NEXT: (block (result i32) ;; TWICE-NEXT: (local.set $1 ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $x + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) ;; TWICE-NEXT: (call $min ;; TWICE-NEXT: (i32.const 42) ;; TWICE-NEXT: (i32.const 1) @@ -857,10 +865,14 @@ ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) - (func $if (param $x i32) + (func $if + (local $x i32) (@metadata.code.branch_hint "\01") (if (block (result i32) + (local.set $x + (i32.const 42) + ) (call $min (i32.const 42) (i32.const 1) From 525ad6b0c2e12ba46dfd280d12dc6a95dbe2b51f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 09:43:54 -0700 Subject: [PATCH 052/239] work --- test/lit/passes/instrument-branch-hints.wast | 101 +++++++++++++++++-- 1 file changed, 93 insertions(+), 8 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index d7111364982..c2ac7b65bab 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -787,19 +787,19 @@ ;; CHECK does the second instrumentation here, and TWICE would do a third, but ;; we add nothing there, so CHECK and TWICE are equal here. (module - ;; CHECK: (type $0 (func (param i32 i32 i32))) + ;; CHECK: (type $0 (func)) - ;; CHECK: (type $1 (func)) + ;; CHECK: (type $1 (func (param i32 i32 i32))) - ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (type $0) (param i32 i32 i32))) - ;; TWICE: (type $0 (func (param i32 i32 i32))) + ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (type $1) (param i32 i32 i32))) + ;; TWICE: (type $0 (func)) - ;; TWICE: (type $1 (func)) + ;; TWICE: (type $1 (func (param i32 i32 i32))) - ;; TWICE: (import "fuzzing-support" "log-branch" (func $min (type $0) (param i32 i32 i32))) + ;; TWICE: (import "fuzzing-support" "log-branch" (func $min (type $1) (param i32 i32 i32))) (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) - ;; CHECK: (func $if (type $1) + ;; CHECK: (func $if (type $0) ;; CHECK-NEXT: (local $x i32) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") @@ -832,7 +832,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $if (type $1) + ;; TWICE: (func $if (type $0) ;; TWICE-NEXT: (local $x i32) ;; TWICE-NEXT: (local $1 i32) ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") @@ -885,4 +885,89 @@ ) ) ) + + ;; CHECK: (func $optimized (type $0) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const -42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $optimized (type $0) + ;; TWICE-NEXT: (local $x i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local.set $x + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const -42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $optimized + (local $x i32) + ;; As above, but now the existing instrumentation looks like it was + ;; optimized a little: the local.set and call were moved out of the if + ;; (something that merge-blocks would do). We should still add the second + ;; instrumentation. + (local.set $x + (i32.const 42) + ) + (call $min + (i32.const 42) + (i32.const 1) + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (drop (i32.const 1337)) + ) + ) + ) ) From f3f56e95420fbe7c594a35a88c56ed940bb11fce Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 09:52:29 -0700 Subject: [PATCH 053/239] work --- src/passes/InstrumentBranchHints.cpp | 63 ++++++++++----- test/lit/passes/instrument-branch-hints.wast | 82 ++++++++++++++++++++ 2 files changed, 125 insertions(+), 20 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 557c12859b8..eebc34c8ff5 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -130,8 +130,9 @@ struct InstrumentBranchHints // Track existing calls to our logging, and their gets, so that we can // identify them and add the second instrumentation properly. This map stores // gets that map to such calls, specifically their actual values (the same - // value used in the branch, which we want to instrument). + // value used in the branch, which we want to instrument). We also map tees. std::unordered_map getsOfPriorInstrumentation; + std::unordered_map teesOfPriorInstrumentation; void visitCall(Call* curr) { if (curr->target != LOG_BRANCH) { @@ -139,8 +140,10 @@ struct InstrumentBranchHints } // Our logging has 3 fields: id, expected, actual. assert(curr->operands.size() == 3); - if (auto* get = curr->operands[2]->cast()) { + if (auto* get = curr->operands[2]->dynCast()) { getsOfPriorInstrumentation[get] = curr; + } else if (auto* tee = curr->operands[2]->dynCast()) { + teesOfPriorInstrumentation[tee] = curr; } } @@ -185,7 +188,12 @@ struct InstrumentBranchHints // (local.get $temp) ;; and used in condition // ) // - // TODO tee? + // We also consider a tee: + // + // (call LOG_BRANCH (local.tee $temp (..))) ;; used in logging + // (if + // (local.get $temp) ;; and used in condition + // auto* fallthrough = Properties::getFallthrough(curr->condition, getPassOptions(), *getModule()); auto* get = fallthrough->template dynCast(); if (!get) { @@ -196,27 +204,42 @@ struct InstrumentBranchHints return; } auto* set = *sets.begin(); - // The set should have two gets: the get in the condition we began at, and - // another. auto& gets = localGraph->getSetInfluences(set); - if (gets.size() != 2) { - return; - } - LocalGet* otherGet = nullptr; - for (auto* get2 : gets) { - if (get2 != get) { - otherGet = get2; + Call* call = nullptr; + if (gets.size() == 2) { + // The set has two gets: the get in the condition we began at, and + // another. + LocalGet* otherGet = nullptr; + for (auto* get2 : gets) { + if (get2 != get) { + otherGet = get2; + } } - } - assert(otherGet); - // See if that other get is used in a logging. - auto iter = getsOfPriorInstrumentation.find(otherGet); - if (iter == getsOfPriorInstrumentation.end()) { + assert(otherGet); + // See if that other get is used in a logging. + auto iter = getsOfPriorInstrumentation.find(otherGet); + if (iter == getsOfPriorInstrumentation.end()) { + return; + } + // Great, this is indeed a prior instrumentation. Add a second + // instrumentation for it, using the old ID (negated). + call = iter->second; + } else if (gets.size() == 1) { + // The set has only one get, but it might be a tee that flows into a + // call. + auto iter = teesOfPriorInstrumentation.find(set); + if (iter == teesOfPriorInstrumentation.end()) { + return; + } + // Great, this is indeed a prior instrumentation! Add a second + // instrumentation for it, using the old ID (negated). + call = iter->second; + } else { + // The get has more uses; give up. return; } - // Great, this is indeed a prior instrumentation! Add a second - // instrumentation for it, using the old ID (negated). - auto* call = iter->second; + + // We found a call from a prior instrumentation. assert(call->operands.size() == 3); id = -call->operands[0]->template cast()->value.geti32(); if (id > 0) { diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index c2ac7b65bab..f10b3496fa8 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -970,4 +970,86 @@ ) ) ) + + ;; CHECK: (func $optimized-moar (type $0) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const -42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $optimized-moar (type $0) + ;; TWICE-NEXT: (local $x i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.tee $x + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const -42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $optimized-moar + (local $x i32) + ;; As above, but optimized further, now using a tee. We should still add the + ;; second instrumentation. + (call $min + (i32.const 42) + (i32.const 1) + (local.tee $x + (i32.const 42) + ) + ) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (drop (i32.const 1337)) + ) + ) + ) + + ;; TODO: test with 3 uses of the local.set, and fail ) From 03c092fff6a63552cd687eb915395f2f74826fe1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 09:53:19 -0700 Subject: [PATCH 054/239] work --- test/lit/passes/instrument-branch-hints.wast | 70 +++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index f10b3496fa8..cf06bf56b8d 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -1051,5 +1051,73 @@ ) ) - ;; TODO: test with 3 uses of the local.set, and fail + ;; CHECK: (func $optimized-bad (type $0) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $optimized-bad (type $0) + ;; TWICE-NEXT: (local $x i32) + ;; TWICE-NEXT: (local.set $x + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $optimized-bad + (local $x i32) + ;; As above, but the set has another use later, so we give up as the pattern + ;; is unfamiliar. + (local.set $x + (i32.const 42) + ) + (call $min + (i32.const 42) + (i32.const 1) + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (drop (i32.const 1337)) + ) + ) + (drop + (local.get $x) ;; extra use + ) + ) ) From 44baed3337ba733b03dca88bbc87c2007c008471 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 10:16:35 -0700 Subject: [PATCH 055/239] wokr --- src/passes/RandomizeBranchHints.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/passes/RandomizeBranchHints.cpp b/src/passes/RandomizeBranchHints.cpp index 3064557aed5..93eb5878ca4 100644 --- a/src/passes/RandomizeBranchHints.cpp +++ b/src/passes/RandomizeBranchHints.cpp @@ -15,7 +15,8 @@ */ // -// Apply random branch hints. This is really only useful for fuzzing. +// Apply random branch hints. This is really only useful for fuzzing. The +// randomness here is deterministic, so that reducing can work. // #include "pass.h" From c437559e3b4956c1f5dc3c452e9bdb8ab391d52e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 10:17:01 -0700 Subject: [PATCH 056/239] format --- src/passes/InstrumentBranchHints.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index eebc34c8ff5..854fc599ef0 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -194,7 +194,8 @@ struct InstrumentBranchHints // (if // (local.get $temp) ;; and used in condition // - auto* fallthrough = Properties::getFallthrough(curr->condition, getPassOptions(), *getModule()); + auto* fallthrough = Properties::getFallthrough( + curr->condition, getPassOptions(), *getModule()); auto* get = fallthrough->template dynCast(); if (!get) { return; From 41d167cd967ae21971e6b7a413445bc5c240ee47 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 10:24:05 -0700 Subject: [PATCH 057/239] comments --- src/passes/InstrumentBranchHints.cpp | 30 +++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 854fc599ef0..2a5631f7836 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -36,7 +36,25 @@ // Y // } // -// The motivation for this pass is to fuzz branch hint updates: given a fuzz +// Concretely, we emit calls to this logging function: +// +// (import "fuzzing-support" "log-branch" +// (func $log-branch (param i32 i32 i32)) ;; ID, prediction, actual +// ) +// +// This can be used to verify that branch hints are accurate, by implementing +// the import like this for example: +// +// imports['fuzzing-support']['log-branch'] = (id, prediction, actual) => { +// // We only care about truthiness of the expected and actual values. +// expected = +!!expected; +// actual = +!!actual; +// // Throw if the hint said this branch would be taken, but it was not, or +// // vice versa. +// if (expected != actual) throw `Bad branch hint! (${id})`; +// }; +// +// Another use case for this pass is to fuzz branch hint updates: given a fuzz // case, we can instrument it and view the loggings, then optimize the original, // instrument that, and view those loggings. Imagine, for example, that we flip // the condition but forget to flip the hint: @@ -66,7 +84,7 @@ // with another temp local. Also, we inferred the same ID (123) in both cases, // by scanning the inside of the condition. Using that, the new logging will be // 123,B,C followed by 123,B,!C. We can therefore find pairs of loggings with -// same ID, and consider the predicted and actual values: +// the same ID, and consider the predicted and actual values: // // [id,0,0], [id,0,0] - nothing changed: good // [id,0,0], [id,0,1] - the actual result changed but not the prediction: bad @@ -76,13 +94,11 @@ // // To make it easy to pair the results, the ID is negative in subsequent // instrumentations. That is, we will match an ID of 42 in the first -// instrumentation with an id of -42 in the last (that avoids us matching two +// instrumentation with an id of -42 in the second (that avoids us matching two // from the first, if e.g. a branch happens twice in a loop). Thus, the first // instrumentations adds positive IDs, and the second adds negative, which makes -// it trivial to differentiate them. -// -// Regardless of whether the hint was right or wrong, it should change in tandem -// with the actual result, see script/fuzz_opt.py's BranchHintPreservation. +// it trivial to differentiate them. (See script/fuzz_opt.py's +// BranchHintPreservation for more details.) // #include "ir/eh-utils.h" From ec8836871f0fcab3929224f88774bb09f8af6ca5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 10:26:46 -0700 Subject: [PATCH 058/239] [Branch Hinting] Add useful passes to generate, test, and fuzz branch hints --- scripts/test/fuzzing.py | 2 + src/passes/CMakeLists.txt | 2 + src/passes/pass.cpp | 6 + src/passes/passes.h | 2 + test/lit/passes/instrument-branch-hints.wast | 1123 ++++++++++++++++++ test/lit/passes/randomize-branch-hints.wast | 309 +++++ 6 files changed, 1444 insertions(+) create mode 100644 test/lit/passes/instrument-branch-hints.wast create mode 100644 test/lit/passes/randomize-branch-hints.wast diff --git a/scripts/test/fuzzing.py b/scripts/test/fuzzing.py index 4a610946935..f922ac4e16b 100644 --- a/scripts/test/fuzzing.py +++ b/scripts/test/fuzzing.py @@ -132,6 +132,8 @@ 'string-lifting-section.wast', # TODO: fuzzer support for uninhabitable imported globals 'exact-references.wast', + # We cannot re-instrument such code (see the pass). + 'instrument-branch-hints.wast', ] diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 6fab09b1bc2..16891caade3 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -52,6 +52,7 @@ set(passes_SOURCES HeapStoreOptimization.cpp I64ToI32Lowering.cpp Inlining.cpp + InstrumentBranchHints.cpp InstrumentLocals.cpp InstrumentMemory.cpp Intrinsics.cpp @@ -102,6 +103,7 @@ set(passes_SOURCES Strip.cpp StripTargetFeatures.cpp TraceCalls.cpp + RandomizeBranchHints.cpp RedundantSetElimination.cpp RemoveImports.cpp RemoveMemoryInit.cpp diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 2042bc71d3a..83085bae007 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -259,6 +259,9 @@ void PassRegistry::registerPasses() { "trace-calls", "instrument the build with code to intercept specific function calls", createTraceCallsPass); + registerPass("instrument-branch-hints", + "instrument branch hints so we can see which guessed right", + createInstrumentBranchHintsPass); registerPass( "instrument-locals", "instrument the build with code to intercept all loads and stores", @@ -409,6 +412,9 @@ void PassRegistry::registerPasses() { registerPass("propagate-globals-globally", "propagate global values to other globals (useful for tests)", createPropagateGlobalsGloballyPass); + registerPass("randomize-branch-hints", + "randomize branch hints (for fuzzing)", + createRandomizeBranchHintsPass); registerPass("remove-non-js-ops", "removes operations incompatible with js", createRemoveNonJSOpsPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index e051e466e72..e0c03bad8d7 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -79,6 +79,7 @@ Pass* createLocalSubtypingPass(); Pass* createLogExecutionPass(); Pass* createIntrinsicLoweringPass(); Pass* createTraceCallsPass(); +Pass* createInstrumentBranchHintsPass(); Pass* createInstrumentLocalsPass(); Pass* createInstrumentMemoryPass(); Pass* createLLVMMemoryCopyFillLoweringPass(); @@ -130,6 +131,7 @@ Pass* createPrintCallGraphPass(); Pass* createPrintFeaturesPass(); Pass* createPrintFunctionMapPass(); Pass* createPropagateGlobalsGloballyPass(); +Pass* createRandomizeBranchHintsPass(); Pass* createRemoveNonJSOpsPass(); Pass* createRemoveImportsPass(); Pass* createRemoveMemoryInitPass(); diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast new file mode 100644 index 00000000000..cf06bf56b8d --- /dev/null +++ b/test/lit/passes/instrument-branch-hints.wast @@ -0,0 +1,1123 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. + +;; RUN: foreach %s %t wasm-opt -all --instrument-branch-hints -S -o - | filecheck %s + +;; Also test the results of running again. When a condition is instrumented +;; twice, we should reuse the id, but emit it negated. +;; RUN: foreach %s %t wasm-opt -all --instrument-branch-hints --instrument-branch-hints -S -o - | filecheck %s --check-prefix=TWICE + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (param i32))) + + ;; CHECK: (type $2 (func (result f64))) + + ;; CHECK: (type $3 (func (param i32 i32 i32))) + + ;; CHECK: (import "fuzzing-support" "log-branch" (func $log-branch (type $3) (param i32 i32 i32))) + + ;; CHECK: (tag $i32 (type $1) (param i32)) + ;; TWICE: (type $0 (func)) + + ;; TWICE: (type $1 (func (param i32))) + + ;; TWICE: (type $2 (func (result f64))) + + ;; TWICE: (type $3 (func (param i32 i32 i32))) + + ;; TWICE: (import "fuzzing-support" "log-branch" (func $log-branch (type $3) (param i32 i32 i32))) + + ;; TWICE: (tag $i32 (type $1) (param i32)) + (tag $i32 (param i32)) + + ;; CHECK: (func $if (type $0) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 11337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 199) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 299) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 342) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 31337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 399) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $if (type $0) + ;; TWICE-NEXT: (local $0 i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local $2 i32) + ;; TWICE-NEXT: (local $3 i32) + ;; TWICE-NEXT: (local $4 i32) + ;; TWICE-NEXT: (local $5 i32) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $3 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -1) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 99) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $4 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (i32.const 142) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 2) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -2) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $4) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $4) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 11337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 199) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (i32.const 242) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 21337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 299) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $5 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (i32.const 342) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 3) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -3) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $5) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $5) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 31337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 399) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $if + ;; An if with a 0 hint, a 1 hint, and no hint. + (@metadata.code.branch_hint "\00") + (if + (i32.const 42) + (then + (drop (i32.const 1337)) + ) + (else + (drop (i32.const 99)) + ) + ) + (@metadata.code.branch_hint "\01") + (if + (i32.const 142) + (then + (drop (i32.const 11337)) + ) + (else + (drop (i32.const 199)) + ) + ) + (if + (i32.const 242) + (then + (drop (i32.const 21337)) + ) + (else + (drop (i32.const 299)) + ) + ) + ;; Another hint of 0, for more coverage (ensure hint value differs from + ;; break id). + (@metadata.code.branch_hint "\00") + (if + (i32.const 342) + (then + (drop (i32.const 31337)) + ) + (else + (drop (i32.const 399)) + ) + ) + ) + + ;; CHECK: (func $br (type $0) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $out1 + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out1 + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 11337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $out2 + ;; CHECK-NEXT: (br_if $out2 + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $br (type $0) + ;; TWICE-NEXT: (local $0 i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local $2 i32) + ;; TWICE-NEXT: (local $3 i32) + ;; TWICE-NEXT: (block $out + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (br_if $out + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 4) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -4) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (block $out1 + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (br_if $out1 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $3 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (i32.const 142) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 5) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -5) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 11337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (block $out2 + ;; TWICE-NEXT: (br_if $out2 + ;; TWICE-NEXT: (i32.const 242) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 21337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $br + ;; As above, with br_if. + (block $out + (@metadata.code.branch_hint "\00") + (br_if $out + (i32.const 42) + ) + (drop (i32.const 1337)) + ) + (block $out1 + (@metadata.code.branch_hint "\01") + (br_if $out1 + (i32.const 142) + ) + (drop (i32.const 11337)) + ) + (block $out2 + (br_if $out2 + (i32.const 242) + ) + (drop (i32.const 21337)) + ) + ) + + ;; CHECK: (func $br_value (type $2) (result f64) + ;; CHECK-NEXT: (local $scratch f64) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (block $out (result f64) + ;; CHECK-NEXT: (local.set $scratch + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (f64.const 3.14159) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $scratch) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $br_value (type $2) (result f64) + ;; TWICE-NEXT: (local $scratch f64) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local $2 i32) + ;; TWICE-NEXT: (block $out (result f64) + ;; TWICE-NEXT: (local.set $scratch + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (br_if $out + ;; TWICE-NEXT: (f64.const 3.14159) + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 6) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -6) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $scratch) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $br_value (result f64) + ;; As above, but now with a value. We need to stash it to a local. + (block $out (result f64) + (@metadata.code.branch_hint "\00") + (br_if $out + (f64.const 3.14159) + (i32.const 42) + ) + (drop (i32.const 1337)) + ) + ) + + ;; CHECK: (func $nested (type $0) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if (result i32) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 7) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 9) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 342) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 8) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $nested (type $0) + ;; TWICE-NEXT: (local $0 i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local $2 i32) + ;; TWICE-NEXT: (local $3 i32) + ;; TWICE-NEXT: (local $4 i32) + ;; TWICE-NEXT: (local $5 i32) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $5 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if (result i32) + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $3 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 7) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -7) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $3) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (i32.const 142) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (else + ;; TWICE-NEXT: (i32.const 242) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 9) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -9) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $5) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $5) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $4 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (i32.const 342) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 8) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -8) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $4) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $4) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $nested + ;; Do not reuse branch hint ids in the same instrumentation: even if we have + ;; nested conditions, we can see which calls we added, and not reuse their + ;; ids. Only TWICE should ever reuse ids in our output. + (@metadata.code.branch_hint "\00") + (if + (@metadata.code.branch_hint "\01") + (if (result i32) + (i32.const 42) + (then + (i32.const 142) + ) + (else + (i32.const 242) + ) + ) + (then + (@metadata.code.branch_hint "\00") + (if + (i32.const 342) + (then + (drop (i32.const 1337)) + ) + ) + ) + ) + ) + + ;; CHECK: (func $eh-pop (type $0) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (block $label + ;; CHECK-NEXT: (try + ;; CHECK-NEXT: (do + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (catch $i32 + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (pop i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $label + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log-branch + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $eh-pop (type $0) + ;; TWICE-NEXT: (local $0 i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local $2 i32) + ;; TWICE-NEXT: (block $label + ;; TWICE-NEXT: (try + ;; TWICE-NEXT: (do + ;; TWICE-NEXT: (nop) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (catch $i32 + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (pop i32) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") + ;; TWICE-NEXT: (br_if $label + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $2 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $0 + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const 10) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $0) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $log-branch + ;; TWICE-NEXT: (i32.const -10) + ;; TWICE-NEXT: (i32.const 0) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $2) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $eh-pop + (block $label + (try + (do + (nop) + ) + (catch $i32 + (@metadata.code.branch_hint "\00") + (br_if $label + ;; This pop will end up in a block after our instrumentation, which + ;; requires fixups. + (pop i32) + ) + ) + ) + ) + ) +) + +;; This module has our import, but with a minified internal name. We should +;; still use it, and assume we are doing the second instrumentation. That is, +;; CHECK does the second instrumentation here, and TWICE would do a third, but +;; we add nothing there, so CHECK and TWICE are equal here. +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (param i32 i32 i32))) + + ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (type $1) (param i32 i32 i32))) + ;; TWICE: (type $0 (func)) + + ;; TWICE: (type $1 (func (param i32 i32 i32))) + + ;; TWICE: (import "fuzzing-support" "log-branch" (func $min (type $1) (param i32 i32 i32))) + (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) + + ;; CHECK: (func $if (type $0) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const -42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $if (type $0) + ;; TWICE-NEXT: (local $x i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $x + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const -42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $if + (local $x i32) + (@metadata.code.branch_hint "\01") + (if + (block (result i32) + (local.set $x + (i32.const 42) + ) + (call $min + (i32.const 42) + (i32.const 1) + (local.get $x) + ) + (local.get $x) + ) + (then + (drop (i32.const 1337)) + ) + ) + ) + + ;; CHECK: (func $optimized (type $0) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const -42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $optimized (type $0) + ;; TWICE-NEXT: (local $x i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (local.set $x + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const -42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $optimized + (local $x i32) + ;; As above, but now the existing instrumentation looks like it was + ;; optimized a little: the local.set and call were moved out of the if + ;; (something that merge-blocks would do). We should still add the second + ;; instrumentation. + (local.set $x + (i32.const 42) + ) + (call $min + (i32.const 42) + (i32.const 1) + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (drop (i32.const 1337)) + ) + ) + ) + + ;; CHECK: (func $optimized-moar (type $0) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const -42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $optimized-moar (type $0) + ;; TWICE-NEXT: (local $x i32) + ;; TWICE-NEXT: (local $1 i32) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.tee $x + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (block (result i32) + ;; TWICE-NEXT: (local.set $1 + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const -42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (local.get $1) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $optimized-moar + (local $x i32) + ;; As above, but optimized further, now using a tee. We should still add the + ;; second instrumentation. + (call $min + (i32.const 42) + (i32.const 1) + (local.tee $x + (i32.const 42) + ) + ) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (drop (i32.const 1337)) + ) + ) + ) + + ;; CHECK: (func $optimized-bad (type $0) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; TWICE: (func $optimized-bad (type $0) + ;; TWICE-NEXT: (local $x i32) + ;; TWICE-NEXT: (local.set $x + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (call $min + ;; TWICE-NEXT: (i32.const 42) + ;; TWICE-NEXT: (i32.const 1) + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") + ;; TWICE-NEXT: (if + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: (then + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (i32.const 1337) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: (drop + ;; TWICE-NEXT: (local.get $x) + ;; TWICE-NEXT: ) + ;; TWICE-NEXT: ) + (func $optimized-bad + (local $x i32) + ;; As above, but the set has another use later, so we give up as the pattern + ;; is unfamiliar. + (local.set $x + (i32.const 42) + ) + (call $min + (i32.const 42) + (i32.const 1) + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (drop (i32.const 1337)) + ) + ) + (drop + (local.get $x) ;; extra use + ) + ) +) diff --git a/test/lit/passes/randomize-branch-hints.wast b/test/lit/passes/randomize-branch-hints.wast new file mode 100644 index 00000000000..95fff9c9ce8 --- /dev/null +++ b/test/lit/passes/randomize-branch-hints.wast @@ -0,0 +1,309 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. + +;; RUN: foreach %s %t wasm-opt --randomize-branch-hints -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (result f64))) + + ;; CHECK: (func $if + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 11337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 199) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 299) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 342) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 31337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 399) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if + ;; We should see various branch hints appear, both true and false, and also + ;; some instructions with no hint. + (if + (i32.const 42) + (then + (drop (i32.const 1337)) + ) + (else + (drop (i32.const 99)) + ) + ) + (if + (i32.const 142) + (then + (drop (i32.const 11337)) + ) + (else + (drop (i32.const 199)) + ) + ) + (if + (i32.const 242) + (then + (drop (i32.const 21337)) + ) + (else + (drop (i32.const 299)) + ) + ) + (if + (i32.const 342) + (then + (drop (i32.const 31337)) + ) + (else + (drop (i32.const 399)) + ) + ) + ) + + ;; CHECK: (func $if-existing + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 11337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 199) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 299) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 342) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 31337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 399) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-existing + ;; We do not error on existing hints, and trample/remove them. + (@metadata.code.branch_hint "\01") + (if + (i32.const 42) + (then + (drop (i32.const 1337)) + ) + (else + (drop (i32.const 99)) + ) + ) + (@metadata.code.branch_hint "\00") + (if + (i32.const 142) + (then + (drop (i32.const 11337)) + ) + (else + (drop (i32.const 199)) + ) + ) + (@metadata.code.branch_hint "\01") + (if + (i32.const 242) + (then + (drop (i32.const 21337)) + ) + (else + (drop (i32.const 299)) + ) + ) + (@metadata.code.branch_hint "\00") + (if + (i32.const 342) + (then + (drop (i32.const 31337)) + ) + (else + (drop (i32.const 399)) + ) + ) + ) + + ;; CHECK: (func $br_if + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $out1 + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out1 + ;; CHECK-NEXT: (i32.const 142) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 11337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $out2 + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out2 + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br_if + ;; As above, with br_if. + (block $out + (br_if $out + (i32.const 42) + ) + (drop (i32.const 1337)) + ) + (block $out1 + (br_if $out1 + (i32.const 142) + ) + (drop (i32.const 11337)) + ) + (block $out2 + ;; Existing hint. + (@metadata.code.branch_hint "\01") + (br_if $out2 + (i32.const 242) + ) + (drop (i32.const 21337)) + ) + ) + + ;; CHECK: (func $br_value (result f64) + ;; CHECK-NEXT: (local $scratch f64) + ;; CHECK-NEXT: (block $out (result f64) + ;; CHECK-NEXT: (local.set $scratch + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (f64.const 3.14159) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $scratch) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br_value (result f64) + ;; As above, but now with a value. We should not error. + (block $out (result f64) + (br_if $out + (f64.const 3.14159) + (i32.const 42) + ) + (drop (i32.const 1337)) + ) + ) + + ;; CHECK: (func $br + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br $out) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br + ;; As above, but now without a condition. We should not error. + (block $out + (br $out + (i32.const 42) + ) + (drop (i32.const 1337)) + ) + ) +) From 848dadfbf977431b6a5b7038b01403203c1bbedf Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 10:34:52 -0700 Subject: [PATCH 059/239] passes --- src/passes/InstrumentBranchHints.cpp | 329 +++++++++++++++++++++++++++ src/passes/RandomizeBranchHints.cpp | 70 ++++++ 2 files changed, 399 insertions(+) create mode 100644 src/passes/InstrumentBranchHints.cpp create mode 100644 src/passes/RandomizeBranchHints.cpp diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp new file mode 100644 index 00000000000..2a5631f7836 --- /dev/null +++ b/src/passes/InstrumentBranchHints.cpp @@ -0,0 +1,329 @@ +/* + * Copyright 2025 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Instruments branch hints and their targets, adding logging that allows us to +// see if the hints were valid or not. We turn +// +// @metadata.branch.hint B +// if (condition) { +// X +// } else { +// Y +// } +// +// into +// +// @metadata.branch.hint B +// ;; log the ID of the condition (123), the prediction (B), and the actual +// ;; runtime result (temp == condition). +// if (temp = condition; log(123, B, temp); temp) { +// X +// } else { +// Y +// } +// +// Concretely, we emit calls to this logging function: +// +// (import "fuzzing-support" "log-branch" +// (func $log-branch (param i32 i32 i32)) ;; ID, prediction, actual +// ) +// +// This can be used to verify that branch hints are accurate, by implementing +// the import like this for example: +// +// imports['fuzzing-support']['log-branch'] = (id, prediction, actual) => { +// // We only care about truthiness of the expected and actual values. +// expected = +!!expected; +// actual = +!!actual; +// // Throw if the hint said this branch would be taken, but it was not, or +// // vice versa. +// if (expected != actual) throw `Bad branch hint! (${id})`; +// }; +// +// Another use case for this pass is to fuzz branch hint updates: given a fuzz +// case, we can instrument it and view the loggings, then optimize the original, +// instrument that, and view those loggings. Imagine, for example, that we flip +// the condition but forget to flip the hint: +// +// @metadata.branch.hint B +// if (!(temp = condition; log(123, B, temp); temp)) { ;; added a ! +// Y ;; this moved +// } else { +// X ;; this moved +// } +// +// The logging before would be 123,B,C (where C is 0 or 1 - the hint might be +// wrong or right, in a fuzz testcase), and the logging after will remain the +// same, so this did not help us yet (because the ! is not the entire condition, +// not just |condition|). But if we run this instrumentation again, we get this: +// +// @metadata.branch.hint B +// if (temp2 = ( +// !(temp = condition; log(123, B, temp); temp) +// ); log(123, B, temp2); temp2)) { +// Y +// } else { +// X +// } +// +// Note how the full !-ed condition is nested inside another instrumentation +// with another temp local. Also, we inferred the same ID (123) in both cases, +// by scanning the inside of the condition. Using that, the new logging will be +// 123,B,C followed by 123,B,!C. We can therefore find pairs of loggings with +// the same ID, and consider the predicted and actual values: +// +// [id,0,0], [id,0,0] - nothing changed: good +// [id,0,0], [id,0,1] - the actual result changed but not the prediction: bad +// [id,0,0], [id,1,0] - prediction changed but not actual result: bad +// [id,0,0], [id,1,1] - actual and predicted both changed: good +// etc. +// +// To make it easy to pair the results, the ID is negative in subsequent +// instrumentations. That is, we will match an ID of 42 in the first +// instrumentation with an id of -42 in the second (that avoids us matching two +// from the first, if e.g. a branch happens twice in a loop). Thus, the first +// instrumentations adds positive IDs, and the second adds negative, which makes +// it trivial to differentiate them. (See script/fuzz_opt.py's +// BranchHintPreservation for more details.) +// + +#include "ir/eh-utils.h" +#include "ir/local-graph.h" +#include "ir/names.h" +#include "ir/properties.h" +#include "pass.h" +#include "wasm-builder.h" +#include "wasm.h" + +namespace wasm { + +namespace { + +// The branch id, which increments as we go. +int branchId = 1; + +struct InstrumentBranchHints + : public WalkerPass> { + + using Super = WalkerPass>; + + // The module and base names of our import. + Name MODULE = "fuzzing-support"; + Name BASE = "log-branch"; + + // The internal name of our import. + Name LOG_BRANCH; + + // Whether we are the second pass of instrumentation. If so, we only add + // logic to parallel existing hints (for each such hint, we emit one with a + // negative ID, so they can be paired, as mentioned above). + bool secondInstrumentation = false; + + std::unique_ptr localGraph; + + void visitIf(If* curr) { processCondition(curr); } + + void visitBreak(Break* curr) { + if (curr->condition) { + processCondition(curr); + } + } + + // Track existing calls to our logging, and their gets, so that we can + // identify them and add the second instrumentation properly. This map stores + // gets that map to such calls, specifically their actual values (the same + // value used in the branch, which we want to instrument). We also map tees. + std::unordered_map getsOfPriorInstrumentation; + std::unordered_map teesOfPriorInstrumentation; + + void visitCall(Call* curr) { + if (curr->target != LOG_BRANCH) { + return; + } + // Our logging has 3 fields: id, expected, actual. + assert(curr->operands.size() == 3); + if (auto* get = curr->operands[2]->dynCast()) { + getsOfPriorInstrumentation[get] = curr; + } else if (auto* tee = curr->operands[2]->dynCast()) { + teesOfPriorInstrumentation[tee] = curr; + } + } + + bool added = false; + + template void processCondition(T* curr) { + if (curr->condition->type == Type::unreachable) { + // This branch is not even reached. + return; + } + + auto likely = getFunction()->codeAnnotations[curr].branchLikely; + if (!likely) { + return; + } + + Builder builder(*getModule()); + + // Pick an ID for this branch. + int id = 0; + if (!secondInstrumentation) { + // This is the first instrumentation. We instrument everything, using a + // new positive ID for each. + id = branchId++; + } else { + // In the second instrumentation we find existing instrumentation and add + // paired ones to them. To find the existing ones, we look for this + // condition being a local.get that is used in a call to our import, that + // is, something like the pattern we emit below: + // + // (local.set $temp ..) + // (call LOG_BRANCH (local.get $temp)) ;; used in logging + // (if + // (local.get $temp) ;; and used in condition + // + // We also consider the fallthrough, for the nested case: + // + // (if + // (block + // (local.set $temp ..) + // (call LOG_BRANCH (local.get $temp)) ;; used in logging + // (local.get $temp) ;; and used in condition + // ) + // + // We also consider a tee: + // + // (call LOG_BRANCH (local.tee $temp (..))) ;; used in logging + // (if + // (local.get $temp) ;; and used in condition + // + auto* fallthrough = Properties::getFallthrough( + curr->condition, getPassOptions(), *getModule()); + auto* get = fallthrough->template dynCast(); + if (!get) { + return; + } + auto& sets = localGraph->getSets(get); + if (sets.size() != 1) { + return; + } + auto* set = *sets.begin(); + auto& gets = localGraph->getSetInfluences(set); + Call* call = nullptr; + if (gets.size() == 2) { + // The set has two gets: the get in the condition we began at, and + // another. + LocalGet* otherGet = nullptr; + for (auto* get2 : gets) { + if (get2 != get) { + otherGet = get2; + } + } + assert(otherGet); + // See if that other get is used in a logging. + auto iter = getsOfPriorInstrumentation.find(otherGet); + if (iter == getsOfPriorInstrumentation.end()) { + return; + } + // Great, this is indeed a prior instrumentation. Add a second + // instrumentation for it, using the old ID (negated). + call = iter->second; + } else if (gets.size() == 1) { + // The set has only one get, but it might be a tee that flows into a + // call. + auto iter = teesOfPriorInstrumentation.find(set); + if (iter == teesOfPriorInstrumentation.end()) { + return; + } + // Great, this is indeed a prior instrumentation! Add a second + // instrumentation for it, using the old ID (negated). + call = iter->second; + } else { + // The get has more uses; give up. + return; + } + + // We found a call from a prior instrumentation. + assert(call->operands.size() == 3); + id = -call->operands[0]->template cast()->value.geti32(); + if (id > 0) { + // The seen ID was already negated, so we negated it again to be + // positive. That means the existing instrumentation was a second + // instrumentation, and we should only operate on positive IDs and emit + // negative ones. + return; + } + } + + // Instrument the condition. + auto tempLocal = builder.addVar(getFunction(), Type::i32); + auto* set = builder.makeLocalSet(tempLocal, curr->condition); + auto* idc = builder.makeConst(Literal(int32_t(id))); + auto* guess = builder.makeConst(Literal(int32_t(*likely))); + auto* get1 = builder.makeLocalGet(tempLocal, Type::i32); + auto* logBranch = + builder.makeCall(LOG_BRANCH, {idc, guess, get1}, Type::none); + auto* get2 = builder.makeLocalGet(tempLocal, Type::i32); + curr->condition = builder.makeBlock({set, logBranch, get2}); + added = true; + } + + void doWalkFunction(Function* func) { + if (secondInstrumentation) { + localGraph = std::make_unique(func, getModule()); + localGraph->computeSetInfluences(); + } + + Super::doWalkFunction(func); + + // Our added blocks may have caused nested pops. + if (added) { + EHUtils::handleBlockNestedPops(func, *getModule()); + added = false; + } + } + + void doWalkModule(Module* module) { + // Find our import, if we were already run on this module. + for (auto& func : module->functions) { + if (func->module == MODULE && func->base == BASE) { + LOG_BRANCH = func->name; + // The logging function existed before, so this is the second + // instrumentation. + secondInstrumentation = true; + break; + } + } + if (!LOG_BRANCH) { + auto* func = module->addFunction(Builder::makeFunction( + Names::getValidFunctionName(*module, BASE), + Signature({Type::i32, Type::i32, Type::i32}, Type::none), + {})); + func->module = MODULE; + func->base = BASE; + LOG_BRANCH = func->name; + } + + // Walk normally, using logBranch as we go. + Super::doWalkModule(module); + } +}; + +} // anonymous namespace + +Pass* createInstrumentBranchHintsPass() { return new InstrumentBranchHints(); } + +} // namespace wasm diff --git a/src/passes/RandomizeBranchHints.cpp b/src/passes/RandomizeBranchHints.cpp new file mode 100644 index 00000000000..93eb5878ca4 --- /dev/null +++ b/src/passes/RandomizeBranchHints.cpp @@ -0,0 +1,70 @@ +/* + * Copyright 2025 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Apply random branch hints. This is really only useful for fuzzing. The +// randomness here is deterministic, so that reducing can work. +// + +#include "pass.h" +#include "wasm-builder.h" +#include "wasm.h" + +namespace wasm { + +struct RandomizeBranchHints + : public WalkerPass< + PostWalker>> { + + uint64_t hash = 42; + + void visitExpression(Expression* curr) { + // Add some deterministic randomness as we go. + deterministic_hash_combine(hash, curr->_id); + } + + void visitIf(If* curr) { + deterministic_hash_combine(hash, 1337); + processCondition(curr); + } + + void visitBreak(Break* curr) { + deterministic_hash_combine(hash, 99999); + if (curr->condition) { + processCondition(curr); + } + } + + template void processCondition(T* curr) { + auto& likely = getFunction()->codeAnnotations[curr].branchLikely; + switch (hash % 3) { + case 0: + likely = true; + break; + case 1: + likely = false; + break; + case 2: + likely = {}; + break; + } + } +}; + +Pass* createRandomizeBranchHintsPass() { return new RandomizeBranchHints(); } + +} // namespace wasm From 28cd6290e8eda8e45ae3618ba01002b864b4d9b7 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 10:40:12 -0700 Subject: [PATCH 060/239] comments --- src/passes/InstrumentBranchHints.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 2a5631f7836..a643fe6f453 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -238,8 +238,7 @@ struct InstrumentBranchHints if (iter == getsOfPriorInstrumentation.end()) { return; } - // Great, this is indeed a prior instrumentation. Add a second - // instrumentation for it, using the old ID (negated). + // Great, this is indeed a prior instrumentation. call = iter->second; } else if (gets.size() == 1) { // The set has only one get, but it might be a tee that flows into a @@ -248,15 +247,16 @@ struct InstrumentBranchHints if (iter == teesOfPriorInstrumentation.end()) { return; } - // Great, this is indeed a prior instrumentation! Add a second - // instrumentation for it, using the old ID (negated). + // Great, this is indeed a prior instrumentation. call = iter->second; } else { - // The get has more uses; give up. + // The get has more uses; give up, as the pattern is not what we + // expect. return; } - // We found a call from a prior instrumentation. + // We found a call from a prior instrumentation. Emit one to pair with it, + // with negated ID. assert(call->operands.size() == 3); id = -call->operands[0]->template cast()->value.geti32(); if (id > 0) { From cd5d8b0043364846a5fa177b1eb09d979e2dbf85 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 10:42:03 -0700 Subject: [PATCH 061/239] comments --- test/lit/passes/instrument-branch-hints.wast | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index cf06bf56b8d..0b7cb156940 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -276,8 +276,7 @@ (drop (i32.const 299)) ) ) - ;; Another hint of 0, for more coverage (ensure hint value differs from - ;; break id). + ;; Another hint of 0, for more coverage. (@metadata.code.branch_hint "\00") (if (i32.const 342) @@ -502,7 +501,7 @@ ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) (func $br_value (result f64) - ;; As above, but now with a value. We need to stash it to a local. + ;; As above, but now with a value. (block $out (result f64) (@metadata.code.branch_hint "\00") (br_if $out From ca6b924dc0180efd4d4b57b1091b899451076ab5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 10:43:40 -0700 Subject: [PATCH 062/239] comments --- test/lit/passes/instrument-branch-hints.wast | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 0b7cb156940..691a86b10c4 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -665,8 +665,9 @@ ;; TWICE-NEXT: ) (func $nested ;; Do not reuse branch hint ids in the same instrumentation: even if we have - ;; nested conditions, we can see which calls we added, and not reuse their - ;; ids. Only TWICE should ever reuse ids in our output. + ;; nested conditions, the first instrumentation should not think its output + ;; is from. Only TWICE should ever reuse ids in our output (that is, emit + ;; negated IDs of existing ones). (@metadata.code.branch_hint "\00") (if (@metadata.code.branch_hint "\01") From c15ecb71c0105580f2af3b8470cab11ec8db7d7b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 10:44:24 -0700 Subject: [PATCH 063/239] comments --- test/lit/passes/instrument-branch-hints.wast | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 691a86b10c4..2af5e73d969 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -664,10 +664,10 @@ ;; TWICE-NEXT: ) ;; TWICE-NEXT: ) (func $nested - ;; Do not reuse branch hint ids in the same instrumentation: even if we have + ;; Do not be confused by our own output, in nested code: even if we have ;; nested conditions, the first instrumentation should not think its output - ;; is from. Only TWICE should ever reuse ids in our output (that is, emit - ;; negated IDs of existing ones). + ;; is from prior instrumentation. Only TWICE should ever emit negated IDs of + ;; existing ones. (@metadata.code.branch_hint "\00") (if (@metadata.code.branch_hint "\01") From 96cfbabe7446a1c48aa0133c25534321719ee394 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 13:02:30 -0700 Subject: [PATCH 064/239] fix --- src/passes/InstrumentBranchHints.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index a643fe6f453..d0d9b7e5e16 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -255,10 +255,15 @@ struct InstrumentBranchHints return; } - // We found a call from a prior instrumentation. Emit one to pair with it, - // with negated ID. + // We found a potential call from a prior instrumentation. It should have + // a const ID. assert(call->operands.size() == 3); - id = -call->operands[0]->template cast()->value.geti32(); + auto* c = call->operands[0]->template cast(); + if (!c) { + return; + } + // Emit logging to pair with it, with negated ID. + id = -c->value.geti32(); if (id > 0) { // The seen ID was already negated, so we negated it again to be // positive. That means the existing instrumentation was a second From 03e91b7e58c23626044474c95b9e6d7ac0f9f9ca Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 13:05:29 -0700 Subject: [PATCH 065/239] fix --- src/passes/InstrumentBranchHints.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index d0d9b7e5e16..26875a2f789 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -258,7 +258,7 @@ struct InstrumentBranchHints // We found a potential call from a prior instrumentation. It should have // a const ID. assert(call->operands.size() == 3); - auto* c = call->operands[0]->template cast(); + auto* c = call->operands[0]->template dynCast(); if (!c) { return; } From 9b2209ae1ed5ba70515971156f8d1f8ad723c6f8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 13:27:19 -0700 Subject: [PATCH 066/239] note --- scripts/fuzz_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index e91df0640fc..e95ddfac14f 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1937,7 +1937,7 @@ def handle(self, wasm): assert second_id == '-' + first_id first_alignment = (first_hint != first_actual) second_alignment = (second_hint != second_actual) - assert first_alignment == second_alignment + assert first_alignment == second_alignment, 'branch hints must change properly' def can_run_on_wasm(self, wasm): # Avoid things d8 cannot fully run. From 92167a184745a1a1bacc3879f3226f983fb29c22 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 13:55:12 -0700 Subject: [PATCH 067/239] feedback --- src/passes/InstrumentBranchHints.cpp | 43 ++++++++++++++++------------ 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 26875a2f789..e549237731f 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -122,11 +122,11 @@ struct InstrumentBranchHints using Super = WalkerPass>; // The module and base names of our import. - Name MODULE = "fuzzing-support"; - Name BASE = "log-branch"; + static Name MODULE = "fuzzing-support"; + static Name BASE = "log-branch"; // The internal name of our import. - Name LOG_BRANCH; + Name logBranch; // Whether we are the second pass of instrumentation. If so, we only add // logic to parallel existing hints (for each such hint, we emit one with a @@ -151,16 +151,19 @@ struct InstrumentBranchHints std::unordered_map teesOfPriorInstrumentation; void visitCall(Call* curr) { - if (curr->target != LOG_BRANCH) { + if (curr->target != logBranch) { return; } // Our logging has 3 fields: id, expected, actual. - assert(curr->operands.size() == 3); - if (auto* get = curr->operands[2]->dynCast()) { - getsOfPriorInstrumentation[get] = curr; - } else if (auto* tee = curr->operands[2]->dynCast()) { - teesOfPriorInstrumentation[tee] = curr; + if (curr->operands.size() == 3) { + if (auto* get = curr->operands[2]->dynCast()) { + getsOfPriorInstrumentation[get] = curr; + } else if (auto* tee = curr->operands[2]->dynCast()) { + teesOfPriorInstrumentation[tee] = curr; + } } + // Anything else is a pattern we don't recognize (perhaps this is a fuzzer- + // modified testcase), and we skip. } bool added = false; @@ -191,7 +194,7 @@ struct InstrumentBranchHints // is, something like the pattern we emit below: // // (local.set $temp ..) - // (call LOG_BRANCH (local.get $temp)) ;; used in logging + // (call logBranch (local.get $temp)) ;; used in logging // (if // (local.get $temp) ;; and used in condition // @@ -200,13 +203,13 @@ struct InstrumentBranchHints // (if // (block // (local.set $temp ..) - // (call LOG_BRANCH (local.get $temp)) ;; used in logging + // (call logBranch (local.get $temp)) ;; used in logging // (local.get $temp) ;; and used in condition // ) // // We also consider a tee: // - // (call LOG_BRANCH (local.tee $temp (..))) ;; used in logging + // (call logBranch (local.tee $temp (..))) ;; used in logging // (if // (local.get $temp) ;; and used in condition // @@ -255,9 +258,11 @@ struct InstrumentBranchHints return; } - // We found a potential call from a prior instrumentation. It should have - // a const ID. - assert(call->operands.size() == 3); + // We found a potential call from a prior instrumentation. It should be in + // the proper form, and have a const ID. + if (call->operands.size() != 3) { + return; + } auto* c = call->operands[0]->template dynCast(); if (!c) { return; @@ -280,7 +285,7 @@ struct InstrumentBranchHints auto* guess = builder.makeConst(Literal(int32_t(*likely))); auto* get1 = builder.makeLocalGet(tempLocal, Type::i32); auto* logBranch = - builder.makeCall(LOG_BRANCH, {idc, guess, get1}, Type::none); + builder.makeCall(logBranch, {idc, guess, get1}, Type::none); auto* get2 = builder.makeLocalGet(tempLocal, Type::i32); curr->condition = builder.makeBlock({set, logBranch, get2}); added = true; @@ -305,21 +310,21 @@ struct InstrumentBranchHints // Find our import, if we were already run on this module. for (auto& func : module->functions) { if (func->module == MODULE && func->base == BASE) { - LOG_BRANCH = func->name; + logBranch = func->name; // The logging function existed before, so this is the second // instrumentation. secondInstrumentation = true; break; } } - if (!LOG_BRANCH) { + if (!logBranch) { auto* func = module->addFunction(Builder::makeFunction( Names::getValidFunctionName(*module, BASE), Signature({Type::i32, Type::i32, Type::i32}, Type::none), {})); func->module = MODULE; func->base = BASE; - LOG_BRANCH = func->name; + logBranch = func->name; } // Walk normally, using logBranch as we go. From 835bfcf161b26ac4fa50c75daddf4b625006db61 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 13:55:59 -0700 Subject: [PATCH 068/239] feedback --- src/passes/InstrumentBranchHints.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index e549237731f..97c7a81202c 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -166,7 +166,7 @@ struct InstrumentBranchHints // modified testcase), and we skip. } - bool added = false; + bool addedInstrumentation = false; template void processCondition(T* curr) { if (curr->condition->type == Type::unreachable) { @@ -288,7 +288,7 @@ struct InstrumentBranchHints builder.makeCall(logBranch, {idc, guess, get1}, Type::none); auto* get2 = builder.makeLocalGet(tempLocal, Type::i32); curr->condition = builder.makeBlock({set, logBranch, get2}); - added = true; + addedInstrumentation = true; } void doWalkFunction(Function* func) { @@ -300,9 +300,9 @@ struct InstrumentBranchHints Super::doWalkFunction(func); // Our added blocks may have caused nested pops. - if (added) { + if (addedInstrumentation) { EHUtils::handleBlockNestedPops(func, *getModule()); - added = false; + addedInstrumentation = false; } } From cb950fedce44627077bd3b6a240379d8f17e02a7 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 13:57:52 -0700 Subject: [PATCH 069/239] feedback --- src/passes/InstrumentBranchHints.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 97c7a81202c..461b0ca8ebe 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -207,12 +207,6 @@ struct InstrumentBranchHints // (local.get $temp) ;; and used in condition // ) // - // We also consider a tee: - // - // (call logBranch (local.tee $temp (..))) ;; used in logging - // (if - // (local.get $temp) ;; and used in condition - // auto* fallthrough = Properties::getFallthrough( curr->condition, getPassOptions(), *getModule()); auto* get = fallthrough->template dynCast(); @@ -245,7 +239,12 @@ struct InstrumentBranchHints call = iter->second; } else if (gets.size() == 1) { // The set has only one get, but it might be a tee that flows into a - // call. + // call: + // + // (call logBranch (local.tee $temp (..))) ;; used in logging + // (if + // (local.get $temp) ;; and used in condition + // auto iter = teesOfPriorInstrumentation.find(set); if (iter == teesOfPriorInstrumentation.end()) { return; From e3a7d216d262340f48638b33de2d8f91e01d1322 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 13:59:01 -0700 Subject: [PATCH 070/239] feedback --- src/passes/InstrumentBranchHints.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 461b0ca8ebe..d31e9148748 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -238,8 +238,8 @@ struct InstrumentBranchHints // Great, this is indeed a prior instrumentation. call = iter->second; } else if (gets.size() == 1) { - // The set has only one get, but it might be a tee that flows into a - // call: + // The set has only one get, but the set might be a tee that flows into + // a call: // // (call logBranch (local.tee $temp (..))) ;; used in logging // (if From facedf9507575978743b058c5586359ef10d639d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 14:01:24 -0700 Subject: [PATCH 071/239] feedback --- src/passes/InstrumentBranchHints.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index d31e9148748..a5bc3dc052a 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -266,15 +266,15 @@ struct InstrumentBranchHints if (!c) { return; } - // Emit logging to pair with it, with negated ID. - id = -c->value.geti32(); - if (id > 0) { - // The seen ID was already negated, so we negated it again to be - // positive. That means the existing instrumentation was a second - // instrumentation, and we should only operate on positive IDs and emit - // negative ones. + // Emit logging to pair with it + id = c->value.geti32(); + if (id < 0) { + // The seen ID is negative, but we should only operation on positive + // ones that we can be certain came from the first instrumentation. return; } + // The second logging we add is with a negated ID. + id = -id; } // Instrument the condition. From fab9d078b45e1cc9150bcd8846298d9d94f4e18f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 14:02:49 -0700 Subject: [PATCH 072/239] feedback --- src/passes/InstrumentBranchHints.cpp | 4 ++-- src/passes/RandomizeBranchHints.cpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index a5bc3dc052a..79289281ddd 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -280,11 +280,11 @@ struct InstrumentBranchHints // Instrument the condition. auto tempLocal = builder.addVar(getFunction(), Type::i32); auto* set = builder.makeLocalSet(tempLocal, curr->condition); - auto* idc = builder.makeConst(Literal(int32_t(id))); + auto* idConst = builder.makeConst(Literal(int32_t(id))); auto* guess = builder.makeConst(Literal(int32_t(*likely))); auto* get1 = builder.makeLocalGet(tempLocal, Type::i32); auto* logBranch = - builder.makeCall(logBranch, {idc, guess, get1}, Type::none); + builder.makeCall(logBranch, {idConst, guess, get1}, Type::none); auto* get2 = builder.makeLocalGet(tempLocal, Type::i32); curr->condition = builder.makeBlock({set, logBranch, get2}); addedInstrumentation = true; diff --git a/src/passes/RandomizeBranchHints.cpp b/src/passes/RandomizeBranchHints.cpp index 93eb5878ca4..20610ba8ff3 100644 --- a/src/passes/RandomizeBranchHints.cpp +++ b/src/passes/RandomizeBranchHints.cpp @@ -20,6 +20,7 @@ // #include "pass.h" +#Include "support/hash.h" #include "wasm-builder.h" #include "wasm.h" From aabd6bce651709cbc774426451b2cf427d948198 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 14:03:33 -0700 Subject: [PATCH 073/239] feedback --- src/passes/pass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 83085bae007..5e98ef5d086 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -412,9 +412,9 @@ void PassRegistry::registerPasses() { registerPass("propagate-globals-globally", "propagate global values to other globals (useful for tests)", createPropagateGlobalsGloballyPass); - registerPass("randomize-branch-hints", - "randomize branch hints (for fuzzing)", - createRandomizeBranchHintsPass); + registerTestPass("randomize-branch-hints", + "randomize branch hints (for fuzzing)", + createRandomizeBranchHintsPass); registerPass("remove-non-js-ops", "removes operations incompatible with js", createRemoveNonJSOpsPass); From a27c9a489b3728bd3ef1414d3b1df021ccbb4d1d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 14:05:47 -0700 Subject: [PATCH 074/239] feedback --- test/lit/passes/randomize-branch-hints.wast | 1 - 1 file changed, 1 deletion(-) diff --git a/test/lit/passes/randomize-branch-hints.wast b/test/lit/passes/randomize-branch-hints.wast index 95fff9c9ce8..b423f848b9a 100644 --- a/test/lit/passes/randomize-branch-hints.wast +++ b/test/lit/passes/randomize-branch-hints.wast @@ -1,5 +1,4 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. ;; RUN: foreach %s %t wasm-opt --randomize-branch-hints -S -o - | filecheck %s From 57fa279eb3450ef389dee70426475d01ebeebffa Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 14:07:58 -0700 Subject: [PATCH 075/239] fix compiler error --- src/passes/InstrumentBranchHints.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 79289281ddd..e43e8eedff0 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -122,8 +122,8 @@ struct InstrumentBranchHints using Super = WalkerPass>; // The module and base names of our import. - static Name MODULE = "fuzzing-support"; - static Name BASE = "log-branch"; + const Name MODULE = "fuzzing-support"; + const Name BASE = "log-branch"; // The internal name of our import. Name logBranch; From 613c532a57828512bb9fb5749640cec67a212e26 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 14:08:57 -0700 Subject: [PATCH 076/239] fix duplicate name --- src/passes/InstrumentBranchHints.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index e43e8eedff0..1a448c8fd16 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -283,10 +283,10 @@ struct InstrumentBranchHints auto* idConst = builder.makeConst(Literal(int32_t(id))); auto* guess = builder.makeConst(Literal(int32_t(*likely))); auto* get1 = builder.makeLocalGet(tempLocal, Type::i32); - auto* logBranch = + auto* log = builder.makeCall(logBranch, {idConst, guess, get1}, Type::none); auto* get2 = builder.makeLocalGet(tempLocal, Type::i32); - curr->condition = builder.makeBlock({set, logBranch, get2}); + curr->condition = builder.makeBlock({set, log, get2}); addedInstrumentation = true; } From baa4d2b04c9a635d7c94ad651c0f772715a6685c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 14:09:37 -0700 Subject: [PATCH 077/239] fix compiler error --- src/passes/RandomizeBranchHints.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/RandomizeBranchHints.cpp b/src/passes/RandomizeBranchHints.cpp index 20610ba8ff3..7b013fc7ebf 100644 --- a/src/passes/RandomizeBranchHints.cpp +++ b/src/passes/RandomizeBranchHints.cpp @@ -20,7 +20,7 @@ // #include "pass.h" -#Include "support/hash.h" +#include "support/hash.h" #include "wasm-builder.h" #include "wasm.h" From ba38cf8ccc77438afd5baf60e3d95635221599c1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 14:15:49 -0700 Subject: [PATCH 078/239] update help --- test/lit/help/wasm-metadce.test | 3 +++ test/lit/help/wasm-opt.test | 3 +++ test/lit/help/wasm2js.test | 3 +++ 3 files changed, 9 insertions(+) diff --git a/test/lit/help/wasm-metadce.test b/test/lit/help/wasm-metadce.test index 5870b5c9d45..4c727bcbd1f 100644 --- a/test/lit/help/wasm-metadce.test +++ b/test/lit/help/wasm-metadce.test @@ -208,6 +208,9 @@ ;; CHECK-NEXT: --inlining-optimizing inline functions and optimizes ;; CHECK-NEXT: where we inlined ;; CHECK-NEXT: +;; CHECK-NEXT: --instrument-branch-hints instrument branch hints so we +;; CHECK-NEXT: can see which guessed right +;; CHECK-NEXT: ;; CHECK-NEXT: --instrument-locals instrument the build with code ;; CHECK-NEXT: to intercept all loads and ;; CHECK-NEXT: stores diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index 34a2ab2f25c..a0d8d199f94 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -232,6 +232,9 @@ ;; CHECK-NEXT: --inlining-optimizing inline functions and optimizes ;; CHECK-NEXT: where we inlined ;; CHECK-NEXT: +;; CHECK-NEXT: --instrument-branch-hints instrument branch hints so we +;; CHECK-NEXT: can see which guessed right +;; CHECK-NEXT: ;; CHECK-NEXT: --instrument-locals instrument the build with code ;; CHECK-NEXT: to intercept all loads and ;; CHECK-NEXT: stores diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index ac68667554b..881e18950e7 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -172,6 +172,9 @@ ;; CHECK-NEXT: --inlining-optimizing inline functions and optimizes ;; CHECK-NEXT: where we inlined ;; CHECK-NEXT: +;; CHECK-NEXT: --instrument-branch-hints instrument branch hints so we +;; CHECK-NEXT: can see which guessed right +;; CHECK-NEXT: ;; CHECK-NEXT: --instrument-locals instrument the build with code ;; CHECK-NEXT: to intercept all loads and ;; CHECK-NEXT: stores From 8b26628c68cbc22dad3c60bec33e7b669ef7dd18 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 14:16:36 -0700 Subject: [PATCH 079/239] format --- src/passes/InstrumentBranchHints.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 1a448c8fd16..54b29a85e16 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -283,8 +283,7 @@ struct InstrumentBranchHints auto* idConst = builder.makeConst(Literal(int32_t(id))); auto* guess = builder.makeConst(Literal(int32_t(*likely))); auto* get1 = builder.makeLocalGet(tempLocal, Type::i32); - auto* log = - builder.makeCall(logBranch, {idConst, guess, get1}, Type::none); + auto* log = builder.makeCall(logBranch, {idConst, guess, get1}, Type::none); auto* get2 = builder.makeLocalGet(tempLocal, Type::i32); curr->condition = builder.makeBlock({set, log, get2}); addedInstrumentation = true; From d954ace4f703b524c8bb1088da4a8e40b8a1fc1c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 14:49:04 -0700 Subject: [PATCH 080/239] todo --- scripts/fuzz_opt.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index e95ddfac14f..dcde66495a4 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1849,6 +1849,28 @@ class BranchHintPreservation(TestCaseHandler): frequency = 1 # XXX def handle(self, wasm): + open(wasm, 'w').write(''' +(module + (func $loop-br_if-flip-reverse (param $x i32) + ;; As above, with a hint of 1, that should flip to 0. + (block $block + (loop $loop + (@metadata.code.branch_hint "\\01") + (br_if $block + (local.get $x) + ) + (br $loop) + ) + ) + ) +) +''') + # XXX bizarre we see no fuzz findings... hack this code to use a given + # wat file I see the bug on, and see that happens... + # so... the issue is that we add an eqz on the br_if... no local.get immediate to see! can we look through eqz..? + # loo through eqz and fallthrough and perhaps more..? + # OR: if we see a prior instrumentation, we can look at that statically and see if it needs flipping, i guess.. not great + # Generate the middle wasm, which has the first round of instrumentation, # then the final one with optimizations as well. We only run the final # one, but the middle one is useful to compare when debugging an error. @@ -1868,7 +1890,8 @@ def handle(self, wasm): run([ in_bin('wasm-opt'), middle, - ] + get_random_opts() + [ + '--remove-unused-brs', # XXX + #] + get_random_opts() + [ # Instrument again after opts, so our fuzzing can see if the opts # messed anything up. '--instrument-branch-hints', From 574217cbd51d15a512e9d151fde2d36e55e4d340 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 2 Jul 2025 17:16:30 -0700 Subject: [PATCH 081/239] todo --- scripts/fuzz_opt.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index dcde66495a4..aa9a8c513ff 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1870,6 +1870,12 @@ def handle(self, wasm): # so... the issue is that we add an eqz on the br_if... no local.get immediate to see! can we look through eqz..? # loo through eqz and fallthrough and perhaps more..? # OR: if we see a prior instrumentation, we can look at that statically and see if it needs flipping, i guess.. not great + # + # Well... we should do this: + # instrument, read it out, see which hints were right, an id that is never wrong no matter how many times branched + # DELETE the branch hints that were wrong. now the wasm runs with 100% correct branch hints. + # DELETE the instrumentation too. + # optimize. run. instrument. all branch hints must be right! could be fewer, but no wrong ones! # Generate the middle wasm, which has the first round of instrumentation, # then the final one with optimizations as well. We only run the final From 69be0a4649973dadeab8c8efaccf2b975b255e10 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 09:50:52 -0700 Subject: [PATCH 082/239] fix --- src/wasm/wasm-binary.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 08c6e19e298..bb8dfb29fe1 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2518,12 +2518,13 @@ getOrMakeName(const std::unordered_map& nameMap, Name name, std::unordered_set& usedNames) { if (auto it = nameMap.find(i); it != nameMap.end()) { - return {it->second, true}; - } else { - auto valid = Names::getValidNameGivenExisting(name, usedNames); - usedNames.insert(valid); - return {valid, false}; + auto name = it->second; + usedNames.insert(name); + return {name, true}; } + auto valid = Names::getValidNameGivenExisting(name, usedNames); + usedNames.insert(valid); + return {valid, false}; } void WasmBinaryReader::readMemories() { From 355817e3ff823cbfbd1d9da8590628db59243fb6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 10:19:00 -0700 Subject: [PATCH 083/239] fix --- src/wasm/wasm-binary.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index bb8dfb29fe1..8a721ad6cb8 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2519,8 +2519,14 @@ getOrMakeName(const std::unordered_map& nameMap, std::unordered_set& usedNames) { if (auto it = nameMap.find(i); it != nameMap.end()) { auto name = it->second; - usedNames.insert(name); - return {name, true}; + auto [_, inserted] = usedNames.insert(name); + if (inserted) { + return {name, true}; + } + // Otherwise, we cannot use the name from the names section, which is + // unfortunate, and fall through to generate a new unique name. (This only + // commonly happens in our own testcases' outputs, where existing names + // happen to match the names we invent for things, and overlaps can occur.) } auto valid = Names::getValidNameGivenExisting(name, usedNames); usedNames.insert(valid); From bffe43524bc5e422678bfce7dddd27f148a4204a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 11:06:51 -0700 Subject: [PATCH 084/239] remove TWICE --- src/passes/InstrumentBranchHints.cpp | 163 +----- test/lit/passes/instrument-branch-hints.wast | 504 ------------------- 2 files changed, 2 insertions(+), 665 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 54b29a85e16..608413196e7 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -54,55 +54,8 @@ // if (expected != actual) throw `Bad branch hint! (${id})`; // }; // -// Another use case for this pass is to fuzz branch hint updates: given a fuzz -// case, we can instrument it and view the loggings, then optimize the original, -// instrument that, and view those loggings. Imagine, for example, that we flip -// the condition but forget to flip the hint: -// -// @metadata.branch.hint B -// if (!(temp = condition; log(123, B, temp); temp)) { ;; added a ! -// Y ;; this moved -// } else { -// X ;; this moved -// } -// -// The logging before would be 123,B,C (where C is 0 or 1 - the hint might be -// wrong or right, in a fuzz testcase), and the logging after will remain the -// same, so this did not help us yet (because the ! is not the entire condition, -// not just |condition|). But if we run this instrumentation again, we get this: -// -// @metadata.branch.hint B -// if (temp2 = ( -// !(temp = condition; log(123, B, temp); temp) -// ); log(123, B, temp2); temp2)) { -// Y -// } else { -// X -// } -// -// Note how the full !-ed condition is nested inside another instrumentation -// with another temp local. Also, we inferred the same ID (123) in both cases, -// by scanning the inside of the condition. Using that, the new logging will be -// 123,B,C followed by 123,B,!C. We can therefore find pairs of loggings with -// the same ID, and consider the predicted and actual values: -// -// [id,0,0], [id,0,0] - nothing changed: good -// [id,0,0], [id,0,1] - the actual result changed but not the prediction: bad -// [id,0,0], [id,1,0] - prediction changed but not actual result: bad -// [id,0,0], [id,1,1] - actual and predicted both changed: good -// etc. -// -// To make it easy to pair the results, the ID is negative in subsequent -// instrumentations. That is, we will match an ID of 42 in the first -// instrumentation with an id of -42 in the second (that avoids us matching two -// from the first, if e.g. a branch happens twice in a loop). Thus, the first -// instrumentations adds positive IDs, and the second adds negative, which makes -// it trivial to differentiate them. (See script/fuzz_opt.py's -// BranchHintPreservation for more details.) -// #include "ir/eh-utils.h" -#include "ir/local-graph.h" #include "ir/names.h" #include "ir/properties.h" #include "pass.h" @@ -128,11 +81,6 @@ struct InstrumentBranchHints // The internal name of our import. Name logBranch; - // Whether we are the second pass of instrumentation. If so, we only add - // logic to parallel existing hints (for each such hint, we emit one with a - // negative ID, so they can be paired, as mentioned above). - bool secondInstrumentation = false; - std::unique_ptr localGraph; void visitIf(If* curr) { processCondition(curr); } @@ -143,13 +91,6 @@ struct InstrumentBranchHints } } - // Track existing calls to our logging, and their gets, so that we can - // identify them and add the second instrumentation properly. This map stores - // gets that map to such calls, specifically their actual values (the same - // value used in the branch, which we want to instrument). We also map tees. - std::unordered_map getsOfPriorInstrumentation; - std::unordered_map teesOfPriorInstrumentation; - void visitCall(Call* curr) { if (curr->target != logBranch) { return; @@ -182,100 +123,7 @@ struct InstrumentBranchHints Builder builder(*getModule()); // Pick an ID for this branch. - int id = 0; - if (!secondInstrumentation) { - // This is the first instrumentation. We instrument everything, using a - // new positive ID for each. - id = branchId++; - } else { - // In the second instrumentation we find existing instrumentation and add - // paired ones to them. To find the existing ones, we look for this - // condition being a local.get that is used in a call to our import, that - // is, something like the pattern we emit below: - // - // (local.set $temp ..) - // (call logBranch (local.get $temp)) ;; used in logging - // (if - // (local.get $temp) ;; and used in condition - // - // We also consider the fallthrough, for the nested case: - // - // (if - // (block - // (local.set $temp ..) - // (call logBranch (local.get $temp)) ;; used in logging - // (local.get $temp) ;; and used in condition - // ) - // - auto* fallthrough = Properties::getFallthrough( - curr->condition, getPassOptions(), *getModule()); - auto* get = fallthrough->template dynCast(); - if (!get) { - return; - } - auto& sets = localGraph->getSets(get); - if (sets.size() != 1) { - return; - } - auto* set = *sets.begin(); - auto& gets = localGraph->getSetInfluences(set); - Call* call = nullptr; - if (gets.size() == 2) { - // The set has two gets: the get in the condition we began at, and - // another. - LocalGet* otherGet = nullptr; - for (auto* get2 : gets) { - if (get2 != get) { - otherGet = get2; - } - } - assert(otherGet); - // See if that other get is used in a logging. - auto iter = getsOfPriorInstrumentation.find(otherGet); - if (iter == getsOfPriorInstrumentation.end()) { - return; - } - // Great, this is indeed a prior instrumentation. - call = iter->second; - } else if (gets.size() == 1) { - // The set has only one get, but the set might be a tee that flows into - // a call: - // - // (call logBranch (local.tee $temp (..))) ;; used in logging - // (if - // (local.get $temp) ;; and used in condition - // - auto iter = teesOfPriorInstrumentation.find(set); - if (iter == teesOfPriorInstrumentation.end()) { - return; - } - // Great, this is indeed a prior instrumentation. - call = iter->second; - } else { - // The get has more uses; give up, as the pattern is not what we - // expect. - return; - } - - // We found a potential call from a prior instrumentation. It should be in - // the proper form, and have a const ID. - if (call->operands.size() != 3) { - return; - } - auto* c = call->operands[0]->template dynCast(); - if (!c) { - return; - } - // Emit logging to pair with it - id = c->value.geti32(); - if (id < 0) { - // The seen ID is negative, but we should only operation on positive - // ones that we can be certain came from the first instrumentation. - return; - } - // The second logging we add is with a negated ID. - id = -id; - } + int id = branchId++; // Instrument the condition. auto tempLocal = builder.addVar(getFunction(), Type::i32); @@ -290,11 +138,6 @@ struct InstrumentBranchHints } void doWalkFunction(Function* func) { - if (secondInstrumentation) { - localGraph = std::make_unique(func, getModule()); - localGraph->computeSetInfluences(); - } - Super::doWalkFunction(func); // Our added blocks may have caused nested pops. @@ -309,12 +152,10 @@ struct InstrumentBranchHints for (auto& func : module->functions) { if (func->module == MODULE && func->base == BASE) { logBranch = func->name; - // The logging function existed before, so this is the second - // instrumentation. - secondInstrumentation = true; break; } } + // Otherwise, add it. if (!logBranch) { auto* func = module->addFunction(Builder::makeFunction( Names::getValidFunctionName(*module, BASE), diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 2af5e73d969..bf7da40b0b8 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -3,10 +3,6 @@ ;; RUN: foreach %s %t wasm-opt -all --instrument-branch-hints -S -o - | filecheck %s -;; Also test the results of running again. When a condition is instrumented -;; twice, we should reuse the id, but emit it negated. -;; RUN: foreach %s %t wasm-opt -all --instrument-branch-hints --instrument-branch-hints -S -o - | filecheck %s --check-prefix=TWICE - (module ;; CHECK: (type $0 (func)) @@ -19,17 +15,6 @@ ;; CHECK: (import "fuzzing-support" "log-branch" (func $log-branch (type $3) (param i32 i32 i32))) ;; CHECK: (tag $i32 (type $1) (param i32)) - ;; TWICE: (type $0 (func)) - - ;; TWICE: (type $1 (func (param i32))) - - ;; TWICE: (type $2 (func (result f64))) - - ;; TWICE: (type $3 (func (param i32 i32 i32))) - - ;; TWICE: (import "fuzzing-support" "log-branch" (func $log-branch (type $3) (param i32 i32 i32))) - - ;; TWICE: (tag $i32 (type $1) (param i32)) (tag $i32 (param i32)) ;; CHECK: (func $if (type $0) @@ -122,129 +107,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $if (type $0) - ;; TWICE-NEXT: (local $0 i32) - ;; TWICE-NEXT: (local $1 i32) - ;; TWICE-NEXT: (local $2 i32) - ;; TWICE-NEXT: (local $3 i32) - ;; TWICE-NEXT: (local $4 i32) - ;; TWICE-NEXT: (local $5 i32) - ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") - ;; TWICE-NEXT: (if - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $3 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $0 - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $0) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $0) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -1) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $3) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $3) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 1337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (else - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 99) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") - ;; TWICE-NEXT: (if - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $4 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $1 - ;; TWICE-NEXT: (i32.const 142) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 2) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -2) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $4) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $4) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 11337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (else - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 199) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (if - ;; TWICE-NEXT: (i32.const 242) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 21337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (else - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 299) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") - ;; TWICE-NEXT: (if - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $5 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $2 - ;; TWICE-NEXT: (i32.const 342) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 3) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $2) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $2) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -3) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $5) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $5) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 31337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (else - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 399) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) (func $if ;; An if with a 0 hint, a 1 hint, and no hint. (@metadata.code.branch_hint "\00") @@ -339,78 +201,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $br (type $0) - ;; TWICE-NEXT: (local $0 i32) - ;; TWICE-NEXT: (local $1 i32) - ;; TWICE-NEXT: (local $2 i32) - ;; TWICE-NEXT: (local $3 i32) - ;; TWICE-NEXT: (block $out - ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") - ;; TWICE-NEXT: (br_if $out - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $2 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $0 - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 4) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $0) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $0) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -4) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $2) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $2) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 1337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (block $out1 - ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") - ;; TWICE-NEXT: (br_if $out1 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $3 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $1 - ;; TWICE-NEXT: (i32.const 142) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 5) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -5) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $3) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $3) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 11337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (block $out2 - ;; TWICE-NEXT: (br_if $out2 - ;; TWICE-NEXT: (i32.const 242) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 21337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) (func $br ;; As above, with br_if. (block $out @@ -462,44 +252,6 @@ ;; CHECK-NEXT: (local.get $scratch) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $br_value (type $2) (result f64) - ;; TWICE-NEXT: (local $scratch f64) - ;; TWICE-NEXT: (local $1 i32) - ;; TWICE-NEXT: (local $2 i32) - ;; TWICE-NEXT: (block $out (result f64) - ;; TWICE-NEXT: (local.set $scratch - ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") - ;; TWICE-NEXT: (br_if $out - ;; TWICE-NEXT: (f64.const 3.14159) - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $2 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $1 - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 6) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -6) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $2) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $2) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 1337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $scratch) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) (func $br_value (result f64) ;; As above, but now with a value. (block $out (result f64) @@ -571,102 +323,9 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $nested (type $0) - ;; TWICE-NEXT: (local $0 i32) - ;; TWICE-NEXT: (local $1 i32) - ;; TWICE-NEXT: (local $2 i32) - ;; TWICE-NEXT: (local $3 i32) - ;; TWICE-NEXT: (local $4 i32) - ;; TWICE-NEXT: (local $5 i32) - ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") - ;; TWICE-NEXT: (if - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $5 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $2 - ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") - ;; TWICE-NEXT: (if (result i32) - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $3 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $0 - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 7) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $0) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $0) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -7) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $3) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $3) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (i32.const 142) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (else - ;; TWICE-NEXT: (i32.const 242) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 9) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $2) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $2) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -9) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $5) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $5) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") - ;; TWICE-NEXT: (if - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $4 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $1 - ;; TWICE-NEXT: (i32.const 342) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 8) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -8) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $4) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $4) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 1337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) (func $nested ;; Do not be confused by our own output, in nested code: even if we have ;; nested conditions, the first instrumentation should not think its output - ;; is from prior instrumentation. Only TWICE should ever emit negated IDs of ;; existing ones. (@metadata.code.branch_hint "\00") (if @@ -722,47 +381,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $eh-pop (type $0) - ;; TWICE-NEXT: (local $0 i32) - ;; TWICE-NEXT: (local $1 i32) - ;; TWICE-NEXT: (local $2 i32) - ;; TWICE-NEXT: (block $label - ;; TWICE-NEXT: (try - ;; TWICE-NEXT: (do - ;; TWICE-NEXT: (nop) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (catch $i32 - ;; TWICE-NEXT: (local.set $1 - ;; TWICE-NEXT: (pop i32) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (@metadata.code.branch_hint "\00") - ;; TWICE-NEXT: (br_if $label - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $2 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $0 - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const 10) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $0) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $0) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $log-branch - ;; TWICE-NEXT: (i32.const -10) - ;; TWICE-NEXT: (i32.const 0) - ;; TWICE-NEXT: (local.get $2) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $2) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) (func $eh-pop (block $label (try @@ -784,19 +402,14 @@ ;; This module has our import, but with a minified internal name. We should ;; still use it, and assume we are doing the second instrumentation. That is, -;; CHECK does the second instrumentation here, and TWICE would do a third, but -;; we add nothing there, so CHECK and TWICE are equal here. (module ;; CHECK: (type $0 (func)) ;; CHECK: (type $1 (func (param i32 i32 i32))) ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (type $1) (param i32 i32 i32))) - ;; TWICE: (type $0 (func)) - ;; TWICE: (type $1 (func (param i32 i32 i32))) - ;; TWICE: (import "fuzzing-support" "log-branch" (func $min (type $1) (param i32 i32 i32))) (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) ;; CHECK: (func $if (type $0) @@ -832,39 +445,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $if (type $0) - ;; TWICE-NEXT: (local $x i32) - ;; TWICE-NEXT: (local $1 i32) - ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") - ;; TWICE-NEXT: (if - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $1 - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $x - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $min - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $x) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $x) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $min - ;; TWICE-NEXT: (i32.const -42) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 1337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) (func $if (local $x i32) (@metadata.code.branch_hint "\01") @@ -917,37 +497,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $optimized (type $0) - ;; TWICE-NEXT: (local $x i32) - ;; TWICE-NEXT: (local $1 i32) - ;; TWICE-NEXT: (local.set $x - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $min - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $x) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") - ;; TWICE-NEXT: (if - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $1 - ;; TWICE-NEXT: (local.get $x) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $min - ;; TWICE-NEXT: (i32.const -42) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 1337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) (func $optimized (local $x i32) ;; As above, but now the existing instrumentation looks like it was @@ -1001,36 +550,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $optimized-moar (type $0) - ;; TWICE-NEXT: (local $x i32) - ;; TWICE-NEXT: (local $1 i32) - ;; TWICE-NEXT: (call $min - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.tee $x - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") - ;; TWICE-NEXT: (if - ;; TWICE-NEXT: (block (result i32) - ;; TWICE-NEXT: (local.set $1 - ;; TWICE-NEXT: (local.get $x) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $min - ;; TWICE-NEXT: (i32.const -42) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (local.get $1) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 1337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) (func $optimized-moar (local $x i32) ;; As above, but optimized further, now using a tee. We should still add the @@ -1074,29 +593,6 @@ ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; TWICE: (func $optimized-bad (type $0) - ;; TWICE-NEXT: (local $x i32) - ;; TWICE-NEXT: (local.set $x - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (call $min - ;; TWICE-NEXT: (i32.const 42) - ;; TWICE-NEXT: (i32.const 1) - ;; TWICE-NEXT: (local.get $x) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (@metadata.code.branch_hint "\01") - ;; TWICE-NEXT: (if - ;; TWICE-NEXT: (local.get $x) - ;; TWICE-NEXT: (then - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (i32.const 1337) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: (drop - ;; TWICE-NEXT: (local.get $x) - ;; TWICE-NEXT: ) - ;; TWICE-NEXT: ) (func $optimized-bad (local $x i32) ;; As above, but the set has another use later, so we give up as the pattern From 9ff748be21d0d101fa4879a5564c3f36f480e07b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 11:10:32 -0700 Subject: [PATCH 085/239] simplify tests --- test/lit/passes/instrument-branch-hints.wast | 172 ++----------------- 1 file changed, 13 insertions(+), 159 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index bf7da40b0b8..3de06eee406 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -108,7 +108,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $if - ;; An if with a 0 hint, a 1 hint, and no hint. + ;; An if with a 0 hint and another with a 1 hint. (@metadata.code.branch_hint "\00") (if (i32.const 42) @@ -129,6 +129,10 @@ (drop (i32.const 199)) ) ) + ) + + (func $if-2 + ;; An if with no hint, and another with 0 for more coverage. (if (i32.const 242) (then @@ -138,7 +142,6 @@ (drop (i32.const 299)) ) ) - ;; Another hint of 0, for more coverage. (@metadata.code.branch_hint "\00") (if (i32.const 342) @@ -202,7 +205,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $br - ;; As above, with br_if. + ;; As above, with br_if, hints of 0 and 1. (block $out (@metadata.code.branch_hint "\00") (br_if $out @@ -217,6 +220,10 @@ ) (drop (i32.const 11337)) ) + ) + + (func $br-no + ;; A br_if with no hint. (block $out2 (br_if $out2 (i32.const 242) @@ -324,9 +331,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $nested - ;; Do not be confused by our own output, in nested code: even if we have - ;; nested conditions, the first instrumentation should not think its output - ;; existing ones. + ;; We should instrument all these, even the nested ones. (@metadata.code.branch_hint "\00") (if (@metadata.code.branch_hint "\01") @@ -400,8 +405,8 @@ ) ) -;; This module has our import, but with a minified internal name. We should -;; still use it, and assume we are doing the second instrumentation. That is, +;; This module has our import, but with a minified internal name. We should use +;; that import. (module ;; CHECK: (type $0 (func)) @@ -465,155 +470,4 @@ ) ) ) - - ;; CHECK: (func $optimized (type $0) - ;; CHECK-NEXT: (local $x i32) - ;; CHECK-NEXT: (local $1 i32) - ;; CHECK-NEXT: (local.set $x - ;; CHECK-NEXT: (i32.const 42) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $min - ;; CHECK-NEXT: (i32.const 42) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (local.set $1 - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $min - ;; CHECK-NEXT: (i32.const -42) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $1) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $1) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1337) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $optimized - (local $x i32) - ;; As above, but now the existing instrumentation looks like it was - ;; optimized a little: the local.set and call were moved out of the if - ;; (something that merge-blocks would do). We should still add the second - ;; instrumentation. - (local.set $x - (i32.const 42) - ) - (call $min - (i32.const 42) - (i32.const 1) - (local.get $x) - ) - (@metadata.code.branch_hint "\01") - (if - (local.get $x) - (then - (drop (i32.const 1337)) - ) - ) - ) - - ;; CHECK: (func $optimized-moar (type $0) - ;; CHECK-NEXT: (local $x i32) - ;; CHECK-NEXT: (local $1 i32) - ;; CHECK-NEXT: (call $min - ;; CHECK-NEXT: (i32.const 42) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.tee $x - ;; CHECK-NEXT: (i32.const 42) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (local.set $1 - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $min - ;; CHECK-NEXT: (i32.const -42) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $1) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $1) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1337) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $optimized-moar - (local $x i32) - ;; As above, but optimized further, now using a tee. We should still add the - ;; second instrumentation. - (call $min - (i32.const 42) - (i32.const 1) - (local.tee $x - (i32.const 42) - ) - ) - (@metadata.code.branch_hint "\01") - (if - (local.get $x) - (then - (drop (i32.const 1337)) - ) - ) - ) - - ;; CHECK: (func $optimized-bad (type $0) - ;; CHECK-NEXT: (local $x i32) - ;; CHECK-NEXT: (local.set $x - ;; CHECK-NEXT: (i32.const 42) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $min - ;; CHECK-NEXT: (i32.const 42) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1337) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $optimized-bad - (local $x i32) - ;; As above, but the set has another use later, so we give up as the pattern - ;; is unfamiliar. - (local.set $x - (i32.const 42) - ) - (call $min - (i32.const 42) - (i32.const 1) - (local.get $x) - ) - (@metadata.code.branch_hint "\01") - (if - (local.get $x) - (then - (drop (i32.const 1337)) - ) - ) - (drop - (local.get $x) ;; extra use - ) - ) ) From 870f4a9fe61fce9f8e106c0f506d2a5b9fd7244b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 11:10:54 -0700 Subject: [PATCH 086/239] fix compilation error --- src/passes/InstrumentBranchHints.cpp | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 608413196e7..d681490c874 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -81,8 +81,6 @@ struct InstrumentBranchHints // The internal name of our import. Name logBranch; - std::unique_ptr localGraph; - void visitIf(If* curr) { processCondition(curr); } void visitBreak(Break* curr) { @@ -91,22 +89,6 @@ struct InstrumentBranchHints } } - void visitCall(Call* curr) { - if (curr->target != logBranch) { - return; - } - // Our logging has 3 fields: id, expected, actual. - if (curr->operands.size() == 3) { - if (auto* get = curr->operands[2]->dynCast()) { - getsOfPriorInstrumentation[get] = curr; - } else if (auto* tee = curr->operands[2]->dynCast()) { - teesOfPriorInstrumentation[tee] = curr; - } - } - // Anything else is a pattern we don't recognize (perhaps this is a fuzzer- - // modified testcase), and we skip. - } - bool addedInstrumentation = false; template void processCondition(T* curr) { From b7da0e07d5ca99629d41ebc3c13478192434089a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 11:12:52 -0700 Subject: [PATCH 087/239] test updates --- test/lit/passes/instrument-branch-hints.wast | 86 ++++++++++---------- 1 file changed, 45 insertions(+), 41 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 3de06eee406..03068846d5b 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -20,7 +20,6 @@ ;; CHECK: (func $if (type $0) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) - ;; CHECK-NEXT: (local $2 i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (block (result i32) @@ -69,6 +68,33 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if + ;; An if with a 0 hint and another with a 1 hint. + (@metadata.code.branch_hint "\00") + (if + (i32.const 42) + (then + (drop (i32.const 1337)) + ) + (else + (drop (i32.const 99)) + ) + ) + (@metadata.code.branch_hint "\01") + (if + (i32.const 142) + (then + (drop (i32.const 11337)) + ) + (else + (drop (i32.const 199)) + ) + ) + ) + + ;; CHECK: (func $if-2 (type $0) + ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (if ;; CHECK-NEXT: (i32.const 242) ;; CHECK-NEXT: (then @@ -85,15 +111,15 @@ ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (local.set $0 ;; CHECK-NEXT: (i32.const 342) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (call $log-branch ;; CHECK-NEXT: (i32.const 3) ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (then ;; CHECK-NEXT: (drop @@ -107,30 +133,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $if - ;; An if with a 0 hint and another with a 1 hint. - (@metadata.code.branch_hint "\00") - (if - (i32.const 42) - (then - (drop (i32.const 1337)) - ) - (else - (drop (i32.const 99)) - ) - ) - (@metadata.code.branch_hint "\01") - (if - (i32.const 142) - (then - (drop (i32.const 11337)) - ) - (else - (drop (i32.const 199)) - ) - ) - ) - (func $if-2 ;; An if with no hint, and another with 0 for more coverage. (if @@ -195,14 +197,6 @@ ;; CHECK-NEXT: (i32.const 11337) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (block $out2 - ;; CHECK-NEXT: (br_if $out2 - ;; CHECK-NEXT: (i32.const 242) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 21337) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $br ;; As above, with br_if, hints of 0 and 1. @@ -222,6 +216,16 @@ ) ) + ;; CHECK: (func $br-no (type $0) + ;; CHECK-NEXT: (block $out2 + ;; CHECK-NEXT: (br_if $out2 + ;; CHECK-NEXT: (i32.const 242) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 21337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $br-no ;; A br_if with no hint. (block $out2 @@ -408,16 +412,16 @@ ;; This module has our import, but with a minified internal name. We should use ;; that import. (module - ;; CHECK: (type $0 (func)) - ;; CHECK: (type $1 (func (param i32 i32 i32))) - ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (type $1) (param i32 i32 i32))) + ;; CHECK: (type $0 (func (param i32 i32 i32))) + ;; CHECK: (type $1 (func)) + ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (type $0) (param i32 i32 i32))) (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) - ;; CHECK: (func $if (type $0) + ;; CHECK: (func $if (type $1) ;; CHECK-NEXT: (local $x i32) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") @@ -437,7 +441,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (call $min - ;; CHECK-NEXT: (i32.const -42) + ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) From ce423219981964d516c66f888a974717fceb4757 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 11:17:39 -0700 Subject: [PATCH 088/239] Add a todo for BrOn --- src/passes/InstrumentBranchHints.cpp | 2 ++ test/lit/passes/instrument-branch-hints.wast | 29 ++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index d681490c874..35d2dbabb15 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -89,6 +89,8 @@ struct InstrumentBranchHints } } + // TODO: BrOn, but the condition there is not an i32 + bool addedInstrumentation = false; template void processCondition(T* curr) { diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 03068846d5b..ae801f6f7d5 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -10,9 +10,11 @@ ;; CHECK: (type $2 (func (result f64))) - ;; CHECK: (type $3 (func (param i32 i32 i32))) + ;; CHECK: (type $3 (func (param anyref))) - ;; CHECK: (import "fuzzing-support" "log-branch" (func $log-branch (type $3) (param i32 i32 i32))) + ;; CHECK: (type $4 (func (param i32 i32 i32))) + + ;; CHECK: (import "fuzzing-support" "log-branch" (func $log-branch (type $4) (param i32 i32 i32))) ;; CHECK: (tag $i32 (type $1) (param i32)) (tag $i32 (param i32)) @@ -360,6 +362,29 @@ ) ) + ;; CHECK: (func $br_on (type $3) (param $x anyref) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_on_null $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br_on (param $x anyref) + ;; We do not instrument BrOn yet: the condition is not an i32 in this case, + ;; so logging is trickier. TODO + (block $out + (drop + (@metadata.code.branch_hint "\00") + (br_on_null $out + (local.get $x) + ) + ) + ) + ) + ;; CHECK: (func $eh-pop (type $0) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) From 4edb1bc00e61e1efb294afe04b30eba6ede7b071 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 11:18:45 -0700 Subject: [PATCH 089/239] oops --- test/lit/passes/instrument-branch-hints.wast | 1 - 1 file changed, 1 deletion(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index ae801f6f7d5..a09bfd4a2cf 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -1,5 +1,4 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. ;; RUN: foreach %s %t wasm-opt -all --instrument-branch-hints -S -o - | filecheck %s From 46f2ec85c3b2c948f5dca92fd1d03965ae844bc0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 12:33:53 -0700 Subject: [PATCH 090/239] Update test/lit/passes/instrument-branch-hints.wast Co-authored-by: Thomas Lively --- test/lit/passes/instrument-branch-hints.wast | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index a09bfd4a2cf..565bcf78716 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -436,8 +436,6 @@ ;; This module has our import, but with a minified internal name. We should use ;; that import. (module - - ;; CHECK: (type $0 (func (param i32 i32 i32))) ;; CHECK: (type $1 (func)) From e351a5fb56a85367941a915aacc5bc8308c08365 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 12:35:17 -0700 Subject: [PATCH 091/239] remove fuzz restriction --- scripts/test/fuzzing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/test/fuzzing.py b/scripts/test/fuzzing.py index f922ac4e16b..4a610946935 100644 --- a/scripts/test/fuzzing.py +++ b/scripts/test/fuzzing.py @@ -132,8 +132,6 @@ 'string-lifting-section.wast', # TODO: fuzzer support for uninhabitable imported globals 'exact-references.wast', - # We cannot re-instrument such code (see the pass). - 'instrument-branch-hints.wast', ] From dd3a518fa3c426fb62635368c81da55756a8aa7f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 13:12:36 -0700 Subject: [PATCH 092/239] fix --- src/wasm/wasm-binary.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index f70b97293d2..a6248ed0db8 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2518,15 +2518,11 @@ getOrMakeName(const std::unordered_map& nameMap, Name name, std::unordered_set& usedNames) { if (auto it = nameMap.find(i); it != nameMap.end()) { - auto name = it->second; - auto [_, inserted] = usedNames.insert(name); - if (inserted) { - return {name, true}; - } - // Otherwise, we cannot use the name from the names section, which is - // unfortunate, and fall through to generate a new unique name. (This only - // commonly happens in our own testcases' outputs, where existing names - // happen to match the names we invent for things, and overlaps can occur.) + // We found a name in the names section. Use it, and also note it as used + // so we don't generate such a name below, later. + auto mappedName = it->second; + usedNames.insert(mappedName); + return {mappedName, true}; } auto valid = Names::getValidNameGivenExisting(name, usedNames); usedNames.insert(valid); From bd211f897a61f657d89427fa25ea528ff892ea4c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 13:23:01 -0700 Subject: [PATCH 093/239] add d8 side of import, so the fuzzer does not error --- scripts/fuzz_shell.js | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index 3f201b3c812..8bc7078faf5 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -261,8 +261,10 @@ function oneIn(n) { return (randomBits() % n) == 0; } -// Set up the imports. +// Import helpers. var tempRet0; + +// Set up the imports. var imports = { 'fuzzing-support': { // Logging. @@ -353,6 +355,17 @@ var imports = { // how many time units to wait). }); }, + + 'log-branch': (id, expected, actual) => { + // We only care about truthiness of the expected and actual values. + expected = +!!expected; + actual = +!!actual; + // Log out the expected and actual outcomes. This is useful for fuzzing, + // see fuzz_opt.py. For testing that expectations actually match reality + // (i.e. that branch hints are correct), you can adjust the logic here to + // throw on expected != actual . + console.log(`log-branch: hint ${id} of ${expected} and actual ${actual}`); + }, }, // Emscripten support. 'env': { From 3a4f27017e6992d7e49c34266b96fc7f73dc1ab5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 13:45:10 -0700 Subject: [PATCH 094/239] Revert "remove fuzz restriction" This reverts commit e351a5fb56a85367941a915aacc5bc8308c08365. --- scripts/test/fuzzing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/test/fuzzing.py b/scripts/test/fuzzing.py index 4a610946935..f922ac4e16b 100644 --- a/scripts/test/fuzzing.py +++ b/scripts/test/fuzzing.py @@ -132,6 +132,8 @@ 'string-lifting-section.wast', # TODO: fuzzer support for uninhabitable imported globals 'exact-references.wast', + # We cannot re-instrument such code (see the pass). + 'instrument-branch-hints.wast', ] From edb08a24bb501bc5571067d42918535c006fd2b6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 13:45:16 -0700 Subject: [PATCH 095/239] Revert "add d8 side of import, so the fuzzer does not error" This reverts commit bd211f897a61f657d89427fa25ea528ff892ea4c. --- scripts/fuzz_shell.js | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index 8bc7078faf5..3f201b3c812 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -261,10 +261,8 @@ function oneIn(n) { return (randomBits() % n) == 0; } -// Import helpers. -var tempRet0; - // Set up the imports. +var tempRet0; var imports = { 'fuzzing-support': { // Logging. @@ -355,17 +353,6 @@ var imports = { // how many time units to wait). }); }, - - 'log-branch': (id, expected, actual) => { - // We only care about truthiness of the expected and actual values. - expected = +!!expected; - actual = +!!actual; - // Log out the expected and actual outcomes. This is useful for fuzzing, - // see fuzz_opt.py. For testing that expectations actually match reality - // (i.e. that branch hints are correct), you can adjust the logic here to - // throw on expected != actual . - console.log(`log-branch: hint ${id} of ${expected} and actual ${actual}`); - }, }, // Emscripten support. 'env': { From 36d60e67badeee633e1b9285ff247801ccd671b0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 13:45:44 -0700 Subject: [PATCH 096/239] note --- scripts/test/fuzzing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test/fuzzing.py b/scripts/test/fuzzing.py index f922ac4e16b..a3add75f4f6 100644 --- a/scripts/test/fuzzing.py +++ b/scripts/test/fuzzing.py @@ -132,7 +132,7 @@ 'string-lifting-section.wast', # TODO: fuzzer support for uninhabitable imported globals 'exact-references.wast', - # We cannot re-instrument such code (see the pass). + # We do not have full suppor for these imports in all parts of the fuzzer. 'instrument-branch-hints.wast', ] From 107bb24d3ce604e1783e1616b36cca5fb4e3ec98 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 14:31:27 -0700 Subject: [PATCH 097/239] work --- src/passes/InstrumentBranchHints.cpp | 185 +++++++++++++++++++++++++-- src/passes/RandomizeBranchHints.cpp | 2 + src/passes/RemoveUnusedBrs.cpp | 5 +- src/passes/pass.cpp | 6 + src/passes/passes.h | 2 + 5 files changed, 186 insertions(+), 14 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 35d2dbabb15..386c9c0972d 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -54,11 +54,52 @@ // if (expected != actual) throw `Bad branch hint! (${id})`; // }; // +// A pass to delete branch hints is also provided, which finds instrumentations +// and the IDs in those calls, and deletes branch hints that were provded. For +// example, +// +// --delete-branch-hints=10,20 +// +// would do this transformation: +// +// @metadata.branch.hint A +// if (temp = condition; log(10, A, temp); temp) { // 10 matches one of 10,20 +// X +// } +// @metadata.branch.hint B +// if (temp = condition; log(99, B, temp); temp) { // 99 does not match +// Y +// } +// +// => +// +// // Used to be a branch hint here, but it was deleted. +// if (temp = condition; log(10, A, temp); temp) { +// X +// } +// @metadata.branch.hint B // this one is unmodified. +// if (temp = condition; log(99, B, temp); temp) { +// Y +// } +// +// A pass to undo the instrumentation is also provided, which does +// +// if (temp = condition; log(123, A, temp); temp) { +// X +// } +// +// => +// +// if (condition) { +// X +// } +// #include "ir/eh-utils.h" #include "ir/names.h" #include "ir/properties.h" #include "pass.h" +#include "support/string.h" #include "wasm-builder.h" #include "wasm.h" @@ -66,6 +107,20 @@ namespace wasm { namespace { +// The module and base names of our import. +const Name MODULE = "fuzzing-support"; +const Name BASE = "log-branch"; + +// Finds our import, if it exists. +Name getLogBranchImport(Module* module) { + // Find our import, if we were already run on this module. + for (auto& func : module->functions) { + if (func->module == MODULE && func->base == BASE) { + return func->name; + } + } +} + // The branch id, which increments as we go. int branchId = 1; @@ -74,10 +129,6 @@ struct InstrumentBranchHints using Super = WalkerPass>; - // The module and base names of our import. - const Name MODULE = "fuzzing-support"; - const Name BASE = "log-branch"; - // The internal name of our import. Name logBranch; @@ -132,14 +183,8 @@ struct InstrumentBranchHints } void doWalkModule(Module* module) { - // Find our import, if we were already run on this module. - for (auto& func : module->functions) { - if (func->module == MODULE && func->base == BASE) { - logBranch = func->name; - break; - } - } - // Otherwise, add it. + logBranch = getLogBranchImport(module); + // If it doesn't exist, add it. if (!logBranch) { auto* func = module->addFunction(Builder::makeFunction( Names::getValidFunctionName(*module, BASE), @@ -155,8 +200,124 @@ struct InstrumentBranchHints } }; +// Instrumentation info for a chunk of code that is the result of the +// instrumentation pass. +struct Instrumentation { + // The condition before the instrumentation. + Expression* originalCondition; + // The call to the logging that the instrumentation added. + Call* call; +}; + +// Check if an expression's condition is an instrumentation, and return the info +// if so. We are provided the internal name of the logging function. +std::optional getInstrumentation(Expression* condition, Name logBranch) { + // We must identify this pattern: + // + // (block + // (local.set $temp (condition)) + // (call $log (id, prediction, (local.get $temp))) + // (local.get $temp) + // ) + // + auto* block = condition->dynCast(); + if (!block) { + return {}; + } + auto& list = block->list; + if (block->list.size() != 3) { + return {}; + } + auto *call = list[1]->dynCast(); + if (!call || call->target != logBranch) { + return {}; + } + // We found the call, so the rest must be in the proper form. + auto* set = list[0]->cast(); + return { set->value, call }; +} + +struct DeleteBranchHints + : public WalkerPass> { + + using Super = WalkerPass>; + + // The internal name of our import. + Name logBranch; + + // The set of IDs to delete. + std::unordered_set idsToDelete; + + void visitIf(If* curr) { processCondition(curr); } + + void visitBreak(Break* curr) { + if (curr->condition) { + processCondition(curr); + } + } + + // TODO: BrOn, but the condition there is not an i32 + + template void processCondition(T* curr) { + if (auto info = getInstrumentation(curr->condition, logBranch)) { + auto id = info->call->operands[0]->cast()->value.geti32(); + if (idsToDelete.count(id)) { + // Remove the branch hint. + getFunction()->codeAnnotations[curr].branchLikely = {}; + } + } + } + + void doWalkModule(Module* module) { + logBranch = getLogBranchImport(module); + + auto arg = getArgument( + "delete-branch-hints", + "DeleteBranchHints usage: wasm-opt --delete-branch-hints=10,20,30"); + for (auto& str : String::Split(arg, String::Split::NewLineOr(","))) { + idsToDelete.insert(std::stoi(str)); + } + + Super::doWalkModule(module); + } +}; + +struct DeInstrumentBranchHints + : public WalkerPass> { + + using Super = WalkerPass>; + + // The internal name of our import. + Name logBranch; + + void visitIf(If* curr) { processCondition(curr); } + + void visitBreak(Break* curr) { + if (curr->condition) { + processCondition(curr); + } + } + + // TODO: BrOn, but the condition there is not an i32 + + template void processCondition(T* curr) { + if (auto info = getInstrumentation(curr->condition, logBranch)) { + // Replace the instrumentated condition with the original one. + replaceCurrent(info->originalCondition); + } + } + + void doWalkModule(Module* module) { + logBranch = getLogBranchImport(module); + + Super::doWalkModule(module); + } +}; + } // anonymous namespace Pass* createInstrumentBranchHintsPass() { return new InstrumentBranchHints(); } +Pass* createDeleteBranchHintsPass() { return new DeleteBranchHints(); } +Pass* createDeInstrumentBranchHintsPass() { return new DeInstrumentBranchHints(); } } // namespace wasm diff --git a/src/passes/RandomizeBranchHints.cpp b/src/passes/RandomizeBranchHints.cpp index 7b013fc7ebf..b74d2e8a093 100644 --- a/src/passes/RandomizeBranchHints.cpp +++ b/src/passes/RandomizeBranchHints.cpp @@ -50,6 +50,8 @@ struct RandomizeBranchHints } } + // TODO: BrOn + template void processCondition(T* curr) { auto& likely = getFunction()->codeAnnotations[curr].branchLikely; switch (hash % 3) { diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 0bbd0f7646b..ef62d973f6c 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -161,11 +161,11 @@ static std::optional getBranchHint(Expression* expr, Function* func) { } static void setBranchHint(Expression* expr, bool likely, Function* func) { -// func->codeAnnotations[expr].branchLikely = likely; + func->codeAnnotations[expr].branchLikely = likely; } static void clearBranchHint(Expression* expr, Function* func) { -// func->codeAnnotations[expr].branchLikely = {}; + func->codeAnnotations[expr].branchLikely = {}; } static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { @@ -176,6 +176,7 @@ static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { } static void flipBranchHint(Expression* expr, Function* func) { +return; if (auto likely = getBranchHint(expr, func)) { setBranchHint(expr, !*likely, func); } diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 5e98ef5d086..b99c83d3f11 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -137,6 +137,12 @@ void PassRegistry::registerPasses() { "propagate-debug-locs", "propagate debug location from parents or previous siblings to child nodes", createDebugLocationPropagationPass); + registerPass("deinstrument-branch-hints", + "de-instrument branch hint instrumentation", + createDeInstrumentBranchHintsPass); + registerPass("delete-branch-hints", + "delete branch hints using a list of instrumented IDs", + createDeleteBranchHintsPass); registerPass("denan", "instrument the wasm to convert NaNs into 0 at runtime", createDeNaNPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index e0c03bad8d7..92dcd3e4eb4 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -37,6 +37,8 @@ Pass* createDAEPass(); Pass* createDAEOptimizingPass(); Pass* createDataFlowOptsPass(); Pass* createDeadCodeEliminationPass(); +Pass* createDeInstrumentBranchHintsPass(); +Pass* createDeleteBranchHintsPass(); Pass* createDeNaNPass(); Pass* createDeAlignPass(); Pass* createDebugLocationPropagationPass(); From d82fbf6a4fe90f17dafe99cf6d2d056a4e4ba1ef Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 14:34:10 -0700 Subject: [PATCH 098/239] fix --- src/passes/InstrumentBranchHints.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 386c9c0972d..a0eeadb3759 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -119,6 +119,8 @@ Name getLogBranchImport(Module* module) { return func->name; } } + + Fatal() << "No branch hint logging import found. Was this code instrumented?"; } // The branch id, which increments as we go. @@ -234,7 +236,7 @@ std::optional getInstrumentation(Expression* condition, Name lo } // We found the call, so the rest must be in the proper form. auto* set = list[0]->cast(); - return { set->value, call }; + return Instrumentation{ set->value, call }; } struct DeleteBranchHints @@ -260,7 +262,7 @@ struct DeleteBranchHints template void processCondition(T* curr) { if (auto info = getInstrumentation(curr->condition, logBranch)) { - auto id = info->call->operands[0]->cast()->value.geti32(); + auto id = info->call->operands[0]->template cast()->value.geti32(); if (idsToDelete.count(id)) { // Remove the branch hint. getFunction()->codeAnnotations[curr].branchLikely = {}; From 9af9bf607046e4d7fb7f83a68a3623c432c8efcf Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 14:40:11 -0700 Subject: [PATCH 099/239] test --- .../lit/passes/deinstrument-branch-hints.wast | 56 ++++++++++++ test/lit/passes/delete-branch-hints.wast | 85 +++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 test/lit/passes/deinstrument-branch-hints.wast create mode 100644 test/lit/passes/delete-branch-hints.wast diff --git a/test/lit/passes/deinstrument-branch-hints.wast b/test/lit/passes/deinstrument-branch-hints.wast new file mode 100644 index 00000000000..cd721320620 --- /dev/null +++ b/test/lit/passes/deinstrument-branch-hints.wast @@ -0,0 +1,56 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt -all --deinstrument-branch-hints -S -o - | filecheck %s + +(module + (func $if + (local $temp i32) + ;; The instrumentation should be removed, and if the if's condition should + ;; be 42. + (@metadata.code.branch_hint "\00") + (if + (block (result i32) + (local.set $temp + (i32.const 42) + ) + (call $log-branch + (i32.const 1) + (i32.const 0) + (local.get $temp) + ) + (local.get $temp) + ) + (then + (drop + (i32.const 1337) + ) + ) + (else + (drop + (i32.const 99) + ) + ) + ) + ) + + (func $br (type $temp) + ;; The same, with a br. + (local $temp i32) + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (block (result i32) + (local.set $temp + (i32.const 42) + ) + (call $log-branch + (i32.const 4) + (i32.const 0) + (local.get $temp) + ) + (local.get $temp) + ) + ) + ) + ) +) diff --git a/test/lit/passes/delete-branch-hints.wast b/test/lit/passes/delete-branch-hints.wast new file mode 100644 index 00000000000..54c7d7650c0 --- /dev/null +++ b/test/lit/passes/delete-branch-hints.wast @@ -0,0 +1,85 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt -all --delete-branch-hints=10,30 -S -o - | filecheck %s + +(module + (func $if-10 + (local $temp i32) + ;; The branch hint should be removed, since the ID "10" is in the list of + ;; 10, 30. + (@metadata.code.branch_hint "\00") + (if + (block (result i32) + (local.set $temp + (i32.const 42) + ) + (call $log-branch + (i32.const 10) + (i32.const 0) + (local.get $temp) + ) + (local.get $temp) + ) + (then + (drop + (i32.const 1337) + ) + ) + (else + (drop + (i32.const 99) + ) + ) + ) + ) + + (func $if-20 + (local $temp i32) + ;; The branch hint should *not* be removed: 20 is not in the list. + (@metadata.code.branch_hint "\00") + (if + (block (result i32) + (local.set $temp + (i32.const 42) + ) + (call $log-branch + (i32.const 20) + (i32.const 0) + (local.get $temp) + ) + (local.get $temp) + ) + (then + (drop + (i32.const 1337) + ) + ) + (else + (drop + (i32.const 99) + ) + ) + ) + ) + + (func $br-3- (type $temp) + ;; The hint should be removed. + (local $temp i32) + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (block (result i32) + (local.set $temp + (i32.const 42) + ) + (call $log-branch + (i32.const 30) + (i32.const 0) + (local.get $temp) + ) + (local.get $temp) + ) + ) + ) + ) +) From 42c04c5128a4aaffca97e0810ec66cf6f1205378 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 14:51:54 -0700 Subject: [PATCH 100/239] workaround --- src/passes/InstrumentBranchHints.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index a0eeadb3759..22c0c5b68f9 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -103,6 +103,10 @@ #include "wasm-builder.h" #include "wasm.h" +// Work around a gcc-14 issue +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnonnull" + namespace wasm { namespace { @@ -119,8 +123,7 @@ Name getLogBranchImport(Module* module) { return func->name; } } - - Fatal() << "No branch hint logging import found. Was this code instrumented?"; + return nullptr; } // The branch id, which increments as we go. @@ -272,6 +275,9 @@ struct DeleteBranchHints void doWalkModule(Module* module) { logBranch = getLogBranchImport(module); + if (!logBranch) { + Fatal() << "No branch hint logging import found. Was this code instrumented?"; + } auto arg = getArgument( "delete-branch-hints", @@ -305,17 +311,22 @@ struct DeInstrumentBranchHints template void processCondition(T* curr) { if (auto info = getInstrumentation(curr->condition, logBranch)) { // Replace the instrumentated condition with the original one. - replaceCurrent(info->originalCondition); + curr->condition = info->originalCondition; } } void doWalkModule(Module* module) { logBranch = getLogBranchImport(module); + if (!logBranch) { + Fatal() << "No branch hint logging import found. Was this code instrumented?"; + } Super::doWalkModule(module); } }; +#pragma GCC diagnostic pop + } // anonymous namespace Pass* createInstrumentBranchHintsPass() { return new InstrumentBranchHints(); } From 0d6f9c0df89f22142b44e34fb9e9596925c6a067 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 14:58:09 -0700 Subject: [PATCH 101/239] finish --- src/passes/InstrumentBranchHints.cpp | 8 +- .../lit/passes/deinstrument-branch-hints.wast | 39 ++++++++- test/lit/passes/delete-branch-hints.wast | 86 ++++++++++++++++++- 3 files changed, 119 insertions(+), 14 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 22c0c5b68f9..e84462398f9 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -103,10 +103,6 @@ #include "wasm-builder.h" #include "wasm.h" -// Work around a gcc-14 issue -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wnonnull" - namespace wasm { namespace { @@ -123,7 +119,7 @@ Name getLogBranchImport(Module* module) { return func->name; } } - return nullptr; + return Name(); } // The branch id, which increments as we go. @@ -325,8 +321,6 @@ struct DeInstrumentBranchHints } }; -#pragma GCC diagnostic pop - } // anonymous namespace Pass* createInstrumentBranchHintsPass() { return new InstrumentBranchHints(); } diff --git a/test/lit/passes/deinstrument-branch-hints.wast b/test/lit/passes/deinstrument-branch-hints.wast index cd721320620..df7763549ba 100644 --- a/test/lit/passes/deinstrument-branch-hints.wast +++ b/test/lit/passes/deinstrument-branch-hints.wast @@ -3,6 +3,30 @@ ;; RUN: foreach %s %t wasm-opt -all --deinstrument-branch-hints -S -o - | filecheck %s (module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (param i32 i32 i32))) + + ;; CHECK: (import "fuzzing-support" "log-branch" (func $log (type $1) (param i32 i32 i32))) + (import "fuzzing-support" "log-branch" (func $log (param i32 i32 i32))) + + ;; CHECK: (func $if (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $if (local $temp i32) ;; The instrumentation should be removed, and if the if's condition should @@ -13,7 +37,7 @@ (local.set $temp (i32.const 42) ) - (call $log-branch + (call $log (i32.const 1) (i32.const 0) (local.get $temp) @@ -33,7 +57,16 @@ ) ) - (func $br (type $temp) + ;; CHECK: (func $br (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br ;; The same, with a br. (local $temp i32) (block $out @@ -43,7 +76,7 @@ (local.set $temp (i32.const 42) ) - (call $log-branch + (call $log (i32.const 4) (i32.const 0) (local.get $temp) diff --git a/test/lit/passes/delete-branch-hints.wast b/test/lit/passes/delete-branch-hints.wast index 54c7d7650c0..375b10d16c2 100644 --- a/test/lit/passes/delete-branch-hints.wast +++ b/test/lit/passes/delete-branch-hints.wast @@ -3,6 +3,39 @@ ;; RUN: foreach %s %t wasm-opt -all --delete-branch-hints=10,30 -S -o - | filecheck %s (module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (param i32 i32 i32))) + + ;; CHECK: (import "fuzzing-support" "log-branch" (func $log (type $1) (param i32 i32 i32))) + (import "fuzzing-support" "log-branch" (func $log (param i32 i32 i32))) + + ;; CHECK: (func $if-10 (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $if-10 (local $temp i32) ;; The branch hint should be removed, since the ID "10" is in the list of @@ -13,7 +46,7 @@ (local.set $temp (i32.const 42) ) - (call $log-branch + (call $log (i32.const 10) (i32.const 0) (local.get $temp) @@ -33,6 +66,33 @@ ) ) + ;; CHECK: (func $if-20 (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $if-20 (local $temp i32) ;; The branch hint should *not* be removed: 20 is not in the list. @@ -42,7 +102,7 @@ (local.set $temp (i32.const 42) ) - (call $log-branch + (call $log (i32.const 20) (i32.const 0) (local.get $temp) @@ -62,7 +122,25 @@ ) ) - (func $br-3- (type $temp) + ;; CHECK: (func $br-30 (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br-30 ;; The hint should be removed. (local $temp i32) (block $out @@ -72,7 +150,7 @@ (local.set $temp (i32.const 42) ) - (call $log-branch + (call $log (i32.const 30) (i32.const 0) (local.get $temp) From 7270c14aad6ab6d5f1bd9056c0895e6d2d2fe833 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 16:14:06 -0700 Subject: [PATCH 102/239] work --- scripts/fuzz_opt.py | 131 ++++++++++++++++---------------------------- 1 file changed, 48 insertions(+), 83 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 8e3398faae0..98ca83d28fd 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1849,6 +1849,7 @@ class BranchHintPreservation(TestCaseHandler): frequency = 1 # XXX def handle(self, wasm): + # Ensure a bugg open(wasm, 'w').write(''' (module (func $loop-br_if-flip-reverse (param $x i32) @@ -1865,108 +1866,72 @@ def handle(self, wasm): ) ) ''') - # XXX bizarre we see no fuzz findings... hack this code to use a given - # wat file I see the bug on, and see that happens... - # so... the issue is that we add an eqz on the br_if... no local.get immediate to see! can we look through eqz..? - # loo through eqz and fallthrough and perhaps more..? - # OR: if we see a prior instrumentation, we can look at that statically and see if it needs flipping, i guess.. not great - # - # Well... we should do this: - # instrument, read it out, see which hints were right, an id that is never wrong no matter how many times branched - # DELETE the branch hints that were wrong. now the wasm runs with 100% correct branch hints. - # DELETE the instrumentation too. - # optimize. run. instrument. all branch hints must be right! could be fewer, but no wrong ones! -READY! - # Generate the middle wasm, which has the first round of instrumentation, - # then the final one with optimizations as well. We only run the final - # one, but the middle one is useful to compare when debugging an error. - middle = wasm + '.mid.wasm' + + # Generate an instrumented wasm. + instrumented = wasm + '.inst.wasm' run([ in_bin('wasm-opt'), wasm, + '-o', instrumented, # Add random branch hints (so we have something to work with). '--randomize-branch-hints', # Instrument them for our fuzzing, then optimize. '--instrument-branch-hints', - '-o', middle, - '-g', - ] + FEATURE_OPTS) - - final = wasm + '.final.wasm' - run([ - in_bin('wasm-opt'), - middle, - '--remove-unused-brs', # XXX - #] + get_random_opts() + [ - # Instrument again after opts, so our fuzzing can see if the opts - # messed anything up. - '--instrument-branch-hints', - '-o', final, '-g', ] + FEATURE_OPTS) - # Run. - out = run_d8_wasm(final) + # Log out the branch hints at runtime. + out = run_d8_wasm(instrumented) # Process the output. We look at the lines like this: # # log-branch: hint 123 of 1 and actual 0 # - # Each line reports a branch id, the hint for its condition, and the - # actual result (if the condition was true). - # - # It is fine for hints to not match expectations, in a fuzz testcase - - # that should happen half the time. What is not fine is if the hint and - # the actual result get out of sync, for which we track pairs from the - # double instrumentation, matched by id: + # Any ID (a particular branch) that we predict wrong is a problem, and + # we will remove that branch hint from the binary. After doing so, we + # will end up with a binary where all branch hints are correct, and we + # then verify that that property is preserved after optimizations. # - # log-branch: hint 123 of 1 and actual 0 - # log-branch: hint -123 of 1 and actual 1 - # - # The second phase of instrumentation adds negative ids, so here we - # would match 123 with -123. - pairs = [] + # (In theory, optimizations could make branch hints wrong in return for + # some benefit that makes things overall faster, but we don't have such + # optimizations for now.) + bad_ids = set() for line in out.splitlines(): if line.startswith('log-branch: hint'): - # Add this as the beginning of a possible pair, if there is - # nothing before us, or a complete pair. - if (not pairs) or len(pairs[-1]) == 2: - pairs.append([line]) - continue + # Parse the ID, the hint, and whether we actually branched. + _, _, id_, _, hint, _, _, actual = first.split(' ') + if hint != actual: + # This hint was misleading. + bad_ids.add(id_) + + # Remove the bad ids (using the instrumentation to identify them by ID), + # and also the instrumentation itself. Then add new instrumentation, + # which we will use to see if any remaining hints are wrong. + final = wasm + '.de_inst.wasm' + args = [ + in_bin('wasm-opt'), + instrumented, + '-o', final, + ] + if bad_ids: + args += [ + '--delete-branch-hints=' + ','.join(bad_ids), + ] + args += [ + '--deinstrument-branch-hints', + '--instrument-branch-hints', + '-g', + ] + FEATURE_OPTS) + run(args) - # This may complete a pair. - last_pair = pairs[-1] - assert len(last_pair) == 1 - last_id = int(last_pair[0].split(' ')[2]) - line_id = int(line.split(' ')[2]) - if last_id >= 0 and last_id == -line_id: - last_pair.append(line) - else: - # They do not match. It is ok if a pair is not found, as the - # optimizer may remove a branch hint or a logging. Start a - # new pair. - pairs.append([line]) - - # Check the pairs. Consider: - # - # log-branch: hint 123 of 1 and actual 0 - # log-branch: hint 123 of 1 and actual 1 - # - # A pair like that is suspect: the actual result shifted - perhaps an - # optimization flipped the condition together with the arms - but the - # hint did not flip with it. That is, we want the pair's hint and actual - # to remain in sync (even if the hint is wrong). - for pair in pairs: - if len(pair) != 2: - continue - print(pair) # XXX - first, second = pair - _, _, first_id, _, first_hint, _, _, first_actual = first.split(' ') - _, _, second_id, _, second_hint, _, _, second_actual = second.split(' ') - assert second_id == '-' + first_id - first_alignment = (first_hint != first_actual) - second_alignment = (second_hint != second_actual) - assert first_alignment == second_alignment, 'branch hints must change properly' + # Log out the branch hints at runtime. + out = run_d8_wasm(final) + + # See if any branch hint was wrong. + for line in out.splitlines(): + if line.startswith('log-branch: hint'): + _, _, id_, _, hint, _, _, actual = first.split(' ') + assert hint == actual, 'Branch hint misled us' def can_run_on_wasm(self, wasm): # Avoid things d8 cannot fully run. From ccde12fa29a4d5c3eb6b65929a5613df590d9ebc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 16:44:50 -0700 Subject: [PATCH 103/239] sinpl --- scripts/fuzz_opt.py | 2 +- src/passes/InstrumentBranchHints.cpp | 170 +++++++++++++++------------ 2 files changed, 98 insertions(+), 74 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 98ca83d28fd..1d0c98b7de9 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1921,7 +1921,7 @@ def handle(self, wasm): '--deinstrument-branch-hints', '--instrument-branch-hints', '-g', - ] + FEATURE_OPTS) + ] + FEATURE_OPTS run(args) # Log out the branch hints at runtime. diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index e84462398f9..0d73dff3e05 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -97,6 +97,8 @@ #include "ir/eh-utils.h" #include "ir/names.h" +#include "ir/local-graph.h" +#include "ir/parents.h" #include "ir/properties.h" #include "pass.h" #include "support/string.h" @@ -201,64 +203,113 @@ struct InstrumentBranchHints } }; -// Instrumentation info for a chunk of code that is the result of the -// instrumentation pass. -struct Instrumentation { - // The condition before the instrumentation. - Expression* originalCondition; - // The call to the logging that the instrumentation added. - Call* call; -}; - -// Check if an expression's condition is an instrumentation, and return the info -// if so. We are provided the internal name of the logging function. -std::optional getInstrumentation(Expression* condition, Name logBranch) { - // We must identify this pattern: - // - // (block - // (local.set $temp (condition)) - // (call $log (id, prediction, (local.get $temp))) - // (local.get $temp) - // ) - // - auto* block = condition->dynCast(); - if (!block) { - return {}; - } - auto& list = block->list; - if (block->list.size() != 3) { - return {}; - } - auto *call = list[1]->dynCast(); - if (!call || call->target != logBranch) { - return {}; - } - // We found the call, so the rest must be in the proper form. - auto* set = list[0]->cast(); - return Instrumentation{ set->value, call }; -} +template +struct InstrumentationProcessor + : public WalkerPass> { -struct DeleteBranchHints - : public WalkerPass> { - - using Super = WalkerPass>; + using Super = WalkerPass>; // The internal name of our import. Name logBranch; - // The set of IDs to delete. - std::unordered_set idsToDelete; + // A LocalGraph, so we can identify the pattern. + std::unique_ptr localGraph; - void visitIf(If* curr) { processCondition(curr); } + // A map of expressions to their parents, so we can identify the pattern. + std::unique_ptr parents; + + Sub* getSub() { return (Sub*)this; } + + void visitIf(If* curr) { getSub()->processCondition(curr); } void visitBreak(Break* curr) { if (curr->condition) { - processCondition(curr); + getSub()->processCondition(curr); } } // TODO: BrOn, but the condition there is not an i32 + void doWalkModule(Module* module) { + logBranch = getLogBranchImport(module); + if (!logBranch) { + Fatal() << "No branch hint logging import found. Was this code instrumented?"; + } + + Super::doWalkModule(module); + } + + // Helpers + + // Instrumentation info for a chunk of code that is the result of the + // instrumentation pass. + struct Instrumentation { + // The condition before the instrumentation. + Expression* originalCondition; + // The call to the logging that the instrumentation added. + Call* call; + }; + + // Check if an expression's condition is an instrumentation, and return the info + // if so. We are provided the internal name of the logging function, and a + // LocalGraph so we can follow gets to their sets. + std::optional getInstrumentation(Expression* condition) { + // We must identify this pattern: + // + // (br_if + // (block + // (local.set $temp (condition)) + // (call $log (id, prediction, (local.get $temp))) + // (local.get $temp) + // ) + // + // The block may vanish during roundtrip though, so we just follow back from + // the last local.get, which appears in the condition: + // + // (local.set $temp (condition)) + // (call $log (id, prediction, (local.get $temp))) + // (br_if + // (local.get $temp) + // + auto* get = condition->template dynCast(); + if (!get) { + return {}; + } + auto& sets = getSub()->localGraph->getSets(get); + if (sets.size() != 1) { + return {}; + } + auto* set = *sets.begin(); + auto& gets = parent.localGraph->getSetInfluences(set); + if (gets.size() != 2) { + return {}; + } + // The set has two gets: the get in the condition we began at, and + // another. + LocalGet* otherGet = nullptr; + for (auto* get2 : gets) { + if (get2 != get) { + otherGet = get2; + } + } + assert(otherGet); + // See if that other get is used in a logging. The parent should be a + // logging call. + auto* call = getSub()->parents->getParent(otherGet)->dynCast(); + if (!call || call->target != logBranch) { + return {}; + } + // Great, this is indeed a prior instrumentation. + return Instrumentation{ set->value, call }; + } +}; + +struct DeleteBranchHints : public InstrumentationProcessor { + using Super = InstrumentationProcessor; + + // The set of IDs to delete. + std::unordered_set idsToDelete; + template void processCondition(T* curr) { if (auto info = getInstrumentation(curr->condition, logBranch)) { auto id = info->call->operands[0]->template cast()->value.geti32(); @@ -270,11 +321,6 @@ struct DeleteBranchHints } void doWalkModule(Module* module) { - logBranch = getLogBranchImport(module); - if (!logBranch) { - Fatal() << "No branch hint logging import found. Was this code instrumented?"; - } - auto arg = getArgument( "delete-branch-hints", "DeleteBranchHints usage: wasm-opt --delete-branch-hints=10,20,30"); @@ -287,22 +333,9 @@ struct DeleteBranchHints }; struct DeInstrumentBranchHints - : public WalkerPass> { - - using Super = WalkerPass>; - - // The internal name of our import. - Name logBranch; + : public InstrumentationProcessor { - void visitIf(If* curr) { processCondition(curr); } - - void visitBreak(Break* curr) { - if (curr->condition) { - processCondition(curr); - } - } - - // TODO: BrOn, but the condition there is not an i32 + using Super = InstrumentationProcessor; template void processCondition(T* curr) { if (auto info = getInstrumentation(curr->condition, logBranch)) { @@ -310,15 +343,6 @@ struct DeInstrumentBranchHints curr->condition = info->originalCondition; } } - - void doWalkModule(Module* module) { - logBranch = getLogBranchImport(module); - if (!logBranch) { - Fatal() << "No branch hint logging import found. Was this code instrumented?"; - } - - Super::doWalkModule(module); - } }; } // anonymous namespace From 16a21b393852b58bc893a76ed3181bed77269727 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 16:46:05 -0700 Subject: [PATCH 104/239] fix --- src/passes/InstrumentBranchHints.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 0d73dff3e05..da8005e5e48 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -230,6 +230,15 @@ struct InstrumentationProcessor // TODO: BrOn, but the condition there is not an i32 + void doWalkFunction(Function* func) { + localGraph = std::make_unique(func, getModule()); + localGraph->computeSetInfluences(); + + parents = std::make_unique(func->body); + + Super::doWalkFunction(func); + } + void doWalkModule(Module* module) { logBranch = getLogBranchImport(module); if (!logBranch) { @@ -280,7 +289,7 @@ struct InstrumentationProcessor return {}; } auto* set = *sets.begin(); - auto& gets = parent.localGraph->getSetInfluences(set); + auto& gets = parents.localGraph->getSetInfluences(set); if (gets.size() != 2) { return {}; } From e182b11781002dfb4285881b6ffbb36b57db14c8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 16:47:47 -0700 Subject: [PATCH 105/239] fix --- src/passes/InstrumentBranchHints.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index da8005e5e48..bb49e148a93 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -231,7 +231,7 @@ struct InstrumentationProcessor // TODO: BrOn, but the condition there is not an i32 void doWalkFunction(Function* func) { - localGraph = std::make_unique(func, getModule()); + localGraph = std::make_unique(func, this->getModule()); localGraph->computeSetInfluences(); parents = std::make_unique(func->body); @@ -289,7 +289,7 @@ struct InstrumentationProcessor return {}; } auto* set = *sets.begin(); - auto& gets = parents.localGraph->getSetInfluences(set); + auto& gets = getSub()->localGraph->getSetInfluences(set); if (gets.size() != 2) { return {}; } @@ -304,7 +304,7 @@ struct InstrumentationProcessor assert(otherGet); // See if that other get is used in a logging. The parent should be a // logging call. - auto* call = getSub()->parents->getParent(otherGet)->dynCast(); + auto* call = getSub()->parents->getParent(otherGet)->template dynCast(); if (!call || call->target != logBranch) { return {}; } @@ -320,7 +320,7 @@ struct DeleteBranchHints : public InstrumentationProcessor { std::unordered_set idsToDelete; template void processCondition(T* curr) { - if (auto info = getInstrumentation(curr->condition, logBranch)) { + if (auto info = getInstrumentation(curr->condition)) { auto id = info->call->operands[0]->template cast()->value.geti32(); if (idsToDelete.count(id)) { // Remove the branch hint. @@ -347,7 +347,7 @@ struct DeInstrumentBranchHints using Super = InstrumentationProcessor; template void processCondition(T* curr) { - if (auto info = getInstrumentation(curr->condition, logBranch)) { + if (auto info = getInstrumentation(curr->condition)) { // Replace the instrumentated condition with the original one. curr->condition = info->originalCondition; } From 16cdc02853d4b58deb72e7bd37255ec35f4dd7d2 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 16:50:44 -0700 Subject: [PATCH 106/239] fix --- src/passes/InstrumentBranchHints.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index bb49e148a93..b307ea956a1 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -280,7 +280,9 @@ struct InstrumentationProcessor // (br_if // (local.get $temp) // - auto* get = condition->template dynCast(); + auto* fallthrough = Properties::getFallthrough( + condition, this->getPassOptions(), *this->getModule()); + auto* get = fallthrough->template dynCast(); if (!get) { return {}; } From 067c012b75e26a8e520c2363a12ccf18cab7969a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Jul 2025 17:02:09 -0700 Subject: [PATCH 107/239] FAILTEst --- .../lit/passes/deinstrument-branch-hints.wast | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/test/lit/passes/deinstrument-branch-hints.wast b/test/lit/passes/deinstrument-branch-hints.wast index df7763549ba..d626a6467b1 100644 --- a/test/lit/passes/deinstrument-branch-hints.wast +++ b/test/lit/passes/deinstrument-branch-hints.wast @@ -86,4 +86,42 @@ ) ) ) + + ;; CHECK: (func $br-before (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br-before + ;; As above, but the instrumentation is before us, leaving only a local.get + ;; in the br's condition. We should still identify the pattern and remove + ;; the logging (but we leave the local.set for other things to clean up). + (local $temp i32) + (block $out + (local.set $temp + (i32.const 42) + ) + (call $log + (i32.const 4) + (i32.const 0) + (local.get $temp) + ) + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $temp) + ) + ) + ) ) From 9a7748975bdb26edd84b40ce461bd09a16b0cbf9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 09:54:04 -0700 Subject: [PATCH 108/239] fix --- src/passes/InstrumentBranchHints.cpp | 32 +++++++++++++++---- .../lit/passes/deinstrument-branch-hints.wast | 6 +--- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index b307ea956a1..f5388374f46 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -96,8 +96,10 @@ // #include "ir/eh-utils.h" -#include "ir/names.h" +#include "ir/find_all.h" #include "ir/local-graph.h" +#include "ir/manipulation.h" +#include "ir/names.h" #include "ir/parents.h" #include "ir/properties.h" #include "pass.h" @@ -253,8 +255,9 @@ struct InstrumentationProcessor // Instrumentation info for a chunk of code that is the result of the // instrumentation pass. struct Instrumentation { - // The condition before the instrumentation. - Expression* originalCondition; + // The condition before the instrumentation (a pointer to it, so we can + // replace it). + Expression** originalCondition; // The call to the logging that the instrumentation added. Call* call; }; @@ -311,7 +314,7 @@ struct InstrumentationProcessor return {}; } // Great, this is indeed a prior instrumentation. - return Instrumentation{ set->value, call }; + return Instrumentation{ &set->value, call }; } }; @@ -350,8 +353,25 @@ struct DeInstrumentBranchHints template void processCondition(T* curr) { if (auto info = getInstrumentation(curr->condition)) { - // Replace the instrumentated condition with the original one. - curr->condition = info->originalCondition; + // Replace the instrumentated condition with the original one (swap so + // that the IR remains valid; the other use of the local will not matter, + // as we remove the logging calls). + std::swap(curr->condition, *info->originalCondition); + } + } + + void visitFunction(Function* func) { + if (func->imported()) { + return; + } + // At the very end, remove all logging calls (we use them during the main + // walk to identify instrumentation). + for (auto* call : FindAll(func->body).list) { + if (call->target == logBranch) { + // We would not instrument unreachable code. + assert(call->type == Type::none); + ExpressionManipulator::nop(call); + } } } }; diff --git a/test/lit/passes/deinstrument-branch-hints.wast b/test/lit/passes/deinstrument-branch-hints.wast index d626a6467b1..45ead99fb77 100644 --- a/test/lit/passes/deinstrument-branch-hints.wast +++ b/test/lit/passes/deinstrument-branch-hints.wast @@ -91,13 +91,9 @@ ;; CHECK-NEXT: (local $temp i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (local.set $temp - ;; CHECK-NEXT: (i32.const 42) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $log - ;; CHECK-NEXT: (i32.const 4) - ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (local.get $temp) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (br_if $out ;; CHECK-NEXT: (i32.const 42) From f569d7296f9e4d8fcb12112139bc44dc6d73b614 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 09:59:25 -0700 Subject: [PATCH 109/239] testing --- scripts/fuzz_opt.py | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 1d0c98b7de9..8b4ce9d7081 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1849,24 +1849,6 @@ class BranchHintPreservation(TestCaseHandler): frequency = 1 # XXX def handle(self, wasm): - # Ensure a bugg - open(wasm, 'w').write(''' -(module - (func $loop-br_if-flip-reverse (param $x i32) - ;; As above, with a hint of 1, that should flip to 0. - (block $block - (loop $loop - (@metadata.code.branch_hint "\\01") - (br_if $block - (local.get $x) - ) - (br $loop) - ) - ) - ) -) -''') - # Generate an instrumented wasm. instrumented = wasm + '.inst.wasm' run([ @@ -1899,29 +1881,33 @@ def handle(self, wasm): for line in out.splitlines(): if line.startswith('log-branch: hint'): # Parse the ID, the hint, and whether we actually branched. - _, _, id_, _, hint, _, _, actual = first.split(' ') + _, _, id_, _, hint, _, _, actual = line.split(' ') if hint != actual: # This hint was misleading. bad_ids.add(id_) - # Remove the bad ids (using the instrumentation to identify them by ID), - # and also the instrumentation itself. Then add new instrumentation, - # which we will use to see if any remaining hints are wrong. + # Generate the final wasm for testing. final = wasm + '.de_inst.wasm' args = [ in_bin('wasm-opt'), instrumented, '-o', final, ] + # Remove the bad ids (using the instrumentation to identify them by ID). if bad_ids: args += [ '--delete-branch-hints=' + ','.join(bad_ids), ] args += [ + # Remove all prior instrumentation (so it does not confuse us), and + # add new instrumentation of hints we left around, which were all + # valid. '--deinstrument-branch-hints', '--instrument-branch-hints', '-g', ] + FEATURE_OPTS + # Add optimizations to see if things break. + args += get_random_opts() run(args) # Log out the branch hints at runtime. @@ -1930,7 +1916,7 @@ def handle(self, wasm): # See if any branch hint was wrong. for line in out.splitlines(): if line.startswith('log-branch: hint'): - _, _, id_, _, hint, _, _, actual = first.split(' ') + _, _, id_, _, hint, _, _, actual = line.split(' ') assert hint == actual, 'Branch hint misled us' def can_run_on_wasm(self, wasm): From 3343a83c123014e08dee25eb5f39c43b3c73b2b9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 10:02:10 -0700 Subject: [PATCH 110/239] fix --- src/passes/RemoveUnusedBrs.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index ef62d973f6c..cb6eddf5931 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -176,7 +176,6 @@ static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { } static void flipBranchHint(Expression* expr, Function* func) { -return; if (auto likely = getBranchHint(expr, func)) { setBranchHint(expr, !*likely, func); } From 0b77b6dcd6a30c86d4a6403d4ab7d4624911d448 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 10:22:51 -0700 Subject: [PATCH 111/239] fix --- scripts/fuzz_opt.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 8b4ce9d7081..af76e9c4b87 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1886,12 +1886,13 @@ def handle(self, wasm): # This hint was misleading. bad_ids.add(id_) - # Generate the final wasm for testing. - final = wasm + '.de_inst.wasm' + # Generate proper hints for testing: A wasm file with 100% valid branch + # hints, and instrumentation to verify that. + de_instrumented = wasm + '.de_inst.wasm' args = [ in_bin('wasm-opt'), instrumented, - '-o', final, + '-o', de_instrumented, ] # Remove the bad ids (using the instrumentation to identify them by ID). if bad_ids: @@ -1906,8 +1907,18 @@ def handle(self, wasm): '--instrument-branch-hints', '-g', ] + FEATURE_OPTS - # Add optimizations to see if things break. - args += get_random_opts() + run(args) + + # Add optimizations to see if things break. We must do this in a + # separate invocation from deinstrumentation etc., due to flags like + # --converge (which would deinstrument multiple times, and after opts). + final = wasm + '.final.wasm' + args = [ + in_bin('wasm-opt'), + de_instrumented, + '-o', final, + '-g', + ] + get_random_opts() + FEATURE_OPTS run(args) # Log out the branch hints at runtime. From f37441b0c2f98afe9a02b1f57d07cdabd40ab609 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 10:23:10 -0700 Subject: [PATCH 112/239] bugify --- src/passes/RemoveUnusedBrs.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index cb6eddf5931..a74be5d23f9 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -176,6 +176,7 @@ static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { } static void flipBranchHint(Expression* expr, Function* func) { +return; // CAUSE BUG if (auto likely = getBranchHint(expr, func)) { setBranchHint(expr, !*likely, func); } From 92fae6c8015ca3d27561514d3661fb5abc33c5e9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 10:27:42 -0700 Subject: [PATCH 113/239] bettre --- scripts/fuzz_opt.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index af76e9c4b87..3d9fe5bd3e2 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1877,15 +1877,23 @@ def handle(self, wasm): # (In theory, optimizations could make branch hints wrong in return for # some benefit that makes things overall faster, but we don't have such # optimizations for now.) + all_ids = set() bad_ids = set() for line in out.splitlines(): if line.startswith('log-branch: hint'): # Parse the ID, the hint, and whether we actually branched. _, _, id_, _, hint, _, _, actual = line.split(' ') + all_ids.add(id_) if hint != actual: # This hint was misleading. bad_ids.add(id_) + # If no good ids remain, there is nothing to test. + if bad_ids == all_ids: + note_ignored_vm_run('no good ids') + 1/0 + return + # Generate proper hints for testing: A wasm file with 100% valid branch # hints, and instrumentation to verify that. de_instrumented = wasm + '.de_inst.wasm' From 8cc956d539cdcd2a69b28847de0cde0f2e33b253 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 10:29:01 -0700 Subject: [PATCH 114/239] bettre --- scripts/fuzz_opt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 3d9fe5bd3e2..cb6f2219e30 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1891,7 +1891,6 @@ def handle(self, wasm): # If no good ids remain, there is nothing to test. if bad_ids == all_ids: note_ignored_vm_run('no good ids') - 1/0 return # Generate proper hints for testing: A wasm file with 100% valid branch From d3e81948d9b13a21cc3ce62469259cfe2c929f42 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 11:07:50 -0700 Subject: [PATCH 115/239] comment --- src/passes/InstrumentBranchHints.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index f5388374f46..8749d5821c0 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -205,6 +205,8 @@ struct InstrumentBranchHints } }; +// Helper class that provides basic utilities for identifying and processing +// instrumentation from InstrumentBranchHints. template struct InstrumentationProcessor : public WalkerPass> { From f67680f6520ebe13dcd3b006cd4640411d7f2b1e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 11:20:38 -0700 Subject: [PATCH 116/239] fix --- src/passes/InstrumentBranchHints.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 8749d5821c0..3b67bd84831 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -370,9 +370,12 @@ struct DeInstrumentBranchHints // walk to identify instrumentation). for (auto* call : FindAll(func->body).list) { if (call->target == logBranch) { - // We would not instrument unreachable code. - assert(call->type == Type::none); - ExpressionManipulator::nop(call); + if (call->type == Type::none) { + ExpressionManipulator::nop(call); + } else { + assert(call->type == Type::unreachable); + ExpressionManipulator::unreachable(call); + } } } } From 6ca3ef55d92a8900a5637e2d1ddaf7b9746fd902 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 12:39:23 -0700 Subject: [PATCH 117/239] format --- src/passes/InstrumentBranchHints.cpp | 21 ++++++++++++--------- src/passes/RemoveUnusedBrs.cpp | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 3b67bd84831..a4d85ef9183 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -208,8 +208,7 @@ struct InstrumentBranchHints // Helper class that provides basic utilities for identifying and processing // instrumentation from InstrumentBranchHints. template -struct InstrumentationProcessor - : public WalkerPass> { +struct InstrumentationProcessor : public WalkerPass> { using Super = WalkerPass>; @@ -246,7 +245,8 @@ struct InstrumentationProcessor void doWalkModule(Module* module) { logBranch = getLogBranchImport(module); if (!logBranch) { - Fatal() << "No branch hint logging import found. Was this code instrumented?"; + Fatal() + << "No branch hint logging import found. Was this code instrumented?"; } Super::doWalkModule(module); @@ -264,9 +264,9 @@ struct InstrumentationProcessor Call* call; }; - // Check if an expression's condition is an instrumentation, and return the info - // if so. We are provided the internal name of the logging function, and a - // LocalGraph so we can follow gets to their sets. + // Check if an expression's condition is an instrumentation, and return the + // info if so. We are provided the internal name of the logging function, and + // a LocalGraph so we can follow gets to their sets. std::optional getInstrumentation(Expression* condition) { // We must identify this pattern: // @@ -311,12 +311,13 @@ struct InstrumentationProcessor assert(otherGet); // See if that other get is used in a logging. The parent should be a // logging call. - auto* call = getSub()->parents->getParent(otherGet)->template dynCast(); + auto* call = + getSub()->parents->getParent(otherGet)->template dynCast(); if (!call || call->target != logBranch) { return {}; } // Great, this is indeed a prior instrumentation. - return Instrumentation{ &set->value, call }; + return Instrumentation{&set->value, call}; } }; @@ -385,6 +386,8 @@ struct DeInstrumentBranchHints Pass* createInstrumentBranchHintsPass() { return new InstrumentBranchHints(); } Pass* createDeleteBranchHintsPass() { return new DeleteBranchHints(); } -Pass* createDeInstrumentBranchHintsPass() { return new DeInstrumentBranchHints(); } +Pass* createDeInstrumentBranchHintsPass() { + return new DeInstrumentBranchHints(); +} } // namespace wasm diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index a74be5d23f9..2f381f2bbcc 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -176,7 +176,7 @@ static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { } static void flipBranchHint(Expression* expr, Function* func) { -return; // CAUSE BUG + return; // CAUSE BUG if (auto likely = getBranchHint(expr, func)) { setBranchHint(expr, !*likely, func); } From 98713c8901914ee28f6e5e3c6229f7513366ea2c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 12:39:47 -0700 Subject: [PATCH 118/239] undo --- src/passes/RemoveUnusedBrs.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 2f381f2bbcc..cb6eddf5931 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -176,7 +176,6 @@ static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { } static void flipBranchHint(Expression* expr, Function* func) { - return; // CAUSE BUG if (auto likely = getBranchHint(expr, func)) { setBranchHint(expr, !*likely, func); } From b91c3d8b1d318be7ba1f9ec49071debc8679e8d7 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 12:41:50 -0700 Subject: [PATCH 119/239] UNDO.but maybe will need to redo --- src/wasm/wasm-binary.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index a6248ed0db8..39b5e7fc77f 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2518,15 +2518,12 @@ getOrMakeName(const std::unordered_map& nameMap, Name name, std::unordered_set& usedNames) { if (auto it = nameMap.find(i); it != nameMap.end()) { - // We found a name in the names section. Use it, and also note it as used - // so we don't generate such a name below, later. - auto mappedName = it->second; - usedNames.insert(mappedName); - return {mappedName, true}; - } - auto valid = Names::getValidNameGivenExisting(name, usedNames); - usedNames.insert(valid); - return {valid, false}; + return {it->second, true}; + } else { + auto valid = Names::getValidNameGivenExisting(name, usedNames); + usedNames.insert(valid); + return {valid, false}; + } } void WasmBinaryReader::readMemories() { From 27eed9e8121aadb2715daec00d40fe29bb11505a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 12:46:10 -0700 Subject: [PATCH 120/239] add --- scripts/fuzz_opt.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 8f832187f5c..c9c672e5a1e 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1874,15 +1874,21 @@ def test_one(random_input, given_wasm): print() if given_wasm: - # if given a wasm file we want to use it as is, but we also want to - # apply properties like not having any NaNs, which the original fuzz - # wasm had applied. that is, we need to preserve properties like not - # having nans through reduction. - try: - run([in_bin('wasm-opt'), given_wasm, '-o', abspath('a.wasm')] + GEN_ARGS + FEATURE_OPTS) - except Exception as e: - print("Internal error in fuzzer! Could not run given wasm") - raise e + # We are given a wasm file to operate on. By default we modify it in the + # usual ways, like running DeNAN on it, which is important in many cases + # (imagine the reducer generates a NAN, then we need to restore the + # property of not having any). However, in some cases we do need to + # trust the wasm is correct, as any change may alter the properties we + # want there, so we have an env var to control that, + # BINARYEN_TRUST_GIVEN_WASM. + if os.environ.get('BINARYEN_TRUST_GIVEN_WASM'): + shutil.copyfile(given_wasm, abspath('a.wasm')) + else: + try: + run([in_bin('wasm-opt'), given_wasm, '-o', abspath('a.wasm')] + GEN_ARGS + FEATURE_OPTS) + except Exception as e: + print("Internal error in fuzzer! Could not run given wasm") + raise e else: # emit the target features section so that reduction can work later, # without needing to specify the features @@ -2351,7 +2357,9 @@ def get_random_opts(): (If it does not, then one possible issue is that the fuzzer fails to write a valid binary. If so, you can print the output of the fuzzer's first command (using -ttf / --translate-to-fuzz) in text form and run the reduction from that, -passing --text to the reducer.) +passing --text to the reducer. Another possible fix is to avoid re-processing +the wasm for fuzzing in each iteration, by adding +BINARYEN_TRUST_GIVEN_WASM=1 in the env.) You can also read "%(reduce_sh)s" which has been filled out for you and includes docs and suggestions. From 708f4f3ff817fd2833333033b110a1d95e64bce9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 12:46:53 -0700 Subject: [PATCH 121/239] add --- scripts/fuzz_opt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index c9c672e5a1e..5c06ef33995 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1878,9 +1878,9 @@ def test_one(random_input, given_wasm): # usual ways, like running DeNAN on it, which is important in many cases # (imagine the reducer generates a NAN, then we need to restore the # property of not having any). However, in some cases we do need to - # trust the wasm is correct, as any change may alter the properties we - # want there, so we have an env var to control that, - # BINARYEN_TRUST_GIVEN_WASM. + # trust the wasm is correct, or it is simpler to debug things without + # constant changes in each reduction cycle, so we have an env var to + # control that, BINARYEN_TRUST_GIVEN_WASM. if os.environ.get('BINARYEN_TRUST_GIVEN_WASM'): shutil.copyfile(given_wasm, abspath('a.wasm')) else: From 484de961dfa1eaa4ea096e4ca74a9b16d3fd6841 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 12:53:18 -0700 Subject: [PATCH 122/239] hint --- scripts/fuzz_opt.py | 4 ++-- scripts/fuzz_shell.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index cb6f2219e30..739ae6aead5 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1882,7 +1882,7 @@ def handle(self, wasm): for line in out.splitlines(): if line.startswith('log-branch: hint'): # Parse the ID, the hint, and whether we actually branched. - _, _, id_, _, hint, _, _, actual = line.split(' ') + _, _, id_, _, hint, _, _, actual, _ = line.split(' ') all_ids.add(id_) if hint != actual: # This hint was misleading. @@ -1934,7 +1934,7 @@ def handle(self, wasm): # See if any branch hint was wrong. for line in out.splitlines(): if line.startswith('log-branch: hint'): - _, _, id_, _, hint, _, _, actual = line.split(' ') + _, _, id_, _, hint, _, _, actual, _ = line.split(' ') assert hint == actual, 'Branch hint misled us' def can_run_on_wasm(self, wasm): diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index 8bc7078faf5..f33b453a98f 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -364,7 +364,7 @@ var imports = { // see fuzz_opt.py. For testing that expectations actually match reality // (i.e. that branch hints are correct), you can adjust the logic here to // throw on expected != actual . - console.log(`log-branch: hint ${id} of ${expected} and actual ${actual}`); + console.log(`log-branch: hint ${id} of ${expected} and actual ${actual} (${expected === actual ? 'right' : 'WRONG'})`); }, }, // Emscripten support. From b15f27a9084ea5601334c525f94709db9be655a6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 13:30:54 -0700 Subject: [PATCH 123/239] work --- scripts/fuzz_opt.py | 18 +++++++++++------- src/tools/execution-results.h | 10 +++++++++- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 739ae6aead5..bcb57f3a850 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1863,11 +1863,13 @@ def handle(self, wasm): ] + FEATURE_OPTS) # Log out the branch hints at runtime. - out = run_d8_wasm(instrumented) + out = run_bynterp(instrumented, ['--fuzz-exec-before', '-all']) # Process the output. We look at the lines like this: # - # log-branch: hint 123 of 1 and actual 0 + # [LoggingExternalInterface log-branch 1 0 0] + # + # where the three integers are: ID, predicted, actual. # # Any ID (a particular branch) that we predict wrong is a problem, and # we will remove that branch hint from the binary. After doing so, we @@ -1879,10 +1881,12 @@ def handle(self, wasm): # optimizations for now.) all_ids = set() bad_ids = set() + LEI_LOG_BRANCH = '[LoggingExternalInterface log-branch' for line in out.splitlines(): - if line.startswith('log-branch: hint'): + if line.startswith(LEI_LOG_BRANCH): # Parse the ID, the hint, and whether we actually branched. - _, _, id_, _, hint, _, _, actual, _ = line.split(' ') + # (1:-1 strips away the [ ] at the edges) + _, _, id_, hint, actual = line[1:-1].split(' ') all_ids.add(id_) if hint != actual: # This hint was misleading. @@ -1929,12 +1933,12 @@ def handle(self, wasm): run(args) # Log out the branch hints at runtime. - out = run_d8_wasm(final) + out = run_bynterp(final, ['--fuzz-exec-before', '-all']) # See if any branch hint was wrong. for line in out.splitlines(): - if line.startswith('log-branch: hint'): - _, _, id_, _, hint, _, _, actual, _ = line.split(' ') + if line.startswith(LEI_LOG_BRANCH): + _, _, id_, hint, actual = line[1:-1].split(' ') assert hint == actual, 'Branch hint misled us' def can_run_on_wasm(self, wasm): diff --git a/src/tools/execution-results.h b/src/tools/execution-results.h index 0dea839c839..03e9ccb47aa 100644 --- a/src/tools/execution-results.h +++ b/src/tools/execution-results.h @@ -79,7 +79,15 @@ struct LoggingExternalInterface : public ShellExternalInterface { if (import->module == "fuzzing-support") { if (import->base.startsWith("log")) { // This is a logging function like log-i32 or log-f64 - std::cout << "[LoggingExternalInterface logging"; + std::cout << "[LoggingExternalInterface "; + if (import->base == "log-branch") { + // Report this as a special logging, so we can differentiate it from + // the others in the fuzzer. + std::cout << "log-branch"; + } else { + // All others are just reported as loggings. + std::cout << "logging"; + } loggings.push_back(Literal()); // buffer with a None between calls for (auto argument : arguments) { if (argument.type == Type::i64) { From d1ec1a145765ae38e54b83835d95badbbbb97e5b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 13:32:06 -0700 Subject: [PATCH 124/239] BUG --- src/passes/RemoveUnusedBrs.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index cb6eddf5931..2f381f2bbcc 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -176,6 +176,7 @@ static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { } static void flipBranchHint(Expression* expr, Function* func) { + return; // CAUSE BUG if (auto likely = getBranchHint(expr, func)) { setBranchHint(expr, !*likely, func); } From c3ef41f3139c55bca754d08d53f0a906c6460754 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 13:44:29 -0700 Subject: [PATCH 125/239] docs --- scripts/fuzz_opt.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index bcb57f3a850..48b0ed69d55 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1844,7 +1844,14 @@ def get_relevant_lines(wat): compare(get_relevant_lines(original), get_relevant_lines(processed), 'Preserve') -# Test that we preserve branch hints properly. +# Test that we preserve branch hints properly. The invariant that we test here +# is that, given correct branch hints (that is, the input wasm's branch hints +# are always correct, a branch is taken iff the hint is that it is taken), then +# the optimizer does not end up with incorrect branch hints. It is fine if the +# optimizer removes some hints (it may remove entire chunks of code in DCE, for +# example, and it may find ways to simplify code so fewer things execute), but +# it should not emit a branch hint that is wrong - if it is not certain, it +# should remove the branch hint. class BranchHintPreservation(TestCaseHandler): frequency = 1 # XXX @@ -1857,12 +1864,12 @@ def handle(self, wasm): '-o', instrumented, # Add random branch hints (so we have something to work with). '--randomize-branch-hints', - # Instrument them for our fuzzing, then optimize. + # Instrument them with logging. '--instrument-branch-hints', '-g', ] + FEATURE_OPTS) - # Log out the branch hints at runtime. + # Collect the logging. out = run_bynterp(instrumented, ['--fuzz-exec-before', '-all']) # Process the output. We look at the lines like this: @@ -1870,29 +1877,20 @@ def handle(self, wasm): # [LoggingExternalInterface log-branch 1 0 0] # # where the three integers are: ID, predicted, actual. - # - # Any ID (a particular branch) that we predict wrong is a problem, and - # we will remove that branch hint from the binary. After doing so, we - # will end up with a binary where all branch hints are correct, and we - # then verify that that property is preserved after optimizations. - # - # (In theory, optimizations could make branch hints wrong in return for - # some benefit that makes things overall faster, but we don't have such - # optimizations for now.) all_ids = set() bad_ids = set() LEI_LOG_BRANCH = '[LoggingExternalInterface log-branch' for line in out.splitlines(): if line.startswith(LEI_LOG_BRANCH): - # Parse the ID, the hint, and whether we actually branched. - # (1:-1 strips away the [ ] at the edges) + # (1:-1 strips away the '[', ']' at the edges) _, _, id_, hint, actual = line[1:-1].split(' ') all_ids.add(id_) if hint != actual: # This hint was misleading. bad_ids.add(id_) - # If no good ids remain, there is nothing to test. + # If no good ids remain, there is nothing to test (no hints will remain + # later down, after we remove bad ones). if bad_ids == all_ids: note_ignored_vm_run('no good ids') return From 2aacfa3d8bcef5226edf5b8752897e416a06d8b8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 13:53:04 -0700 Subject: [PATCH 126/239] more --- scripts/fuzz_opt.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 48b0ed69d55..fd7a21cf2a2 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1918,6 +1918,13 @@ def handle(self, wasm): ] + FEATURE_OPTS run(args) + # After that filtering, no invalid branch hint should remain. + out = run_bynterp(de_instrumented, ['--fuzz-exec-before', '-all']) + for line in out.splitlines(): + if line.startswith(LEI_LOG_BRANCH): + _, _, id_, hint, actual = line[1:-1].split(' ') + assert hint == actual, 'Branch hint misled us' + # Add optimizations to see if things break. We must do this in a # separate invocation from deinstrumentation etc., due to flags like # --converge (which would deinstrument multiple times, and after opts). @@ -1930,14 +1937,10 @@ def handle(self, wasm): ] + get_random_opts() + FEATURE_OPTS run(args) - # Log out the branch hints at runtime. - out = run_bynterp(final, ['--fuzz-exec-before', '-all']) - - # See if any branch hint was wrong. - for line in out.splitlines(): - if line.startswith(LEI_LOG_BRANCH): - _, _, id_, hint, actual = line[1:-1].split(' ') - assert hint == actual, 'Branch hint misled us' + # The output should be identical to before, including the fact that all + # branch hints are valid. + out2 = run_bynterp(final, ['--fuzz-exec-before', '-all']) + compare(out, out2, 'BranchHintPreservation') def can_run_on_wasm(self, wasm): # Avoid things d8 cannot fully run. From 189216a2d4f2e451684b0e4a8e014736cc759aa7 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 13:53:44 -0700 Subject: [PATCH 127/239] more --- scripts/fuzz_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index fd7a21cf2a2..bfcda0d00cc 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1940,7 +1940,7 @@ def handle(self, wasm): # The output should be identical to before, including the fact that all # branch hints are valid. out2 = run_bynterp(final, ['--fuzz-exec-before', '-all']) - compare(out, out2, 'BranchHintPreservation') + compare_between_vms(out, out2, 'BranchHintPreservation') def can_run_on_wasm(self, wasm): # Avoid things d8 cannot fully run. From b15c678d61d53e613d63bb525b072066b9beeb91 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 13:57:31 -0700 Subject: [PATCH 128/239] more --- scripts/fuzz_opt.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index bfcda0d00cc..7b74cce96d3 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1940,7 +1940,10 @@ def handle(self, wasm): # The output should be identical to before, including the fact that all # branch hints are valid. out2 = run_bynterp(final, ['--fuzz-exec-before', '-all']) - compare_between_vms(out, out2, 'BranchHintPreservation') + # Filter outputs to relevant lines. + def filter(text): + return '\n'.join([line for line in text.splitlines() if line.startswith(LEI_LOG_BRANCH)]) + compare(filter(out), filter(out2), 'BranchHintPreservation') def can_run_on_wasm(self, wasm): # Avoid things d8 cannot fully run. From dd6e04c4eb5089aad64cd25029663984da41facd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Jul 2025 14:39:11 -0700 Subject: [PATCH 129/239] try --- scripts/fuzz_opt.py | 51 ++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 7b74cce96d3..28f19520498 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1909,41 +1909,50 @@ def handle(self, wasm): '--delete-branch-hints=' + ','.join(bad_ids), ] args += [ - # Remove all prior instrumentation (so it does not confuse us), and - # add new instrumentation of hints we left around, which were all - # valid. + # Remove all prior instrumentation, so it does not confuse us later + # when we log our final hints, and also so it does not inhibit + # optimizations. '--deinstrument-branch-hints', - '--instrument-branch-hints', '-g', ] + FEATURE_OPTS run(args) # After that filtering, no invalid branch hint should remain. - out = run_bynterp(de_instrumented, ['--fuzz-exec-before', '-all']) - for line in out.splitlines(): - if line.startswith(LEI_LOG_BRANCH): - _, _, id_, hint, actual = line[1:-1].split(' ') - assert hint == actual, 'Branch hint misled us' + def check_bad_hints(wasm, stage): + out = run_bynterp(wasm, ['--fuzz-exec-before', '-all']) + for line in out.splitlines(): + if line.startswith(LEI_LOG_BRANCH): + _, _, id_, hint, actual = line[1:-1].split(' ') + assert hint == actual, stage - # Add optimizations to see if things break. We must do this in a - # separate invocation from deinstrumentation etc., due to flags like - # --converge (which would deinstrument multiple times, and after opts). - final = wasm + '.final.wasm' + check_bad_hints(de_instrumented, 'Bad hint after deletions') + + # Add optimizations to see if things break. + opted = wasm + '.opted.wasm' args = [ in_bin('wasm-opt'), de_instrumented, - '-o', final, + '-o', opted, '-g', ] + get_random_opts() + FEATURE_OPTS run(args) - # The output should be identical to before, including the fact that all - # branch hints are valid. - out2 = run_bynterp(final, ['--fuzz-exec-before', '-all']) - # Filter outputs to relevant lines. - def filter(text): - return '\n'.join([line for line in text.splitlines() if line.startswith(LEI_LOG_BRANCH)]) - compare(filter(out), filter(out2), 'BranchHintPreservation') + # Add instrumentation, to see if any branch hints are wrong after + # optimizations. We must do this in a separate invocation from the + # optimizations due to flags like --converge (which would instrument + # multiple times). + final = wasm + '.final.wasm' + args = [ + in_bin('wasm-opt'), + opted, + '-o', final, + '--instrument-branch-hints', + '-g', + ] + FEATURE_OPTS + run(args) + + # No bad hints should pop up after optimizations. + check_bad_hints(final, 'Bad hint after optimizations') def can_run_on_wasm(self, wasm): # Avoid things d8 cannot fully run. From 4e99909c234015ec4d483d9c4e390dfece185a96 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 08:46:26 -0700 Subject: [PATCH 130/239] work --- scripts/fuzz_opt.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 153139cb8ec..7749ce5c301 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1846,7 +1846,7 @@ def get_relevant_lines(wat): # Test that we preserve branch hints properly. The invariant that we test here # is that, given correct branch hints (that is, the input wasm's branch hints -# are always correct, a branch is taken iff the hint is that it is taken), then +# are always correct: a branch is taken iff the hint is that it is taken), then # the optimizer does not end up with incorrect branch hints. It is fine if the # optimizer removes some hints (it may remove entire chunks of code in DCE, for # example, and it may find ways to simplify code so fewer things execute), but @@ -1917,16 +1917,6 @@ def handle(self, wasm): ] + FEATURE_OPTS run(args) - # After that filtering, no invalid branch hint should remain. - def check_bad_hints(wasm, stage): - out = run_bynterp(wasm, ['--fuzz-exec-before', '-all']) - for line in out.splitlines(): - if line.startswith(LEI_LOG_BRANCH): - _, _, id_, hint, actual = line[1:-1].split(' ') - assert hint == actual, stage - - check_bad_hints(de_instrumented, 'Bad hint after deletions') - # Add optimizations to see if things break. opted = wasm + '.opted.wasm' args = [ @@ -1952,7 +1942,12 @@ def check_bad_hints(wasm, stage): run(args) # No bad hints should pop up after optimizations. - check_bad_hints(final, 'Bad hint after optimizations') + # After that filtering, no invalid branch hint should remain. + out = run_bynterp(final, ['--fuzz-exec-before', '-all']) + for line in out.splitlines(): + if line.startswith(LEI_LOG_BRANCH): + _, _, id_, hint, actual = line[1:-1].split(' ') + assert hint == actual, 'Bad hint after optimizations' def can_run_on_wasm(self, wasm): # Avoid things d8 cannot fully run. From 32845b8f20218ed8be7840f92985aabb68887b7d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 08:50:14 -0700 Subject: [PATCH 131/239] start --- scripts/fuzz_opt.py | 115 +++++++- src/passes/InstrumentBranchHints.cpp | 255 +++++++++++++++++- src/passes/RandomizeBranchHints.cpp | 2 + src/passes/pass.cpp | 6 + src/passes/passes.h | 2 + src/tools/execution-results.h | 10 +- .../lit/passes/deinstrument-branch-hints.wast | 123 +++++++++ test/lit/passes/delete-branch-hints.wast | 163 +++++++++++ 8 files changed, 662 insertions(+), 14 deletions(-) create mode 100644 test/lit/passes/deinstrument-branch-hints.wast create mode 100644 test/lit/passes/delete-branch-hints.wast diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 5c06ef33995..7749ce5c301 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1844,8 +1844,118 @@ def get_relevant_lines(wat): compare(get_relevant_lines(original), get_relevant_lines(processed), 'Preserve') +# Test that we preserve branch hints properly. The invariant that we test here +# is that, given correct branch hints (that is, the input wasm's branch hints +# are always correct: a branch is taken iff the hint is that it is taken), then +# the optimizer does not end up with incorrect branch hints. It is fine if the +# optimizer removes some hints (it may remove entire chunks of code in DCE, for +# example, and it may find ways to simplify code so fewer things execute), but +# it should not emit a branch hint that is wrong - if it is not certain, it +# should remove the branch hint. +class BranchHintPreservation(TestCaseHandler): + frequency = 1 # XXX + + def handle(self, wasm): + # Generate an instrumented wasm. + instrumented = wasm + '.inst.wasm' + run([ + in_bin('wasm-opt'), + wasm, + '-o', instrumented, + # Add random branch hints (so we have something to work with). + '--randomize-branch-hints', + # Instrument them with logging. + '--instrument-branch-hints', + '-g', + ] + FEATURE_OPTS) + + # Collect the logging. + out = run_bynterp(instrumented, ['--fuzz-exec-before', '-all']) + + # Process the output. We look at the lines like this: + # + # [LoggingExternalInterface log-branch 1 0 0] + # + # where the three integers are: ID, predicted, actual. + all_ids = set() + bad_ids = set() + LEI_LOG_BRANCH = '[LoggingExternalInterface log-branch' + for line in out.splitlines(): + if line.startswith(LEI_LOG_BRANCH): + # (1:-1 strips away the '[', ']' at the edges) + _, _, id_, hint, actual = line[1:-1].split(' ') + all_ids.add(id_) + if hint != actual: + # This hint was misleading. + bad_ids.add(id_) + + # If no good ids remain, there is nothing to test (no hints will remain + # later down, after we remove bad ones). + if bad_ids == all_ids: + note_ignored_vm_run('no good ids') + return + + # Generate proper hints for testing: A wasm file with 100% valid branch + # hints, and instrumentation to verify that. + de_instrumented = wasm + '.de_inst.wasm' + args = [ + in_bin('wasm-opt'), + instrumented, + '-o', de_instrumented, + ] + # Remove the bad ids (using the instrumentation to identify them by ID). + if bad_ids: + args += [ + '--delete-branch-hints=' + ','.join(bad_ids), + ] + args += [ + # Remove all prior instrumentation, so it does not confuse us later + # when we log our final hints, and also so it does not inhibit + # optimizations. + '--deinstrument-branch-hints', + '-g', + ] + FEATURE_OPTS + run(args) + + # Add optimizations to see if things break. + opted = wasm + '.opted.wasm' + args = [ + in_bin('wasm-opt'), + de_instrumented, + '-o', opted, + '-g', + ] + get_random_opts() + FEATURE_OPTS + run(args) + + # Add instrumentation, to see if any branch hints are wrong after + # optimizations. We must do this in a separate invocation from the + # optimizations due to flags like --converge (which would instrument + # multiple times). + final = wasm + '.final.wasm' + args = [ + in_bin('wasm-opt'), + opted, + '-o', final, + '--instrument-branch-hints', + '-g', + ] + FEATURE_OPTS + run(args) + + # No bad hints should pop up after optimizations. + # After that filtering, no invalid branch hint should remain. + out = run_bynterp(final, ['--fuzz-exec-before', '-all']) + for line in out.splitlines(): + if line.startswith(LEI_LOG_BRANCH): + _, _, id_, hint, actual = line[1:-1].split(' ') + assert hint == actual, 'Bad hint after optimizations' + + def can_run_on_wasm(self, wasm): + # Avoid things d8 cannot fully run. + return all_disallowed(['shared-everything', 'strings', 'custom-descriptors']) + + # The global list of all test case handlers -testcase_handlers = [ +''' FuzzExec(), CompareVMs(), CheckDeterminism(), @@ -1859,6 +1969,9 @@ def get_relevant_lines(wat): ClusterFuzz(), Two(), PreserveImportsExports(), +''' +testcase_handlers = [ + BranchHintPreservation(), ] diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 35d2dbabb15..a4d85ef9183 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -54,11 +54,56 @@ // if (expected != actual) throw `Bad branch hint! (${id})`; // }; // +// A pass to delete branch hints is also provided, which finds instrumentations +// and the IDs in those calls, and deletes branch hints that were provded. For +// example, +// +// --delete-branch-hints=10,20 +// +// would do this transformation: +// +// @metadata.branch.hint A +// if (temp = condition; log(10, A, temp); temp) { // 10 matches one of 10,20 +// X +// } +// @metadata.branch.hint B +// if (temp = condition; log(99, B, temp); temp) { // 99 does not match +// Y +// } +// +// => +// +// // Used to be a branch hint here, but it was deleted. +// if (temp = condition; log(10, A, temp); temp) { +// X +// } +// @metadata.branch.hint B // this one is unmodified. +// if (temp = condition; log(99, B, temp); temp) { +// Y +// } +// +// A pass to undo the instrumentation is also provided, which does +// +// if (temp = condition; log(123, A, temp); temp) { +// X +// } +// +// => +// +// if (condition) { +// X +// } +// #include "ir/eh-utils.h" +#include "ir/find_all.h" +#include "ir/local-graph.h" +#include "ir/manipulation.h" #include "ir/names.h" +#include "ir/parents.h" #include "ir/properties.h" #include "pass.h" +#include "support/string.h" #include "wasm-builder.h" #include "wasm.h" @@ -66,6 +111,21 @@ namespace wasm { namespace { +// The module and base names of our import. +const Name MODULE = "fuzzing-support"; +const Name BASE = "log-branch"; + +// Finds our import, if it exists. +Name getLogBranchImport(Module* module) { + // Find our import, if we were already run on this module. + for (auto& func : module->functions) { + if (func->module == MODULE && func->base == BASE) { + return func->name; + } + } + return Name(); +} + // The branch id, which increments as we go. int branchId = 1; @@ -74,10 +134,6 @@ struct InstrumentBranchHints using Super = WalkerPass>; - // The module and base names of our import. - const Name MODULE = "fuzzing-support"; - const Name BASE = "log-branch"; - // The internal name of our import. Name logBranch; @@ -132,14 +188,8 @@ struct InstrumentBranchHints } void doWalkModule(Module* module) { - // Find our import, if we were already run on this module. - for (auto& func : module->functions) { - if (func->module == MODULE && func->base == BASE) { - logBranch = func->name; - break; - } - } - // Otherwise, add it. + logBranch = getLogBranchImport(module); + // If it doesn't exist, add it. if (!logBranch) { auto* func = module->addFunction(Builder::makeFunction( Names::getValidFunctionName(*module, BASE), @@ -155,8 +205,189 @@ struct InstrumentBranchHints } }; +// Helper class that provides basic utilities for identifying and processing +// instrumentation from InstrumentBranchHints. +template +struct InstrumentationProcessor : public WalkerPass> { + + using Super = WalkerPass>; + + // The internal name of our import. + Name logBranch; + + // A LocalGraph, so we can identify the pattern. + std::unique_ptr localGraph; + + // A map of expressions to their parents, so we can identify the pattern. + std::unique_ptr parents; + + Sub* getSub() { return (Sub*)this; } + + void visitIf(If* curr) { getSub()->processCondition(curr); } + + void visitBreak(Break* curr) { + if (curr->condition) { + getSub()->processCondition(curr); + } + } + + // TODO: BrOn, but the condition there is not an i32 + + void doWalkFunction(Function* func) { + localGraph = std::make_unique(func, this->getModule()); + localGraph->computeSetInfluences(); + + parents = std::make_unique(func->body); + + Super::doWalkFunction(func); + } + + void doWalkModule(Module* module) { + logBranch = getLogBranchImport(module); + if (!logBranch) { + Fatal() + << "No branch hint logging import found. Was this code instrumented?"; + } + + Super::doWalkModule(module); + } + + // Helpers + + // Instrumentation info for a chunk of code that is the result of the + // instrumentation pass. + struct Instrumentation { + // The condition before the instrumentation (a pointer to it, so we can + // replace it). + Expression** originalCondition; + // The call to the logging that the instrumentation added. + Call* call; + }; + + // Check if an expression's condition is an instrumentation, and return the + // info if so. We are provided the internal name of the logging function, and + // a LocalGraph so we can follow gets to their sets. + std::optional getInstrumentation(Expression* condition) { + // We must identify this pattern: + // + // (br_if + // (block + // (local.set $temp (condition)) + // (call $log (id, prediction, (local.get $temp))) + // (local.get $temp) + // ) + // + // The block may vanish during roundtrip though, so we just follow back from + // the last local.get, which appears in the condition: + // + // (local.set $temp (condition)) + // (call $log (id, prediction, (local.get $temp))) + // (br_if + // (local.get $temp) + // + auto* fallthrough = Properties::getFallthrough( + condition, this->getPassOptions(), *this->getModule()); + auto* get = fallthrough->template dynCast(); + if (!get) { + return {}; + } + auto& sets = getSub()->localGraph->getSets(get); + if (sets.size() != 1) { + return {}; + } + auto* set = *sets.begin(); + auto& gets = getSub()->localGraph->getSetInfluences(set); + if (gets.size() != 2) { + return {}; + } + // The set has two gets: the get in the condition we began at, and + // another. + LocalGet* otherGet = nullptr; + for (auto* get2 : gets) { + if (get2 != get) { + otherGet = get2; + } + } + assert(otherGet); + // See if that other get is used in a logging. The parent should be a + // logging call. + auto* call = + getSub()->parents->getParent(otherGet)->template dynCast(); + if (!call || call->target != logBranch) { + return {}; + } + // Great, this is indeed a prior instrumentation. + return Instrumentation{&set->value, call}; + } +}; + +struct DeleteBranchHints : public InstrumentationProcessor { + using Super = InstrumentationProcessor; + + // The set of IDs to delete. + std::unordered_set idsToDelete; + + template void processCondition(T* curr) { + if (auto info = getInstrumentation(curr->condition)) { + auto id = info->call->operands[0]->template cast()->value.geti32(); + if (idsToDelete.count(id)) { + // Remove the branch hint. + getFunction()->codeAnnotations[curr].branchLikely = {}; + } + } + } + + void doWalkModule(Module* module) { + auto arg = getArgument( + "delete-branch-hints", + "DeleteBranchHints usage: wasm-opt --delete-branch-hints=10,20,30"); + for (auto& str : String::Split(arg, String::Split::NewLineOr(","))) { + idsToDelete.insert(std::stoi(str)); + } + + Super::doWalkModule(module); + } +}; + +struct DeInstrumentBranchHints + : public InstrumentationProcessor { + + using Super = InstrumentationProcessor; + + template void processCondition(T* curr) { + if (auto info = getInstrumentation(curr->condition)) { + // Replace the instrumentated condition with the original one (swap so + // that the IR remains valid; the other use of the local will not matter, + // as we remove the logging calls). + std::swap(curr->condition, *info->originalCondition); + } + } + + void visitFunction(Function* func) { + if (func->imported()) { + return; + } + // At the very end, remove all logging calls (we use them during the main + // walk to identify instrumentation). + for (auto* call : FindAll(func->body).list) { + if (call->target == logBranch) { + if (call->type == Type::none) { + ExpressionManipulator::nop(call); + } else { + assert(call->type == Type::unreachable); + ExpressionManipulator::unreachable(call); + } + } + } + } +}; + } // anonymous namespace Pass* createInstrumentBranchHintsPass() { return new InstrumentBranchHints(); } +Pass* createDeleteBranchHintsPass() { return new DeleteBranchHints(); } +Pass* createDeInstrumentBranchHintsPass() { + return new DeInstrumentBranchHints(); +} } // namespace wasm diff --git a/src/passes/RandomizeBranchHints.cpp b/src/passes/RandomizeBranchHints.cpp index 7b013fc7ebf..b74d2e8a093 100644 --- a/src/passes/RandomizeBranchHints.cpp +++ b/src/passes/RandomizeBranchHints.cpp @@ -50,6 +50,8 @@ struct RandomizeBranchHints } } + // TODO: BrOn + template void processCondition(T* curr) { auto& likely = getFunction()->codeAnnotations[curr].branchLikely; switch (hash % 3) { diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 5e98ef5d086..b99c83d3f11 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -137,6 +137,12 @@ void PassRegistry::registerPasses() { "propagate-debug-locs", "propagate debug location from parents or previous siblings to child nodes", createDebugLocationPropagationPass); + registerPass("deinstrument-branch-hints", + "de-instrument branch hint instrumentation", + createDeInstrumentBranchHintsPass); + registerPass("delete-branch-hints", + "delete branch hints using a list of instrumented IDs", + createDeleteBranchHintsPass); registerPass("denan", "instrument the wasm to convert NaNs into 0 at runtime", createDeNaNPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index e0c03bad8d7..92dcd3e4eb4 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -37,6 +37,8 @@ Pass* createDAEPass(); Pass* createDAEOptimizingPass(); Pass* createDataFlowOptsPass(); Pass* createDeadCodeEliminationPass(); +Pass* createDeInstrumentBranchHintsPass(); +Pass* createDeleteBranchHintsPass(); Pass* createDeNaNPass(); Pass* createDeAlignPass(); Pass* createDebugLocationPropagationPass(); diff --git a/src/tools/execution-results.h b/src/tools/execution-results.h index 0dea839c839..03e9ccb47aa 100644 --- a/src/tools/execution-results.h +++ b/src/tools/execution-results.h @@ -79,7 +79,15 @@ struct LoggingExternalInterface : public ShellExternalInterface { if (import->module == "fuzzing-support") { if (import->base.startsWith("log")) { // This is a logging function like log-i32 or log-f64 - std::cout << "[LoggingExternalInterface logging"; + std::cout << "[LoggingExternalInterface "; + if (import->base == "log-branch") { + // Report this as a special logging, so we can differentiate it from + // the others in the fuzzer. + std::cout << "log-branch"; + } else { + // All others are just reported as loggings. + std::cout << "logging"; + } loggings.push_back(Literal()); // buffer with a None between calls for (auto argument : arguments) { if (argument.type == Type::i64) { diff --git a/test/lit/passes/deinstrument-branch-hints.wast b/test/lit/passes/deinstrument-branch-hints.wast new file mode 100644 index 00000000000..45ead99fb77 --- /dev/null +++ b/test/lit/passes/deinstrument-branch-hints.wast @@ -0,0 +1,123 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt -all --deinstrument-branch-hints -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (param i32 i32 i32))) + + ;; CHECK: (import "fuzzing-support" "log-branch" (func $log (type $1) (param i32 i32 i32))) + (import "fuzzing-support" "log-branch" (func $log (param i32 i32 i32))) + + ;; CHECK: (func $if (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if + (local $temp i32) + ;; The instrumentation should be removed, and if the if's condition should + ;; be 42. + (@metadata.code.branch_hint "\00") + (if + (block (result i32) + (local.set $temp + (i32.const 42) + ) + (call $log + (i32.const 1) + (i32.const 0) + (local.get $temp) + ) + (local.get $temp) + ) + (then + (drop + (i32.const 1337) + ) + ) + (else + (drop + (i32.const 99) + ) + ) + ) + ) + + ;; CHECK: (func $br (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br + ;; The same, with a br. + (local $temp i32) + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (block (result i32) + (local.set $temp + (i32.const 42) + ) + (call $log + (i32.const 4) + (i32.const 0) + (local.get $temp) + ) + (local.get $temp) + ) + ) + ) + ) + + ;; CHECK: (func $br-before (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br-before + ;; As above, but the instrumentation is before us, leaving only a local.get + ;; in the br's condition. We should still identify the pattern and remove + ;; the logging (but we leave the local.set for other things to clean up). + (local $temp i32) + (block $out + (local.set $temp + (i32.const 42) + ) + (call $log + (i32.const 4) + (i32.const 0) + (local.get $temp) + ) + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $temp) + ) + ) + ) +) diff --git a/test/lit/passes/delete-branch-hints.wast b/test/lit/passes/delete-branch-hints.wast new file mode 100644 index 00000000000..375b10d16c2 --- /dev/null +++ b/test/lit/passes/delete-branch-hints.wast @@ -0,0 +1,163 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt -all --delete-branch-hints=10,30 -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (param i32 i32 i32))) + + ;; CHECK: (import "fuzzing-support" "log-branch" (func $log (type $1) (param i32 i32 i32))) + (import "fuzzing-support" "log-branch" (func $log (param i32 i32 i32))) + + ;; CHECK: (func $if-10 (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-10 + (local $temp i32) + ;; The branch hint should be removed, since the ID "10" is in the list of + ;; 10, 30. + (@metadata.code.branch_hint "\00") + (if + (block (result i32) + (local.set $temp + (i32.const 42) + ) + (call $log + (i32.const 10) + (i32.const 0) + (local.get $temp) + ) + (local.get $temp) + ) + (then + (drop + (i32.const 1337) + ) + ) + (else + (drop + (i32.const 99) + ) + ) + ) + ) + + ;; CHECK: (func $if-20 (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-20 + (local $temp i32) + ;; The branch hint should *not* be removed: 20 is not in the list. + (@metadata.code.branch_hint "\00") + (if + (block (result i32) + (local.set $temp + (i32.const 42) + ) + (call $log + (i32.const 20) + (i32.const 0) + (local.get $temp) + ) + (local.get $temp) + ) + (then + (drop + (i32.const 1337) + ) + ) + (else + (drop + (i32.const 99) + ) + ) + ) + ) + + ;; CHECK: (func $br-30 (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $log + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br-30 + ;; The hint should be removed. + (local $temp i32) + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (block (result i32) + (local.set $temp + (i32.const 42) + ) + (call $log + (i32.const 30) + (i32.const 0) + (local.get $temp) + ) + (local.get $temp) + ) + ) + ) + ) +) From 4277a0514c3d9a7f77d4c8b49ac1520e244f94d3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 08:50:48 -0700 Subject: [PATCH 132/239] finish --- scripts/fuzz_opt.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 7749ce5c301..74ab3aeb2c2 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1853,7 +1853,7 @@ def get_relevant_lines(wat): # it should not emit a branch hint that is wrong - if it is not certain, it # should remove the branch hint. class BranchHintPreservation(TestCaseHandler): - frequency = 1 # XXX + frequency = 0.1 def handle(self, wasm): # Generate an instrumented wasm. @@ -1955,7 +1955,7 @@ def can_run_on_wasm(self, wasm): # The global list of all test case handlers -''' +testcase_handlers = [ FuzzExec(), CompareVMs(), CheckDeterminism(), @@ -1969,8 +1969,6 @@ def can_run_on_wasm(self, wasm): ClusterFuzz(), Two(), PreserveImportsExports(), -''' -testcase_handlers = [ BranchHintPreservation(), ] From 05764b8339bdd2d3ae392b2d2787b4b1fb46ea2d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 08:52:30 -0700 Subject: [PATCH 133/239] fix --- scripts/fuzz_opt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 74ab3aeb2c2..6569640a0b8 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1942,7 +1942,6 @@ def handle(self, wasm): run(args) # No bad hints should pop up after optimizations. - # After that filtering, no invalid branch hint should remain. out = run_bynterp(final, ['--fuzz-exec-before', '-all']) for line in out.splitlines(): if line.startswith(LEI_LOG_BRANCH): From 620daeb3cd81165dde8b30b7c6e98401af6ca037 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 08:53:02 -0700 Subject: [PATCH 134/239] fix --- scripts/fuzz_opt.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 6569640a0b8..3a6c66c4577 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1948,10 +1948,6 @@ def handle(self, wasm): _, _, id_, hint, actual = line[1:-1].split(' ') assert hint == actual, 'Bad hint after optimizations' - def can_run_on_wasm(self, wasm): - # Avoid things d8 cannot fully run. - return all_disallowed(['shared-everything', 'strings', 'custom-descriptors']) - # The global list of all test case handlers testcase_handlers = [ From aa6ad0e45420d8e4f78698c174cc24bf78a5fec0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 08:57:16 -0700 Subject: [PATCH 135/239] clean --- src/passes/InstrumentBranchHints.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index a4d85ef9183..6fc46946355 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -117,7 +117,6 @@ const Name BASE = "log-branch"; // Finds our import, if it exists. Name getLogBranchImport(Module* module) { - // Find our import, if we were already run on this module. for (auto& func : module->functions) { if (func->module == MODULE && func->base == BASE) { return func->name; @@ -265,8 +264,7 @@ struct InstrumentationProcessor : public WalkerPass> { }; // Check if an expression's condition is an instrumentation, and return the - // info if so. We are provided the internal name of the logging function, and - // a LocalGraph so we can follow gets to their sets. + // info if so. std::optional getInstrumentation(Expression* condition) { // We must identify this pattern: // @@ -356,9 +354,9 @@ struct DeInstrumentBranchHints template void processCondition(T* curr) { if (auto info = getInstrumentation(curr->condition)) { - // Replace the instrumentated condition with the original one (swap so - // that the IR remains valid; the other use of the local will not matter, - // as we remove the logging calls). + // Replace the instrumented condition with the original one (swap so that + // the IR remains valid; the other use of the local will not matter, as we + // remove the logging calls). std::swap(curr->condition, *info->originalCondition); } } From c001d31e0f656b78a39ef87af918564b09976e02 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 09:00:11 -0700 Subject: [PATCH 136/239] clean --- test/lit/passes/deinstrument-branch-hints.wast | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lit/passes/deinstrument-branch-hints.wast b/test/lit/passes/deinstrument-branch-hints.wast index 45ead99fb77..c01c4479dbf 100644 --- a/test/lit/passes/deinstrument-branch-hints.wast +++ b/test/lit/passes/deinstrument-branch-hints.wast @@ -29,7 +29,7 @@ ;; CHECK-NEXT: ) (func $if (local $temp i32) - ;; The instrumentation should be removed, and if the if's condition should + ;; The instrumentation should be removed, and the if's condition should ;; be 42. (@metadata.code.branch_hint "\00") (if From 1db1fa5c15b020c9c4a4bd5cbc7bfdd429b5ccfa Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 09:08:02 -0700 Subject: [PATCH 137/239] mark them as test passes --- src/passes/pass.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index b99c83d3f11..6d2592e91da 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -137,12 +137,6 @@ void PassRegistry::registerPasses() { "propagate-debug-locs", "propagate debug location from parents or previous siblings to child nodes", createDebugLocationPropagationPass); - registerPass("deinstrument-branch-hints", - "de-instrument branch hint instrumentation", - createDeInstrumentBranchHintsPass); - registerPass("delete-branch-hints", - "delete branch hints using a list of instrumented IDs", - createDeleteBranchHintsPass); registerPass("denan", "instrument the wasm to convert NaNs into 0 at runtime", createDeNaNPass); @@ -418,9 +412,6 @@ void PassRegistry::registerPasses() { registerPass("propagate-globals-globally", "propagate global values to other globals (useful for tests)", createPropagateGlobalsGloballyPass); - registerTestPass("randomize-branch-hints", - "randomize branch hints (for fuzzing)", - createRandomizeBranchHintsPass); registerPass("remove-non-js-ops", "removes operations incompatible with js", createRemoveNonJSOpsPass); @@ -457,9 +448,6 @@ void PassRegistry::registerPasses() { registerPass("reorder-globals", "sorts globals by access frequency", createReorderGlobalsPass); - registerTestPass("reorder-globals-always", - "sorts globals by access frequency (even if there are few)", - createReorderGlobalsAlwaysPass); registerPass("reorder-locals", "sorts locals by access frequency", createReorderLocalsPass); @@ -605,9 +593,21 @@ void PassRegistry::registerPasses() { registerTestPass("catch-pop-fixup", "fixup nested pops within catches", createCatchPopFixupPass); + registerTestPass("deinstrument-branch-hints", + "de-instrument branch hint instrumentation", + createDeInstrumentBranchHintsPass); + registerTestPass("delete-branch-hints", + "delete branch hints using a list of instrumented IDs", + createDeleteBranchHintsPass); registerTestPass("experimental-type-generalizing", "generalize types (not yet sound)", createTypeGeneralizingPass); + registerTestPass("randomize-branch-hints", + "randomize branch hints (for fuzzing)", + createRandomizeBranchHintsPass); + registerTestPass("reorder-globals-always", + "sorts globals by access frequency (even if there are few)", + createReorderGlobalsAlwaysPass); } void PassRunner::addIfNoDWARFIssues(std::string passName) { From 877d9e6adcedfd7c28b273361c4526a190e01fdb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 09:08:13 -0700 Subject: [PATCH 138/239] format --- src/passes/pass.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 6d2592e91da..30e7416c2cf 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -594,11 +594,11 @@ void PassRegistry::registerPasses() { "fixup nested pops within catches", createCatchPopFixupPass); registerTestPass("deinstrument-branch-hints", - "de-instrument branch hint instrumentation", - createDeInstrumentBranchHintsPass); + "de-instrument branch hint instrumentation", + createDeInstrumentBranchHintsPass); registerTestPass("delete-branch-hints", - "delete branch hints using a list of instrumented IDs", - createDeleteBranchHintsPass); + "delete branch hints using a list of instrumented IDs", + createDeleteBranchHintsPass); registerTestPass("experimental-type-generalizing", "generalize types (not yet sound)", createTypeGeneralizingPass); From 9d4fa56ad94c9ced688f839a36033b927510c9c5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 09:14:48 -0700 Subject: [PATCH 139/239] new --- src/ir/branch-hints.h | 60 ++++++++++++++++++++++++++++++++++ src/passes/RemoveUnusedBrs.cpp | 45 ++++--------------------- 2 files changed, 67 insertions(+), 38 deletions(-) create mode 100644 src/ir/branch-hints.h diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h new file mode 100644 index 00000000000..50e14abd6a0 --- /dev/null +++ b/src/ir/branch-hints.h @@ -0,0 +1,60 @@ +/* + * Copyright 2025 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ir_branch_hint_h +#define wasm_ir_branch_hint_h + +#include "wasm.h" + +// +// Branch hint utilities to get them, set, flip, etc. +// + +namespace wasm::BranchHints { + +inline std::optional get(Expression* expr, Function* func) { + auto iter = func->codeAnnotations.find(expr); + if (iter == func->codeAnnotations.end()) { + // No annotations at all. + return {}; + } + return iter->second.branchLikely; +} + +inline void set(Expression* expr, bool likely, Function* func) { + func->codeAnnotations[expr].branchLikely = likely; +} + +inline void clear(Expression* expr, Function* func) { + func->codeAnnotations[expr].branchLikely = {}; +} + +inline void copyTo(Expression* from, Expression* to, Function* func) { + auto fromLikely = get(from, func); + if (fromLikely) { + set(to, *fromLikely, func); + } +} + +inline void flip(Expression* expr, Function* func) { + if (auto likely = get(expr, func)) { + set(expr, !*likely, func); + } +} + +} // namespace wasm::BranchHints + +#endif // wasm_ir_branch_hint_h diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 2f381f2bbcc..5ad554280fd 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -18,6 +18,7 @@ // Removes branches for which we go to where they go anyhow // +#include "ir/branch-hints.h" #include "ir/branch-utils.h" #include "ir/cost.h" #include "ir/drop.h" @@ -150,38 +151,6 @@ static bool tooCostlyToRunUnconditionally(const PassOptions& passOptions, return tooCostlyToRunUnconditionally(passOptions, max); } -// Branch hint utilities. -static std::optional getBranchHint(Expression* expr, Function* func) { - auto iter = func->codeAnnotations.find(expr); - if (iter == func->codeAnnotations.end()) { - // No annotations at all. - return {}; - } - return iter->second.branchLikely; -} - -static void setBranchHint(Expression* expr, bool likely, Function* func) { - func->codeAnnotations[expr].branchLikely = likely; -} - -static void clearBranchHint(Expression* expr, Function* func) { - func->codeAnnotations[expr].branchLikely = {}; -} - -static void copyBranchHintTo(Expression* from, Expression* to, Function* func) { - auto fromLikely = getBranchHint(from, func); - if (fromLikely) { - setBranchHint(to, *fromLikely, func); - } -} - -static void flipBranchHint(Expression* expr, Function* func) { - return; // CAUSE BUG - if (auto likely = getBranchHint(expr, func)) { - setBranchHint(expr, !*likely, func); - } -} - struct RemoveUnusedBrs : public WalkerPass> { bool isFunctionParallel() override { return true; } @@ -461,7 +430,7 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeSelect(br->condition, curr->condition, zero); } br->finalize(); - copyBranchHintTo(curr, br, getFunction()); + BranchHints::copyTo(curr, br, getFunction()); replaceCurrent(Builder(*getModule()).dropIfConcretelyTyped(br)); anotherCycle = true; } @@ -497,10 +466,10 @@ struct RemoveUnusedBrs : public WalkerPass> { // close to certainty). If one of them lacks a hint, we know nothing. If // both are unlikely, we can say that A && B is also unlikely (in fact // it is less likely). - auto currHint = getBranchHint(curr, getFunction()); - auto childHint = getBranchHint(child, getFunction()); + auto currHint = BranchHints::get(curr, getFunction()); + auto childHint = BranchHints::get(child, getFunction()); if (!currHint || currHint != childHint) { - clearBranchHint(curr, getFunction()); + BranchHints::clear(curr, getFunction()); } curr->ifTrue = child->ifTrue; } @@ -732,7 +701,7 @@ struct RemoveUnusedBrs : public WalkerPass> { brIf->condition = builder.makeUnary(EqZInt32, brIf->condition); last->name = brIf->name; brIf->name = loop->name; - flipBranchHint(brIf, getFunction()); + BranchHints::flip(brIf, getFunction()); return true; } else { // there are elements in the middle, @@ -753,7 +722,7 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeIf(brIf->condition, builder.makeBreak(brIf->name), stealSlice(builder, block, i + 1, list.size())); - copyBranchHintTo(brIf, list[i], getFunction()); + BranchHints::copyTo(brIf, list[i], getFunction()); // later: fuzz this: instrument "i am guessing at loc X" and "it // was true/it was false", then fuzzz that we don't decreaes times // we are right. From 50049e04115784fe5254828eae1de56d726ca520 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 09:22:39 -0700 Subject: [PATCH 140/239] bettr --- src/ir/branch-hints.h | 19 +++++++++++++++---- src/passes/RemoveUnusedBrs.cpp | 8 -------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index 50e14abd6a0..d23115ef8a6 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -25,6 +25,7 @@ namespace wasm::BranchHints { +// Get the branch hint for an expression. inline std::optional get(Expression* expr, Function* func) { auto iter = func->codeAnnotations.find(expr); if (iter == func->codeAnnotations.end()) { @@ -34,21 +35,31 @@ inline std::optional get(Expression* expr, Function* func) { return iter->second.branchLikely; } -inline void set(Expression* expr, bool likely, Function* func) { +// Set the branch hint for an expression, trampling anything existing before. +inline void set(Expression* expr, std::optional likely, Function* func) { + if (!likely) { + // We are writing an empty hint. Do not create an empty annotation if one + // did not exist. + if (!func->codeAnnotations.count(expr)) { + return; + } + } func->codeAnnotations[expr].branchLikely = likely; } +// Clear the branch hint for an expression. inline void clear(Expression* expr, Function* func) { func->codeAnnotations[expr].branchLikely = {}; } +// Copy the branch hint for an expression to another, trampling anything +// existing before. inline void copyTo(Expression* from, Expression* to, Function* func) { auto fromLikely = get(from, func); - if (fromLikely) { - set(to, *fromLikely, func); - } + set(to, *fromLikely, func); } +// Flip the branch hint for an expression (if it exists). inline void flip(Expression* expr, Function* func) { if (auto likely = get(expr, func)) { set(expr, !*likely, func); diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 5ad554280fd..10c843c0c7a 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -723,14 +723,6 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeBreak(brIf->name), stealSlice(builder, block, i + 1, list.size())); BranchHints::copyTo(brIf, list[i], getFunction()); - // later: fuzz this: instrument "i am guessing at loc X" and "it - // was true/it was false", then fuzzz that we don't decreaes times - // we are right. - // "guess #17: true", then on one arm "guess #7 was true" and - // "false" on the other; match them upp (ok if guess vanishes - - // hints can be eliminated) - // maybe: turn hints into asserts? not for fuzzing, but testing. - // if hint is 1, assert the value is not 0, etc. block->finalize(); return true; } From 7565b4f196d8bdc6d8128e4f6c7bb78dbcce5dfc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 09:31:52 -0700 Subject: [PATCH 141/239] work --- src/ir/branch-hints.h | 2 +- src/passes/RemoveUnusedBrs.cpp | 2 + .../remove-unused-brs_branch-hints.wast | 80 ++++++++++++++++++- 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index d23115ef8a6..7c09d0a3ed4 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -56,7 +56,7 @@ inline void clear(Expression* expr, Function* func) { // existing before. inline void copyTo(Expression* from, Expression* to, Function* func) { auto fromLikely = get(from, func); - set(to, *fromLikely, func); + set(to, fromLikely, func); } // Flip the branch hint for an expression (if it exists). diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 10c843c0c7a..1c77f27c5fd 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1224,6 +1224,8 @@ struct RemoveUnusedBrs : public WalkerPass> { // we are an if-else where the ifTrue is a break without a // condition, so we can do this ifTrueBreak->condition = iff->condition; + BranchHints::copyTo(iff, ifTrueBreak, getFunction()); + abort(); ifTrueBreak->finalize(); list[i] = Builder(*getModule()).dropIfConcretelyTyped(ifTrueBreak); ExpressionManipulator::spliceIntoBlock(curr, i + 1, iff->ifFalse); diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 2b2a5d10d1b..90e1ef43204 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -3,11 +3,14 @@ ;; RUN: | filecheck %s (module - ;; CHECK: (import "a" "b" (func $i32 (type $2) (result i32))) + ;; CHECK: (import "a" "b" (func $i32 (type $3) (result i32))) (import "a" "b" (func $i32 (result i32))) - ;; CHECK: (import "a" "b" (func $none (type $3))) + ;; CHECK: (import "a" "b" (func $none (type $2))) (import "a" "b" (func $none)) + ;; CHECK: (tag $e (type $2)) + (tag $e) + ;; CHECK: (func $if-br (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (nop) @@ -404,4 +407,77 @@ ) ) ) + + ;; CHECK: (func $throw-if-br_if-0 (type $1) (param $x i32) + ;; CHECK-NEXT: (block $catch + ;; CHECK-NEXT: (try_table (catch_all $catch) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $catch + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $throw-if-br_if-0 (param $x i32) + (block $catch + (try_table (catch_all $catch) + ;; This if can turn into a br_if. The branch hint should be copied. + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (throw $e) + ) + ) + ) + ) + ) + + ;; CHECK: (func $throw-if-br_if-1 (type $1) (param $x i32) + ;; CHECK-NEXT: (block $catch + ;; CHECK-NEXT: (try_table (catch_all $catch) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $catch + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $throw-if-br_if-1 (param $x i32) + ;; As above, but the hint is 1. + (block $catch + (try_table (catch_all $catch) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (throw $e) + ) + ) + ) + ) + ) + + ;; CHECK: (func $throw-if-br_if-no (type $1) (param $x i32) + ;; CHECK-NEXT: (block $catch + ;; CHECK-NEXT: (try_table (catch_all $catch) + ;; CHECK-NEXT: (br_if $catch + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $throw-if-br_if-no (param $x i32) + ;; As above, but there is no branch hint, so we should emit none. + (block $catch + (try_table (catch_all $catch) + (if + (local.get $x) + (then + (throw $e) + ) + ) + ) + ) + ) ) From 5a42c80bc8fd7853b8d07b1d1073a0fb8322eead Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 09:40:05 -0700 Subject: [PATCH 142/239] work --- src/passes/RemoveUnusedBrs.cpp | 1 - .../remove-unused-brs_branch-hints.wast | 53 ++++++++++++++----- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 1c77f27c5fd..d2d20d26d1c 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1225,7 +1225,6 @@ struct RemoveUnusedBrs : public WalkerPass> { // condition, so we can do this ifTrueBreak->condition = iff->condition; BranchHints::copyTo(iff, ifTrueBreak, getFunction()); - abort(); ifTrueBreak->finalize(); list[i] = Builder(*getModule()).dropIfConcretelyTyped(ifTrueBreak); ExpressionManipulator::spliceIntoBlock(curr, i + 1, iff->ifFalse); diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 90e1ef43204..3421862adca 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -11,7 +11,7 @@ ;; CHECK: (tag $e (type $2)) (tag $e) - ;; CHECK: (func $if-br (type $0) (param $x i32) (param $y i32) + ;; CHECK: (func $if-br (type $1) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") @@ -36,7 +36,7 @@ ) ) - ;; CHECK: (func $if-br_0 (type $0) (param $x i32) (param $y i32) + ;; CHECK: (func $if-br_0 (type $1) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") @@ -59,7 +59,7 @@ ) ) - ;; CHECK: (func $if-br_if (type $0) (param $x i32) (param $y i32) + ;; CHECK: (func $if-br_if (type $1) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") @@ -89,7 +89,7 @@ ) ) - ;; CHECK: (func $if-if-1* (type $0) (param $x i32) (param $y i32) + ;; CHECK: (func $if-if-1* (type $1) (param $x i32) (param $y i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (select @@ -168,7 +168,7 @@ ) ) - ;; CHECK: (func $if-if-0* (type $0) (param $x i32) (param $y i32) + ;; CHECK: (func $if-if-0* (type $1) (param $x i32) (param $y i32) ;; CHECK-NEXT: (if ;; CHECK-NEXT: (select ;; CHECK-NEXT: (local.get $x) @@ -247,7 +247,7 @@ ) ) - ;; CHECK: (func $if-if-?* (type $0) (param $x i32) (param $y i32) + ;; CHECK: (func $if-if-?* (type $1) (param $x i32) (param $y i32) ;; CHECK-NEXT: (if ;; CHECK-NEXT: (select ;; CHECK-NEXT: (local.get $x) @@ -319,7 +319,7 @@ ) ) - ;; CHECK: (func $loop-br_if-flip (type $1) (param $x i32) + ;; CHECK: (func $loop-br_if-flip (type $0) (param $x i32) ;; CHECK-NEXT: (loop $loop ;; CHECK-NEXT: (block $block ;; CHECK-NEXT: (block @@ -347,7 +347,7 @@ ) ) - ;; CHECK: (func $loop-br_if-flip-reverse (type $1) (param $x i32) + ;; CHECK: (func $loop-br_if-flip-reverse (type $0) (param $x i32) ;; CHECK-NEXT: (loop $loop ;; CHECK-NEXT: (block $block ;; CHECK-NEXT: (block @@ -374,7 +374,7 @@ ) ) - ;; CHECK: (func $loop-br_if-if (type $1) (param $x i32) + ;; CHECK: (func $loop-br_if-if (type $0) (param $x i32) ;; CHECK-NEXT: (loop $loop ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if @@ -408,7 +408,7 @@ ) ) - ;; CHECK: (func $throw-if-br_if-0 (type $1) (param $x i32) + ;; CHECK: (func $throw-if-br_if-0 (type $0) (param $x i32) ;; CHECK-NEXT: (block $catch ;; CHECK-NEXT: (try_table (catch_all $catch) ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") @@ -433,7 +433,7 @@ ) ) - ;; CHECK: (func $throw-if-br_if-1 (type $1) (param $x i32) + ;; CHECK: (func $throw-if-br_if-1 (type $0) (param $x i32) ;; CHECK-NEXT: (block $catch ;; CHECK-NEXT: (try_table (catch_all $catch) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") @@ -458,7 +458,7 @@ ) ) - ;; CHECK: (func $throw-if-br_if-no (type $1) (param $x i32) + ;; CHECK: (func $throw-if-br_if-no (type $0) (param $x i32) ;; CHECK-NEXT: (block $catch ;; CHECK-NEXT: (try_table (catch_all $catch) ;; CHECK-NEXT: (br_if $catch @@ -480,4 +480,33 @@ ) ) ) + + ;; CHECK: (func $unexitable-loops-result (type $0) (param $x i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $loop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $unexitable-loops-result (param $x i32) + ;; This if with a br arm can turn into a br_if. The hint should be copied. + (loop $loop + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (br $loop) + ) + (else + ;; This call, and the one below, are needed for the pattern that is + ;; recognized here. + (call $none) + ) + ) + (call $none) + ) + ) ) From c5796cf00ef4a0c5d27d2c72b335c845dcb186bd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 09:46:11 -0700 Subject: [PATCH 143/239] work --- src/ir/branch-hints.h | 9 ++++- src/passes/RemoveUnusedBrs.cpp | 1 + .../remove-unused-brs_branch-hints.wast | 33 +++++++++++++++++-- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index 7c09d0a3ed4..3e851b9393f 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -53,7 +53,7 @@ inline void clear(Expression* expr, Function* func) { } // Copy the branch hint for an expression to another, trampling anything -// existing before. +// existing before for the latter. inline void copyTo(Expression* from, Expression* to, Function* func) { auto fromLikely = get(from, func); set(to, fromLikely, func); @@ -66,6 +66,13 @@ inline void flip(Expression* expr, Function* func) { } } +// Copy the branch hint for an expression to another, flipping it while we do +// so. +inline void copyFlippedTo(Expression* from, Expression* to, Function* func) { + copyTo(from, to, func); + flip(to, func); +} + } // namespace wasm::BranchHints #endif // wasm_ir_branch_hint_h diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index d2d20d26d1c..efebdfa81eb 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1239,6 +1239,7 @@ struct RemoveUnusedBrs : public WalkerPass> { *getModule())) { ifFalseBreak->condition = Builder(*getModule()).makeUnary(EqZInt32, iff->condition); + BranchHints::copyFlippedTo(iff, ifFalseBreak, getFunction()); ifFalseBreak->finalize(); list[i] = Builder(*getModule()).dropIfConcretelyTyped(ifFalseBreak); ExpressionManipulator::spliceIntoBlock(curr, i + 1, iff->ifTrue); diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 3421862adca..0382d9d3298 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -481,7 +481,7 @@ ) ) - ;; CHECK: (func $unexitable-loops-result (type $0) (param $x i32) + ;; CHECK: (func $loop-if-br (type $0) (param $x i32) ;; CHECK-NEXT: (loop $loop ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (br_if $loop @@ -491,7 +491,7 @@ ;; CHECK-NEXT: (call $none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $unexitable-loops-result (param $x i32) + (func $loop-if-br (param $x i32) ;; This if with a br arm can turn into a br_if. The hint should be copied. (loop $loop (@metadata.code.branch_hint "\01") @@ -509,4 +509,33 @@ (call $none) ) ) + + ;; CHECK: (func $loop-if-br-reverse (type $0) (param $x i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $loop + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $loop-if-br-reverse (param $x i32) + ;; As above, with arms flipped. Now the condition flips. + (loop $loop + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (call $none) + ) + (else + (br $loop) + ) + ) + (call $none) + ) + ) ) From 89cbb2626036d6df697b5fdb071fabf2f0584d9e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 10:37:03 -0700 Subject: [PATCH 144/239] work --- src/ir/branch-hints.h | 25 ++++++++++++++++++++++++- src/passes/RemoveUnusedBrs.cpp | 12 ++---------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index 3e851b9393f..b6d40e4f337 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -49,7 +49,12 @@ inline void set(Expression* expr, std::optional likely, Function* func) { // Clear the branch hint for an expression. inline void clear(Expression* expr, Function* func) { - func->codeAnnotations[expr].branchLikely = {}; + // Do not create an empty annotation if one did not exist. + auto iter = func->codeAnnotations.find(expr); + if (iter == func->codeAnnotations.end()) { + return; + } + iter->second.branchLikely = {}; } // Copy the branch hint for an expression to another, trampling anything @@ -73,6 +78,24 @@ inline void copyFlippedTo(Expression* from, Expression* to, Function* func) { flip(to, func); } +// Given two expressions to read from, apply the AND hint to a target. That is, +// the target will be true when both inputs are true. |to| may be equal to +// |from1| or |from2|. The hint of |to| is trampled. +inline void applyAndTo(Expression* from1, Expression* from2, Expression* to, Function* func) { + // If from1 and from2 are both likely, then from1 && from2 is slightly less + // likely, but we assume our hints are nearly certain, so we apply it. And, + // converse, if from1 and from2 and both unlikely, then from1 && from2 is even + // less likely, so we can once more apply a hint. + auto from1Hint = BranchHints::get(from1, func); + auto from2Hint = BranchHints::get(from2, func); + if (from1Hint == from2Hint) { + set(to, from1Hint, func); + } else { + // The hints do not even match. + BranchHints::clear(to, func); + } +} + } // namespace wasm::BranchHints #endif // wasm_ir_branch_hint_h diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index efebdfa81eb..675b8c83c85 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -461,16 +461,7 @@ struct RemoveUnusedBrs : public WalkerPass> { Builder builder(*getModule()); curr->condition = builder.makeSelect( child->condition, curr->condition, builder.makeConst(int32_t(0))); - // If two conditions A and B are likely, then A && B is also likely - // (though, in theory, slightly less likely, but our branch hints are - // close to certainty). If one of them lacks a hint, we know nothing. If - // both are unlikely, we can say that A && B is also unlikely (in fact - // it is less likely). - auto currHint = BranchHints::get(curr, getFunction()); - auto childHint = BranchHints::get(child, getFunction()); - if (!currHint || currHint != childHint) { - BranchHints::clear(curr, getFunction()); - } + BranchHints::applyAndTo(curr, child, curr, getFunction()); curr->ifTrue = child->ifTrue; } } @@ -1272,6 +1263,7 @@ struct RemoveUnusedBrs : public WalkerPass> { Builder builder(*getModule()); br1->condition = builder.makeBinary(OrInt32, br1->condition, br2->condition); + // waka ExpressionManipulator::nop(br2); } } From 2fc5c0c704cf0dafde93e66013d74e4084413210 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 10:58:09 -0700 Subject: [PATCH 145/239] work --- src/ir/branch-hints.h | 11 ++ src/passes/RemoveUnusedBrs.cpp | 3 +- ...remove-unused-brs_branch-hints-shrink.wast | 180 ++++++++++++++++++ 3 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 test/lit/passes/remove-unused-brs_branch-hints-shrink.wast diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index b6d40e4f337..62e0c3fa733 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -96,6 +96,17 @@ inline void applyAndTo(Expression* from1, Expression* from2, Expression* to, Fun } } +// As |applyAndTo|, but now the condition on |to| the OR of |from1| and |from2|. +inline void applyOrTo(Expression* from1, Expression* from2, Expression* to, Function* func) { + // If from1 and from2 are both likely, then from1 || from2 is even more + // likely. If from1 and from2 are both unlikely, then from1 || from2 is + // slightly more likely, but we assume our hints are nearly certain, so we + // apply it. That is, the math works out the same for |applyAndTo|, so we just + // call that, but we leave the methods separate for clarity and future + // refactoring. + applyAndTo(from1, from2, to, func); +} + } // namespace wasm::BranchHints #endif // wasm_ir_branch_hint_h diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 675b8c83c85..0585678c1b2 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1263,8 +1263,9 @@ struct RemoveUnusedBrs : public WalkerPass> { Builder builder(*getModule()); br1->condition = builder.makeBinary(OrInt32, br1->condition, br2->condition); - // waka + BranchHints::applyOrTo(br1, br2, br1, getFunction()); ExpressionManipulator::nop(br2); + BranchHints::clear(br2, getFunction()); } } } else { diff --git a/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast b/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast new file mode 100644 index 00000000000..7f252a19148 --- /dev/null +++ b/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast @@ -0,0 +1,180 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --remove-unused-brs -all --shrink-level=1 -S -o - \ +;; RUN: | filecheck %s + +(module + ;; CHECK: (import "a" "b" (func $none (type $1))) + (import "a" "b" (func $none)) + + ;; CHECK: (func $join-br_ifs (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs (param $x i32) (param $y i32) + ;; The br_ifs will be joined into a single one. The hint should propagate, + ;; as it matches. + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $y) + ) + ;; Extra code so that the entire testcase does not get optimized out as + ;; trivial. + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + + ;; CHECK: (func $join-br_ifs-0 (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-0 (param $x i32) (param $y i32) + ;; The hints once more match, but now are 0. We still propagate. + (block $out + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + + ;; CHECK: (func $join-br_ifs-no (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-no (param $x i32) (param $y i32) + ;; One is missing a hint, so we clear the hint. + (block $out + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $x) + ) + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + + ;; CHECK: (func $join-br_ifs-no-flip (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-no-flip (param $x i32) (param $y i32) + ;; The other one is missing the hint, so we clear the hint. + (block $out + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + + ;; CHECK: (func $join-br_ifs-mismatch (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-mismatch (param $x i32) (param $y i32) + ;; The hints do not match, so we clear the hint. + (block $out + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) +) From 7f1cadda34791c68e61ae17429feae8f5c6a1f1b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 11:03:41 -0700 Subject: [PATCH 146/239] work --- src/passes/RemoveUnusedBrs.cpp | 7 +++-- .../remove-unused-brs_branch-hints.wast | 26 +++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 0585678c1b2..74da0e100c8 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1405,9 +1405,12 @@ struct RemoveUnusedBrs : public WalkerPass> { // no other breaks to that name, so we can do this if (!drop) { assert(!br->value); - replaceCurrent(builder.makeIf( - builder.makeUnary(EqZInt32, br->condition), curr)); + auto* iff = builder.makeIf( + builder.makeUnary(EqZInt32, br->condition), curr); + replaceCurrent(iff); + BranchHints::copyFlippedTo(br, iff, getFunction()); ExpressionManipulator::nop(br); + BranchHints::clear(br, getFunction()); curr->finalize(curr->type); } else { // To use an if, the value must have no side effects, as in the diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 0382d9d3298..ae87cc494a3 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -538,4 +538,30 @@ (call $none) ) ) + + ;; CHECK: (func $restructure-if (type $0) (param $x i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (block $block + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $restructure-if (param $x i32) + (block $block + ;; We will emit an if with flipped condition, which should get a flipped + ;; hint. + (@metadata.code.branch_hint "\01") + (br_if $block + (local.get $x) + ) + (call $none) + ) + ) ) From 1646e8f3631ac06cb9d871012cd21e6aabeff485 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 11:10:01 -0700 Subject: [PATCH 147/239] work --- src/passes/RemoveUnusedBrs.cpp | 5 ++-- .../remove-unused-brs_branch-hints.wast | 30 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 74da0e100c8..a22b34b7591 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1421,8 +1421,9 @@ struct RemoveUnusedBrs : public WalkerPass> { if (EffectAnalyzer::canReorder( passOptions, *getModule(), br->condition, br->value)) { ExpressionManipulator::nop(list[0]); - replaceCurrent( - builder.makeIf(br->condition, br->value, curr)); + auto* iff = builder.makeIf(br->condition, br->value, curr); + BranchHints::copyTo(br, iff, getFunction()); + replaceCurrent(iff); } } else { // The value has side effects, so it must always execute. We diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index ae87cc494a3..747226cb7b5 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -564,4 +564,34 @@ (call $none) ) ) + + ;; CHECK: (func $restructure-if-value (type $4) (param $x i32) (result i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if (result i32) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (block $value (result i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $restructure-if-value (param $x i32) (result i32) + ;; We will emit an if with the same condition, which should get the same + ;; hint. + (block $value (result i32) + (drop + (@metadata.code.branch_hint "\01") + (br_if $value + (i32.const 0) + (local.get $x) + ) + ) + (unreachable) + ) + ) ) From 45fd1301d8c6c4436589f3dc2ce5b561e400596c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 11:12:11 -0700 Subject: [PATCH 148/239] work --- scripts/fuzz_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 2715869fe29..7b3d43f4cbc 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1924,7 +1924,7 @@ def handle(self, wasm): de_instrumented, '-o', opted, '-g', - ] + get_random_opts() + FEATURE_OPTS + ] + ['--remove-unused-brs'] + FEATURE_OPTS run(args) # Add instrumentation, to see if any branch hints are wrong after From 001f7f41a6db8edbedab2fce0aacf541ae218adc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 11:41:57 -0700 Subject: [PATCH 149/239] work --- src/passes/RemoveUnusedBrs.cpp | 3 + .../remove-unused-brs_branch-hints.wast | 69 +++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index a22b34b7591..654fc6d5389 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1584,6 +1584,9 @@ struct RemoveUnusedBrs : public WalkerPass> { Builder builder(*getModule()); if (flipCondition) { builder.flip(iff); + BranchHints::copyFlippedTo(iff, br, getFunction()); + } else { + BranchHints::copyTo(iff, br, getFunction()); } br->condition = iff->condition; br->finalize(); diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 747226cb7b5..6f42e0e8269 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -594,4 +594,73 @@ (unreachable) ) ) + + ;; CHECK: (func $set-if-br-arm (type $0) (param $x i32) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $set-if-br-arm (param $x i32) + (local $temp i32) + ;; The if will turn into a br_if, with the same hint. + (block $out + (local.set $temp + (@metadata.code.branch_hint "\00") + (if (result i32) + (local.get $x) + (then + (br $out) + ) + (else + (i32.const 0) + ) + ) + ) + ) + ) + + ;; CHECK: (func $set-if-br-arm-flip (type $0) (param $x i32) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $set-if-br-arm-flip (param $x i32) + (local $temp i32) + ;; As above, but with arms reversed. + ;; The if will turn into a flipped br_if, with a flipped hint. + (block $out + (local.set $temp + (@metadata.code.branch_hint "\00") + (if (result i32) + (local.get $x) + (then + (i32.const 0) + ) + (else + (br $out) + ) + ) + ) + ) + ) ) From 81220a76c7bd32548d304b8792fd072cb160fd58 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 11:46:01 -0700 Subject: [PATCH 150/239] simpl --- src/passes/RemoveUnusedBrs.cpp | 14 +++++++++----- src/wasm-builder.h | 5 ----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 654fc6d5389..0b3f4c5d4fb 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1542,6 +1542,12 @@ struct RemoveUnusedBrs : public WalkerPass> { optimizeSetIf(getCurrentPointer()); } + // Flip an if's condition with an eqz, and flip its arms. + void flip(If* iff) { + std::swap(iff->ifTrue, iff->ifFalse); + iff->condition = Builder(*getModule()).makeUnary(EqZInt32, iff->condition); + } + void optimizeSetIf(Expression** currp) { if (optimizeSetIfWithBrArm(currp)) { return; @@ -1583,12 +1589,10 @@ struct RemoveUnusedBrs : public WalkerPass> { // Wonderful, do it! Builder builder(*getModule()); if (flipCondition) { - builder.flip(iff); - BranchHints::copyFlippedTo(iff, br, getFunction()); - } else { - BranchHints::copyTo(iff, br, getFunction()); + flip(iff); } br->condition = iff->condition; + BranchHints::copyTo(iff, br, getFunction()); br->finalize(); set->value = two; auto* block = builder.makeSequence(br, set); @@ -1656,7 +1660,7 @@ struct RemoveUnusedBrs : public WalkerPass> { Builder builder(*getModule()); LocalGet* get = iff->ifTrue->dynCast(); if (get && get->index == set->index) { - builder.flip(iff); + flip(iff); } else { get = iff->ifFalse->dynCast(); if (get && get->index != set->index) { diff --git a/src/wasm-builder.h b/src/wasm-builder.h index 00612a4020b..dc877bd0223 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -1481,11 +1481,6 @@ class Builder { return makeDrop(curr); } - void flip(If* iff) { - std::swap(iff->ifTrue, iff->ifFalse); - iff->condition = makeUnary(EqZInt32, iff->condition); - } - // Returns a replacement with the precise same type, and with minimal contents // as best we can. As a replacement, this may reuse the input node. template Expression* replaceWithIdenticalType(T* curr) { From c06f1dacb87dfc0f3c8bc8d82e6134cb39bdb486 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 11:56:08 -0700 Subject: [PATCH 151/239] work --- src/passes/RemoveUnusedBrs.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 0b3f4c5d4fb..a59d053e7ea 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1546,6 +1546,7 @@ struct RemoveUnusedBrs : public WalkerPass> { void flip(If* iff) { std::swap(iff->ifTrue, iff->ifFalse); iff->condition = Builder(*getModule()).makeUnary(EqZInt32, iff->condition); + BranchHints::flip(iff, getFunction()); } void optimizeSetIf(Expression** currp) { @@ -1921,6 +1922,7 @@ struct RemoveUnusedBrs : public WalkerPass> { curr->type = Type::unreachable; block->list.push_back(curr); block->finalize(); + BranchHints::clear(curr, getFunction()); // The type changed, so refinalize. refinalize = true; } else { From 9779b780bff98c28abf17c4b160cfb7caaba39a9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 12:00:07 -0700 Subject: [PATCH 152/239] work --- src/ir/branch-hints.h | 10 ++++++++-- src/passes/RemoveUnusedBrs.cpp | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index 62e0c3fa733..3d93cae9be9 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -81,7 +81,10 @@ inline void copyFlippedTo(Expression* from, Expression* to, Function* func) { // Given two expressions to read from, apply the AND hint to a target. That is, // the target will be true when both inputs are true. |to| may be equal to // |from1| or |from2|. The hint of |to| is trampled. -inline void applyAndTo(Expression* from1, Expression* from2, Expression* to, Function* func) { +inline void applyAndTo(Expression* from1, + Expression* from2, + Expression* to, + Function* func) { // If from1 and from2 are both likely, then from1 && from2 is slightly less // likely, but we assume our hints are nearly certain, so we apply it. And, // converse, if from1 and from2 and both unlikely, then from1 && from2 is even @@ -97,7 +100,10 @@ inline void applyAndTo(Expression* from1, Expression* from2, Expression* to, Fun } // As |applyAndTo|, but now the condition on |to| the OR of |from1| and |from2|. -inline void applyOrTo(Expression* from1, Expression* from2, Expression* to, Function* func) { +inline void applyOrTo(Expression* from1, + Expression* from2, + Expression* to, + Function* func) { // If from1 and from2 are both likely, then from1 || from2 is even more // likely. If from1 and from2 are both unlikely, then from1 || from2 is // slightly more likely, but we assume our hints are nearly certain, so we diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index a59d053e7ea..8812c75a6bc 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1545,7 +1545,8 @@ struct RemoveUnusedBrs : public WalkerPass> { // Flip an if's condition with an eqz, and flip its arms. void flip(If* iff) { std::swap(iff->ifTrue, iff->ifFalse); - iff->condition = Builder(*getModule()).makeUnary(EqZInt32, iff->condition); + iff->condition = + Builder(*getModule()).makeUnary(EqZInt32, iff->condition); BranchHints::flip(iff, getFunction()); } From 1ac54bb7562d63ab1590b2fa8ed632ff425c7c2b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 12:47:25 -0700 Subject: [PATCH 153/239] fix --- src/ir/branch-hints.h | 118 ++++ src/passes/RemoveUnusedBrs.cpp | 35 +- src/wasm-builder.h | 5 - ...remove-unused-brs_branch-hints-shrink.wast | 180 +++++ .../remove-unused-brs_branch-hints.wast | 666 ++++++++++++++++++ 5 files changed, 993 insertions(+), 11 deletions(-) create mode 100644 src/ir/branch-hints.h create mode 100644 test/lit/passes/remove-unused-brs_branch-hints-shrink.wast create mode 100644 test/lit/passes/remove-unused-brs_branch-hints.wast diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h new file mode 100644 index 00000000000..3d93cae9be9 --- /dev/null +++ b/src/ir/branch-hints.h @@ -0,0 +1,118 @@ +/* + * Copyright 2025 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ir_branch_hint_h +#define wasm_ir_branch_hint_h + +#include "wasm.h" + +// +// Branch hint utilities to get them, set, flip, etc. +// + +namespace wasm::BranchHints { + +// Get the branch hint for an expression. +inline std::optional get(Expression* expr, Function* func) { + auto iter = func->codeAnnotations.find(expr); + if (iter == func->codeAnnotations.end()) { + // No annotations at all. + return {}; + } + return iter->second.branchLikely; +} + +// Set the branch hint for an expression, trampling anything existing before. +inline void set(Expression* expr, std::optional likely, Function* func) { + if (!likely) { + // We are writing an empty hint. Do not create an empty annotation if one + // did not exist. + if (!func->codeAnnotations.count(expr)) { + return; + } + } + func->codeAnnotations[expr].branchLikely = likely; +} + +// Clear the branch hint for an expression. +inline void clear(Expression* expr, Function* func) { + // Do not create an empty annotation if one did not exist. + auto iter = func->codeAnnotations.find(expr); + if (iter == func->codeAnnotations.end()) { + return; + } + iter->second.branchLikely = {}; +} + +// Copy the branch hint for an expression to another, trampling anything +// existing before for the latter. +inline void copyTo(Expression* from, Expression* to, Function* func) { + auto fromLikely = get(from, func); + set(to, fromLikely, func); +} + +// Flip the branch hint for an expression (if it exists). +inline void flip(Expression* expr, Function* func) { + if (auto likely = get(expr, func)) { + set(expr, !*likely, func); + } +} + +// Copy the branch hint for an expression to another, flipping it while we do +// so. +inline void copyFlippedTo(Expression* from, Expression* to, Function* func) { + copyTo(from, to, func); + flip(to, func); +} + +// Given two expressions to read from, apply the AND hint to a target. That is, +// the target will be true when both inputs are true. |to| may be equal to +// |from1| or |from2|. The hint of |to| is trampled. +inline void applyAndTo(Expression* from1, + Expression* from2, + Expression* to, + Function* func) { + // If from1 and from2 are both likely, then from1 && from2 is slightly less + // likely, but we assume our hints are nearly certain, so we apply it. And, + // converse, if from1 and from2 and both unlikely, then from1 && from2 is even + // less likely, so we can once more apply a hint. + auto from1Hint = BranchHints::get(from1, func); + auto from2Hint = BranchHints::get(from2, func); + if (from1Hint == from2Hint) { + set(to, from1Hint, func); + } else { + // The hints do not even match. + BranchHints::clear(to, func); + } +} + +// As |applyAndTo|, but now the condition on |to| the OR of |from1| and |from2|. +inline void applyOrTo(Expression* from1, + Expression* from2, + Expression* to, + Function* func) { + // If from1 and from2 are both likely, then from1 || from2 is even more + // likely. If from1 and from2 are both unlikely, then from1 || from2 is + // slightly more likely, but we assume our hints are nearly certain, so we + // apply it. That is, the math works out the same for |applyAndTo|, so we just + // call that, but we leave the methods separate for clarity and future + // refactoring. + applyAndTo(from1, from2, to, func); +} + +} // namespace wasm::BranchHints + +#endif // wasm_ir_branch_hint_h diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 64c68f2354e..8812c75a6bc 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -18,6 +18,7 @@ // Removes branches for which we go to where they go anyhow // +#include "ir/branch-hints.h" #include "ir/branch-utils.h" #include "ir/cost.h" #include "ir/drop.h" @@ -429,6 +430,7 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeSelect(br->condition, curr->condition, zero); } br->finalize(); + BranchHints::copyTo(curr, br, getFunction()); replaceCurrent(Builder(*getModule()).dropIfConcretelyTyped(br)); anotherCycle = true; } @@ -459,6 +461,7 @@ struct RemoveUnusedBrs : public WalkerPass> { Builder builder(*getModule()); curr->condition = builder.makeSelect( child->condition, curr->condition, builder.makeConst(int32_t(0))); + BranchHints::applyAndTo(curr, child, curr, getFunction()); curr->ifTrue = child->ifTrue; } } @@ -689,6 +692,7 @@ struct RemoveUnusedBrs : public WalkerPass> { brIf->condition = builder.makeUnary(EqZInt32, brIf->condition); last->name = brIf->name; brIf->name = loop->name; + BranchHints::flip(brIf, getFunction()); return true; } else { // there are elements in the middle, @@ -709,6 +713,7 @@ struct RemoveUnusedBrs : public WalkerPass> { builder.makeIf(brIf->condition, builder.makeBreak(brIf->name), stealSlice(builder, block, i + 1, list.size())); + BranchHints::copyTo(brIf, list[i], getFunction()); block->finalize(); return true; } @@ -1210,6 +1215,7 @@ struct RemoveUnusedBrs : public WalkerPass> { // we are an if-else where the ifTrue is a break without a // condition, so we can do this ifTrueBreak->condition = iff->condition; + BranchHints::copyTo(iff, ifTrueBreak, getFunction()); ifTrueBreak->finalize(); list[i] = Builder(*getModule()).dropIfConcretelyTyped(ifTrueBreak); ExpressionManipulator::spliceIntoBlock(curr, i + 1, iff->ifFalse); @@ -1224,6 +1230,7 @@ struct RemoveUnusedBrs : public WalkerPass> { *getModule())) { ifFalseBreak->condition = Builder(*getModule()).makeUnary(EqZInt32, iff->condition); + BranchHints::copyFlippedTo(iff, ifFalseBreak, getFunction()); ifFalseBreak->finalize(); list[i] = Builder(*getModule()).dropIfConcretelyTyped(ifFalseBreak); ExpressionManipulator::spliceIntoBlock(curr, i + 1, iff->ifTrue); @@ -1256,7 +1263,9 @@ struct RemoveUnusedBrs : public WalkerPass> { Builder builder(*getModule()); br1->condition = builder.makeBinary(OrInt32, br1->condition, br2->condition); + BranchHints::applyOrTo(br1, br2, br1, getFunction()); ExpressionManipulator::nop(br2); + BranchHints::clear(br2, getFunction()); } } } else { @@ -1396,9 +1405,12 @@ struct RemoveUnusedBrs : public WalkerPass> { // no other breaks to that name, so we can do this if (!drop) { assert(!br->value); - replaceCurrent(builder.makeIf( - builder.makeUnary(EqZInt32, br->condition), curr)); + auto* iff = builder.makeIf( + builder.makeUnary(EqZInt32, br->condition), curr); + replaceCurrent(iff); + BranchHints::copyFlippedTo(br, iff, getFunction()); ExpressionManipulator::nop(br); + BranchHints::clear(br, getFunction()); curr->finalize(curr->type); } else { // To use an if, the value must have no side effects, as in the @@ -1409,8 +1421,9 @@ struct RemoveUnusedBrs : public WalkerPass> { if (EffectAnalyzer::canReorder( passOptions, *getModule(), br->condition, br->value)) { ExpressionManipulator::nop(list[0]); - replaceCurrent( - builder.makeIf(br->condition, br->value, curr)); + auto* iff = builder.makeIf(br->condition, br->value, curr); + BranchHints::copyTo(br, iff, getFunction()); + replaceCurrent(iff); } } else { // The value has side effects, so it must always execute. We @@ -1529,6 +1542,14 @@ struct RemoveUnusedBrs : public WalkerPass> { optimizeSetIf(getCurrentPointer()); } + // Flip an if's condition with an eqz, and flip its arms. + void flip(If* iff) { + std::swap(iff->ifTrue, iff->ifFalse); + iff->condition = + Builder(*getModule()).makeUnary(EqZInt32, iff->condition); + BranchHints::flip(iff, getFunction()); + } + void optimizeSetIf(Expression** currp) { if (optimizeSetIfWithBrArm(currp)) { return; @@ -1570,9 +1591,10 @@ struct RemoveUnusedBrs : public WalkerPass> { // Wonderful, do it! Builder builder(*getModule()); if (flipCondition) { - builder.flip(iff); + flip(iff); } br->condition = iff->condition; + BranchHints::copyTo(iff, br, getFunction()); br->finalize(); set->value = two; auto* block = builder.makeSequence(br, set); @@ -1640,7 +1662,7 @@ struct RemoveUnusedBrs : public WalkerPass> { Builder builder(*getModule()); LocalGet* get = iff->ifTrue->dynCast(); if (get && get->index == set->index) { - builder.flip(iff); + flip(iff); } else { get = iff->ifFalse->dynCast(); if (get && get->index != set->index) { @@ -1901,6 +1923,7 @@ struct RemoveUnusedBrs : public WalkerPass> { curr->type = Type::unreachable; block->list.push_back(curr); block->finalize(); + BranchHints::clear(curr, getFunction()); // The type changed, so refinalize. refinalize = true; } else { diff --git a/src/wasm-builder.h b/src/wasm-builder.h index 00612a4020b..dc877bd0223 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -1481,11 +1481,6 @@ class Builder { return makeDrop(curr); } - void flip(If* iff) { - std::swap(iff->ifTrue, iff->ifFalse); - iff->condition = makeUnary(EqZInt32, iff->condition); - } - // Returns a replacement with the precise same type, and with minimal contents // as best we can. As a replacement, this may reuse the input node. template Expression* replaceWithIdenticalType(T* curr) { diff --git a/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast b/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast new file mode 100644 index 00000000000..7f252a19148 --- /dev/null +++ b/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast @@ -0,0 +1,180 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --remove-unused-brs -all --shrink-level=1 -S -o - \ +;; RUN: | filecheck %s + +(module + ;; CHECK: (import "a" "b" (func $none (type $1))) + (import "a" "b" (func $none)) + + ;; CHECK: (func $join-br_ifs (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs (param $x i32) (param $y i32) + ;; The br_ifs will be joined into a single one. The hint should propagate, + ;; as it matches. + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $y) + ) + ;; Extra code so that the entire testcase does not get optimized out as + ;; trivial. + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + + ;; CHECK: (func $join-br_ifs-0 (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-0 (param $x i32) (param $y i32) + ;; The hints once more match, but now are 0. We still propagate. + (block $out + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + + ;; CHECK: (func $join-br_ifs-no (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-no (param $x i32) (param $y i32) + ;; One is missing a hint, so we clear the hint. + (block $out + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $x) + ) + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + + ;; CHECK: (func $join-br_ifs-no-flip (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-no-flip (param $x i32) (param $y i32) + ;; The other one is missing the hint, so we clear the hint. + (block $out + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + + ;; CHECK: (func $join-br_ifs-mismatch (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-mismatch (param $x i32) (param $y i32) + ;; The hints do not match, so we clear the hint. + (block $out + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) +) diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast new file mode 100644 index 00000000000..6f42e0e8269 --- /dev/null +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -0,0 +1,666 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --remove-unused-brs -all -S -o - \ +;; RUN: | filecheck %s + +(module + ;; CHECK: (import "a" "b" (func $i32 (type $3) (result i32))) + (import "a" "b" (func $i32 (result i32))) + ;; CHECK: (import "a" "b" (func $none (type $2))) + (import "a" "b" (func $none)) + + ;; CHECK: (tag $e (type $2)) + (tag $e) + + ;; CHECK: (func $if-br (type $1) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-br (param $x i32) (param $y i32) + (block $out + ;; This nop prevents the entire testcase from being trivial. + (nop) + ;; The if-br will turn into a br_if. The branch hint should then go on the + ;; br_if, and remain 01. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (br $out) + ) + ) + ) + ) + + ;; CHECK: (func $if-br_0 (type $1) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-br_0 (param $x i32) (param $y i32) + (block $out + (nop) + ;; As above, but a hint of 0. + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (br $out) + ) + ) + ) + ) + + ;; CHECK: (func $if-br_if (type $1) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-br_if (param $x i32) (param $y i32) + (block $out + (nop) + ;; As above, but the br has a condition. We can merge conditions (using a + ;; select), and then move the hint to the br_if. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (br_if $out + (local.get $y) + ) + ) + ) + ) + ) + + ;; CHECK: (func $if-if-1* (type $1) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-if-1* (param $x i32) (param $y i32) + ;; Both ifs have a hint of 1, so after we merge the ifs the combined + ;; condition remains likely. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ;; The outer if still has a hint of 1, but the inner is 0. We emit no hint. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\00") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ;; The outer if still has a hint of 1, but the inner has none. We emit no + ;; hint. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ) + + ;; CHECK: (func $if-if-0* (type $1) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-if-0* (param $x i32) (param $y i32) + ;; As above, but now the outer if has hints of 0. + + ;; The hints do not match, so we emit no hint. + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ;; The hints match, so the combined condition is unlikely. + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\00") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ;; Inner lacks a hint, so we emit nothing. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ) + + ;; CHECK: (func $if-if-?* (type $1) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-if-?* (param $x i32) (param $y i32) + ;; As above, but now the outer if has no hint. We emit no hints here. + + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\00") + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + (if + (local.get $x) + (then + (if + (local.get $y) + (then + (call $none) + ) + ) + ) + ) + ) + + ;; CHECK: (func $loop-br_if-flip (type $0) (param $x i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (block $block + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $loop + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $loop-br_if-flip (param $x i32) + (block $block + (loop $loop + ;; This br_if's condition will flip when it is turned from a break out + ;; of the loop to a continue inside it. The hint should flip too. + (@metadata.code.branch_hint "\00") + (br_if $block + (local.get $x) + ) + (br $loop) + ) + ) + ) + + ;; CHECK: (func $loop-br_if-flip-reverse (type $0) (param $x i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (block $block + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $loop + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $loop-br_if-flip-reverse (param $x i32) + ;; As above, with a hint of 1, that should flip to 0. + (block $block + (loop $loop + (@metadata.code.branch_hint "\01") + (br_if $block + (local.get $x) + ) + (br $loop) + ) + ) + ) + + ;; CHECK: (func $loop-br_if-if (type $0) (param $x i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (block $block + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br $loop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $loop-br_if-if (param $x i32) + (loop $loop + (block $block + ;; This br_if will turn into an if with the same condition. The hint can + ;; be copied over. + (@metadata.code.branch_hint "\00") + (br_if $block + (local.get $x) + ) + ;; Extra code so simpler optimizations do not kick in. + (drop (i32.const 42)) + (br $loop) + ) + ) + ) + + ;; CHECK: (func $throw-if-br_if-0 (type $0) (param $x i32) + ;; CHECK-NEXT: (block $catch + ;; CHECK-NEXT: (try_table (catch_all $catch) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $catch + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $throw-if-br_if-0 (param $x i32) + (block $catch + (try_table (catch_all $catch) + ;; This if can turn into a br_if. The branch hint should be copied. + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (throw $e) + ) + ) + ) + ) + ) + + ;; CHECK: (func $throw-if-br_if-1 (type $0) (param $x i32) + ;; CHECK-NEXT: (block $catch + ;; CHECK-NEXT: (try_table (catch_all $catch) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $catch + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $throw-if-br_if-1 (param $x i32) + ;; As above, but the hint is 1. + (block $catch + (try_table (catch_all $catch) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (throw $e) + ) + ) + ) + ) + ) + + ;; CHECK: (func $throw-if-br_if-no (type $0) (param $x i32) + ;; CHECK-NEXT: (block $catch + ;; CHECK-NEXT: (try_table (catch_all $catch) + ;; CHECK-NEXT: (br_if $catch + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $throw-if-br_if-no (param $x i32) + ;; As above, but there is no branch hint, so we should emit none. + (block $catch + (try_table (catch_all $catch) + (if + (local.get $x) + (then + (throw $e) + ) + ) + ) + ) + ) + + ;; CHECK: (func $loop-if-br (type $0) (param $x i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $loop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $loop-if-br (param $x i32) + ;; This if with a br arm can turn into a br_if. The hint should be copied. + (loop $loop + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (br $loop) + ) + (else + ;; This call, and the one below, are needed for the pattern that is + ;; recognized here. + (call $none) + ) + ) + (call $none) + ) + ) + + ;; CHECK: (func $loop-if-br-reverse (type $0) (param $x i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $loop + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $loop-if-br-reverse (param $x i32) + ;; As above, with arms flipped. Now the condition flips. + (loop $loop + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (call $none) + ) + (else + (br $loop) + ) + ) + (call $none) + ) + ) + + ;; CHECK: (func $restructure-if (type $0) (param $x i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (block $block + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $restructure-if (param $x i32) + (block $block + ;; We will emit an if with flipped condition, which should get a flipped + ;; hint. + (@metadata.code.branch_hint "\01") + (br_if $block + (local.get $x) + ) + (call $none) + ) + ) + + ;; CHECK: (func $restructure-if-value (type $4) (param $x i32) (result i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if (result i32) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (block $value (result i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $restructure-if-value (param $x i32) (result i32) + ;; We will emit an if with the same condition, which should get the same + ;; hint. + (block $value (result i32) + (drop + (@metadata.code.branch_hint "\01") + (br_if $value + (i32.const 0) + (local.get $x) + ) + ) + (unreachable) + ) + ) + + ;; CHECK: (func $set-if-br-arm (type $0) (param $x i32) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $set-if-br-arm (param $x i32) + (local $temp i32) + ;; The if will turn into a br_if, with the same hint. + (block $out + (local.set $temp + (@metadata.code.branch_hint "\00") + (if (result i32) + (local.get $x) + (then + (br $out) + ) + (else + (i32.const 0) + ) + ) + ) + ) + ) + + ;; CHECK: (func $set-if-br-arm-flip (type $0) (param $x i32) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $set-if-br-arm-flip (param $x i32) + (local $temp i32) + ;; As above, but with arms reversed. + ;; The if will turn into a flipped br_if, with a flipped hint. + (block $out + (local.set $temp + (@metadata.code.branch_hint "\00") + (if (result i32) + (local.get $x) + (then + (i32.const 0) + ) + (else + (br $out) + ) + ) + ) + ) + ) +) From 8aada230a99717c475d7febc88d33fcaa62d869c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 12:58:48 -0700 Subject: [PATCH 154/239] moar --- src/passes/RemoveUnusedBrs.cpp | 3 +- .../remove-unused-brs_branch-hints.wast | 32 ++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 8812c75a6bc..5b505cf1d60 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -397,6 +397,7 @@ struct RemoveUnusedBrs : public WalkerPass> { curr->condition, br->value, getPassOptions(), *getModule())) { if (!br->condition) { br->condition = curr->condition; + BranchHints::copyTo(curr, br, getFunction()); } else { // In this case we can replace // if (condition1) br_if (condition2) @@ -428,9 +429,9 @@ struct RemoveUnusedBrs : public WalkerPass> { // That keeps the order of the two conditions as it was originally. br->condition = builder.makeSelect(br->condition, curr->condition, zero); + BranchHints::applyAndTo(curr, br, br, getFunction()); } br->finalize(); - BranchHints::copyTo(curr, br, getFunction()); replaceCurrent(Builder(*getModule()).dropIfConcretelyTyped(br)); anotherCycle = true; } diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 6f42e0e8269..9f6d43606bf 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -76,7 +76,37 @@ (block $out (nop) ;; As above, but the br has a condition. We can merge conditions (using a - ;; select), and then move the hint to the br_if. + ;; select), and then move the hint to the br_if, as the br_if has the + ;; same hint as the if. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $y) + ) + ) + ) + ) + ) + + ;; CHECK: (func $if-br_if-no (type $1) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-br_if-no (param $x i32) (param $y i32) + (block $out + (nop) + ;; As above, but the br lacks a hint, so we emit no hint. (@metadata.code.branch_hint "\01") (if (local.get $x) From 0215cbcfb4e5b2251f6833389e3667d8a0066074 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 12:58:48 -0700 Subject: [PATCH 155/239] moar --- src/passes/RemoveUnusedBrs.cpp | 3 +- .../remove-unused-brs_branch-hints.wast | 32 ++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 8812c75a6bc..5b505cf1d60 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -397,6 +397,7 @@ struct RemoveUnusedBrs : public WalkerPass> { curr->condition, br->value, getPassOptions(), *getModule())) { if (!br->condition) { br->condition = curr->condition; + BranchHints::copyTo(curr, br, getFunction()); } else { // In this case we can replace // if (condition1) br_if (condition2) @@ -428,9 +429,9 @@ struct RemoveUnusedBrs : public WalkerPass> { // That keeps the order of the two conditions as it was originally. br->condition = builder.makeSelect(br->condition, curr->condition, zero); + BranchHints::applyAndTo(curr, br, br, getFunction()); } br->finalize(); - BranchHints::copyTo(curr, br, getFunction()); replaceCurrent(Builder(*getModule()).dropIfConcretelyTyped(br)); anotherCycle = true; } diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 6f42e0e8269..9f6d43606bf 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -76,7 +76,37 @@ (block $out (nop) ;; As above, but the br has a condition. We can merge conditions (using a - ;; select), and then move the hint to the br_if. + ;; select), and then move the hint to the br_if, as the br_if has the + ;; same hint as the if. + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $y) + ) + ) + ) + ) + ) + + ;; CHECK: (func $if-br_if-no (type $1) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-br_if-no (param $x i32) (param $y i32) + (block $out + (nop) + ;; As above, but the br lacks a hint, so we emit no hint. (@metadata.code.branch_hint "\01") (if (local.get $x) From 0cbbf5ad49edc0415bdcfd379deee35fc9bd0425 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 14:52:47 -0700 Subject: [PATCH 156/239] properly apply OR --- src/ir/branch-hints.h | 36 ++++++---- ...remove-unused-brs_branch-hints-shrink.wast | 71 ++++++++++++++++++- .../remove-unused-brs_branch-hints.wast | 28 ++++++++ 3 files changed, 120 insertions(+), 15 deletions(-) diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index 3d93cae9be9..51d885d30b6 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -37,12 +37,10 @@ inline std::optional get(Expression* expr, Function* func) { // Set the branch hint for an expression, trampling anything existing before. inline void set(Expression* expr, std::optional likely, Function* func) { - if (!likely) { - // We are writing an empty hint. Do not create an empty annotation if one - // did not exist. - if (!func->codeAnnotations.count(expr)) { - return; - } + // When we are writing an empty hint, do not create an empty annotation if one + // did not exist. + if (!likely && !func->codeAnnotations.count(expr)) { + return; } func->codeAnnotations[expr].branchLikely = likely; } @@ -88,7 +86,8 @@ inline void applyAndTo(Expression* from1, // If from1 and from2 are both likely, then from1 && from2 is slightly less // likely, but we assume our hints are nearly certain, so we apply it. And, // converse, if from1 and from2 and both unlikely, then from1 && from2 is even - // less likely, so we can once more apply a hint. + // less likely, so we can once more apply a hint. If the hints differ, than + // one is unlikely or unknown, and we can't say anything about from1 && from2. auto from1Hint = BranchHints::get(from1, func); auto from2Hint = BranchHints::get(from2, func); if (from1Hint == from2Hint) { @@ -104,13 +103,22 @@ inline void applyOrTo(Expression* from1, Expression* from2, Expression* to, Function* func) { - // If from1 and from2 are both likely, then from1 || from2 is even more - // likely. If from1 and from2 are both unlikely, then from1 || from2 is - // slightly more likely, but we assume our hints are nearly certain, so we - // apply it. That is, the math works out the same for |applyAndTo|, so we just - // call that, but we leave the methods separate for clarity and future - // refactoring. - applyAndTo(from1, from2, to, func); + // If one is likely then so is the from1 || from2. If both are unlikely then + // from1 || from2 is slightly more likely, but we assume our hints are nearly + // certain, so we apply it. + auto from1Hint = BranchHints::get(from1, func); + auto from2Hint = BranchHints::get(from2, func); + if ((from1Hint && *from1Hint) || (from2Hint && *from2Hint)) { + set(to, true, func); + } else if (from1Hint && from2Hint) { + // We ruled out that either one is present and true, so if both are present, + // both must be false. + assert(!*from1Hint && !*from2Hint); + set(to, false, func); + } else { + // We don't know. + BranchHints::clear(to, func); + } } } // namespace wasm::BranchHints diff --git a/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast b/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast index 7f252a19148..21c08e8cd4c 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast @@ -144,8 +144,77 @@ ) ) + ;; CHECK: (func $join-br_ifs-yes-one (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-yes-one (param $x i32) (param $y i32) + ;; One has a 1 hint, so we can use that. + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $x) + ) + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + + ;; CHECK: (func $join-br_ifs-yes-other (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-yes-other (param $x i32) (param $y i32) + ;; As above, but the other has the 1, which we can still use. + (block $out + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + ;; CHECK: (func $join-br_ifs-mismatch (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (br_if $out ;; CHECK-NEXT: (i32.or ;; CHECK-NEXT: (local.get $x) @@ -160,7 +229,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $join-br_ifs-mismatch (param $x i32) (param $y i32) - ;; The hints do not match, so we clear the hint. + ;; The hints do not match, but we can still use the 1 hint. (block $out (@metadata.code.branch_hint "\00") (br_if $out diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 9f6d43606bf..321fbab8146 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -119,6 +119,34 @@ ) ) + ;; CHECK: (func $if-br_if-no-2 (type $1) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-br_if-no-2 (param $x i32) (param $y i32) + (block $out + (nop) + ;; As above, but now the if lacks a hint, so we emit no hint. + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $y) + ) + ) + ) + ) + ) + ;; CHECK: (func $if-if-1* (type $1) (param $x i32) (param $y i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (if From 76632f11048669d788566dfaf27dad0234e42900 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 14:52:47 -0700 Subject: [PATCH 157/239] properly apply OR --- src/ir/branch-hints.h | 36 ++++++---- ...remove-unused-brs_branch-hints-shrink.wast | 71 ++++++++++++++++++- .../remove-unused-brs_branch-hints.wast | 28 ++++++++ 3 files changed, 120 insertions(+), 15 deletions(-) diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index 3d93cae9be9..51d885d30b6 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -37,12 +37,10 @@ inline std::optional get(Expression* expr, Function* func) { // Set the branch hint for an expression, trampling anything existing before. inline void set(Expression* expr, std::optional likely, Function* func) { - if (!likely) { - // We are writing an empty hint. Do not create an empty annotation if one - // did not exist. - if (!func->codeAnnotations.count(expr)) { - return; - } + // When we are writing an empty hint, do not create an empty annotation if one + // did not exist. + if (!likely && !func->codeAnnotations.count(expr)) { + return; } func->codeAnnotations[expr].branchLikely = likely; } @@ -88,7 +86,8 @@ inline void applyAndTo(Expression* from1, // If from1 and from2 are both likely, then from1 && from2 is slightly less // likely, but we assume our hints are nearly certain, so we apply it. And, // converse, if from1 and from2 and both unlikely, then from1 && from2 is even - // less likely, so we can once more apply a hint. + // less likely, so we can once more apply a hint. If the hints differ, than + // one is unlikely or unknown, and we can't say anything about from1 && from2. auto from1Hint = BranchHints::get(from1, func); auto from2Hint = BranchHints::get(from2, func); if (from1Hint == from2Hint) { @@ -104,13 +103,22 @@ inline void applyOrTo(Expression* from1, Expression* from2, Expression* to, Function* func) { - // If from1 and from2 are both likely, then from1 || from2 is even more - // likely. If from1 and from2 are both unlikely, then from1 || from2 is - // slightly more likely, but we assume our hints are nearly certain, so we - // apply it. That is, the math works out the same for |applyAndTo|, so we just - // call that, but we leave the methods separate for clarity and future - // refactoring. - applyAndTo(from1, from2, to, func); + // If one is likely then so is the from1 || from2. If both are unlikely then + // from1 || from2 is slightly more likely, but we assume our hints are nearly + // certain, so we apply it. + auto from1Hint = BranchHints::get(from1, func); + auto from2Hint = BranchHints::get(from2, func); + if ((from1Hint && *from1Hint) || (from2Hint && *from2Hint)) { + set(to, true, func); + } else if (from1Hint && from2Hint) { + // We ruled out that either one is present and true, so if both are present, + // both must be false. + assert(!*from1Hint && !*from2Hint); + set(to, false, func); + } else { + // We don't know. + BranchHints::clear(to, func); + } } } // namespace wasm::BranchHints diff --git a/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast b/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast index 7f252a19148..21c08e8cd4c 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints-shrink.wast @@ -144,8 +144,77 @@ ) ) + ;; CHECK: (func $join-br_ifs-yes-one (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-yes-one (param $x i32) (param $y i32) + ;; One has a 1 hint, so we can use that. + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $x) + ) + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + + ;; CHECK: (func $join-br_ifs-yes-other (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (call $none) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $join-br_ifs-yes-other (param $x i32) (param $y i32) + ;; As above, but the other has the 1, which we can still use. + (block $out + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $y) + ) + + (call $none) + (br_if $out + (local.get $y) + ) + ) + ) + ;; CHECK: (func $join-br_ifs-mismatch (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (br_if $out ;; CHECK-NEXT: (i32.or ;; CHECK-NEXT: (local.get $x) @@ -160,7 +229,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $join-br_ifs-mismatch (param $x i32) (param $y i32) - ;; The hints do not match, so we clear the hint. + ;; The hints do not match, but we can still use the 1 hint. (block $out (@metadata.code.branch_hint "\00") (br_if $out diff --git a/test/lit/passes/remove-unused-brs_branch-hints.wast b/test/lit/passes/remove-unused-brs_branch-hints.wast index 9f6d43606bf..321fbab8146 100644 --- a/test/lit/passes/remove-unused-brs_branch-hints.wast +++ b/test/lit/passes/remove-unused-brs_branch-hints.wast @@ -119,6 +119,34 @@ ) ) + ;; CHECK: (func $if-br_if-no-2 (type $1) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $if-br_if-no-2 (param $x i32) (param $y i32) + (block $out + (nop) + ;; As above, but now the if lacks a hint, so we emit no hint. + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $y) + ) + ) + ) + ) + ) + ;; CHECK: (func $if-if-1* (type $1) (param $x i32) (param $y i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (if From 126eb8896cb72275ca41eafc16f5457c91e0677c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 14:55:20 -0700 Subject: [PATCH 158/239] format --- src/ir/branch-hints.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index 51d885d30b6..5157538f847 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -105,7 +105,7 @@ inline void applyOrTo(Expression* from1, Function* func) { // If one is likely then so is the from1 || from2. If both are unlikely then // from1 || from2 is slightly more likely, but we assume our hints are nearly - // certain, so we apply it. + // certain, so we apply it. auto from1Hint = BranchHints::get(from1, func); auto from2Hint = BranchHints::get(from2, func); if ((from1Hint && *from1Hint) || (from2Hint && *from2Hint)) { From ed5a58b79165fb46084db1223efff58339a1a007 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 14:55:26 -0700 Subject: [PATCH 159/239] format --- src/ir/branch-hints.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index 51d885d30b6..5157538f847 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -105,7 +105,7 @@ inline void applyOrTo(Expression* from1, Function* func) { // If one is likely then so is the from1 || from2. If both are unlikely then // from1 || from2 is slightly more likely, but we assume our hints are nearly - // certain, so we apply it. + // certain, so we apply it. auto from1Hint = BranchHints::get(from1, func); auto from2Hint = BranchHints::get(from2, func); if ((from1Hint && *from1Hint) || (from2Hint && *from2Hint)) { From a633e50b4698829ebfa8d99992e257730caf6634 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 15:15:36 -0700 Subject: [PATCH 160/239] Update src/ir/branch-hints.h Co-authored-by: Thomas Lively --- src/ir/branch-hints.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/branch-hints.h b/src/ir/branch-hints.h index 5157538f847..a15198f54a8 100644 --- a/src/ir/branch-hints.h +++ b/src/ir/branch-hints.h @@ -86,7 +86,7 @@ inline void applyAndTo(Expression* from1, // If from1 and from2 are both likely, then from1 && from2 is slightly less // likely, but we assume our hints are nearly certain, so we apply it. And, // converse, if from1 and from2 and both unlikely, then from1 && from2 is even - // less likely, so we can once more apply a hint. If the hints differ, than + // less likely, so we can once more apply a hint. If the hints differ, then // one is unlikely or unknown, and we can't say anything about from1 && from2. auto from1Hint = BranchHints::get(from1, func); auto from2Hint = BranchHints::get(from2, func); From 158f5b08bc7ed792decafd8ffa2ba5fe13b7eb06 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 15:30:14 -0700 Subject: [PATCH 161/239] work --- src/passes/InstrumentBranchHints.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 6fc46946355..337b95124a5 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -95,10 +95,10 @@ // } // +#include "ir/drop.h" #include "ir/eh-utils.h" #include "ir/find_all.h" #include "ir/local-graph.h" -#include "ir/manipulation.h" #include "ir/names.h" #include "ir/parents.h" #include "ir/properties.h" @@ -367,14 +367,17 @@ struct DeInstrumentBranchHints } // At the very end, remove all logging calls (we use them during the main // walk to identify instrumentation). - for (auto* call : FindAll(func->body).list) { + for (auto** callp : FindAllPointers(func->body).list) { + auto* call = (*callp)->cast(); if (call->target == logBranch) { + Builder builder(*getModule()); + Expression* last; if (call->type == Type::none) { - ExpressionManipulator::nop(call); + last = builder.makeNop(); } else { - assert(call->type == Type::unreachable); - ExpressionManipulator::unreachable(call); + last = builder.makeUnreachable(); } + *callp = getDroppedChildrenAndAppend(call, *getModule(), getPassOptions(), last, DropMode::IgnoreParentEffects); } } } From 5092f67c0efbb2525e5a0b206774891d1a232fe0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 15:33:05 -0700 Subject: [PATCH 162/239] fix --- src/passes/InstrumentBranchHints.cpp | 7 ++- .../lit/passes/deinstrument-branch-hints.wast | 44 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 337b95124a5..5da92841b7d 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -377,7 +377,12 @@ struct DeInstrumentBranchHints } else { last = builder.makeUnreachable(); } - *callp = getDroppedChildrenAndAppend(call, *getModule(), getPassOptions(), last, DropMode::IgnoreParentEffects); + *callp = getDroppedChildrenAndAppend(call, + *getModule(), + getPassOptions(), + last, + // We know the call is removable. + DropMode::IgnoreParentEffects); } } } diff --git a/test/lit/passes/deinstrument-branch-hints.wast b/test/lit/passes/deinstrument-branch-hints.wast index c01c4479dbf..6d619c4b28b 100644 --- a/test/lit/passes/deinstrument-branch-hints.wast +++ b/test/lit/passes/deinstrument-branch-hints.wast @@ -120,4 +120,48 @@ ) ) ) + + ;; CHECK: (func $br-before-effects (type $0) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (local $other i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $other + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br-before-effects + ;; As above, but there are effects in the call's children that we must + ;; keep. + (local $temp i32) + (local $other i32) + (block $out + (local.set $temp + (i32.const 42) + ) + (call $log + (i32.const 4) + (local.tee $other ;; this tee must be kept around + (i32.const 0) + ) + (local.get $temp) + ) + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $temp) + ) + ) + ) ) From 5b968f45dcd59b7ad7c6e1171342faf79eb3ccac Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 16:19:49 -0700 Subject: [PATCH 163/239] more --- scripts/fuzz_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 7b3d43f4cbc..55bdacc962a 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1924,7 +1924,7 @@ def handle(self, wasm): de_instrumented, '-o', opted, '-g', - ] + ['--remove-unused-brs'] + FEATURE_OPTS + ] + ['--remove-unused-brs', '--optimize-instructions'] + FEATURE_OPTS run(args) # Add instrumentation, to see if any branch hints are wrong after From c0d4dc5cf4b41c492601e07edb0bc3a9f1b62b45 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 16:26:41 -0700 Subject: [PATCH 164/239] fix --- src/passes/OptimizeInstructions.cpp | 2 ++ .../optimize-instructions-branch-hints.wast | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 test/lit/passes/optimize-instructions-branch-hints.wast diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index e4d5a94339c..b521f9ae45f 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1171,6 +1172,7 @@ struct OptimizeInstructions // flip if-else arms to get rid of an eqz curr->condition = unary->value; std::swap(curr->ifTrue, curr->ifFalse); + BranchHints::flip(curr, getFunction()); } } if (curr->condition->type != Type::unreachable && diff --git a/test/lit/passes/optimize-instructions-branch-hints.wast b/test/lit/passes/optimize-instructions-branch-hints.wast new file mode 100644 index 00000000000..270bf179216 --- /dev/null +++ b/test/lit/passes/optimize-instructions-branch-hints.wast @@ -0,0 +1,32 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --optimize-instructions -all -S -o - | filecheck %s + +(module + ;; CHECK: (func $conditionals (type $0) (param $x i32) (result i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if (result i32) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $conditionals (param $x i32) (result i32) + ;; When we flip the if, the hint should flip too. + (@metadata.code.branch_hint "\00") + (if (result i32) + (i32.eqz + (local.get $x) + ) + (then + (i32.const 42) + ) + (else + (i32.const 1337) + ) + ) + ) +) From a1e2fe2e1512e061b07cf68aa4a0a104f2e7659e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 16:44:25 -0700 Subject: [PATCH 165/239] fix --- scripts/fuzz_opt.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 55bdacc962a..cdf1b7122b4 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1946,6 +1946,10 @@ def handle(self, wasm): for line in out.splitlines(): if line.startswith(LEI_LOG_BRANCH): _, _, id_, hint, actual = line[1:-1].split(' ') + # We do not care about the integer value of the condition, only + # if it was 0 or non-zero (and the hint itself must be 0/1). + assert hint in (0, 1) + actual = (actual != 0) assert hint == actual, 'Bad hint after optimizations' From e450d9b517dbffaec06dc2a3f43e51c327b214eb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 16:46:11 -0700 Subject: [PATCH 166/239] fix --- scripts/fuzz_opt.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index cdf1b7122b4..12d8e37003b 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1946,10 +1946,12 @@ def handle(self, wasm): for line in out.splitlines(): if line.startswith(LEI_LOG_BRANCH): _, _, id_, hint, actual = line[1:-1].split(' ') - # We do not care about the integer value of the condition, only - # if it was 0 or non-zero (and the hint itself must be 0/1). + hint = int(hint) + actual = int(actual) assert hint in (0, 1) - actual = (actual != 0) + # We do not care about the integer value of the condition, only + # if it was 0 or non-zero. + actual = (actual != '0') assert hint == actual, 'Bad hint after optimizations' From a321e25161394d55978d526845a482f89c6eb5c9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 9 Jul 2025 16:50:36 -0700 Subject: [PATCH 167/239] fix --- scripts/fuzz_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 12d8e37003b..c81fada9b39 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1951,7 +1951,7 @@ def handle(self, wasm): assert hint in (0, 1) # We do not care about the integer value of the condition, only # if it was 0 or non-zero. - actual = (actual != '0') + actual = (actual != 0) assert hint == actual, 'Bad hint after optimizations' From dfaca1ebd14c2940d43e58d1780bb4da0a76a783 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 10 Jul 2025 08:53:24 -0700 Subject: [PATCH 168/239] fix --- src/wasm-binary.h | 5 +++++ src/wasm/wasm-binary.cpp | 30 +++++++++++++----------------- test/lit/name-overlap.wast | 16 ++++++++++++++++ 3 files changed, 34 insertions(+), 17 deletions(-) create mode 100644 test/lit/name-overlap.wast diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 21b53f34329..8cadd5c0072 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1658,6 +1658,11 @@ class WasmBinaryReader { std::unordered_map dataNames; std::unordered_map elemNames; + // The names that are already used (either from the names section, or that we + // generate as internal names for un-named things). + std::unordered_set usedFunctionNames, usedTableNames, usedMemoryNames, + usedGlobalNames, usedTagNames; + Function* currFunction = nullptr; // before we see a function (like global init expressions), there is no end of // function to check diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 39b5e7fc77f..54dad9079c7 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2529,17 +2529,15 @@ getOrMakeName(const std::unordered_map& nameMap, void WasmBinaryReader::readMemories() { auto num = getU32LEB(); auto numImports = wasm.memories.size(); - std::unordered_set usedNames; for (auto& [index, name] : memoryNames) { if (index >= num + numImports) { std::cerr << "warning: memory index out of bounds in name section: " << name << " at index " << index << '\n'; } - usedNames.insert(name); } for (size_t i = 0; i < num; i++) { auto [name, isExplicit] = - getOrMakeName(memoryNames, numImports + i, makeName("", i), usedNames); + getOrMakeName(memoryNames, numImports + i, makeName("", i), usedMemoryNames); auto memory = Builder::makeMemory(name); memory->hasExplicitName = isExplicit; getResizableLimits(memory->initial, @@ -2871,8 +2869,6 @@ void WasmBinaryReader::getResizableLimits(Address& initial, void WasmBinaryReader::readImports() { size_t num = getU32LEB(); Builder builder(wasm); - std::unordered_set usedFunctionNames, usedTableNames, usedMemoryNames, - usedGlobalNames, usedTagNames; for (size_t i = 0; i < num; i++) { auto module = getInlineString(); auto base = getInlineString(); @@ -3007,13 +3003,14 @@ void WasmBinaryReader::setLocalNames(Function& func, Index i) { void WasmBinaryReader::readFunctionSignatures() { size_t num = getU32LEB(); auto numImports = wasm.functions.size(); - std::unordered_set usedNames; +std::cout << "adding existing\n"; for (auto& [index, name] : functionNames) { if (index >= num + numImports) { std::cerr << "warning: function index out of bounds in name section: " << name << " at index " << index << '\n'; } - usedNames.insert(name); + usedFunctionNames.insert(name); +std::cout << " adding: " << name << "\n"; } // Also check that the function indices in the local names subsection are // in-bounds, even though we don't use them here. @@ -3026,7 +3023,7 @@ void WasmBinaryReader::readFunctionSignatures() { } for (size_t i = 0; i < num; i++) { auto [name, isExplicit] = - getOrMakeName(functionNames, numImports + i, makeName("", i), usedNames); + getOrMakeName(functionNames, numImports + i, makeName("", i), usedFunctionNames); auto index = getU32LEB(); HeapType type = getTypeByIndex(index); functionTypes.push_back(type); @@ -4761,17 +4758,15 @@ Name WasmBinaryReader::getIndexedString() { void WasmBinaryReader::readGlobals() { size_t num = getU32LEB(); auto numImports = wasm.globals.size(); - std::unordered_set usedNames; for (auto& [index, name] : globalNames) { if (index >= num + numImports) { std::cerr << "warning: global index out of bounds in name section: " << name << " at index " << index << '\n'; } - usedNames.insert(name); } for (size_t i = 0; i < num; i++) { auto [name, isExplicit] = getOrMakeName( - globalNames, numImports + i, makeName("global$", i), usedNames); + globalNames, numImports + i, makeName("global$", i), usedGlobalNames); auto type = getConcreteType(); auto mutable_ = getU32LEB(); if (mutable_ & ~1) { @@ -4860,17 +4855,15 @@ void WasmBinaryReader::readDataSegments() { void WasmBinaryReader::readTableDeclarations() { auto num = getU32LEB(); auto numImports = wasm.tables.size(); - std::unordered_set usedNames; for (auto& [index, name] : tableNames) { if (index >= num + numImports) { std::cerr << "warning: table index out of bounds in name section: " << name << " at index " << index << '\n'; } - usedNames.insert(name); } for (size_t i = 0; i < num; i++) { auto [name, isExplicit] = - getOrMakeName(tableNames, numImports + i, makeName("", i), usedNames); + getOrMakeName(tableNames, numImports + i, makeName("", i), usedTableNames); auto elemType = getType(); if (!elemType.isRef()) { throwError("Table type must be a reference type"); @@ -4977,18 +4970,16 @@ void WasmBinaryReader::readElementSegments() { void WasmBinaryReader::readTags() { size_t num = getU32LEB(); auto numImports = wasm.tags.size(); - std::unordered_set usedNames; for (auto& [index, name] : tagNames) { if (index >= num + numImports) { std::cerr << "warning: tag index out of bounds in name section: " << name << " at index " << index << '\n'; } - usedNames.insert(name); } for (size_t i = 0; i < num; i++) { getInt8(); // Reserved 'attribute' field auto [name, isExplicit] = - getOrMakeName(tagNames, numImports + i, makeName("tag$", i), usedNames); + getOrMakeName(tagNames, numImports + i, makeName("tag$", i), usedTagNames); auto typeIndex = getU32LEB(); auto tag = Builder::makeTag(name, getSignatureByTypeIndex(typeIndex)); tag->hasExplicitName = isExplicit; @@ -5081,6 +5072,7 @@ void WasmBinaryReader::readNames(size_t sectionPos, size_t payloadLen) { auto rawName = getInlineString(); auto name = processor.process(rawName); functionNames[index] = name; + usedFunctionNames.insert(name); } } else if (nameType == Subsection::NameLocal) { auto numFuncs = getU32LEB(); @@ -5112,6 +5104,7 @@ void WasmBinaryReader::readNames(size_t sectionPos, size_t payloadLen) { auto rawName = getInlineString(); auto name = processor.process(rawName); tableNames[index] = name; + usedTableNames.insert(name); } } else if (nameType == Subsection::NameElem) { auto num = getU32LEB(); @@ -5130,6 +5123,7 @@ void WasmBinaryReader::readNames(size_t sectionPos, size_t payloadLen) { auto rawName = getInlineString(); auto name = processor.process(rawName); memoryNames[index] = name; + usedMemoryNames.insert(name); } } else if (nameType == Subsection::NameData) { auto num = getU32LEB(); @@ -5148,6 +5142,7 @@ void WasmBinaryReader::readNames(size_t sectionPos, size_t payloadLen) { auto rawName = getInlineString(); auto name = processor.process(rawName); globalNames[index] = name; + usedGlobalNames.insert(name); } } else if (nameType == Subsection::NameField) { auto numTypes = getU32LEB(); @@ -5170,6 +5165,7 @@ void WasmBinaryReader::readNames(size_t sectionPos, size_t payloadLen) { auto rawName = getInlineString(); auto name = processor.process(rawName); tagNames[index] = name; + usedTagNames.insert(name); } } else { std::cerr << "warning: unknown name subsection with id " diff --git a/test/lit/name-overlap.wast b/test/lit/name-overlap.wast new file mode 100644 index 00000000000..ea97fa5e7ce --- /dev/null +++ b/test/lit/name-overlap.wast @@ -0,0 +1,16 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt -all --instrument-branch-hints --roundtrip -S -o - | filecheck %s + +;; Two imports exist here, and instrument-branch-hints will add another. The +;; name "fimport$2" happens to be the name that would be chosen for that new +;; import, leading to a situation that the existing import has a forced name +;; from the names section (it is named here in the wat) while we pick an +;; internal name (not from the name section) that overlaps with it, causing an +;; error if we do not make sure to avoid duplication between import and non- +;; import names. + +(module + (import "fuzzing-support" "log-i64" (func $fimport$2 (param i64))) + (import "fuzzing-support" "log-f32" (func $fimport$3 (param f32))) +) From 9a69c342cd4b81acc29ae3c9bf7f1063644e73ba Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 10 Jul 2025 08:53:53 -0700 Subject: [PATCH 169/239] format --- src/wasm/wasm-binary.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 54dad9079c7..4af7c939d72 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2536,8 +2536,8 @@ void WasmBinaryReader::readMemories() { } } for (size_t i = 0; i < num; i++) { - auto [name, isExplicit] = - getOrMakeName(memoryNames, numImports + i, makeName("", i), usedMemoryNames); + auto [name, isExplicit] = getOrMakeName( + memoryNames, numImports + i, makeName("", i), usedMemoryNames); auto memory = Builder::makeMemory(name); memory->hasExplicitName = isExplicit; getResizableLimits(memory->initial, @@ -3003,14 +3003,12 @@ void WasmBinaryReader::setLocalNames(Function& func, Index i) { void WasmBinaryReader::readFunctionSignatures() { size_t num = getU32LEB(); auto numImports = wasm.functions.size(); -std::cout << "adding existing\n"; for (auto& [index, name] : functionNames) { if (index >= num + numImports) { std::cerr << "warning: function index out of bounds in name section: " << name << " at index " << index << '\n'; } usedFunctionNames.insert(name); -std::cout << " adding: " << name << "\n"; } // Also check that the function indices in the local names subsection are // in-bounds, even though we don't use them here. @@ -3022,8 +3020,8 @@ std::cout << " adding: " << name << "\n"; } } for (size_t i = 0; i < num; i++) { - auto [name, isExplicit] = - getOrMakeName(functionNames, numImports + i, makeName("", i), usedFunctionNames); + auto [name, isExplicit] = getOrMakeName( + functionNames, numImports + i, makeName("", i), usedFunctionNames); auto index = getU32LEB(); HeapType type = getTypeByIndex(index); functionTypes.push_back(type); @@ -4862,8 +4860,8 @@ void WasmBinaryReader::readTableDeclarations() { } } for (size_t i = 0; i < num; i++) { - auto [name, isExplicit] = - getOrMakeName(tableNames, numImports + i, makeName("", i), usedTableNames); + auto [name, isExplicit] = getOrMakeName( + tableNames, numImports + i, makeName("", i), usedTableNames); auto elemType = getType(); if (!elemType.isRef()) { throwError("Table type must be a reference type"); @@ -4978,8 +4976,8 @@ void WasmBinaryReader::readTags() { } for (size_t i = 0; i < num; i++) { getInt8(); // Reserved 'attribute' field - auto [name, isExplicit] = - getOrMakeName(tagNames, numImports + i, makeName("tag$", i), usedTagNames); + auto [name, isExplicit] = getOrMakeName( + tagNames, numImports + i, makeName("tag$", i), usedTagNames); auto typeIndex = getU32LEB(); auto tag = Builder::makeTag(name, getSignatureByTypeIndex(typeIndex)); tag->hasExplicitName = isExplicit; From 9c887578a1ac38e7306bfec342ffda41f5e573a4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 10 Jul 2025 08:56:04 -0700 Subject: [PATCH 170/239] test --- test/lit/name-overlap.wast | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/lit/name-overlap.wast b/test/lit/name-overlap.wast index ea97fa5e7ce..4caa4785e56 100644 --- a/test/lit/name-overlap.wast +++ b/test/lit/name-overlap.wast @@ -11,6 +11,15 @@ ;; import names. (module + ;; CHECK: (type $0 (func (param i64))) + + ;; CHECK: (type $1 (func (param f32))) + + ;; CHECK: (type $2 (func (param i32 i32 i32))) + + ;; CHECK: (import "fuzzing-support" "log-i64" (func $fimport$2 (type $0) (param i64))) (import "fuzzing-support" "log-i64" (func $fimport$2 (param i64))) + ;; CHECK: (import "fuzzing-support" "log-f32" (func $fimport$3 (type $1) (param f32))) (import "fuzzing-support" "log-f32" (func $fimport$3 (param f32))) ) +;; CHECK: (import "fuzzing-support" "log-branch" (func $fimport$2_2 (type $2) (param i32 i32 i32))) From 39308c50f28fcc32a14411881d8d9a06345d246d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 10 Jul 2025 09:00:00 -0700 Subject: [PATCH 171/239] redundant --- src/wasm/wasm-binary.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 4af7c939d72..1437f2a8219 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -3008,7 +3008,6 @@ void WasmBinaryReader::readFunctionSignatures() { std::cerr << "warning: function index out of bounds in name section: " << name << " at index " << index << '\n'; } - usedFunctionNames.insert(name); } // Also check that the function indices in the local names subsection are // in-bounds, even though we don't use them here. From 02c1a63703cdaa3139fe1b922d50c78a3aee7946 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 10 Jul 2025 10:20:29 -0700 Subject: [PATCH 172/239] fixen --- src/passes/InstrumentBranchHints.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 5da92841b7d..77210cd7220 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -294,6 +294,9 @@ struct InstrumentationProcessor : public WalkerPass> { return {}; } auto* set = *sets.begin(); + if (!set) { + return {}; + } auto& gets = getSub()->localGraph->getSetInfluences(set); if (gets.size() != 2) { return {}; @@ -327,10 +330,12 @@ struct DeleteBranchHints : public InstrumentationProcessor { template void processCondition(T* curr) { if (auto info = getInstrumentation(curr->condition)) { - auto id = info->call->operands[0]->template cast()->value.geti32(); - if (idsToDelete.count(id)) { - // Remove the branch hint. - getFunction()->codeAnnotations[curr].branchLikely = {}; + if (auto* c = info->call->operands[0]->template dynCast()) { + auto id = c->value.geti32(); + if (idsToDelete.count(id)) { + // Remove the branch hint. + getFunction()->codeAnnotations[curr].branchLikely = {}; + } } } } @@ -350,8 +355,6 @@ struct DeleteBranchHints : public InstrumentationProcessor { struct DeInstrumentBranchHints : public InstrumentationProcessor { - using Super = InstrumentationProcessor; - template void processCondition(T* curr) { if (auto info = getInstrumentation(curr->condition)) { // Replace the instrumented condition with the original one (swap so that From a62b75d6476f21b1d403c793f3a8fde5892ba686 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 10 Jul 2025 11:57:35 -0700 Subject: [PATCH 173/239] fix --- src/passes/InstrumentBranchHints.cpp | 25 ++++++++++++-------- test/lit/passes/instrument-branch-hints.wast | 11 ++++++--- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 77210cd7220..873b02c01ef 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -187,18 +187,23 @@ struct InstrumentBranchHints } void doWalkModule(Module* module) { - logBranch = getLogBranchImport(module); - // If it doesn't exist, add it. - if (!logBranch) { - auto* func = module->addFunction(Builder::makeFunction( - Names::getValidFunctionName(*module, BASE), - Signature({Type::i32, Type::i32, Type::i32}, Type::none), - {})); - func->module = MODULE; - func->base = BASE; - logBranch = func->name; + if (auto existing = getLogBranchImport(module)) { + // This file already has our import. We nop it out, as whatever the + // current code does may be dangerous (it may log incorrect hints). + auto* func = module->getFunction(existing); + func->body = Builder(*module).makeNop(); + func->module = func->base = Name(); } + // Add our import. + auto* func = module->addFunction(Builder::makeFunction( + Names::getValidFunctionName(*module, BASE), + Signature({Type::i32, Type::i32, Type::i32}, Type::none), + {})); + func->module = MODULE; + func->base = BASE; + logBranch = func->name; + // Walk normally, using logBranch as we go. Super::doWalkModule(module); } diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 565bcf78716..11dccb4dc18 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -436,12 +436,17 @@ ;; This module has our import, but with a minified internal name. We should use ;; that import. (module + (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) + ;; CHECK: (type $0 (func (param i32 i32 i32))) ;; CHECK: (type $1 (func)) - ;; CHECK: (import "fuzzing-support" "log-branch" (func $min (type $0) (param i32 i32 i32))) - (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) + ;; CHECK: (import "fuzzing-support" "log-branch" (func $log-branch (type $0) (param i32 i32 i32))) + + ;; CHECK: (func $min (type $0) (param $0 i32) (param $1 i32) (param $2 i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) ;; CHECK: (func $if (type $1) ;; CHECK-NEXT: (local $x i32) @@ -462,7 +467,7 @@ ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (call $log-branch ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (local.get $1) From a56ea285aa43fd8b348495fc4360da687b877381 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 10 Jul 2025 12:35:17 -0700 Subject: [PATCH 174/239] fix --- test/lit/passes/instrument-branch-hints.wast | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/lit/passes/instrument-branch-hints.wast b/test/lit/passes/instrument-branch-hints.wast index 11dccb4dc18..5a6d73f5e0e 100644 --- a/test/lit/passes/instrument-branch-hints.wast +++ b/test/lit/passes/instrument-branch-hints.wast @@ -433,10 +433,10 @@ ) ) -;; This module has our import, but with a minified internal name. We should use -;; that import. +;; This module has an existing import with our module and base names. We nop it +;; and create a fresh one, to avoid confusion. (module - (import "fuzzing-support" "log-branch" (func $min (param i32 i32 i32))) + (import "fuzzing-support" "log-branch" (func $existing (param i32 i32 i32))) ;; CHECK: (type $0 (func (param i32 i32 i32))) @@ -444,7 +444,7 @@ ;; CHECK: (import "fuzzing-support" "log-branch" (func $log-branch (type $0) (param i32 i32 i32))) - ;; CHECK: (func $min (type $0) (param $0 i32) (param $1 i32) (param $2 i32) + ;; CHECK: (func $existing (type $0) (param $0 i32) (param $1 i32) (param $2 i32) ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) @@ -459,7 +459,7 @@ ;; CHECK-NEXT: (local.set $x ;; CHECK-NEXT: (i32.const 42) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (call $min + ;; CHECK-NEXT: (call $existing ;; CHECK-NEXT: (i32.const 42) ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (local.get $x) @@ -489,7 +489,7 @@ (local.set $x (i32.const 42) ) - (call $min + (call $existing (i32.const 42) (i32.const 1) (local.get $x) From a1e74c01146f5bfbf599c99ae33dac192da86fce Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 08:46:20 -0700 Subject: [PATCH 175/239] work --- scripts/fuzz_opt.py | 61 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index c81fada9b39..222b2a158df 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1941,18 +1941,59 @@ def handle(self, wasm): ] + FEATURE_OPTS run(args) - # No bad hints should pop up after optimizations. + # Run the final wasm. out = run_bynterp(final, ['--fuzz-exec-before', '-all']) + + # Preprocess the logging. We must discard all lines from functions that + # trap, because we are fuzzing branch hints, which are not an effect, + # and so they can be reordered with traps; consider this: + # + # (i32.add + # (block + # (if (X) (unreachable) + # (i32.const 10) + # ) + # (block + # (@metadata.code.branch_hint "\00") + # (if (Y) (unreachable) + # (i32.const 20) + # ) + # ) + # + # It is ok to reorder traps, so the optimizer might flip the arms of + # this add (imagine other code inside the arms justified that). That + # reordering is fine since the branch hint has no effect that the + # optimizer needs to care about. However, after we instrument, there + # *is* an effect, the visible logging, so if X is true we trap and do + # not log a branch hint, but if we reorder, we do log, then trap. + # + # Note that this problem is specific to traps, because the optimizer can + # reorder them, and does not care about identity. + # + # To handle this, gather lines for each call, and then see which groups + # end in traps. (Initialize the list of groups with an empty group, for + # any logging before the first call.) + line_groups = [['before calls']] for line in out.splitlines(): - if line.startswith(LEI_LOG_BRANCH): - _, _, id_, hint, actual = line[1:-1].split(' ') - hint = int(hint) - actual = int(actual) - assert hint in (0, 1) - # We do not care about the integer value of the condition, only - # if it was 0 or non-zero. - actual = (actual != 0) - assert hint == actual, 'Bad hint after optimizations' + if line.startswith(FUZZ_EXEC_CALL_PREFIX): + line_groups.append([line]) + else: + line_groups[-1].append(line) + + # No bad hints should pop up after optimizations. + for group in line_groups: + if not group or group[-1] == '[trap unreachable]': + continue + for line in out.splitlines(): + if line.startswith(LEI_LOG_BRANCH): + _, _, id_, hint, actual = line[1:-1].split(' ') + hint = int(hint) + actual = int(actual) + assert hint in (0, 1) + # We do not care about the integer value of the condition, + # only if it was 0 or non-zero. + actual = (actual != 0) + assert hint == actual, 'Bad hint after optimizations' # The global list of all test case handlers From 69c1711cb80243a1adcd1950bf77c4d9ec7ea4b0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 08:48:36 -0700 Subject: [PATCH 176/239] oops --- scripts/fuzz_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 222b2a158df..cfe85d1deea 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1984,7 +1984,7 @@ def handle(self, wasm): for group in line_groups: if not group or group[-1] == '[trap unreachable]': continue - for line in out.splitlines(): + for line in group: if line.startswith(LEI_LOG_BRANCH): _, _, id_, hint, actual = line[1:-1].split(' ') hint = int(hint) From 0b309b7cceb1766e8bd09fac51c83435add7e374 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 09:08:30 -0700 Subject: [PATCH 177/239] go --- scripts/fuzz_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index cfe85d1deea..21d4a2816c4 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1924,7 +1924,7 @@ def handle(self, wasm): de_instrumented, '-o', opted, '-g', - ] + ['--remove-unused-brs', '--optimize-instructions'] + FEATURE_OPTS + ] + get_random_opts() + FEATURE_OPTS run(args) # Add instrumentation, to see if any branch hints are wrong after From 5a136b63606ee54ec925be821e7304796578dbf9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 10:48:50 -0700 Subject: [PATCH 178/239] fix --- src/passes/Vacuum.cpp | 2 ++ test/lit/passes/vacuum-branch-hints.wast | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 test/lit/passes/vacuum-branch-hints.wast diff --git a/src/passes/Vacuum.cpp b/src/passes/Vacuum.cpp index 0e49f19c89f..b5e223d9f21 100644 --- a/src/passes/Vacuum.cpp +++ b/src/passes/Vacuum.cpp @@ -19,6 +19,7 @@ // #include +#include #include #include #include @@ -297,6 +298,7 @@ struct Vacuum : public WalkerPass> { curr->ifFalse = nullptr; curr->condition = Builder(*getModule()).makeUnary(EqZInt32, curr->condition); + BranchHints::flip(curr, getFunction()); } else if (curr->ifTrue->is() && curr->ifFalse->is()) { // instead of dropping both sides, drop the if, if they are the same // type diff --git a/test/lit/passes/vacuum-branch-hints.wast b/test/lit/passes/vacuum-branch-hints.wast new file mode 100644 index 00000000000..e72d25b04df --- /dev/null +++ b/test/lit/passes/vacuum-branch-hints.wast @@ -0,0 +1,23 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --vacuum -all -S -o - | filecheck %s + +(module + (func $if (param $x i32) + ;; When we flip the if, the hint should flip too. + (@metadata.code.branch_hint "\00") + (if (result i32) + (i32.eqz + (local.get $x) + ) + (then + (nop) + ) + (else + (call $if + (local.get $x) + ) + ) + ) + ) +) + From ceb5c77c596955ad532cdd582a86e28f130785b8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 10:55:08 -0700 Subject: [PATCH 179/239] fix --- test/lit/passes/vacuum-branch-hints.wast | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/test/lit/passes/vacuum-branch-hints.wast b/test/lit/passes/vacuum-branch-hints.wast index e72d25b04df..39d1381f9dd 100644 --- a/test/lit/passes/vacuum-branch-hints.wast +++ b/test/lit/passes/vacuum-branch-hints.wast @@ -2,10 +2,25 @@ ;; RUN: wasm-opt %s --vacuum -all -S -o - | filecheck %s (module + ;; CHECK: (func $if (type $0) (param $x i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $if (param $x i32) ;; When we flip the if, the hint should flip too. (@metadata.code.branch_hint "\00") - (if (result i32) + (if (i32.eqz (local.get $x) ) From c44545702ec2a592d4d2e6e18a9588fc8110d108 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 11:01:43 -0700 Subject: [PATCH 180/239] start --- src/passes/Inlining.cpp | 1 + .../inlining_splitting_branch-hints.wast | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 test/lit/passes/inlining_splitting_branch-hints.wast diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index bc8dfb950aa..a953a061bb1 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -1097,6 +1097,7 @@ struct FunctionSplitter { auto* inlineableIf = getIf(inlineable->body); inlineableIf->condition = builder.makeUnary(EqZInt32, inlineableIf->condition); + abort(); inlineableIf->ifTrue = builder.makeCall( outlined->name, getForwardedArgs(func, builder), Type::none); inlineable->body = inlineableIf; diff --git a/test/lit/passes/inlining_splitting_branch-hints.wast b/test/lit/passes/inlining_splitting_branch-hints.wast new file mode 100644 index 00000000000..9309105d286 --- /dev/null +++ b/test/lit/passes/inlining_splitting_branch-hints.wast @@ -0,0 +1,29 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt --inlining --optimize-level=3 --partial-inlining-ifs=1 --all-features -S -o - | filecheck %s + +;; The function we partially inline here has an if, which we emit as flipped +;; afterwards. The new ifs should have flipped hints. + +(module + (func $func (param $0 i32) + (@metadata.code.branch_hint "\01") + (if + (local.get $0) + (then + (return) + ) + ) + (loop $l + (unreachable) + ) + ) + (func $caller + (call $func + (i32.const 0) + ) + (call $func + (i32.const 0) + ) + ) +) From e907c37d7a93aa715d5a3b18f4052c0b5d764066 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 11:26:48 -0700 Subject: [PATCH 181/239] work --- src/ir/debuginfo.h | 6 --- src/ir/{debuginfo.cpp => metadata.cpp} | 37 ++++++++++++---- src/ir/metadata.h | 33 ++++++++++++++ src/ir/module-utils.cpp | 2 +- src/passes/Inlining.cpp | 3 +- .../inlining_splitting_branch-hints.wast | 43 +++++++++++++++++++ 6 files changed, 106 insertions(+), 18 deletions(-) rename src/ir/{debuginfo.cpp => metadata.cpp} (58%) create mode 100644 src/ir/metadata.h diff --git a/src/ir/debuginfo.h b/src/ir/debuginfo.h index 96c4d8c2a92..143675cb3b8 100644 --- a/src/ir/debuginfo.h +++ b/src/ir/debuginfo.h @@ -61,12 +61,6 @@ inline void copyOriginalToReplacement(Expression* original, } } -// Given an expression and a copy of it in another function, copy the debug -// info into the second function. -void copyBetweenFunctions(Expression* origin, - Expression* copy, - Function* originFunc, - Function* copyFunc); } // namespace wasm::debuginfo #endif // wasm_ir_debuginfo_h diff --git a/src/ir/debuginfo.cpp b/src/ir/metadata.cpp similarity index 58% rename from src/ir/debuginfo.cpp rename to src/ir/metadata.cpp index a5fe92d54f4..94d44683a84 100644 --- a/src/ir/debuginfo.cpp +++ b/src/ir/metadata.cpp @@ -14,20 +14,28 @@ * limitations under the License. */ -#include "ir/debuginfo.h" +#include "ir/metadata.h" #include "wasm-traversal.h" #include "wasm.h" -namespace wasm::debuginfo { +namespace wasm::metadata { void copyBetweenFunctions(Expression* origin, Expression* copy, Function* originFunc, Function* copyFunc) { - if (originFunc->debugLocations.empty()) { - return; // No debug info to copy + if (originFunc->debugLocations.empty() && + originFunc->codeAnnotations.empty()) { + // Nothing to copy. + return; } + // List out instructions serially, so we can match them between the old and + // new copies. + // + // This is not that efficient, and in theory we could copy this in the + // caller context as the code is copied. However, we assume that most + // functions have no metadata, so this is faster in that common case. struct Lister : public PostWalker> { std::vector list; void visitExpression(Expression* curr) { list.push_back(curr); } @@ -41,14 +49,25 @@ void copyBetweenFunctions(Expression* origin, auto& originDebug = originFunc->debugLocations; auto& copyDebug = copyFunc->debugLocations; + auto& originAnnotations = originFunc->codeAnnotations; + auto& copyAnnotations = copyFunc->codeAnnotations; + assert(originList.list.size() == copyList.list.size()); for (Index i = 0; i < originList.list.size(); i++) { - auto iter = originDebug.find(originList.list[i]); - if (iter != originDebug.end()) { - auto location = iter->second; - copyDebug[copyList.list[i]] = location; + { + auto iter = originDebug.find(originList.list[i]); + if (iter != originDebug.end()) { + copyDebug[copyList.list[i]] = iter->second; + } + } + + { + auto iter = originAnnotations.find(originList.list[i]); + if (iter != originAnnotations.end()) { + copyAnnotations[copyList.list[i]] = iter->second; + } } } } -} // namespace wasm::debuginfo +} // namespace wasm::metadata diff --git a/src/ir/metadata.h b/src/ir/metadata.h new file mode 100644 index 00000000000..99fdb9ff48b --- /dev/null +++ b/src/ir/metadata.h @@ -0,0 +1,33 @@ +/* + * Copyright 2019 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ir_metadata_h +#define wasm_ir_metadata_h + +#include "wasm.h" + +namespace wasm::metadata { + +// Given an expression and a copy of it in another function, copy the all +// metadata - debug info, code annotations - into the second function. +void copyBetweenFunctions(Expression* origin, + Expression* copy, + Function* originFunc, + Function* copyFunc); + +} // namespace wasm::metadata + +#endif // wasm_ir_metadata_h diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp index c19ae369eb9..f60a6db3264 100644 --- a/src/ir/module-utils.cpp +++ b/src/ir/module-utils.cpp @@ -72,7 +72,7 @@ copyFunctionWithoutAdd(Function* func, ret->localNames = func->localNames; ret->localIndices = func->localIndices; ret->body = ExpressionManipulator::copy(func->body, out); - debuginfo::copyBetweenFunctions(func->body, ret->body, func, ret.get()); + metadata::copyBetweenFunctions(func->body, ret->body, func, ret.get()); ret->prologLocation = func->prologLocation; ret->epilogLocation = func->epilogLocation; // Update file indices if needed diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index a953a061bb1..1b2c1726211 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -634,7 +634,7 @@ static void doCodeInlining(Module* module, // Generate and update the inlined contents auto* contents = ExpressionManipulator::copy(from->body, *module); - debuginfo::copyBetweenFunctions(from->body, contents, from, into); + metadata::copyBetweenFunctions(from->body, contents, from, into); updater.walk(contents); block->list.push_back(contents); block->type = retType; @@ -1097,7 +1097,6 @@ struct FunctionSplitter { auto* inlineableIf = getIf(inlineable->body); inlineableIf->condition = builder.makeUnary(EqZInt32, inlineableIf->condition); - abort(); inlineableIf->ifTrue = builder.makeCall( outlined->name, getForwardedArgs(func, builder), Type::none); inlineable->body = inlineableIf; diff --git a/test/lit/passes/inlining_splitting_branch-hints.wast b/test/lit/passes/inlining_splitting_branch-hints.wast index 9309105d286..08c3d382f0e 100644 --- a/test/lit/passes/inlining_splitting_branch-hints.wast +++ b/test/lit/passes/inlining_splitting_branch-hints.wast @@ -18,6 +18,44 @@ (unreachable) ) ) + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (param i32))) + + ;; CHECK: (func $caller (type $0) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (block $__inlined_func$byn-split-inlineable-A$func + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $byn-split-outlined-A$func + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $__inlined_func$byn-split-inlineable-A$func$1 + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (call $byn-split-outlined-A$func + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $caller (call $func (i32.const 0) @@ -27,3 +65,8 @@ ) ) ) +;; CHECK: (func $byn-split-outlined-A$func (type $1) (param $0 i32) +;; CHECK-NEXT: (loop $l +;; CHECK-NEXT: (unreachable) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) From 91b78278d2bf7d2a60f7414d3e1b0b101b0bee6d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 11:28:29 -0700 Subject: [PATCH 182/239] fix --- src/ir/CMakeLists.txt | 2 +- src/ir/module-utils.cpp | 2 +- src/passes/Inlining.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ir/CMakeLists.txt b/src/ir/CMakeLists.txt index a74c06897f8..2dce7c22509 100644 --- a/src/ir/CMakeLists.txt +++ b/src/ir/CMakeLists.txt @@ -2,7 +2,6 @@ FILE(GLOB ir_HEADERS *.h) set(ir_SOURCES ExpressionAnalyzer.cpp ExpressionManipulator.cpp - debuginfo.cpp drop.cpp effects.cpp eh-utils.cpp @@ -10,6 +9,7 @@ set(ir_SOURCES intrinsics.cpp lubs.cpp memory-utils.cpp + metadata.cpp module-utils.cpp names.cpp possible-contents.cpp diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp index f60a6db3264..582a2ad82df 100644 --- a/src/ir/module-utils.cpp +++ b/src/ir/module-utils.cpp @@ -15,9 +15,9 @@ */ #include "module-utils.h" -#include "ir/debuginfo.h" #include "ir/intrinsics.h" #include "ir/manipulation.h" +#include "ir/metadata.h" #include "ir/properties.h" #include "support/insert_ordered.h" #include "support/topological_sort.h" diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 1b2c1726211..4fccac41e67 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -31,13 +31,13 @@ #include #include "ir/branch-utils.h" -#include "ir/debuginfo.h" #include "ir/drop.h" #include "ir/eh-utils.h" #include "ir/element-utils.h" #include "ir/find_all.h" #include "ir/literal-utils.h" #include "ir/localize.h" +#include "ir/metadata.h" #include "ir/module-utils.h" #include "ir/names.h" #include "ir/properties.h" From c59cf8b8098c5e30f5f5b136c3581473890aac16 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 11:34:33 -0700 Subject: [PATCH 183/239] finx --- test/lit/passes/inlining_splitting_branch-hints.wast | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/lit/passes/inlining_splitting_branch-hints.wast b/test/lit/passes/inlining_splitting_branch-hints.wast index 08c3d382f0e..ae631d07f3a 100644 --- a/test/lit/passes/inlining_splitting_branch-hints.wast +++ b/test/lit/passes/inlining_splitting_branch-hints.wast @@ -29,6 +29,7 @@ ;; CHECK-NEXT: (local.set $0 ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (i32.eqz ;; CHECK-NEXT: (local.get $0) @@ -44,6 +45,7 @@ ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (i32.eqz ;; CHECK-NEXT: (local.get $1) From 314ce6303f715787585cb990864ab405c17912f6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 11:35:31 -0700 Subject: [PATCH 184/239] almost --- test/lit/passes/inlining_splitting_branch-hints.wast | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/inlining_splitting_branch-hints.wast b/test/lit/passes/inlining_splitting_branch-hints.wast index ae631d07f3a..f8d2ba135bb 100644 --- a/test/lit/passes/inlining_splitting_branch-hints.wast +++ b/test/lit/passes/inlining_splitting_branch-hints.wast @@ -14,8 +14,9 @@ (return) ) ) + ;; More code, so this is not trivial. (loop $l - (unreachable) + (nop) ) ) ;; CHECK: (type $0 (func)) @@ -69,6 +70,6 @@ ) ;; CHECK: (func $byn-split-outlined-A$func (type $1) (param $0 i32) ;; CHECK-NEXT: (loop $l -;; CHECK-NEXT: (unreachable) +;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) From 76b7f236e145f50c92961a6d5cd74c0558254b6a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 11:37:32 -0700 Subject: [PATCH 185/239] fix --- src/passes/Inlining.cpp | 2 ++ test/lit/passes/inlining_splitting_branch-hints.wast | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 4fccac41e67..a4ce4df556f 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -30,6 +30,7 @@ #include +#include "ir/branch-hints.h" #include "ir/branch-utils.h" #include "ir/drop.h" #include "ir/eh-utils.h" @@ -1097,6 +1098,7 @@ struct FunctionSplitter { auto* inlineableIf = getIf(inlineable->body); inlineableIf->condition = builder.makeUnary(EqZInt32, inlineableIf->condition); + BranchHints::flip(inlineableIf, inlineable); inlineableIf->ifTrue = builder.makeCall( outlined->name, getForwardedArgs(func, builder), Type::none); inlineable->body = inlineableIf; diff --git a/test/lit/passes/inlining_splitting_branch-hints.wast b/test/lit/passes/inlining_splitting_branch-hints.wast index f8d2ba135bb..73db81da78e 100644 --- a/test/lit/passes/inlining_splitting_branch-hints.wast +++ b/test/lit/passes/inlining_splitting_branch-hints.wast @@ -30,7 +30,7 @@ ;; CHECK-NEXT: (local.set $0 ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (i32.eqz ;; CHECK-NEXT: (local.get $0) @@ -46,7 +46,7 @@ ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (i32.eqz ;; CHECK-NEXT: (local.get $1) From 79e2e4418e36a34f9ac9ea46b51623c6a064de83 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 11:46:42 -0700 Subject: [PATCH 186/239] start --- ...ate-function-elimination_branch-hints.wast | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 test/lit/passes/duplicate-function-elimination_branch-hints.wast diff --git a/test/lit/passes/duplicate-function-elimination_branch-hints.wast b/test/lit/passes/duplicate-function-elimination_branch-hints.wast new file mode 100644 index 00000000000..5ea7214ce6b --- /dev/null +++ b/test/lit/passes/duplicate-function-elimination_branch-hints.wast @@ -0,0 +1,27 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. + +;; RUN: foreach %s %t wasm-opt --duplicate-function-elimination --all-features -S -o - | filecheck %s + +;; The functions here differ in branch hints, and should not be merged. +(module + (func $zero (export "zero") (param $x i32) + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) + + (func $one (export "one") (param $x i32) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) +) From b922ff45e6aa23c572b7a9e68b4696dfe68c8df6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 13:05:41 -0700 Subject: [PATCH 187/239] work --- src/ir/function-utils.h | 13 ++- src/ir/hashed.h | 5 +- ...ate-function-elimination_branch-hints.wast | 93 +++++++++++++++++++ 3 files changed, 107 insertions(+), 4 deletions(-) diff --git a/src/ir/function-utils.h b/src/ir/function-utils.h index ebdef0cd4bc..74ee6f2e248 100644 --- a/src/ir/function-utils.h +++ b/src/ir/function-utils.h @@ -37,10 +37,17 @@ inline bool equal(Function* left, Function* right) { return false; } } - if (!left->imported() && !right->imported()) { - return ExpressionAnalyzer::equal(left->body, right->body); + if (left->imported() && right->imported()) { + return true; + } + if (left->imported() || right->imported()) { + return false; + } + + // Look at the code as well. + if (!ExpressionAnalyzer::equal(left->body, right->body)) { + return false; } - return left->imported() && right->imported(); } } // namespace wasm::FunctionUtils diff --git a/src/ir/hashed.h b/src/ir/hashed.h index 4e90951f518..8fb31956b87 100644 --- a/src/ir/hashed.h +++ b/src/ir/hashed.h @@ -17,10 +17,11 @@ #ifndef _wasm_ir_hashed_h #define _wasm_ir_hashed_h +#include + #include "ir/utils.h" #include "support/hash.h" #include "wasm.h" -#include namespace wasm { @@ -65,6 +66,8 @@ struct FunctionHasher : public WalkerPass> { } hash_combine(digest, ExpressionAnalyzer::flexibleHash(func->body, customHasher)); + // TODO: Hash metadata (debug info, code annotations), though it would be + // very rare to get a false collision for these reasons. return digest; } diff --git a/test/lit/passes/duplicate-function-elimination_branch-hints.wast b/test/lit/passes/duplicate-function-elimination_branch-hints.wast index 5ea7214ce6b..56e74d85e48 100644 --- a/test/lit/passes/duplicate-function-elimination_branch-hints.wast +++ b/test/lit/passes/duplicate-function-elimination_branch-hints.wast @@ -25,3 +25,96 @@ ) ) ) + +;; These also differ, now one is missing a hint, and they should not be merged. +;; TODO: Perhaps when optimizing for size, we should merge and drop the hint? +(module + (func $zero (export "zero") (param $x i32) + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) + + (func $one (export "one") (param $x i32) + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) +) + +;; Flipped case of the above, now the other one is the only one with a hint, +;; and that hint is flipped. +(module + (func $zero (export "zero") (param $x i32) + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) + + (func $one (export "one") (param $x i32) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) +) + +;; Identical branch hints: We can merge here. +(module + (func $zero (export "zero") (param $x i32) + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) + + (func $one (export "one") (param $x i32) + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) +) + +;; Ditto, with identical hints of 1. +(module + (func $zero (export "zero") (param $x i32) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) + + (func $one (export "one") (param $x i32) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) +) + From f6de78b4e2d2d46d179ec308b6c4acee592daa39 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 13:14:42 -0700 Subject: [PATCH 188/239] work --- src/ir/metadata.cpp | 63 ++++++++++++++++++++++++++++++++++----------- src/ir/metadata.h | 4 +++ 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index 94d44683a84..c330bfe623a 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -20,6 +20,24 @@ namespace wasm::metadata { +namespace { + +// List out instructions serially, so we can match them between the old and +// new copies. +// +// This is not that efficient, and in theory we could copy this in the +// caller context as the code is copied. However, we assume that most +// functions have no metadata, so this is faster in that common case. +struct Serializer : public PostWalker> { + Serializer(Expression* expr) { walk(expr); } + + std::vector list; + + void visitExpression(Expression* curr) { list.push_back(curr); } +}; + +} // anonymous namespace + void copyBetweenFunctions(Expression* origin, Expression* copy, Function* originFunc, @@ -30,21 +48,8 @@ void copyBetweenFunctions(Expression* origin, return; } - // List out instructions serially, so we can match them between the old and - // new copies. - // - // This is not that efficient, and in theory we could copy this in the - // caller context as the code is copied. However, we assume that most - // functions have no metadata, so this is faster in that common case. - struct Lister : public PostWalker> { - std::vector list; - void visitExpression(Expression* curr) { list.push_back(curr); } - }; - - Lister originList; - originList.walk(origin); - Lister copyList; - copyList.walk(copy); + Serializer originList(origin); + Serializer copyList(copy); auto& originDebug = originFunc->debugLocations; auto& copyDebug = copyFunc->debugLocations; @@ -70,4 +75,32 @@ void copyBetweenFunctions(Expression* origin, } } +// Given two expressions to use as keys, see if they have identical values (or +// identically is absent from) in two maps. +template +bool compare(Expression* a, Expression* b, const T& aMap, const T& bMap) { + auto aIter = aMap.find(a); + auto bIter = bMap.find(b); + if (aIter == aMap.end() && bIter == bMap.end()) { + return true; + } + if (aIter == aMap.end() || bIter == bMap.end()) { + return false; + } + return aIter->second == bIter->second; +} + +bool compare(Function* a, Function* b) { + Serializer aList(a->body); + Serializer bList(b->body); + + assert(aList.list.size() == bList.list.size()); + for (Index i = 0; i < aList.list.size(); i++) { + if (!compare(aList[i], bList[i], a->debugLocations, b->debugLocations) || + !compare(aList[i], bList[i], a->codeAnnotations, b->codeAnnotations)) { + return false; + } + } +} + } // namespace wasm::metadata diff --git a/src/ir/metadata.h b/src/ir/metadata.h index 99fdb9ff48b..dac982f6bf5 100644 --- a/src/ir/metadata.h +++ b/src/ir/metadata.h @@ -28,6 +28,10 @@ void copyBetweenFunctions(Expression* origin, Function* originFunc, Function* copyFunc); +// Assuming two functions have identical code, check if they also have +// identical metadata. +bool compare(Function* a, Function* b); + } // namespace wasm::metadata #endif // wasm_ir_metadata_h From bceb296447f8424ed0b68604d37da624a1d61767 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 13:17:26 -0700 Subject: [PATCH 189/239] fix --- src/ir/function-utils.h | 1 + src/ir/metadata.cpp | 6 +++--- src/ir/metadata.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ir/function-utils.h b/src/ir/function-utils.h index 74ee6f2e248..28d36bdfb21 100644 --- a/src/ir/function-utils.h +++ b/src/ir/function-utils.h @@ -48,6 +48,7 @@ inline bool equal(Function* left, Function* right) { if (!ExpressionAnalyzer::equal(left->body, right->body)) { return false; } + return metadata::equal(left, right); } } // namespace wasm::FunctionUtils diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index c330bfe623a..8a10e4b6e33 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -90,14 +90,14 @@ bool compare(Expression* a, Expression* b, const T& aMap, const T& bMap) { return aIter->second == bIter->second; } -bool compare(Function* a, Function* b) { +bool equal(Function* a, Function* b) { Serializer aList(a->body); Serializer bList(b->body); assert(aList.list.size() == bList.list.size()); for (Index i = 0; i < aList.list.size(); i++) { - if (!compare(aList[i], bList[i], a->debugLocations, b->debugLocations) || - !compare(aList[i], bList[i], a->codeAnnotations, b->codeAnnotations)) { + if (!compare(aList[i].list, bList[i].list, a->debugLocations, b->debugLocations) || + !compare(aList[i].list, bList[i].list, a->codeAnnotations, b->codeAnnotations)) { return false; } } diff --git a/src/ir/metadata.h b/src/ir/metadata.h index dac982f6bf5..42ce4dc6015 100644 --- a/src/ir/metadata.h +++ b/src/ir/metadata.h @@ -30,7 +30,7 @@ void copyBetweenFunctions(Expression* origin, // Assuming two functions have identical code, check if they also have // identical metadata. -bool compare(Function* a, Function* b); +bool equal(Function* a, Function* b); } // namespace wasm::metadata From 950aa1ac8029a18eeaa299707614444341b0232c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 13:24:35 -0700 Subject: [PATCH 190/239] fix --- src/ir/function-utils.h | 1 + src/ir/metadata.cpp | 12 ++++++++++-- src/wasm.h | 5 +++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/ir/function-utils.h b/src/ir/function-utils.h index 28d36bdfb21..be24ee9586c 100644 --- a/src/ir/function-utils.h +++ b/src/ir/function-utils.h @@ -17,6 +17,7 @@ #ifndef wasm_ir_function_h #define wasm_ir_function_h +#include "ir/metadata.h" #include "ir/utils.h" #include "wasm.h" diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index 8a10e4b6e33..93705d8fa02 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -91,16 +91,24 @@ bool compare(Expression* a, Expression* b, const T& aMap, const T& bMap) { } bool equal(Function* a, Function* b) { + if (a->debugLocations.empty() && b->debugLocations.empty() && + a->codeAnnotations.empty() && b->codeAnnotations.empty()) { + // Nothing to compare; no differences. + return true; + } + Serializer aList(a->body); Serializer bList(b->body); assert(aList.list.size() == bList.list.size()); for (Index i = 0; i < aList.list.size(); i++) { - if (!compare(aList[i].list, bList[i].list, a->debugLocations, b->debugLocations) || - !compare(aList[i].list, bList[i].list, a->codeAnnotations, b->codeAnnotations)) { + if (!compare(aList.list[i], bList.list[i], a->debugLocations, b->debugLocations) || + !compare(aList.list[i], bList.list[i], a->codeAnnotations, b->codeAnnotations)) { return false; } } + + return true; } } // namespace wasm::metadata diff --git a/src/wasm.h b/src/wasm.h index e1bd262400b..ec88a72eca7 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2279,6 +2279,11 @@ class Function : public Importable { static const uint8_t NeverInline = 0; static const uint8_t AlwaysInline = 127; std::optional inline_; + + bool operator==(const CodeAnnotation& other) const { + return branchLikely == other.branchLikely && + inline_ == other.inline_; + } }; // Function-level annotations are implemented with a key of nullptr, matching From cc654de471a383b371ceb2274c12821818dcfd19 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 13:57:25 -0700 Subject: [PATCH 191/239] fix --- ...ate-function-elimination_branch-hints.wast | 120 ++++++++++++++++-- 1 file changed, 110 insertions(+), 10 deletions(-) diff --git a/test/lit/passes/duplicate-function-elimination_branch-hints.wast b/test/lit/passes/duplicate-function-elimination_branch-hints.wast index 56e74d85e48..60f86e2eb86 100644 --- a/test/lit/passes/duplicate-function-elimination_branch-hints.wast +++ b/test/lit/passes/duplicate-function-elimination_branch-hints.wast @@ -5,7 +5,22 @@ ;; The functions here differ in branch hints, and should not be merged. (module - (func $zero (export "zero") (param $x i32) + ;; CHECK: (type $0 (func (param i32))) + + ;; CHECK: (export "a" (func $a)) + + ;; CHECK: (export "b" (func $b)) + + ;; CHECK: (func $a (type $0) (param $x i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $a (export "a") (param $x i32) (@metadata.code.branch_hint "\00") (if (local.get $x) @@ -15,7 +30,16 @@ ) ) - (func $one (export "one") (param $x i32) + ;; CHECK: (func $b (type $0) (param $x i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $b (export "b") (param $x i32) (@metadata.code.branch_hint "\01") (if (local.get $x) @@ -29,7 +53,22 @@ ;; These also differ, now one is missing a hint, and they should not be merged. ;; TODO: Perhaps when optimizing for size, we should merge and drop the hint? (module - (func $zero (export "zero") (param $x i32) + ;; CHECK: (type $0 (func (param i32))) + + ;; CHECK: (export "a" (func $a)) + + ;; CHECK: (export "b" (func $b)) + + ;; CHECK: (func $a (type $0) (param $x i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $a (export "a") (param $x i32) (@metadata.code.branch_hint "\00") (if (local.get $x) @@ -39,7 +78,15 @@ ) ) - (func $one (export "one") (param $x i32) + ;; CHECK: (func $b (type $0) (param $x i32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $b (export "b") (param $x i32) (if (local.get $x) (then @@ -52,7 +99,21 @@ ;; Flipped case of the above, now the other one is the only one with a hint, ;; and that hint is flipped. (module - (func $zero (export "zero") (param $x i32) + ;; CHECK: (type $0 (func (param i32))) + + ;; CHECK: (export "a" (func $a)) + + ;; CHECK: (export "b" (func $b)) + + ;; CHECK: (func $a (type $0) (param $x i32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $a (export "a") (param $x i32) (if (local.get $x) (then @@ -61,7 +122,16 @@ ) ) - (func $one (export "one") (param $x i32) + ;; CHECK: (func $b (type $0) (param $x i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $b (export "b") (param $x i32) (@metadata.code.branch_hint "\01") (if (local.get $x) @@ -74,7 +144,22 @@ ;; Identical branch hints: We can merge here. (module - (func $zero (export "zero") (param $x i32) + ;; CHECK: (type $0 (func (param i32))) + + ;; CHECK: (export "a" (func $a)) + + ;; CHECK: (export "b" (func $a)) + + ;; CHECK: (func $a (type $0) (param $x i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $a (export "a") (param $x i32) (@metadata.code.branch_hint "\00") (if (local.get $x) @@ -84,7 +169,7 @@ ) ) - (func $one (export "one") (param $x i32) + (func $b (export "b") (param $x i32) (@metadata.code.branch_hint "\00") (if (local.get $x) @@ -97,7 +182,22 @@ ;; Ditto, with identical hints of 1. (module - (func $zero (export "zero") (param $x i32) + ;; CHECK: (type $0 (func (param i32))) + + ;; CHECK: (export "a" (func $a)) + + ;; CHECK: (export "b" (func $a)) + + ;; CHECK: (func $a (type $0) (param $x i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $a (export "a") (param $x i32) (@metadata.code.branch_hint "\01") (if (local.get $x) @@ -107,7 +207,7 @@ ) ) - (func $one (export "one") (param $x i32) + (func $b (export "b") (param $x i32) (@metadata.code.branch_hint "\01") (if (local.get $x) From ca3777a3c26e985e79c6773840fd9afadfe0c016 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 14:25:46 -0700 Subject: [PATCH 192/239] format --- src/ir/metadata.cpp | 11 ++++++++--- src/wasm.h | 3 +-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index 93705d8fa02..fc46b319ccc 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -28,7 +28,8 @@ namespace { // This is not that efficient, and in theory we could copy this in the // caller context as the code is copied. However, we assume that most // functions have no metadata, so this is faster in that common case. -struct Serializer : public PostWalker> { +struct Serializer + : public PostWalker> { Serializer(Expression* expr) { walk(expr); } std::vector list; @@ -102,8 +103,12 @@ bool equal(Function* a, Function* b) { assert(aList.list.size() == bList.list.size()); for (Index i = 0; i < aList.list.size(); i++) { - if (!compare(aList.list[i], bList.list[i], a->debugLocations, b->debugLocations) || - !compare(aList.list[i], bList.list[i], a->codeAnnotations, b->codeAnnotations)) { + if (!compare( + aList.list[i], bList.list[i], a->debugLocations, b->debugLocations) || + !compare(aList.list[i], + bList.list[i], + a->codeAnnotations, + b->codeAnnotations)) { return false; } } diff --git a/src/wasm.h b/src/wasm.h index ec88a72eca7..3f59ce6e5bc 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2281,8 +2281,7 @@ class Function : public Importable { std::optional inline_; bool operator==(const CodeAnnotation& other) const { - return branchLikely == other.branchLikely && - inline_ == other.inline_; + return branchLikely == other.branchLikely && inline_ == other.inline_; } }; From 5dfec1bcaf3e576cc9715cc76c4bbfa4fb595713 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 15:07:08 -0700 Subject: [PATCH 193/239] Update src/ir/metadata.cpp Co-authored-by: Thomas Lively --- src/ir/metadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index fc46b319ccc..f2fd35412fd 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -77,7 +77,7 @@ void copyBetweenFunctions(Expression* origin, } // Given two expressions to use as keys, see if they have identical values (or -// identically is absent from) in two maps. +// are both absent) in two maps. template bool compare(Expression* a, Expression* b, const T& aMap, const T& bMap) { auto aIter = aMap.find(a); From 8d6802a0d070ee17506012a83d672e381cbb0627 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 15:07:28 -0700 Subject: [PATCH 194/239] feedback --- src/ir/function-utils.h | 6 ++---- src/ir/metadata.cpp | 31 +++++++++++++++++++++---------- src/ir/metadata.h | 7 +++++-- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/ir/function-utils.h b/src/ir/function-utils.h index be24ee9586c..61e7bd5ca1a 100644 --- a/src/ir/function-utils.h +++ b/src/ir/function-utils.h @@ -44,12 +44,10 @@ inline bool equal(Function* left, Function* right) { if (left->imported() || right->imported()) { return false; } - - // Look at the code as well. - if (!ExpressionAnalyzer::equal(left->body, right->body)) { + if (!metadata::equal(left, right)) { return false; } - return metadata::equal(left, right); + return ExpressionAnalyzer::equal(left->body, right->body); } } // namespace wasm::FunctionUtils diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index fc46b319ccc..ee58af1b21b 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -78,20 +78,26 @@ void copyBetweenFunctions(Expression* origin, // Given two expressions to use as keys, see if they have identical values (or // identically is absent from) in two maps. -template -bool compare(Expression* a, Expression* b, const T& aMap, const T& bMap) { +template +bool compare(Expression* a, Expression* b, const T& aMap, const T& bMap, const V& defaultValue) { auto aIter = aMap.find(a); + auto aItem = aIter != aMap.end() ? aIter->second : defaultValue; auto bIter = bMap.find(b); - if (aIter == aMap.end() && bIter == bMap.end()) { + auto bItem = bIter != bMap.end() ? bIter->second : defaultValue; + return aItem == bItem; +} + +bool equal(Function* a, Function* b) { + if (a->imported() && b->imported()) { + // No code metadata, and we don't yet store function-level metadata. return true; } - if (aIter == aMap.end() || bIter == bMap.end()) { - return false; + if (a->imported() || b->imported()) { + // See comment on declaration, we consider such a difference as making them + // unequal. + return true; } - return aIter->second == bIter->second; -} -bool equal(Function* a, Function* b) { if (a->debugLocations.empty() && b->debugLocations.empty() && a->codeAnnotations.empty() && b->codeAnnotations.empty()) { // Nothing to compare; no differences. @@ -101,14 +107,19 @@ bool equal(Function* a, Function* b) { Serializer aList(a->body); Serializer bList(b->body); + if (aList.list.size() != bList.list.size()) { + return false; + } + assert(aList.list.size() == bList.list.size()); for (Index i = 0; i < aList.list.size(); i++) { if (!compare( - aList.list[i], bList.list[i], a->debugLocations, b->debugLocations) || + aList.list[i], bList.list[i], a->debugLocations, b->debugLocations, Function::DebugLocation()) || !compare(aList.list[i], bList.list[i], a->codeAnnotations, - b->codeAnnotations)) { + b->codeAnnotations, + Function::CodeAnnotation())) { return false; } } diff --git a/src/ir/metadata.h b/src/ir/metadata.h index 42ce4dc6015..ef18a8172c9 100644 --- a/src/ir/metadata.h +++ b/src/ir/metadata.h @@ -28,8 +28,11 @@ void copyBetweenFunctions(Expression* origin, Function* originFunc, Function* copyFunc); -// Assuming two functions have identical code, check if they also have -// identical metadata. +// Check if two functions have identical metadata. We consider differences like +// one being imported and the other not, or having different numbers of +// instructions, to mean they are not equal (as the meaning of comparisons +// becomes hard in such cases, and the main use here is to compare metadata +// after all else is known equal). bool equal(Function* a, Function* b); } // namespace wasm::metadata From 77e4a7ec83e752265404d49eed3d37661f87b1f8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 15:09:40 -0700 Subject: [PATCH 195/239] format --- src/ir/metadata.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index 21eed865d95..de9e671ea54 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -79,7 +79,11 @@ void copyBetweenFunctions(Expression* origin, // Given two expressions to use as keys, see if they have identical values (or // are both absent) in two maps. template -bool compare(Expression* a, Expression* b, const T& aMap, const T& bMap, const V& defaultValue) { +bool compare(Expression* a, + Expression* b, + const T& aMap, + const T& bMap, + const V& defaultValue) { auto aIter = aMap.find(a); auto aItem = aIter != aMap.end() ? aIter->second : defaultValue; auto bIter = bMap.find(b); @@ -113,8 +117,11 @@ bool equal(Function* a, Function* b) { assert(aList.list.size() == bList.list.size()); for (Index i = 0; i < aList.list.size(); i++) { - if (!compare( - aList.list[i], bList.list[i], a->debugLocations, b->debugLocations, Function::DebugLocation()) || + if (!compare(aList.list[i], + bList.list[i], + a->debugLocations, + b->debugLocations, + Function::DebugLocation()) || !compare(aList.list[i], bList.list[i], a->codeAnnotations, From 73946de36fd3a1a4398fbf564dde8b4974c7278e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 15:10:20 -0700 Subject: [PATCH 196/239] fix --- src/ir/function-utils.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ir/function-utils.h b/src/ir/function-utils.h index 61e7bd5ca1a..1496b49c8a5 100644 --- a/src/ir/function-utils.h +++ b/src/ir/function-utils.h @@ -38,15 +38,15 @@ inline bool equal(Function* left, Function* right) { return false; } } + if (!metadata::equal(left, right)) { + return false; + } if (left->imported() && right->imported()) { return true; } if (left->imported() || right->imported()) { return false; } - if (!metadata::equal(left, right)) { - return false; - } return ExpressionAnalyzer::equal(left->body, right->body); } From 556c0ea0d6273dad9c00721818b2ab707ac2159f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 15:11:20 -0700 Subject: [PATCH 197/239] fix --- src/ir/metadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index de9e671ea54..12f756fea54 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -99,7 +99,7 @@ bool equal(Function* a, Function* b) { if (a->imported() || b->imported()) { // See comment on declaration, we consider such a difference as making them // unequal. - return true; + return false; } if (a->debugLocations.empty() && b->debugLocations.empty() && From 2d9eba5853fe35c11edae5756aca9c706d07bc5f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 15:14:52 -0700 Subject: [PATCH 198/239] avoid compiler warning on unset values --- src/wasm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wasm.h b/src/wasm.h index 3f59ce6e5bc..42de5ce890d 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2233,7 +2233,7 @@ class Function : public Importable { // Source maps debugging info: map expression nodes to their file, line, col, // symbol name. struct DebugLocation { - BinaryLocation fileIndex, lineNumber, columnNumber; + BinaryLocation fileIndex = -1, lineNumber = -1, columnNumber = -1; std::optional symbolNameIndex; bool operator==(const DebugLocation& other) const { return fileIndex == other.fileIndex && lineNumber == other.lineNumber && From 0721d8cf6691b9cb640e766627bf1eb4ac0c58cd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 11 Jul 2025 16:49:15 -0700 Subject: [PATCH 199/239] magic workaround for gcc compiler error --- src/ir/metadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index 12f756fea54..437aad72e61 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -83,7 +83,7 @@ bool compare(Expression* a, Expression* b, const T& aMap, const T& bMap, - const V& defaultValue) { + const V defaultValue) { auto aIter = aMap.find(a); auto aItem = aIter != aMap.end() ? aIter->second : defaultValue; auto bIter = bMap.find(b); From d1948192b93aec87456b9c673cfdb48c0792f893 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 14 Jul 2025 09:47:08 -0700 Subject: [PATCH 200/239] yikes --- src/passes/RemoveUnusedBrs.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 5b505cf1d60..ceb61e49276 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -1485,6 +1485,9 @@ struct RemoveUnusedBrs : public WalkerPass> { } // Convert an if into a select, if possible and beneficial to do so. + // XXX we may run more code... with branch hints that are wrongg + // XXX 1. assert-build flag to disable enabling unrun code..?\ + // 2. or don't run unrun code with branch hints..? nah Select* selectify(If* iff) { // Only an if-else can be turned into a select. if (!iff->ifFalse) { From 354cc0129d25b177cd8d9e03fbab7239b02cef58 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 14 Jul 2025 10:29:58 -0700 Subject: [PATCH 201/239] fix --- scripts/fuzz_opt.py | 5 ++ src/passes/RemoveUnusedBrs.cpp | 24 +++++- ...sed-brs_branch-hints-unconditionalize.wast | 80 +++++++++++++++++++ 3 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 test/lit/passes/remove-unused-brs_branch-hints-unconditionalize.wast diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 21d4a2816c4..7cacb1aeec1 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1924,6 +1924,11 @@ def handle(self, wasm): de_instrumented, '-o', opted, '-g', + # Do not unconditionalize code: if a branch hint does not run, but + # we start to run it all the time, it may have been a wrong hint + # that will show up as a false positive here (as it breaks our + # assumption that only valid branch hints remained in the module). + '--pass-arg=remove-unused-brs-never-unconditionalize', ] + get_random_opts() + FEATURE_OPTS run(args) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index ceb61e49276..67e4d865d8b 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -15,7 +15,17 @@ */ // -// Removes branches for which we go to where they go anyhow +// Removes branches for which we go to where they go anyhow. +// +// Arguments: +// +// --pass-arg=remove-unused-brs-never-unconditionalize +// +// This is used during fuzzing, to prevent us from unconditionalizing code +// (making it always run, when it didn't before). Unconditionalizing code +// is a problem for fuzzing branch hints, as a branch hint that never ran +// might be wrong, and if we start to run it, the fuzzer could think it +// found a bug. // #include "ir/branch-hints.h" @@ -1183,6 +1193,9 @@ struct RemoveUnusedBrs : public WalkerPass> { // perform some final optimizations struct FinalOptimizer : public PostWalker { bool shrink; + // Whether we are allowed to unconditionalize code, that is, make code + // run unconditionally that previously might not have run. + bool neverUnconditionalize; PassOptions& passOptions; bool needUniqify = false; @@ -1485,10 +1498,10 @@ struct RemoveUnusedBrs : public WalkerPass> { } // Convert an if into a select, if possible and beneficial to do so. - // XXX we may run more code... with branch hints that are wrongg - // XXX 1. assert-build flag to disable enabling unrun code..?\ - // 2. or don't run unrun code with branch hints..? nah Select* selectify(If* iff) { + if (neverUnconditionalize) { + return nullptr; + } // Only an if-else can be turned into a select. if (!iff->ifFalse) { return nullptr; @@ -1944,6 +1957,9 @@ struct RemoveUnusedBrs : public WalkerPass> { FinalOptimizer finalOptimizer(getPassOptions()); finalOptimizer.setModule(getModule()); finalOptimizer.shrink = getPassRunner()->options.shrinkLevel > 0; + finalOptimizer.neverUnconditionalize = + hasArgument("remove-unused-brs-never-unconditionalize"); + finalOptimizer.walkFunction(func); if (finalOptimizer.needUniqify) { wasm::UniqueNameMapper::uniquify(func->body); diff --git a/test/lit/passes/remove-unused-brs_branch-hints-unconditionalize.wast b/test/lit/passes/remove-unused-brs_branch-hints-unconditionalize.wast new file mode 100644 index 00000000000..428dfe1835c --- /dev/null +++ b/test/lit/passes/remove-unused-brs_branch-hints-unconditionalize.wast @@ -0,0 +1,80 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. + +;; RUN: wasm-opt %s --remove-unused-brs -all -S -o - \ +;; RUN: | filecheck %s +;; RUN: wasm-opt %s --remove-unused-brs --pass-arg=remove-unused-brs-never-unconditionalize -all -S -o - \ +;; RUN: | filecheck %s --check-prefix=NO_UN + +;; Verify that the "never-unconditionalize" flag is respected: when set, we do +;; not run code unconditionally that previously might not have run. This is +;; important as the branch hint in un-executed code may be right or wrong, which +;; can confuse the fuzzer. + +(module + ;; CHECK: (func $selectify (type $0) (param $x i32) (param $y i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if (result i32) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (block $out (result i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; NO_UN: (func $selectify (type $0) (param $x i32) (param $y i32) (result i32) + ;; NO_UN-NEXT: (if (result i32) + ;; NO_UN-NEXT: (local.get $x) + ;; NO_UN-NEXT: (then + ;; NO_UN-NEXT: (local.get $y) + ;; NO_UN-NEXT: ) + ;; NO_UN-NEXT: (else + ;; NO_UN-NEXT: (@metadata.code.branch_hint "\01") + ;; NO_UN-NEXT: (if (result i32) + ;; NO_UN-NEXT: (local.get $y) + ;; NO_UN-NEXT: (then + ;; NO_UN-NEXT: (i32.const 10) + ;; NO_UN-NEXT: ) + ;; NO_UN-NEXT: (else + ;; NO_UN-NEXT: (block $out (result i32) + ;; NO_UN-NEXT: (nop) + ;; NO_UN-NEXT: (i32.const 20) + ;; NO_UN-NEXT: ) + ;; NO_UN-NEXT: ) + ;; NO_UN-NEXT: ) + ;; NO_UN-NEXT: ) + ;; NO_UN-NEXT: ) + ;; NO_UN-NEXT: ) + (func $selectify (param $x i32) (param $y i32) (result i32) + ;; This if can be a select, but the nested if's branch hint will then + ;; always execute, which we should avoid when the flag is passed. + (if (result i32) + (local.get $x) + (then + (local.get $y) + ) + (else + (block $out (result i32) + (@metadata.code.branch_hint "\01") + (if + (local.get $y) + (then + (br $out + (i32.const 10) + ) + ) + ) + (i32.const 20) + ) + ) + ) + ) +) From 4af21ca4481a6bc85daf5fe9358a5f85170d6d51 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 14 Jul 2025 12:52:50 -0700 Subject: [PATCH 202/239] work --- scripts/fuzz_opt.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 7cacb1aeec1..b0d366a11b8 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1929,6 +1929,9 @@ def handle(self, wasm): # that will show up as a false positive here (as it breaks our # assumption that only valid branch hints remained in the module). '--pass-arg=remove-unused-brs-never-unconditionalize', + # Some passes that can unconditionalize code can just be disabled, + # as they do not modify ifs or brs. + '--skip-pass=heap-store-optimization', ] + get_random_opts() + FEATURE_OPTS run(args) From b5be8c2656c618b2a1f696a0e99d0acf9e939c16 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 14 Jul 2025 13:23:26 -0700 Subject: [PATCH 203/239] when all else fails, disable the gcc warning --- src/ir/metadata.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index 437aad72e61..e66a961342d 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -76,6 +76,9 @@ void copyBetweenFunctions(Expression* origin, } } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" + // Given two expressions to use as keys, see if they have identical values (or // are both absent) in two maps. template @@ -134,4 +137,6 @@ bool equal(Function* a, Function* b) { return true; } +#pragma GCC diagnostic pop + } // namespace wasm::metadata From 0c7266200fbd3504c306363cabf45e8b84fd9c0c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 14 Jul 2025 13:24:26 -0700 Subject: [PATCH 204/239] undo --- scripts/fuzz_shell.js | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js index f33b453a98f..3f201b3c812 100644 --- a/scripts/fuzz_shell.js +++ b/scripts/fuzz_shell.js @@ -261,10 +261,8 @@ function oneIn(n) { return (randomBits() % n) == 0; } -// Import helpers. -var tempRet0; - // Set up the imports. +var tempRet0; var imports = { 'fuzzing-support': { // Logging. @@ -355,17 +353,6 @@ var imports = { // how many time units to wait). }); }, - - 'log-branch': (id, expected, actual) => { - // We only care about truthiness of the expected and actual values. - expected = +!!expected; - actual = +!!actual; - // Log out the expected and actual outcomes. This is useful for fuzzing, - // see fuzz_opt.py. For testing that expectations actually match reality - // (i.e. that branch hints are correct), you can adjust the logic here to - // throw on expected != actual . - console.log(`log-branch: hint ${id} of ${expected} and actual ${actual} (${expected === actual ? 'right' : 'WRONG'})`); - }, }, // Emscripten support. 'env': { From f91653806bc2a5c1f53c974480f32700c98490cd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 15 Jul 2025 09:07:47 -0700 Subject: [PATCH 205/239] work --- scripts/fuzz_opt.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index b0d366a11b8..2a1fa5e7801 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1852,6 +1852,9 @@ def get_relevant_lines(wat): # example, and it may find ways to simplify code so fewer things execute), but # it should not emit a branch hint that is wrong - if it is not certain, it # should remove the branch hint. +# +# Note that bugs found by this fuzzer tend to require the following during +# reducing: BINARYEN_TRUST_GIVEN_WASM=1 in the env, and --text as a parameter. class BranchHintPreservation(TestCaseHandler): frequency = 1 # XXX @@ -1932,6 +1935,7 @@ def handle(self, wasm): # Some passes that can unconditionalize code can just be disabled, # as they do not modify ifs or brs. '--skip-pass=heap-store-optimization', + '--skip-pass=licm', ] + get_random_opts() + FEATURE_OPTS run(args) From fd3fa160b3ae399f1135b960e71a24d3a0783cc2 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 15 Jul 2025 14:27:29 -0700 Subject: [PATCH 206/239] oh no --- scripts/fuzz_opt.py | 7 +++++-- src/passes/CodeFolding.cpp | 2 +- src/passes/LocalCSE.cpp | 2 +- src/passes/OptimizeInstructions.cpp | 4 ++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 2a1fa5e7801..e26f8a82114 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1933,9 +1933,12 @@ def handle(self, wasm): # assumption that only valid branch hints remained in the module). '--pass-arg=remove-unused-brs-never-unconditionalize', # Some passes that can unconditionalize code can just be disabled, - # as they do not modify ifs or brs. - '--skip-pass=heap-store-optimization', + # as they do not modify ifs or brs: + # LICM moves code out of loops, possibly past a trap that would have + # prevented execution. '--skip-pass=licm', + # HeapStoreOptimization moves struct.sets closer to struct.news. + '--skip-pass=heap-store-optimization', ] + get_random_opts() + FEATURE_OPTS run(args) diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index 305eb12784f..8492f754c7a 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -250,7 +250,7 @@ struct CodeFolding auto maybeAddBlock = [this](Block* block, Expression*& other) -> Block* { // If other is a suffix of the block, wrap it in a block. if (block->list.empty() || - !ExpressionAnalyzer::equal(other, block->list.back())) { + !ExpressionAnalyzer::equal(other, block->list.back())) { // meta? others? return nullptr; } // Do it, assign to the out param `other`, and return the block. diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp index 78f722c7ae5..ec62195667c 100644 --- a/src/passes/LocalCSE.cpp +++ b/src/passes/LocalCSE.cpp @@ -157,7 +157,7 @@ struct HEComparer { if (a.digest != b.digest) { return false; } - return ExpressionAnalyzer::equal(a.expr, b.expr); + return ExpressionAnalyzer::equal(a.expr, b.expr); // meta? } }; diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index b521f9ae45f..f49a540ad25 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1176,7 +1176,7 @@ struct OptimizeInstructions } } if (curr->condition->type != Type::unreachable && - ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse)) { + ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse)) { // META! // The sides are identical, so fold. If we can replace the If with one // arm and there are no side effects in the condition, replace it. But // make sure not to change a concrete expression to an unreachable @@ -3171,7 +3171,7 @@ struct OptimizeInstructions // Sides are identical, fold Expression *ifTrue, *ifFalse, *c; if (matches(curr, select(any(&ifTrue), any(&ifFalse), any(&c))) && - ExpressionAnalyzer::equal(ifTrue, ifFalse)) { + ExpressionAnalyzer::equal(ifTrue, ifFalse)) { // meta! auto value = effects(ifTrue); if (value.hasSideEffects()) { // At best we don't need the condition, but need to execute the From 2414f80178ab2eb105d4da5672b68d564f820c2c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 16 Jul 2025 10:09:00 -0700 Subject: [PATCH 207/239] fix --- src/ir/metadata.cpp | 22 +-- src/ir/metadata.h | 4 + src/ir/utils.h | 13 ++ src/passes/OptimizeInstructions.cpp | 5 +- .../optimize-instructions-branch-hints.wast | 126 +++++++++++++++++- 5 files changed, 159 insertions(+), 11 deletions(-) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index e66a961342d..4a1477aaefa 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -105,14 +105,20 @@ bool equal(Function* a, Function* b) { return false; } - if (a->debugLocations.empty() && b->debugLocations.empty() && - a->codeAnnotations.empty() && b->codeAnnotations.empty()) { + return equal(a->body, b->body, a, b); +} + +bool equal(Expression* a, Expression* b, Function* aFunc, Function* bFunc) { + assert(aFunc && bFunc); + + if (aFunc->debugLocations.empty() && bFunc->debugLocations.empty() && + aFunc->codeAnnotations.empty() && bFunc->codeAnnotations.empty()) { // Nothing to compare; no differences. return true; } - Serializer aList(a->body); - Serializer bList(b->body); + Serializer aList(a); + Serializer bList(b); if (aList.list.size() != bList.list.size()) { return false; @@ -122,13 +128,13 @@ bool equal(Function* a, Function* b) { for (Index i = 0; i < aList.list.size(); i++) { if (!compare(aList.list[i], bList.list[i], - a->debugLocations, - b->debugLocations, + aFunc->debugLocations, + bFunc->debugLocations, Function::DebugLocation()) || !compare(aList.list[i], bList.list[i], - a->codeAnnotations, - b->codeAnnotations, + aFunc->codeAnnotations, + bFunc->codeAnnotations, Function::CodeAnnotation())) { return false; } diff --git a/src/ir/metadata.h b/src/ir/metadata.h index ef18a8172c9..aaa1a2b6c6a 100644 --- a/src/ir/metadata.h +++ b/src/ir/metadata.h @@ -35,6 +35,10 @@ void copyBetweenFunctions(Expression* origin, // after all else is known equal). bool equal(Function* a, Function* b); +// Check if two expressions are equal in metadata. They may or may not be from +// the same function. +bool equal(Expression* a, Expression* b, Function* aFunc, Function* bFunc); + } // namespace wasm::metadata #endif // wasm_ir_metadata_h diff --git a/src/ir/utils.h b/src/ir/utils.h index 8051cb1a348..f24397839c9 100644 --- a/src/ir/utils.h +++ b/src/ir/utils.h @@ -18,6 +18,7 @@ #define wasm_ir_utils_h #include "ir/branch-utils.h" +#include "ir/metadata.h" #include "pass.h" #include "wasm-builder.h" #include "wasm-traversal.h" @@ -69,6 +70,18 @@ struct ExpressionAnalyzer { return flexibleEqual(left, right, comparer); } + // Compare two expressions and their metadata as well. If just the first + // function is provided, we consider them both to arrive from the same one. + static bool equalIncludingMetadata(Expression* left, Expression* right, Function* leftFunc=nullptr, Function* rightFunc=nullptr) { + if (!equal(left, right)) { + return false; + } + if (!rightFunc) { + rightFunc = leftFunc; + } + return metadata::equal(left, right, leftFunc, rightFunc); + } + // A shallow comparison, ignoring child nodes. static bool shallowEqual(Expression* left, Expression* right) { auto comparer = [left, right](Expression* currLeft, Expression* currRight) { diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index cada4085572..17ee0005f4b 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1168,16 +1168,17 @@ struct OptimizeInstructions void visitIf(If* curr) { curr->condition = optimizeBoolean(curr->condition); if (curr->ifFalse) { + auto* func = getFunction(); if (auto* unary = curr->condition->dynCast()) { if (unary->op == EqZInt32) { // flip if-else arms to get rid of an eqz curr->condition = unary->value; std::swap(curr->ifTrue, curr->ifFalse); - BranchHints::flip(curr, getFunction()); + BranchHints::flip(curr, func); } } if (curr->condition->type != Type::unreachable && - ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse)) { // META! + ExpressionAnalyzer::equalIncludingMetadata(curr->ifTrue, curr->ifFalse, func)) { // The sides are identical, so fold. If we can replace the If with one // arm and there are no side effects in the condition, replace it. But // make sure not to change a concrete expression to an unreachable diff --git a/test/lit/passes/optimize-instructions-branch-hints.wast b/test/lit/passes/optimize-instructions-branch-hints.wast index 270bf179216..e3205715271 100644 --- a/test/lit/passes/optimize-instructions-branch-hints.wast +++ b/test/lit/passes/optimize-instructions-branch-hints.wast @@ -2,7 +2,7 @@ ;; RUN: wasm-opt %s --optimize-instructions -all -S -o - | filecheck %s (module - ;; CHECK: (func $conditionals (type $0) (param $x i32) (result i32) + ;; CHECK: (func $conditionals (type $1) (param $x i32) (result i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (if (result i32) ;; CHECK-NEXT: (local.get $x) @@ -29,4 +29,128 @@ ) ) ) + + ;; CHECK: (func $no-merge (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $no-merge (param $x i32) (param $y i32) + ;; Do not merge if arms if metadata differs (perhaps the hint is intentionally + ;; different, reflecting different runtime behavior). + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\00") + (if + (local.get $y) + (then + (unreachable) + ) + ) + ) + (else + (@metadata.code.branch_hint "\01") + (if + (local.get $y) + (then + (unreachable) + ) + ) + ) + ) + ) + + ;; CHECK: (func $yes-merge (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $yes-merge (param $x i32) (param $y i32) + ;; As above, but now the hints match, so we merge. + (if + (local.get $x) + (then + (@metadata.code.branch_hint "\01") + (if + (local.get $y) + (then + (unreachable) + ) + ) + ) + (else + (@metadata.code.branch_hint "\01") + (if + (local.get $y) + (then + (unreachable) + ) + ) + ) + ) + ) + + ;; CHECK: (func $always-merge-select (type $2) (param $x i32) (param $y i32) (result i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if (result i32) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $always-merge-select (param $x i32) (param $y i32) (result i32) + ;; A select with different metadata is still merged: the code was executed + ;; anyhow, so it's fine if we execute just one of the two (we pick the first, + ;; arbitrarily). + (select + (@metadata.code.branch_hint "\00") + (if (result i32) + (local.get $x) + (then + (i32.const 10) + ) + (else + (i32.const 20) + ) + ) + (@metadata.code.branch_hint "\01") + (if (result i32) + (local.get $x) + (then + (i32.const 10) + ) + (else + (i32.const 20) + ) + ) + (local.get $y) + ) + ) ) From 894306220e6a8938f297e6a2a140014e6ef828bc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 16 Jul 2025 10:19:47 -0700 Subject: [PATCH 208/239] work --- src/ir/metadata.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index 4a1477aaefa..726a8ebc711 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -111,6 +111,8 @@ bool equal(Function* a, Function* b) { bool equal(Expression* a, Expression* b, Function* aFunc, Function* bFunc) { assert(aFunc && bFunc); + // When optimizing, should we ignore debugLocations..? We are ok to lose + // those but not code annotations, then? if (aFunc->debugLocations.empty() && bFunc->debugLocations.empty() && aFunc->codeAnnotations.empty() && bFunc->codeAnnotations.empty()) { // Nothing to compare; no differences. From 6f56435b17aed6ee5576ac35d0550627efabbd77 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 16 Jul 2025 14:41:14 -0700 Subject: [PATCH 209/239] work --- src/ir/metadata.cpp | 15 +++---- ...ate-function-elimination_branch-hints.wast | 40 +++++++++++++++++++ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index e66a961342d..f69927ecc10 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -105,8 +105,14 @@ bool equal(Function* a, Function* b) { return false; } - if (a->debugLocations.empty() && b->debugLocations.empty() && - a->codeAnnotations.empty() && b->codeAnnotations.empty()) { + // TODO: We do not consider debug locations here. This is often what is + // desired in optimized builds (e.g. if we are trying to fold two + // pieces of code together, that benefit outweighs slightly inaccurate + // debug info). If we find that non-optimizer locations call this in + // ways that lead to degraded debug info, we could add an option to + // control it. + + if (a->codeAnnotations.empty() && b->codeAnnotations.empty()) { // Nothing to compare; no differences. return true; } @@ -121,11 +127,6 @@ bool equal(Function* a, Function* b) { assert(aList.list.size() == bList.list.size()); for (Index i = 0; i < aList.list.size(); i++) { if (!compare(aList.list[i], - bList.list[i], - a->debugLocations, - b->debugLocations, - Function::DebugLocation()) || - !compare(aList.list[i], bList.list[i], a->codeAnnotations, b->codeAnnotations, diff --git a/test/lit/passes/duplicate-function-elimination_branch-hints.wast b/test/lit/passes/duplicate-function-elimination_branch-hints.wast index 60f86e2eb86..83b64f7e47c 100644 --- a/test/lit/passes/duplicate-function-elimination_branch-hints.wast +++ b/test/lit/passes/duplicate-function-elimination_branch-hints.wast @@ -218,3 +218,43 @@ ) ) +;; Source file location (debug info) does *not* prevent optimization. We +;; prioritize optimization over debug info quality. +(module + ;; CHECK: (type $0 (func (param i32))) + + ;; CHECK: (export "a" (func $a)) + + ;; CHECK: (export "b" (func $a)) + + ;; CHECK: (func $a (type $0) (param $x i32) + ;; CHECK-NEXT: ;;@ src.cpp:10:1 + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $a (export "a") (param $x i32) + ;; After we merge, this hint will remain in the single function. + ;;@ src.cpp:10:1 + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) + + (func $b (export "b") (param $x i32) + ;;@ src.cpp:20:1 + (if + (local.get $x) + (then + (unreachable) + ) + ) + ) +) + From d43226b6f57521714f25650debe2d754a36239da Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 16 Jul 2025 14:47:34 -0700 Subject: [PATCH 210/239] work --- src/passes/OptimizeInstructions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 17ee0005f4b..0fd0768ca41 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3239,7 +3239,7 @@ struct OptimizeInstructions // Sides are identical, fold Expression *ifTrue, *ifFalse, *c; if (matches(curr, select(any(&ifTrue), any(&ifFalse), any(&c))) && - ExpressionAnalyzer::equal(ifTrue, ifFalse)) { // meta! + ExpressionAnalyzer::equal(ifTrue, ifFalse)) { auto value = effects(ifTrue); if (value.hasSideEffects()) { // At best we don't need the condition, but need to execute the From cf96c46c696b906498931484d22f1e7b64389a9a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 16 Jul 2025 14:56:20 -0700 Subject: [PATCH 211/239] work --- src/ir/metadata.cpp | 6 +++--- src/passes/LocalCSE.cpp | 7 ++++++- src/passes/OptimizeInstructions.cpp | 6 +++++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index 460840ae162..770c7d9d663 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -117,7 +117,7 @@ bool equal(Expression* a, Expression* b, Function* aFunc, Function* bFunc) { // debug info). If we find that non-optimizer locations call this in // ways that lead to degraded debug info, we could add an option to // control it. - if (a->codeAnnotations.empty() && b->codeAnnotations.empty()) { + if (aFunc->codeAnnotations.empty() && bFunc->codeAnnotations.empty()) { // Nothing to compare; no differences. return true; } @@ -133,8 +133,8 @@ bool equal(Expression* a, Expression* b, Function* aFunc, Function* bFunc) { for (Index i = 0; i < aList.list.size(); i++) { if (!compare(aList.list[i], bList.list[i], - a->codeAnnotations, - b->codeAnnotations, + aFunc->codeAnnotations, + bFunc->codeAnnotations, Function::CodeAnnotation())) { return false; } diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp index ec62195667c..9c9a8198f89 100644 --- a/src/passes/LocalCSE.cpp +++ b/src/passes/LocalCSE.cpp @@ -157,7 +157,12 @@ struct HEComparer { if (a.digest != b.digest) { return false; } - return ExpressionAnalyzer::equal(a.expr, b.expr); // meta? + // Note that we do not consider metadata here. That means we may replace two + // identical expressions with different metadata, say, different branch + // hints, but that is ok: we are only removing things from executing (by + // reusing the first computed value), so this will not cause new invalid + // branch hints to execute. + return ExpressionAnalyzer::equal(a.expr, b.expr); } }; diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 0fd0768ca41..705f0124c0a 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3236,8 +3236,12 @@ struct OptimizeInstructions } } { - // Sides are identical, fold + // If sides are identical, fold. Expression *ifTrue, *ifFalse, *c; + // Note we do not compare metadata here: This is a select, so both arms + // execute anyhow, and things like branch hints were already being run. + // After optimization, we will only run fewer things, and run no risk of + // running new bad things. if (matches(curr, select(any(&ifTrue), any(&ifFalse), any(&c))) && ExpressionAnalyzer::equal(ifTrue, ifFalse)) { auto value = effects(ifTrue); From bd402b47a2707ccdbce5ca545011da6e565695fe Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 16 Jul 2025 15:11:34 -0700 Subject: [PATCH 212/239] fix --- src/passes/CodeFolding.cpp | 10 +- .../lit/passes/code-folding_branch-hints.wast | 107 ++++++++++++++++++ 2 files changed, 112 insertions(+), 5 deletions(-) create mode 100644 test/lit/passes/code-folding_branch-hints.wast diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index 8492f754c7a..110a3d37db7 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -250,7 +250,7 @@ struct CodeFolding auto maybeAddBlock = [this](Block* block, Expression*& other) -> Block* { // If other is a suffix of the block, wrap it in a block. if (block->list.empty() || - !ExpressionAnalyzer::equal(other, block->list.back())) { // meta? others? + !ExpressionAnalyzer::equalIncludingMetadata(other, block->list.back(), getFunction())) { return nullptr; } // Do it, assign to the out param `other`, and return the block. @@ -395,7 +395,7 @@ struct CodeFolding Index tail = 1; for (; tail < tails.size(); ++tail) { auto* other = getMergeable(tails[tail], num); - if (!other || !ExpressionAnalyzer::equal(item, other)) { + if (!other || !ExpressionAnalyzer::equalIncludingMetadata(item, other, getFunction())) { // Other tail too short or has a difference. break; } @@ -673,7 +673,7 @@ struct CodeFolding [&](Expression* item) { if (item == first || // don't bother comparing the first - ExpressionAnalyzer::equal(item, first)) { + ExpressionAnalyzer::equalIncludingMetadata(item, first, getFunction())) { // equal, keep it return false; } else { @@ -691,8 +691,8 @@ struct CodeFolding explore.end(), [&](Tail& tail) { auto* item = getItem(tail, num); - return !ExpressionAnalyzer::equal( - item, correct); + return !ExpressionAnalyzer::equalIncludingMetadata( + item, correct, getFunction()); }), explore.end()); // try to optimize this deeper tail. if we succeed, then stop here, diff --git a/test/lit/passes/code-folding_branch-hints.wast b/test/lit/passes/code-folding_branch-hints.wast new file mode 100644 index 00000000000..407986a931a --- /dev/null +++ b/test/lit/passes/code-folding_branch-hints.wast @@ -0,0 +1,107 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. + +;; RUN: wasm-opt %s -all --code-folding -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func (param i32 i32) (result f32))) + + ;; CHECK: (func $different (type $0) (param $x i32) (param $y i32) (result f32) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + (func $different (param $x i32) (param $y i32) (result f32) + ;; The branch hints differ, so we do not optimize. + (if (result f32) + (local.get $x) + (then + (block (result f32) + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (nop) + ) + ) + (f32.const 0) + ) + ) + (else + (block (result f32) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (nop) + ) + ) + (f32.const 0) + ) + ) + ) + ) + + ;; CHECK: (func $same (type $0) (param $x i32) (param $y i32) (result f32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + (func $same (param $x i32) (param $y i32) (result f32) + ;; The branch hints are the same, so we optimize. + (if (result f32) + (local.get $x) + (then + (block (result f32) + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (nop) + ) + ) + (f32.const 0) + ) + ) + (else + (block (result f32) + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (nop) + ) + ) + (f32.const 0) + ) + ) + ) + ) +) From 75af49f4cd124d1fe9abd9264cff77a9ebacc9fc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 16 Jul 2025 15:32:32 -0700 Subject: [PATCH 213/239] format --- src/ir/utils.h | 5 ++++- src/passes/CodeFolding.cpp | 15 +++++++++------ src/passes/OptimizeInstructions.cpp | 3 ++- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/ir/utils.h b/src/ir/utils.h index f24397839c9..5312242f5ba 100644 --- a/src/ir/utils.h +++ b/src/ir/utils.h @@ -72,7 +72,10 @@ struct ExpressionAnalyzer { // Compare two expressions and their metadata as well. If just the first // function is provided, we consider them both to arrive from the same one. - static bool equalIncludingMetadata(Expression* left, Expression* right, Function* leftFunc=nullptr, Function* rightFunc=nullptr) { + static bool equalIncludingMetadata(Expression* left, + Expression* right, + Function* leftFunc = nullptr, + Function* rightFunc = nullptr) { if (!equal(left, right)) { return false; } diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index 110a3d37db7..e8c11a3bf91 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -249,8 +249,8 @@ struct CodeFolding // run the rest of the optimization mormally. auto maybeAddBlock = [this](Block* block, Expression*& other) -> Block* { // If other is a suffix of the block, wrap it in a block. - if (block->list.empty() || - !ExpressionAnalyzer::equalIncludingMetadata(other, block->list.back(), getFunction())) { + if (block->list.empty() || !ExpressionAnalyzer::equalIncludingMetadata( + other, block->list.back(), getFunction())) { return nullptr; } // Do it, assign to the out param `other`, and return the block. @@ -395,7 +395,8 @@ struct CodeFolding Index tail = 1; for (; tail < tails.size(); ++tail) { auto* other = getMergeable(tails[tail], num); - if (!other || !ExpressionAnalyzer::equalIncludingMetadata(item, other, getFunction())) { + if (!other || !ExpressionAnalyzer::equalIncludingMetadata( + item, other, getFunction())) { // Other tail too short or has a difference. break; } @@ -673,7 +674,8 @@ struct CodeFolding [&](Expression* item) { if (item == first || // don't bother comparing the first - ExpressionAnalyzer::equalIncludingMetadata(item, first, getFunction())) { + ExpressionAnalyzer::equalIncludingMetadata( + item, first, getFunction())) { // equal, keep it return false; } else { @@ -691,8 +693,9 @@ struct CodeFolding explore.end(), [&](Tail& tail) { auto* item = getItem(tail, num); - return !ExpressionAnalyzer::equalIncludingMetadata( - item, correct, getFunction()); + return !ExpressionAnalyzer:: + equalIncludingMetadata( + item, correct, getFunction()); }), explore.end()); // try to optimize this deeper tail. if we succeed, then stop here, diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 705f0124c0a..ca72eba43de 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1178,7 +1178,8 @@ struct OptimizeInstructions } } if (curr->condition->type != Type::unreachable && - ExpressionAnalyzer::equalIncludingMetadata(curr->ifTrue, curr->ifFalse, func)) { + ExpressionAnalyzer::equalIncludingMetadata( + curr->ifTrue, curr->ifFalse, func)) { // The sides are identical, so fold. If we can replace the If with one // arm and there are no side effects in the condition, replace it. But // make sure not to change a concrete expression to an unreachable From b67f2d4d95ad62bd967d891e10276fe7e3259f24 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 17 Jul 2025 12:44:32 -0700 Subject: [PATCH 214/239] todo --- src/passes/OptimizeInstructions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index ca72eba43de..6a4979b47c0 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -5671,7 +5671,7 @@ struct OptimizeInstructions while (1) { // Ignore control flow structures (which are handled in MergeBlocks). if (!Properties::isControlFlowStructure(curr->ifTrue) && - ExpressionAnalyzer::shallowEqual(curr->ifTrue, curr->ifFalse)) { + ExpressionAnalyzer::shallowEqual(curr->ifTrue, curr->ifFalse)) { // This too! XXX // TODO: consider the case with more than one child. ChildIterator ifTrueChildren(curr->ifTrue); if (ifTrueChildren.children.size() == 1) { From 58f81db83e181d487923574d768dee2686dc6bd3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 21 Jul 2025 10:16:32 -0700 Subject: [PATCH 215/239] unyolo --- src/ir/function-utils.h | 8 ++++---- ...icate-function-elimination_branch-hints.wast | 17 ++++++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/ir/function-utils.h b/src/ir/function-utils.h index 1496b49c8a5..29aca8885ae 100644 --- a/src/ir/function-utils.h +++ b/src/ir/function-utils.h @@ -17,7 +17,6 @@ #ifndef wasm_ir_function_h #define wasm_ir_function_h -#include "ir/metadata.h" #include "ir/utils.h" #include "wasm.h" @@ -38,9 +37,10 @@ inline bool equal(Function* left, Function* right) { return false; } } - if (!metadata::equal(left, right)) { - return false; - } + // We could in principle compare metadata here, but intentionally do not, as + // for optimization purposes we do want to e.g. merge functions that differ + // only in metadata (following LLVM's example). If we have a non-optimization + // reason for comparing metadata here then we could add a flag for it. if (left->imported() && right->imported()) { return true; } diff --git a/test/lit/passes/duplicate-function-elimination_branch-hints.wast b/test/lit/passes/duplicate-function-elimination_branch-hints.wast index 83b64f7e47c..03871469068 100644 --- a/test/lit/passes/duplicate-function-elimination_branch-hints.wast +++ b/test/lit/passes/duplicate-function-elimination_branch-hints.wast @@ -3,7 +3,10 @@ ;; RUN: foreach %s %t wasm-opt --duplicate-function-elimination --all-features -S -o - | filecheck %s -;; The functions here differ in branch hints, and should not be merged. +;; Test that we merge functions even if they differ in branch hints. This is +;; good for code size, and follows what LLVM does. + +;; The functions here differ in branch hints (but we still merge). (module ;; CHECK: (type $0 (func (param i32))) @@ -50,8 +53,7 @@ ) ) -;; These also differ, now one is missing a hint, and they should not be merged. -;; TODO: Perhaps when optimizing for size, we should merge and drop the hint? +;; These also differ, now one is missing a hint (but we still merge). (module ;; CHECK: (type $0 (func (param i32))) @@ -97,7 +99,7 @@ ) ;; Flipped case of the above, now the other one is the only one with a hint, -;; and that hint is flipped. +;; and that hint is flipped (but we still merge). (module ;; CHECK: (type $0 (func (param i32))) @@ -142,7 +144,7 @@ ) ) -;; Identical branch hints: We can merge here. +;; Identical branch hints: We can definitely merge here. (module ;; CHECK: (type $0 (func (param i32))) @@ -218,8 +220,9 @@ ) ) -;; Source file location (debug info) does *not* prevent optimization. We -;; prioritize optimization over debug info quality. +;; Source file location (debug info) does not prevent optimization (and has +;; even less reason to do so than branch hints, as we prioritize optimization +;; over debug info quality). (module ;; CHECK: (type $0 (func (param i32))) From b06e4bd5105664ace402536f7a1bdf12f9a13b6b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 21 Jul 2025 11:25:44 -0700 Subject: [PATCH 216/239] update test --- ...ate-function-elimination_branch-hints.wast | 32 ++----------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/test/lit/passes/duplicate-function-elimination_branch-hints.wast b/test/lit/passes/duplicate-function-elimination_branch-hints.wast index 03871469068..75a16f148bb 100644 --- a/test/lit/passes/duplicate-function-elimination_branch-hints.wast +++ b/test/lit/passes/duplicate-function-elimination_branch-hints.wast @@ -12,7 +12,7 @@ ;; CHECK: (export "a" (func $a)) - ;; CHECK: (export "b" (func $b)) + ;; CHECK: (export "b" (func $a)) ;; CHECK: (func $a (type $0) (param $x i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") @@ -33,15 +33,6 @@ ) ) - ;; CHECK: (func $b (type $0) (param $x i32) - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (unreachable) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) (func $b (export "b") (param $x i32) (@metadata.code.branch_hint "\01") (if @@ -59,7 +50,7 @@ ;; CHECK: (export "a" (func $a)) - ;; CHECK: (export "b" (func $b)) + ;; CHECK: (export "b" (func $a)) ;; CHECK: (func $a (type $0) (param $x i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") @@ -80,14 +71,6 @@ ) ) - ;; CHECK: (func $b (type $0) (param $x i32) - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (unreachable) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) (func $b (export "b") (param $x i32) (if (local.get $x) @@ -105,7 +88,7 @@ ;; CHECK: (export "a" (func $a)) - ;; CHECK: (export "b" (func $b)) + ;; CHECK: (export "b" (func $a)) ;; CHECK: (func $a (type $0) (param $x i32) ;; CHECK-NEXT: (if @@ -124,15 +107,6 @@ ) ) - ;; CHECK: (func $b (type $0) (param $x i32) - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (unreachable) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) (func $b (export "b") (param $x i32) (@metadata.code.branch_hint "\01") (if From 0e06b4c2accd963eb5d5ba2e20225ad31dacd792 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 21 Jul 2025 13:02:56 -0700 Subject: [PATCH 217/239] undo --- src/ir/metadata.cpp | 17 ++++++----------- src/ir/metadata.h | 4 ---- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/src/ir/metadata.cpp b/src/ir/metadata.cpp index 770c7d9d663..f69927ecc10 100644 --- a/src/ir/metadata.cpp +++ b/src/ir/metadata.cpp @@ -105,25 +105,20 @@ bool equal(Function* a, Function* b) { return false; } - return equal(a->body, b->body, a, b); -} - -bool equal(Expression* a, Expression* b, Function* aFunc, Function* bFunc) { - assert(aFunc && bFunc); - // TODO: We do not consider debug locations here. This is often what is // desired in optimized builds (e.g. if we are trying to fold two // pieces of code together, that benefit outweighs slightly inaccurate // debug info). If we find that non-optimizer locations call this in // ways that lead to degraded debug info, we could add an option to // control it. - if (aFunc->codeAnnotations.empty() && bFunc->codeAnnotations.empty()) { + + if (a->codeAnnotations.empty() && b->codeAnnotations.empty()) { // Nothing to compare; no differences. return true; } - Serializer aList(a); - Serializer bList(b); + Serializer aList(a->body); + Serializer bList(b->body); if (aList.list.size() != bList.list.size()) { return false; @@ -133,8 +128,8 @@ bool equal(Expression* a, Expression* b, Function* aFunc, Function* bFunc) { for (Index i = 0; i < aList.list.size(); i++) { if (!compare(aList.list[i], bList.list[i], - aFunc->codeAnnotations, - bFunc->codeAnnotations, + a->codeAnnotations, + b->codeAnnotations, Function::CodeAnnotation())) { return false; } diff --git a/src/ir/metadata.h b/src/ir/metadata.h index aaa1a2b6c6a..ef18a8172c9 100644 --- a/src/ir/metadata.h +++ b/src/ir/metadata.h @@ -35,10 +35,6 @@ void copyBetweenFunctions(Expression* origin, // after all else is known equal). bool equal(Function* a, Function* b); -// Check if two expressions are equal in metadata. They may or may not be from -// the same function. -bool equal(Expression* a, Expression* b, Function* aFunc, Function* bFunc); - } // namespace wasm::metadata #endif // wasm_ir_metadata_h From 9736c14d9096b23380fbcf320fccdf9c25cd2f47 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 21 Jul 2025 13:03:57 -0700 Subject: [PATCH 218/239] undo --- src/ir/utils.h | 16 ---------------- src/passes/CodeFolding.cpp | 15 ++++++--------- 2 files changed, 6 insertions(+), 25 deletions(-) diff --git a/src/ir/utils.h b/src/ir/utils.h index 5312242f5ba..8051cb1a348 100644 --- a/src/ir/utils.h +++ b/src/ir/utils.h @@ -18,7 +18,6 @@ #define wasm_ir_utils_h #include "ir/branch-utils.h" -#include "ir/metadata.h" #include "pass.h" #include "wasm-builder.h" #include "wasm-traversal.h" @@ -70,21 +69,6 @@ struct ExpressionAnalyzer { return flexibleEqual(left, right, comparer); } - // Compare two expressions and their metadata as well. If just the first - // function is provided, we consider them both to arrive from the same one. - static bool equalIncludingMetadata(Expression* left, - Expression* right, - Function* leftFunc = nullptr, - Function* rightFunc = nullptr) { - if (!equal(left, right)) { - return false; - } - if (!rightFunc) { - rightFunc = leftFunc; - } - return metadata::equal(left, right, leftFunc, rightFunc); - } - // A shallow comparison, ignoring child nodes. static bool shallowEqual(Expression* left, Expression* right) { auto comparer = [left, right](Expression* currLeft, Expression* currRight) { diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index e8c11a3bf91..305eb12784f 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -249,8 +249,8 @@ struct CodeFolding // run the rest of the optimization mormally. auto maybeAddBlock = [this](Block* block, Expression*& other) -> Block* { // If other is a suffix of the block, wrap it in a block. - if (block->list.empty() || !ExpressionAnalyzer::equalIncludingMetadata( - other, block->list.back(), getFunction())) { + if (block->list.empty() || + !ExpressionAnalyzer::equal(other, block->list.back())) { return nullptr; } // Do it, assign to the out param `other`, and return the block. @@ -395,8 +395,7 @@ struct CodeFolding Index tail = 1; for (; tail < tails.size(); ++tail) { auto* other = getMergeable(tails[tail], num); - if (!other || !ExpressionAnalyzer::equalIncludingMetadata( - item, other, getFunction())) { + if (!other || !ExpressionAnalyzer::equal(item, other)) { // Other tail too short or has a difference. break; } @@ -674,8 +673,7 @@ struct CodeFolding [&](Expression* item) { if (item == first || // don't bother comparing the first - ExpressionAnalyzer::equalIncludingMetadata( - item, first, getFunction())) { + ExpressionAnalyzer::equal(item, first)) { // equal, keep it return false; } else { @@ -693,9 +691,8 @@ struct CodeFolding explore.end(), [&](Tail& tail) { auto* item = getItem(tail, num); - return !ExpressionAnalyzer:: - equalIncludingMetadata( - item, correct, getFunction()); + return !ExpressionAnalyzer::equal( + item, correct); }), explore.end()); // try to optimize this deeper tail. if we succeed, then stop here, From 7516b88f665123667eae048bc59a59f0dbc157be Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 21 Jul 2025 13:14:56 -0700 Subject: [PATCH 219/239] fix --- scripts/fuzz_opt.py | 18 +++++++++++++----- src/passes/CodeFolding.cpp | 4 ++++ src/passes/OptimizeInstructions.cpp | 22 ++++++++++++++++------ 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index e26f8a82114..8345752f1d3 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1927,11 +1927,6 @@ def handle(self, wasm): de_instrumented, '-o', opted, '-g', - # Do not unconditionalize code: if a branch hint does not run, but - # we start to run it all the time, it may have been a wrong hint - # that will show up as a false positive here (as it breaks our - # assumption that only valid branch hints remained in the module). - '--pass-arg=remove-unused-brs-never-unconditionalize', # Some passes that can unconditionalize code can just be disabled, # as they do not modify ifs or brs: # LICM moves code out of loops, possibly past a trap that would have @@ -1939,6 +1934,19 @@ def handle(self, wasm): '--skip-pass=licm', # HeapStoreOptimization moves struct.sets closer to struct.news. '--skip-pass=heap-store-optimization', + # CodeFolding and DuplicateFunctionElimination merge code, keeping + # a random branch hint from the duplicates, which might be wrong. + '--skip-pass=code-folding', + '--skip-pass=duplicate-function-elimination', + # Do not fold inside OptimizeInstructions either (we do not + # disable the entire pass, as it does many other things). + '--pass-arg=optimize-instructions-never-fold', + # Do not unconditionalize code: if a branch hint does not run, but + # we start to run it all the time, it may have been a wrong hint + # that will show up as a false positive here (as it breaks our + # assumption that only valid branch hints remained in the module). + '--pass-arg=remove-unused-brs-never-unconditionalize', + # Do not ] + get_random_opts() + FEATURE_OPTS run(args) diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index 305eb12784f..ee30d3e5080 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -249,6 +249,10 @@ struct CodeFolding // run the rest of the optimization mormally. auto maybeAddBlock = [this](Block* block, Expression*& other) -> Block* { // If other is a suffix of the block, wrap it in a block. + // + // Note that we do not consider metadata here. Like LLVM, we ignore + // metadata when trying to fold code together, preferring certain + // optimization over possible benefits of profiling data. if (block->list.empty() || !ExpressionAnalyzer::equal(other, block->list.back())) { return nullptr; diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 6a4979b47c0..9bc7d4d0f8d 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -228,11 +228,19 @@ struct OptimizeInstructions bool fastMath; + // If set, we never fold/merge code together. This is important when fuzzing + // branch hints, as if we allow folding, then we may fold code identical in + // all ways but for branch hints, leading to an invalid branch hint executing + // later (imagine one arm had the right hint and the other the wrong one; we + // leave one of the two arbitrarily, so we might get unlucky). + bool neverFold; + // In rare cases we make a change to a type, and will do a refinalize. bool refinalize = false; void doWalkFunction(Function* func) { fastMath = getPassOptions().fastMath; + neverFold = hasArgument("optimize-instructions-never-fold"); // First, scan locals. { @@ -1168,17 +1176,19 @@ struct OptimizeInstructions void visitIf(If* curr) { curr->condition = optimizeBoolean(curr->condition); if (curr->ifFalse) { - auto* func = getFunction(); if (auto* unary = curr->condition->dynCast()) { if (unary->op == EqZInt32) { // flip if-else arms to get rid of an eqz curr->condition = unary->value; std::swap(curr->ifTrue, curr->ifFalse); - BranchHints::flip(curr, func); + BranchHints::flip(curr, getFunction()); } } - if (curr->condition->type != Type::unreachable && - ExpressionAnalyzer::equalIncludingMetadata( + // Note that we do not consider metadata here. Like LLVM, we ignore + // metadata when trying to fold code together, preferring certain + // optimization over possible benefits of profiling data. + if (!neverFold && curr->condition->type != Type::unreachable && + ExpressionAnalyzer::equal( curr->ifTrue, curr->ifFalse, func)) { // The sides are identical, so fold. If we can replace the If with one // arm and there are no side effects in the condition, replace it. But @@ -5648,7 +5658,7 @@ struct OptimizeInstructions } } - { + if (!neverFold) { // Identical code on both arms can be folded out, e.g. // // (select @@ -5671,7 +5681,7 @@ struct OptimizeInstructions while (1) { // Ignore control flow structures (which are handled in MergeBlocks). if (!Properties::isControlFlowStructure(curr->ifTrue) && - ExpressionAnalyzer::shallowEqual(curr->ifTrue, curr->ifFalse)) { // This too! XXX + ExpressionAnalyzer::shallowEqual(curr->ifTrue, curr->ifFalse)) { // TODO: consider the case with more than one child. ChildIterator ifTrueChildren(curr->ifTrue); if (ifTrueChildren.children.size() == 1) { From b6c1ed5956ab7ef8fce0646929bd6f1c24dd48bc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 21 Jul 2025 13:15:47 -0700 Subject: [PATCH 220/239] format --- src/passes/OptimizeInstructions.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 9bc7d4d0f8d..dedc2fdcdf7 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1188,8 +1188,7 @@ struct OptimizeInstructions // metadata when trying to fold code together, preferring certain // optimization over possible benefits of profiling data. if (!neverFold && curr->condition->type != Type::unreachable && - ExpressionAnalyzer::equal( - curr->ifTrue, curr->ifFalse, func)) { + ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse, func)) { // The sides are identical, so fold. If we can replace the If with one // arm and there are no side effects in the condition, replace it. But // make sure not to change a concrete expression to an unreachable From 01e73a8e5243fa5b8e1c3bdd7e81d3653b6aa80a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 21 Jul 2025 13:18:13 -0700 Subject: [PATCH 221/239] fix --- src/passes/OptimizeInstructions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index dedc2fdcdf7..6e4d62a15b0 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1188,7 +1188,7 @@ struct OptimizeInstructions // metadata when trying to fold code together, preferring certain // optimization over possible benefits of profiling data. if (!neverFold && curr->condition->type != Type::unreachable && - ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse, func)) { + ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse)) { // The sides are identical, so fold. If we can replace the If with one // arm and there are no side effects in the condition, replace it. But // make sure not to change a concrete expression to an unreachable From 5d59a8ae367b3a5e3be290df468c7a9779f060c2 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 10:31:56 -0700 Subject: [PATCH 222/239] fix --- src/passes/pass-utils.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/passes/pass-utils.h b/src/passes/pass-utils.h index 1db4f561479..71836cd5dd6 100644 --- a/src/passes/pass-utils.h +++ b/src/passes/pass-utils.h @@ -36,7 +36,10 @@ struct FilteredPass : public Pass { } FilteredPass(std::unique_ptr&& pass, const FuncSet& relevantFuncs) - : pass(std::move(pass)), relevantFuncs(relevantFuncs) {} + : pass(std::move(pass)), relevantFuncs(relevantFuncs) { + // Copy the pass's name, for debugging and for --skip-pass support. + name = pass->name; + } bool isFunctionParallel() override { assert(pass->isFunctionParallel()); From 534abe540fb7c645e873be8c62b1f429411bb06e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 10:40:59 -0700 Subject: [PATCH 223/239] fix --- src/passes/pass-utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/passes/pass-utils.h b/src/passes/pass-utils.h index 71836cd5dd6..12b33fb398f 100644 --- a/src/passes/pass-utils.h +++ b/src/passes/pass-utils.h @@ -35,8 +35,8 @@ struct FilteredPass : public Pass { return std::make_unique(pass->create(), relevantFuncs); } - FilteredPass(std::unique_ptr&& pass, const FuncSet& relevantFuncs) - : pass(std::move(pass)), relevantFuncs(relevantFuncs) { + FilteredPass(std::unique_ptr&& pass_, const FuncSet& relevantFuncs) + : pass(std::move(pass_)), relevantFuncs(relevantFuncs) { // Copy the pass's name, for debugging and for --skip-pass support. name = pass->name; } From ddffdfeafd1fdbbb278dcdb36fd178f35c97d53a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:10:32 -0700 Subject: [PATCH 224/239] comments --- scripts/fuzz_opt.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 8345752f1d3..cd93542f328 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1927,26 +1927,31 @@ def handle(self, wasm): de_instrumented, '-o', opted, '-g', - # Some passes that can unconditionalize code can just be disabled, - # as they do not modify ifs or brs: - # LICM moves code out of loops, possibly past a trap that would have - # prevented execution. + + # Some passes are just skipped, as they do not modify ifs or brs, + # but they do break the invariant of not adding bad branch hints: + # * LICM moves code out of loops, possibly past a trap that would've + # prevented execution. Unconditionally running code like this is + # dangerous as branch hints are not an "effect" from the + # optimizer's point of view, but our invariant can break if a + # "bad" branch hint was not executed, but starts to be. '--skip-pass=licm', - # HeapStoreOptimization moves struct.sets closer to struct.news. + # * HeapStoreOptimization moves struct.sets closer to struct.news. '--skip-pass=heap-store-optimization', - # CodeFolding and DuplicateFunctionElimination merge code, keeping - # a random branch hint from the duplicates, which might be wrong. + # * CodeFolding and DuplicateFunctionElimination merge code, keeping + # a random branch hint from the duplicates, which might be wrong + # (we follow LLVM here, see details in the passes). '--skip-pass=code-folding', '--skip-pass=duplicate-function-elimination', - # Do not fold inside OptimizeInstructions either (we do not - # disable the entire pass, as it does many other things). + + # Some passes break the invariant in some cases, but we do not want + # to skip them entirely, as they have other things we need to fuzz. + # We add pass-args for them: + # * Do not fold inside OptimizeInstructions. '--pass-arg=optimize-instructions-never-fold', - # Do not unconditionalize code: if a branch hint does not run, but - # we start to run it all the time, it may have been a wrong hint - # that will show up as a false positive here (as it breaks our - # assumption that only valid branch hints remained in the module). + # * Do not unconditionalize code in RemoveUnusedBrs. '--pass-arg=remove-unused-brs-never-unconditionalize', - # Do not + ] + get_random_opts() + FEATURE_OPTS run(args) From acc70b76dd98b5648088892aa7a695a43c256958 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:20:39 -0700 Subject: [PATCH 225/239] feedback --- scripts/fuzz_opt.py | 2 +- src/passes/InstrumentBranchHints.cpp | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index cd93542f328..16e9f1f857c 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -2024,7 +2024,7 @@ def handle(self, wasm): assert hint == actual, 'Bad hint after optimizations' -# The global list of all test case handlers +# The global list of all test case handlers XXX ''' FuzzExec(), CompareVMs(), diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 873b02c01ef..6ca473d9ac0 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -225,13 +225,13 @@ struct InstrumentationProcessor : public WalkerPass> { // A map of expressions to their parents, so we can identify the pattern. std::unique_ptr parents; - Sub* getSub() { return (Sub*)this; } + Sub* self() { return static_cast(this); } - void visitIf(If* curr) { getSub()->processCondition(curr); } + void visitIf(If* curr) { self()->processCondition(curr); } void visitBreak(Break* curr) { if (curr->condition) { - getSub()->processCondition(curr); + self()->processCondition(curr); } } @@ -294,7 +294,7 @@ struct InstrumentationProcessor : public WalkerPass> { if (!get) { return {}; } - auto& sets = getSub()->localGraph->getSets(get); + auto& sets = self()->localGraph->getSets(get); if (sets.size() != 1) { return {}; } @@ -302,7 +302,7 @@ struct InstrumentationProcessor : public WalkerPass> { if (!set) { return {}; } - auto& gets = getSub()->localGraph->getSetInfluences(set); + auto& gets = self()->localGraph->getSetInfluences(set); if (gets.size() != 2) { return {}; } @@ -318,7 +318,7 @@ struct InstrumentationProcessor : public WalkerPass> { // See if that other get is used in a logging. The parent should be a // logging call. auto* call = - getSub()->parents->getParent(otherGet)->template dynCast(); + self()->parents->getParent(otherGet)->template dynCast(); if (!call || call->target != logBranch) { return {}; } From 0a5083fc1d89b940445f6a233a5e446f7269d329 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:28:31 -0700 Subject: [PATCH 226/239] rename --- scripts/fuzz_opt.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 16e9f1f857c..fc9f7e5cc00 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -515,10 +515,10 @@ def compare_between_vms(x, y, context): y_line = y_lines[i] if x_line != y_line: # this is different, but maybe it's a vm difference we can ignore - LEI_LOGGING = '[LoggingExternalInterface logging' - if x_line.startswith(LEI_LOGGING) and y_line.startswith(LEI_LOGGING): - x_val = x_line[len(LEI_LOGGING) + 1:-1] - y_val = y_line[len(LEI_LOGGING) + 1:-1] + LOGGING_PREFIX = '[LoggingExternalInterface logging' + if x_line.startswith(LOGGING_PREFIX) and y_line.startswith(LOGGING_PREFIX): + x_val = x_line[len(LOGGING_PREFIX) + 1:-1] + y_val = y_line[len(LOGGING_PREFIX) + 1:-1] if numbers_are_close_enough(x_val, y_val): continue if x_line.startswith(FUZZ_EXEC_NOTE_RESULT) and y_line.startswith(FUZZ_EXEC_NOTE_RESULT): @@ -1882,9 +1882,9 @@ def handle(self, wasm): # where the three integers are: ID, predicted, actual. all_ids = set() bad_ids = set() - LEI_LOG_BRANCH = '[LoggingExternalInterface log-branch' + LOG_BRANCH_PREFIX = '[LoggingExternalInterface log-branch' for line in out.splitlines(): - if line.startswith(LEI_LOG_BRANCH): + if line.startswith(LOG_BRANCH_PREFIX): # (1:-1 strips away the '[', ']' at the edges) _, _, id_, hint, actual = line[1:-1].split(' ') all_ids.add(id_) @@ -2013,7 +2013,7 @@ def handle(self, wasm): if not group or group[-1] == '[trap unreachable]': continue for line in group: - if line.startswith(LEI_LOG_BRANCH): + if line.startswith(LOG_BRANCH_PREFIX): _, _, id_, hint, actual = line[1:-1].split(' ') hint = int(hint) actual = int(actual) From 8b56402c6ba8e0f7946da8df99ab6d4a953a126f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:28:58 -0700 Subject: [PATCH 227/239] feedback: less self --- src/passes/InstrumentBranchHints.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 6ca473d9ac0..6ecfc596e91 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -294,7 +294,7 @@ struct InstrumentationProcessor : public WalkerPass> { if (!get) { return {}; } - auto& sets = self()->localGraph->getSets(get); + auto& sets = localGraph->getSets(get); if (sets.size() != 1) { return {}; } @@ -302,7 +302,7 @@ struct InstrumentationProcessor : public WalkerPass> { if (!set) { return {}; } - auto& gets = self()->localGraph->getSetInfluences(set); + auto& gets = localGraph->getSetInfluences(set); if (gets.size() != 2) { return {}; } @@ -318,7 +318,7 @@ struct InstrumentationProcessor : public WalkerPass> { // See if that other get is used in a logging. The parent should be a // logging call. auto* call = - self()->parents->getParent(otherGet)->template dynCast(); + parents->getParent(otherGet)->template dynCast(); if (!call || call->target != logBranch) { return {}; } @@ -363,8 +363,10 @@ struct DeInstrumentBranchHints template void processCondition(T* curr) { if (auto info = getInstrumentation(curr->condition)) { // Replace the instrumented condition with the original one (swap so that - // the IR remains valid; the other use of the local will not matter, as we - // remove the logging calls). + // the IR remains valid: we cannot use the same expression twice in our + // IR, and the original condition is still used in another place, until + // we remove the logging calls; since we will remove the calls anyhow, we + // just need some valid IR there). std::swap(curr->condition, *info->originalCondition); } } From f40a98ccde11a9105d4165e1a090f2a7255e8009 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:29:22 -0700 Subject: [PATCH 228/239] format --- src/passes/InstrumentBranchHints.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index 6ecfc596e91..c3ebbef29ae 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -317,8 +317,7 @@ struct InstrumentationProcessor : public WalkerPass> { assert(otherGet); // See if that other get is used in a logging. The parent should be a // logging call. - auto* call = - parents->getParent(otherGet)->template dynCast(); + auto* call = parents->getParent(otherGet)->template dynCast(); if (!call || call->target != logBranch) { return {}; } From 7c07e56bf38450d010d6a771eb1a48196e0b7188 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:31:41 -0700 Subject: [PATCH 229/239] finalize --- scripts/fuzz_opt.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index fc9f7e5cc00..74d0de98bb4 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1856,7 +1856,7 @@ def get_relevant_lines(wat): # Note that bugs found by this fuzzer tend to require the following during # reducing: BINARYEN_TRUST_GIVEN_WASM=1 in the env, and --text as a parameter. class BranchHintPreservation(TestCaseHandler): - frequency = 1 # XXX + frequency = 0.1 def handle(self, wasm): # Generate an instrumented wasm. @@ -2024,8 +2024,8 @@ def handle(self, wasm): assert hint == actual, 'Bad hint after optimizations' -# The global list of all test case handlers XXX -''' +# The global list of all test case handlers +testcase_handlers = [ FuzzExec(), CompareVMs(), CheckDeterminism(), @@ -2039,8 +2039,6 @@ def handle(self, wasm): ClusterFuzz(), Two(), PreserveImportsExports(), -''' -testcase_handlers = [ BranchHintPreservation(), ] From 17b3bea1f11c65b365a6946cf690ac1d7a775708 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:36:10 -0700 Subject: [PATCH 230/239] typo --- src/passes/InstrumentBranchHints.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/InstrumentBranchHints.cpp b/src/passes/InstrumentBranchHints.cpp index c3ebbef29ae..34af0953662 100644 --- a/src/passes/InstrumentBranchHints.cpp +++ b/src/passes/InstrumentBranchHints.cpp @@ -55,7 +55,7 @@ // }; // // A pass to delete branch hints is also provided, which finds instrumentations -// and the IDs in those calls, and deletes branch hints that were provded. For +// and the IDs in those calls, and deletes branch hints that were listed. For // example, // // --delete-branch-hints=10,20 From 28ce0f296aac7bb66445452ce00d90ddb2e1f363 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:42:48 -0700 Subject: [PATCH 231/239] text --- src/passes/RemoveUnusedBrs.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index fe8af8863ea..2b60aec2ad2 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -17,16 +17,6 @@ // // Removes branches for which we go to where they go anyhow. // -// Arguments: -// -// --pass-arg=remove-unused-brs-never-unconditionalize -// -// This is used during fuzzing, to prevent us from unconditionalizing code -// (making it always run, when it didn't before). Unconditionalizing code -// is a problem for fuzzing branch hints, as a branch hint that never ran -// might be wrong, and if we start to run it, the fuzzer could think it -// found a bug. -// #include "ir/branch-hints.h" #include "ir/branch-utils.h" @@ -1264,7 +1254,9 @@ struct RemoveUnusedBrs : public WalkerPass> { struct FinalOptimizer : public PostWalker { bool shrink; // Whether we are allowed to unconditionalize code, that is, make code - // run unconditionally that previously might not have run. + // run that previously might not have. Unconditionalizing code is a + // problem for fuzzing branch hints: a branch hint that never ran might be + // wrong, and if we start to run it, the fuzzer would report a finding. bool neverUnconditionalize; PassOptions& passOptions; From 0df872448a029206f8a84dac96065f0cbfe935f4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:46:30 -0700 Subject: [PATCH 232/239] test --- .../lit/passes/code-folding_branch-hints.wast | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/test/lit/passes/code-folding_branch-hints.wast b/test/lit/passes/code-folding_branch-hints.wast index 407986a931a..5fa0a4f46e4 100644 --- a/test/lit/passes/code-folding_branch-hints.wast +++ b/test/lit/passes/code-folding_branch-hints.wast @@ -7,31 +7,20 @@ ;; CHECK: (type $0 (func (param i32 i32) (result f32))) ;; CHECK: (func $different (type $0) (param $x i32) (param $y i32) (result f32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (nop) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (else - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (nop) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (f32.const 0) ;; CHECK-NEXT: ) (func $different (param $x i32) (param $y i32) (result f32) - ;; The branch hints differ, so we do not optimize. + ;; The branch hints differ, but we still optimize (like LLVM). (if (result f32) (local.get $x) (then @@ -75,7 +64,7 @@ ;; CHECK-NEXT: (f32.const 0) ;; CHECK-NEXT: ) (func $same (param $x i32) (param $y i32) (result f32) - ;; The branch hints are the same, so we optimize. + ;; The branch hints are the same, so we definitely optimize. (if (result f32) (local.get $x) (then From d968f99b9f1b643fdea9af2b79dbcd751bc2d389 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:47:38 -0700 Subject: [PATCH 233/239] test --- .../optimize-instructions-branch-hints.wast | 27 +++++-------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/test/lit/passes/optimize-instructions-branch-hints.wast b/test/lit/passes/optimize-instructions-branch-hints.wast index e3205715271..1ddf7b1ce93 100644 --- a/test/lit/passes/optimize-instructions-branch-hints.wast +++ b/test/lit/passes/optimize-instructions-branch-hints.wast @@ -30,32 +30,17 @@ ) ) - ;; CHECK: (func $no-merge (type $0) (param $x i32) (param $y i32) + ;; CHECK: (func $still-merge (type $0) (param $x i32) (param $y i32) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.get $y) - ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (unreachable) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (else - ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (local.get $y) - ;; CHECK-NEXT: (then - ;; CHECK-NEXT: (unreachable) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $no-merge (param $x i32) (param $y i32) - ;; Do not merge if arms if metadata differs (perhaps the hint is intentionally - ;; different, reflecting different runtime behavior). + (func $still-merge (param $x i32) (param $y i32) + ;; We merge if arms even if metadata differs (like LLVM). (if (local.get $x) (then From 71b07e13677f08e513e1dffdd0a6fb55d5f9d18a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:49:38 -0700 Subject: [PATCH 234/239] test --- test/lit/passes/optimize-instructions-branch-hints.wast | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lit/passes/optimize-instructions-branch-hints.wast b/test/lit/passes/optimize-instructions-branch-hints.wast index 1ddf7b1ce93..59fbdaf180a 100644 --- a/test/lit/passes/optimize-instructions-branch-hints.wast +++ b/test/lit/passes/optimize-instructions-branch-hints.wast @@ -74,7 +74,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $yes-merge (param $x i32) (param $y i32) - ;; As above, but now the hints match, so we merge. + ;; Now the hints match, so we definitely merge. (if (local.get $x) (then From 1d41ea44e4b149fe92f69d5736d4e93b463e4edc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:54:42 -0700 Subject: [PATCH 235/239] test --- ....wast => optimize-instructions_branch-hints-fold.wast} | 8 ++++++++ 1 file changed, 8 insertions(+) rename test/lit/passes/{optimize-instructions-branch-hints.wast => optimize-instructions_branch-hints-fold.wast} (89%) diff --git a/test/lit/passes/optimize-instructions-branch-hints.wast b/test/lit/passes/optimize-instructions_branch-hints-fold.wast similarity index 89% rename from test/lit/passes/optimize-instructions-branch-hints.wast rename to test/lit/passes/optimize-instructions_branch-hints-fold.wast index 59fbdaf180a..7877a656a6b 100644 --- a/test/lit/passes/optimize-instructions-branch-hints.wast +++ b/test/lit/passes/optimize-instructions_branch-hints-fold.wast @@ -1,6 +1,14 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. + ;; RUN: wasm-opt %s --optimize-instructions -all -S -o - | filecheck %s +;; RUN: wasm-opt %s --optimize-instructions -all --pass-arg=optimize-instructions-never-fold -S -o - \ +;; RUN: | filecheck %s --check-prefix=NO_FO + +;; Also verify that the "never-fold" flag is respected: when set, we do not fold +;; code together. This is important as we keep one of the two branch hints, and +;; it may be wrong, which can confuse the fuzzer. + (module ;; CHECK: (func $conditionals (type $1) (param $x i32) (result i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") From c487b02388382e89a26bc0b00e1cb8a908a5b274 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 11:56:55 -0700 Subject: [PATCH 236/239] test --- ...timize-instructions_branch-hints-fold.wast | 97 ++++++++++++++++--- 1 file changed, 84 insertions(+), 13 deletions(-) diff --git a/test/lit/passes/optimize-instructions_branch-hints-fold.wast b/test/lit/passes/optimize-instructions_branch-hints-fold.wast index 7877a656a6b..3e570bf056b 100644 --- a/test/lit/passes/optimize-instructions_branch-hints-fold.wast +++ b/test/lit/passes/optimize-instructions_branch-hints-fold.wast @@ -2,13 +2,13 @@ ;; RUN: wasm-opt %s --optimize-instructions -all -S -o - | filecheck %s -;; RUN: wasm-opt %s --optimize-instructions -all --pass-arg=optimize-instructions-never-fold -S -o - \ -;; RUN: | filecheck %s --check-prefix=NO_FO - ;; Also verify that the "never-fold" flag is respected: when set, we do not fold ;; code together. This is important as we keep one of the two branch hints, and ;; it may be wrong, which can confuse the fuzzer. +;; RUN: wasm-opt %s --optimize-instructions -all --pass-arg=optimize-instructions-never-fold -S -o - \ +;; RUN: | filecheck %s --check-prefix=NO_FO + (module ;; CHECK: (func $conditionals (type $1) (param $x i32) (result i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") @@ -22,6 +22,18 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; NO_FO: (func $conditionals (type $1) (param $x i32) (result i32) + ;; NO_FO-NEXT: (@metadata.code.branch_hint "\01") + ;; NO_FO-NEXT: (if (result i32) + ;; NO_FO-NEXT: (local.get $x) + ;; NO_FO-NEXT: (then + ;; NO_FO-NEXT: (i32.const 1337) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: (else + ;; NO_FO-NEXT: (i32.const 42) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) (func $conditionals (param $x i32) (result i32) ;; When we flip the if, the hint should flip too. (@metadata.code.branch_hint "\00") @@ -38,7 +50,7 @@ ) ) - ;; CHECK: (func $still-merge (type $0) (param $x i32) (param $y i32) + ;; CHECK: (func $still-fold (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (local.get $y) @@ -47,8 +59,32 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $still-merge (param $x i32) (param $y i32) - ;; We merge if arms even if metadata differs (like LLVM). + ;; NO_FO: (func $still-fold (type $0) (param $x i32) (param $y i32) + ;; NO_FO-NEXT: (if + ;; NO_FO-NEXT: (local.get $x) + ;; NO_FO-NEXT: (then + ;; NO_FO-NEXT: (@metadata.code.branch_hint "\00") + ;; NO_FO-NEXT: (if + ;; NO_FO-NEXT: (local.get $y) + ;; NO_FO-NEXT: (then + ;; NO_FO-NEXT: (unreachable) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: (else + ;; NO_FO-NEXT: (@metadata.code.branch_hint "\01") + ;; NO_FO-NEXT: (if + ;; NO_FO-NEXT: (local.get $y) + ;; NO_FO-NEXT: (then + ;; NO_FO-NEXT: (unreachable) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + (func $still-fold (param $x i32) (param $y i32) + ;; We fold if arms even if metadata differs (like LLVM). We do not fold if the + ;; flag was passed, however. (if (local.get $x) (then @@ -72,7 +108,7 @@ ) ) - ;; CHECK: (func $yes-merge (type $0) (param $x i32) (param $y i32) + ;; CHECK: (func $yes-fold (type $0) (param $x i32) (param $y i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (if ;; CHECK-NEXT: (local.get $y) @@ -81,8 +117,31 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $yes-merge (param $x i32) (param $y i32) - ;; Now the hints match, so we definitely merge. + ;; NO_FO: (func $yes-fold (type $0) (param $x i32) (param $y i32) + ;; NO_FO-NEXT: (if + ;; NO_FO-NEXT: (local.get $x) + ;; NO_FO-NEXT: (then + ;; NO_FO-NEXT: (@metadata.code.branch_hint "\01") + ;; NO_FO-NEXT: (if + ;; NO_FO-NEXT: (local.get $y) + ;; NO_FO-NEXT: (then + ;; NO_FO-NEXT: (unreachable) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: (else + ;; NO_FO-NEXT: (@metadata.code.branch_hint "\01") + ;; NO_FO-NEXT: (if + ;; NO_FO-NEXT: (local.get $y) + ;; NO_FO-NEXT: (then + ;; NO_FO-NEXT: (unreachable) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + (func $yes-fold (param $x i32) (param $y i32) + ;; Now the hints match, so we definitely fold (without the flag). (if (local.get $x) (then @@ -106,7 +165,7 @@ ) ) - ;; CHECK: (func $always-merge-select (type $2) (param $x i32) (param $y i32) (result i32) + ;; CHECK: (func $always-fold-select (type $2) (param $x i32) (param $y i32) (result i32) ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if (result i32) ;; CHECK-NEXT: (local.get $x) @@ -118,10 +177,22 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $always-merge-select (param $x i32) (param $y i32) (result i32) - ;; A select with different metadata is still merged: the code was executed + ;; NO_FO: (func $always-fold-select (type $2) (param $x i32) (param $y i32) (result i32) + ;; NO_FO-NEXT: (@metadata.code.branch_hint "\00") + ;; NO_FO-NEXT: (if (result i32) + ;; NO_FO-NEXT: (local.get $x) + ;; NO_FO-NEXT: (then + ;; NO_FO-NEXT: (i32.const 10) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: (else + ;; NO_FO-NEXT: (i32.const 20) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: ) + (func $always-fold-select (param $x i32) (param $y i32) (result i32) + ;; A select with different metadata is still foldable: the code was executed ;; anyhow, so it's fine if we execute just one of the two (we pick the first, - ;; arbitrarily). + ;; arbitrarily). We do so even with the flag. (select (@metadata.code.branch_hint "\00") (if (result i32) From e37bc454c3aecb7533e20371d1af4d6a38a35906 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 16:06:00 -0700 Subject: [PATCH 237/239] comment --- scripts/fuzz_opt.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 74d0de98bb4..3f6b5225ace 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1936,7 +1936,9 @@ def handle(self, wasm): # optimizer's point of view, but our invariant can break if a # "bad" branch hint was not executed, but starts to be. '--skip-pass=licm', - # * HeapStoreOptimization moves struct.sets closer to struct.news. + # * HeapStoreOptimization moves struct.sets closer to struct.news + # (and so, like LICM, it might move code to a place where it + # executes unconditionally). '--skip-pass=heap-store-optimization', # * CodeFolding and DuplicateFunctionElimination merge code, keeping # a random branch hint from the duplicates, which might be wrong From 9c42c2610286dac8886322982d45dd704489da0f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 16:10:48 -0700 Subject: [PATCH 238/239] feedback: extra test --- .../lit/passes/code-folding_branch-hints.wast | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/lit/passes/code-folding_branch-hints.wast b/test/lit/passes/code-folding_branch-hints.wast index 5fa0a4f46e4..51601230865 100644 --- a/test/lit/passes/code-folding_branch-hints.wast +++ b/test/lit/passes/code-folding_branch-hints.wast @@ -50,6 +50,51 @@ ) ) + ;; CHECK: (func $different-flip (type $0) (param $x i32) (param $y i32) (result f32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + (func $different-flip (param $x i32) (param $y i32) (result f32) + ;; As above, but flipped. We still optimize, still keeping the first branch + ;; hint (now "\01"). + (if (result f32) + (local.get $x) + (then + (block (result f32) + (@metadata.code.branch_hint "\01") + (if + (local.get $x) + (then + (nop) + ) + ) + (f32.const 0) + ) + ) + (else + (block (result f32) + (@metadata.code.branch_hint "\00") + (if + (local.get $x) + (then + (nop) + ) + ) + (f32.const 0) + ) + ) + ) + ) + ;; CHECK: (func $same (type $0) (param $x i32) (param $y i32) (result f32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (local.get $x) From d9da805100d39a9c9bdf18374cbc740691be8797 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 22 Jul 2025 16:41:12 -0700 Subject: [PATCH 239/239] add MergeBlocks --- scripts/fuzz_opt.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 3f6b5225ace..cd07a8ad4ae 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1929,21 +1929,26 @@ def handle(self, wasm): '-g', # Some passes are just skipped, as they do not modify ifs or brs, - # but they do break the invariant of not adding bad branch hints: - # * LICM moves code out of loops, possibly past a trap that would've - # prevented execution. Unconditionally running code like this is - # dangerous as branch hints are not an "effect" from the - # optimizer's point of view, but our invariant can break if a - # "bad" branch hint was not executed, but starts to be. + # but they do break the invariant of not adding bad branch hints. + # There are two main issues here: + # * Moving code around, possibly causing it to start to execute if + # it previously was not reached due to a trap (a branch hint + # seems to have no effects in the optimizer, so it will do such + # movements). And if it starts to execute and is a wrong hint, we + # get an invalid fuzzer finding. + # * LICM moves code out of loops. '--skip-pass=licm', - # * HeapStoreOptimization moves struct.sets closer to struct.news - # (and so, like LICM, it might move code to a place where it - # executes unconditionally). + # * HeapStoreOptimization moves struct.sets closer to struct.news. '--skip-pass=heap-store-optimization', - # * CodeFolding and DuplicateFunctionElimination merge code, keeping - # a random branch hint from the duplicates, which might be wrong - # (we follow LLVM here, see details in the passes). + # * MergeBlocks moves code out of inner blocks to outer blocks. + '--skip-pass=merge-blocks', + # * Merging/folding code. When we do so, code identical in content + # but differing in metadata will end up with the metadata from one + # of the copies, which might be wrong (we follow LLVM here, see + # details in the passes). + # * CodeFolding merges code blocks inside functions. '--skip-pass=code-folding', + # * DuplicateFunctionElimination merges functions. '--skip-pass=duplicate-function-elimination', # Some passes break the invariant in some cases, but we do not want