-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[PowerPC] support branch hint for AtomicExpandImpl::expandAtomicCmpXchg #152366
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-powerpc Author: zhijian lin (diggerlin) ChangesThe patch add branch hint for AtomicExpandImpl::expandAtomicCmpXchg, in PowerPC, it support branch hint as
Patch is 171.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152366.diff 13 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cbdc1b6031680..027bcc5bc53ae 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2332,6 +2332,12 @@ class LLVM_ABI TargetLoweringBase {
virtual Instruction *emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const;
+
+ virtual MDNode *
+ getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+ return nullptr;
+ }
+
/// @}
// Emits code that executes when the comparison result in the ll/sc
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 3f3d5dc90711f..abaa8b6e841f6 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1454,7 +1454,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
+ Builder.CreateCondBr(
+ ShouldStore, ReleasingStoreBB, NoStoreBB,
+ TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
Builder.SetInsertPoint(ReleasingStoreBB);
if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1472,8 +1474,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
- Builder.CreateCondBr(StoreSuccess, SuccessBB,
- CI->isWeak() ? FailureBB : RetryBB);
+ Builder.CreateCondBr(
+ StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : RetryBB,
+ TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
Builder.SetInsertPoint(ReleasedLoadBB);
Value *SecondLoad;
@@ -1486,7 +1489,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+ Builder.CreateCondBr(
+ ShouldStore, TryStoreBB, NoStoreBB,
+ TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
// Update PHI node in TryStoreBB.
LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
} else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 459525ed4ee9a..b218532e56b6a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -68,6 +68,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -12816,6 +12817,11 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
return Builder.CreateXor(Call, Builder.getInt32(1));
}
+MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(
+ LLVMContext &Ctx) const {
+ return MDBuilder(Ctx).createLikelyBranchWeights();
+}
+
// The mappings for emitLeading/TrailingFence is taken from
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 124c7116dc3b5..4892a3c603a6c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -938,6 +938,8 @@ namespace llvm {
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
+ virtual MDNode *
+ getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
bool shouldInlineQuadwordAtomics() const;
TargetLowering::AtomicExpansionKind
diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
index b7852c3c3e6e0..2d8e0e869a860 100644
--- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
+++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -19,51 +19,53 @@ define signext i32 @main() nounwind {
; CHECK-NEXT: addi 3, 1, 46
; CHECK-NEXT: lharx 4, 0, 3
; CHECK-NEXT: cmplwi 4, 33059
-; CHECK-NEXT: bne 0, .LBB0_4
+; CHECK-NEXT: bne- 0, .LBB0_4
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: sync
; CHECK-NEXT: li 4, 234
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_2: # %cmpxchg.trystore
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 4, 0, 3
-; CHECK-NEXT: beq 0, .LBB0_7
+; CHECK-NEXT: beq+ 0, .LBB0_5
; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
; CHECK-NEXT: #
; CHECK-NEXT: lharx 5, 0, 3
; CHECK-NEXT: cmplwi 5, 33059
-; CHECK-NEXT: beq 0, .LBB0_2
+; CHECK-NEXT: beq+ 0, .LBB0_2
; CHECK-NEXT: .LBB0_4: # %cmpxchg.nostore
; CHECK-NEXT: lwsync
-; CHECK-NEXT: b .LBB0_8
-; CHECK-NEXT: .LBB0_5: # %L.B0000
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB0_6
+; CHECK-NEXT: .LBB0_5: # %cmpxchg.success
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: .LBB0_6: # %cmpxchg.end
+; CHECK-NEXT: bc 4, 20, .LBB0_9
+; CHECK-NEXT: # %bb.7: # %L.B0000
; CHECK-NEXT: lhz 3, 46(1)
; CHECK-NEXT: cmplwi 3, 234
-; CHECK-NEXT: bne 0, .LBB0_9
-; CHECK-NEXT: # %bb.6: # %L.B0001
+; CHECK-NEXT: bne 0, .LBB0_10
+; CHECK-NEXT: # %bb.8: # %L.B0001
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-NEXT: bl puts
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: b .LBB0_11
-; CHECK-NEXT: .LBB0_7: # %cmpxchg.success
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: b .LBB0_5
-; CHECK-NEXT: .LBB0_8: # %L.B0003
+; CHECK-NEXT: b .LBB0_12
+; CHECK-NEXT: .LBB0_9: # %L.B0003
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-NEXT: addi 3, 3, 16
-; CHECK-NEXT: b .LBB0_10
-; CHECK-NEXT: .LBB0_9: # %L.B0005
+; CHECK-NEXT: b .LBB0_11
+; CHECK-NEXT: .LBB0_10: # %L.B0005
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-NEXT: addi 3, 3, 64
-; CHECK-NEXT: .LBB0_10: # %L.B0003
+; CHECK-NEXT: .LBB0_11: # %L.B0003
; CHECK-NEXT: bl puts
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: .LBB0_11: # %L.B0003
+; CHECK-NEXT: .LBB0_12: # %L.B0003
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: mtlr 0
@@ -83,7 +85,7 @@ define signext i32 @main() nounwind {
; CHECK-P7-NEXT: srw 6, 5, 4
; CHECK-P7-NEXT: clrlwi 6, 6, 16
; CHECK-P7-NEXT: cmplwi 6, 33059
-; CHECK-P7-NEXT: bne 0, .LBB0_4
+; CHECK-P7-NEXT: bne- 0, .LBB0_4
; CHECK-P7-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-P7-NEXT: lis 6, 0
; CHECK-P7-NEXT: li 7, 234
@@ -92,51 +94,53 @@ define signext i32 @main() nounwind {
; CHECK-P7-NEXT: slw 7, 7, 4
; CHECK-P7-NEXT: slw 6, 6, 4
; CHECK-P7-NEXT: not 6, 6
-; CHECK-P7-NEXT: .p2align 4
; CHECK-P7-NEXT: .LBB0_2: # %cmpxchg.trystore
; CHECK-P7-NEXT: #
; CHECK-P7-NEXT: and 5, 5, 6
; CHECK-P7-NEXT: or 5, 5, 7
; CHECK-P7-NEXT: stwcx. 5, 0, 3
-; CHECK-P7-NEXT: beq 0, .LBB0_7
+; CHECK-P7-NEXT: beq+ 0, .LBB0_5
; CHECK-P7-NEXT: # %bb.3: # %cmpxchg.releasedload
; CHECK-P7-NEXT: #
; CHECK-P7-NEXT: lwarx 5, 0, 3
; CHECK-P7-NEXT: srw 8, 5, 4
; CHECK-P7-NEXT: clrlwi 8, 8, 16
; CHECK-P7-NEXT: cmplwi 8, 33059
-; CHECK-P7-NEXT: beq 0, .LBB0_2
+; CHECK-P7-NEXT: beq+ 0, .LBB0_2
; CHECK-P7-NEXT: .LBB0_4: # %cmpxchg.nostore
+; CHECK-P7-NEXT: crxor 20, 20, 20
; CHECK-P7-NEXT: lwsync
-; CHECK-P7-NEXT: b .LBB0_8
-; CHECK-P7-NEXT: .LBB0_5: # %L.B0000
+; CHECK-P7-NEXT: b .LBB0_6
+; CHECK-P7-NEXT: .LBB0_5: # %cmpxchg.success
+; CHECK-P7-NEXT: lwsync
+; CHECK-P7-NEXT: creqv 20, 20, 20
+; CHECK-P7-NEXT: .LBB0_6: # %cmpxchg.end
+; CHECK-P7-NEXT: bc 4, 20, .LBB0_9
+; CHECK-P7-NEXT: # %bb.7: # %L.B0000
; CHECK-P7-NEXT: lhz 3, 46(1)
; CHECK-P7-NEXT: cmplwi 3, 234
-; CHECK-P7-NEXT: bne 0, .LBB0_9
-; CHECK-P7-NEXT: # %bb.6: # %L.B0001
+; CHECK-P7-NEXT: bne 0, .LBB0_10
+; CHECK-P7-NEXT: # %bb.8: # %L.B0001
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-P7-NEXT: bl puts
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: li 3, 0
-; CHECK-P7-NEXT: b .LBB0_11
-; CHECK-P7-NEXT: .LBB0_7: # %cmpxchg.success
-; CHECK-P7-NEXT: lwsync
-; CHECK-P7-NEXT: b .LBB0_5
-; CHECK-P7-NEXT: .LBB0_8: # %L.B0003
+; CHECK-P7-NEXT: b .LBB0_12
+; CHECK-P7-NEXT: .LBB0_9: # %L.B0003
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-P7-NEXT: addi 3, 3, 16
-; CHECK-P7-NEXT: b .LBB0_10
-; CHECK-P7-NEXT: .LBB0_9: # %L.B0005
+; CHECK-P7-NEXT: b .LBB0_11
+; CHECK-P7-NEXT: .LBB0_10: # %L.B0005
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-P7-NEXT: addi 3, 3, 64
-; CHECK-P7-NEXT: .LBB0_10: # %L.B0003
+; CHECK-P7-NEXT: .LBB0_11: # %L.B0003
; CHECK-P7-NEXT: bl puts
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: li 3, 1
-; CHECK-P7-NEXT: .LBB0_11: # %L.B0003
+; CHECK-P7-NEXT: .LBB0_12: # %L.B0003
; CHECK-P7-NEXT: addi 1, 1, 48
; CHECK-P7-NEXT: ld 0, 16(1)
; CHECK-P7-NEXT: mtlr 0
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 07afea75aec67..7e892fc4ae6eb 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -4347,19 +4347,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 8, sc@toc@l(4)
; CHECK-NEXT: lbarx 5, 0, 6
; CHECK-NEXT: cmplw 5, 7
-; CHECK-NEXT: bne 0, .LBB3_4
+; CHECK-NEXT: bne- 0, .LBB3_4
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore276
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_2: # %cmpxchg.trystore275
; CHECK-NEXT: #
; CHECK-NEXT: stbcx. 8, 0, 6
-; CHECK-NEXT: beq 0, .LBB3_4
+; CHECK-NEXT: beq+ 0, .LBB3_4
; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload274
; CHECK-NEXT: #
; CHECK-NEXT: lbarx 5, 0, 6
; CHECK-NEXT: cmplw 5, 7
-; CHECK-NEXT: beq 0, .LBB3_2
+; CHECK-NEXT: beq+ 0, .LBB3_2
; CHECK-NEXT: .LBB3_4: # %cmpxchg.nostore272
; CHECK-NEXT: addi 7, 3, uc@toc@l
; CHECK-NEXT: lwsync
@@ -4367,20 +4366,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 9, uc@toc@l(3)
; CHECK-NEXT: lbarx 8, 0, 7
; CHECK-NEXT: cmplw 8, 9
-; CHECK-NEXT: bne 0, .LBB3_8
+; CHECK-NEXT: bne- 0, .LBB3_8
; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore257
; CHECK-NEXT: sync
; CHECK-NEXT: clrlwi 5, 5, 24
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_6: # %cmpxchg.trystore256
; CHECK-NEXT: #
; CHECK-NEXT: stbcx. 5, 0, 7
-; CHECK-NEXT: beq 0, .LBB3_8
+; CHECK-NEXT: beq+ 0, .LBB3_8
; CHECK-NEXT: # %bb.7: # %cmpxchg.releasedload255
; CHECK-NEXT: #
; CHECK-NEXT: lbarx 8, 0, 7
; CHECK-NEXT: cmplw 8, 9
-; CHECK-NEXT: beq 0, .LBB3_6
+; CHECK-NEXT: beq+ 0, .LBB3_6
; CHECK-NEXT: .LBB3_8: # %cmpxchg.nostore253
; CHECK-NEXT: addis 5, 2, ss@toc@ha
; CHECK-NEXT: lwsync
@@ -4390,21 +4388,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 8, 5, ss@toc@l
; CHECK-NEXT: lharx 9, 0, 8
; CHECK-NEXT: cmplw 9, 10
-; CHECK-NEXT: bne 0, .LBB3_12
+; CHECK-NEXT: bne- 0, .LBB3_12
; CHECK-NEXT: # %bb.9: # %cmpxchg.fencedstore238
; CHECK-NEXT: extsb 11, 11
; CHECK-NEXT: sync
; CHECK-NEXT: clrlwi 11, 11, 16
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_10: # %cmpxchg.trystore237
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 11, 0, 8
-; CHECK-NEXT: beq 0, .LBB3_12
+; CHECK-NEXT: beq+ 0, .LBB3_12
; CHECK-NEXT: # %bb.11: # %cmpxchg.releasedload236
; CHECK-NEXT: #
; CHECK-NEXT: lharx 9, 0, 8
; CHECK-NEXT: cmplw 9, 10
-; CHECK-NEXT: beq 0, .LBB3_10
+; CHECK-NEXT: beq+ 0, .LBB3_10
; CHECK-NEXT: .LBB3_12: # %cmpxchg.nostore234
; CHECK-NEXT: lwsync
; CHECK-NEXT: sth 9, ss@toc@l(5)
@@ -4414,21 +4411,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 9, 5, us@toc@l
; CHECK-NEXT: lharx 10, 0, 9
; CHECK-NEXT: cmplw 10, 11
-; CHECK-NEXT: bne 0, .LBB3_16
+; CHECK-NEXT: bne- 0, .LBB3_16
; CHECK-NEXT: # %bb.13: # %cmpxchg.fencedstore219
; CHECK-NEXT: extsb 12, 12
; CHECK-NEXT: sync
; CHECK-NEXT: clrlwi 12, 12, 16
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_14: # %cmpxchg.trystore218
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 12, 0, 9
-; CHECK-NEXT: beq 0, .LBB3_16
+; CHECK-NEXT: beq+ 0, .LBB3_16
; CHECK-NEXT: # %bb.15: # %cmpxchg.releasedload217
; CHECK-NEXT: #
; CHECK-NEXT: lharx 10, 0, 9
; CHECK-NEXT: cmplw 10, 11
-; CHECK-NEXT: beq 0, .LBB3_14
+; CHECK-NEXT: beq+ 0, .LBB3_14
; CHECK-NEXT: .LBB3_16: # %cmpxchg.nostore215
; CHECK-NEXT: lwsync
; CHECK-NEXT: sth 10, us@toc@l(5)
@@ -4438,20 +4434,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 10, 5, si@toc@l
; CHECK-NEXT: lwarx 11, 0, 10
; CHECK-NEXT: cmplw 11, 12
-; CHECK-NEXT: bne 0, .LBB3_20
+; CHECK-NEXT: bne- 0, .LBB3_20
; CHECK-NEXT: # %bb.17: # %cmpxchg.fencedstore200
; CHECK-NEXT: extsb 0, 0
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_18: # %cmpxchg.trystore199
; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 0, 0, 10
-; CHECK-NEXT: beq 0, .LBB3_20
+; CHECK-NEXT: beq+ 0, .LBB3_20
; CHECK-NEXT: # %bb.19: # %cmpxchg.releasedload198
; CHECK-NEXT: #
; CHECK-NEXT: lwarx 11, 0, 10
; CHECK-NEXT: cmplw 11, 12
-; CHECK-NEXT: beq 0, .LBB3_18
+; CHECK-NEXT: beq+ 0, .LBB3_18
; CHECK-NEXT: .LBB3_20: # %cmpxchg.nostore196
; CHECK-NEXT: lwsync
; CHECK-NEXT: stw 11, si@toc@l(5)
@@ -4461,20 +4456,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 11, 5, ui@toc@l
; CHECK-NEXT: lwarx 12, 0, 11
; CHECK-NEXT: cmplw 12, 0
-; CHECK-NEXT: bne 0, .LBB3_24
+; CHECK-NEXT: bne- 0, .LBB3_24
; CHECK-NEXT: # %bb.21: # %cmpxchg.fencedstore181
; CHECK-NEXT: extsb 30, 30
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_22: # %cmpxchg.trystore180
; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 30, 0, 11
-; CHECK-NEXT: beq 0, .LBB3_24
+; CHECK-NEXT: beq+ 0, .LBB3_24
; CHECK-NEXT: # %bb.23: # %cmpxchg.releasedload179
; CHECK-NEXT: #
; CHECK-NEXT: lwarx 12, 0, 11
; CHECK-NEXT: cmplw 12, 0
-; CHECK-NEXT: beq 0, .LBB3_22
+; CHECK-NEXT: beq+ 0, .LBB3_22
; CHECK-NEXT: .LBB3_24: # %cmpxchg.nostore177
; CHECK-NEXT: addis 30, 2, sll@toc@ha
; CHECK-NEXT: lwsync
@@ -4484,20 +4478,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 12, 30, sll@toc@l
; CHECK-NEXT: ldarx 0, 0, 12
; CHECK-NEXT: cmpld 0, 29
-; CHECK-NEXT: bne 0, .LBB3_28
+; CHECK-NEXT: bne- 0, .LBB3_28
; CHECK-NEXT: # %bb.25: # %cmpxchg.fencedstore162
; CHECK-NEXT: extsb 28, 28
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_26: # %cmpxchg.trystore161
; CHECK-NEXT: #
; CHECK-NEXT: stdcx. 28, 0, 12
-; CHECK-NEXT: beq 0, .LBB3_28
+; CHECK-NEXT: beq+ 0, .LBB3_28
; CHECK-NEXT: # %bb.27: # %cmpxchg.releasedload160
; CHECK-NEXT: #
; CHECK-NEXT: ldarx 0, 0, 12
; CHECK-NEXT: cmpld 0, 29
-; CHECK-NEXT: beq 0, .LBB3_26
+; CHECK-NEXT: beq+ 0, .LBB3_26
; CHECK-NEXT: .LBB3_28: # %cmpxchg.nostore158
; CHECK-NEXT: lwsync
; CHECK-NEXT: std 0, sll@toc@l(30)
@@ -4507,20 +4500,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 0, 30, ull@toc@l
; CHECK-NEXT: ldarx 29, 0, 0
; CHECK-NEXT: cmpld 29, 28
-; CHECK-NEXT: bne 0, .LBB3_32
+; CHECK-NEXT: bne- 0, .LBB3_32
; CHECK-NEXT: # %bb.29: # %cmpxchg.fencedstore143
; CHECK-NEXT: extsb 27, 27
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_30: # %cmpxchg.trystore142
; CHECK-NEXT: #
; CHECK-NEXT: stdcx. 27, 0, 0
-; CHECK-NEXT: beq 0, .LBB3_32
+; CHECK-NEXT: beq+ 0, .LBB3_32
; CHECK-NEXT: # %bb.31: # %cmpxchg.releasedload141
; CHECK-NEXT: #
; CHECK-NEXT: ldarx 29, 0, 0
; CHECK-NEXT: cmpld 29, 28
-; CHECK-NEXT: beq 0, .LBB3_30
+; CHECK-NEXT: beq+ 0, .LBB3_30
; CHECK-NEXT: .LBB3_32: # %cmpxchg.nostore139
; CHECK-NEXT: lwsync
; CHECK-NEXT: std 29, ull@toc@l(30)
@@ -4528,19 +4520,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 29, sc@toc@l(4)
; CHECK-NEXT: lbarx 28, 0, 6
; CHECK-NEXT: cmplw 28, 30
-; CHECK-NEXT: bne 0, .LBB3_36
+; CHECK-NEXT: bne- 0, .LBB3_36
; CHECK-NEXT: # %bb.33: # %cmpxchg.fencedstore124
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_34: # %cmpxchg.trystore123
; CHECK-NEXT: #
; CHECK-NEXT: stbcx. 29, 0, 6
-; CHECK-NEXT: beq 0, .LBB3_37
+; CHECK-NEXT: beq+ 0, .LBB3_37
; CHECK-NEXT: # %bb.35: # %cmpxchg.releasedload122
; CHECK-NEXT: #
; CHECK-NEXT: lbarx 28, 0, 6
; CHECK-NEXT: cmplw 28, 30
-; CHECK-NEXT: beq 0, .LBB3_34
+; CHECK-NEXT: beq+ 0, .LBB3_34
; CHECK-NEXT: .LBB3_36: # %cmpxchg.nostore120
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4557,19 +4548,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 6, uc@toc@l(3)
; CHECK-NEXT: lbarx 29, 0, 7
; CHECK-NEXT: cmplw 29, 6
-; CHECK-NEXT: bne 0, .LBB3_42
+; CHECK-NEXT: bne- 0, .LBB3_42
; CHECK-NEXT: # %bb.39: # %cmpxchg.fencedstore105
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_40: # %cmpxchg.trystore104
; CHECK-NEXT: #
; CHECK-NEXT: stbcx. 30, 0, 7
-; CHECK-NEXT: beq 0, .LBB3_43
+; CHECK-NEXT: beq+ 0, .LBB3_43
; CHECK-NEXT: # %bb.41: # %cmpxchg.releasedload103
; CHECK-NEXT: #
; CHECK-NEXT: lbarx 29, 0, 7
; CHECK-NEXT: cmplw 29, 6
-; CHECK-NEXT: beq 0, .LBB3_40
+; CHECK-NEXT: beq+ 0, .LBB3_40
; CHECK-NEXT: .LBB3_42: # %cmpxchg.nostore101
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4586,21 +4576,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 6, uc@toc@l(3)
; CHECK-NEXT: lharx 30, 0, 8
; CHECK-NEXT: cmplw 30, 6
-; CHECK-NEXT: bne 0, .LBB3_48
+; CHECK-NEXT: bne- 0, .LBB3_48
; CHECK-NEXT: # %bb.45: # %cmpxchg.fencedstore86
; CHECK-NEXT: extsb 7, 7
; CHECK-NEXT: sync
; CHECK-NEXT: clrlwi 7, 7, 16
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_46: # %cmpxchg.trystore85
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 7, 0, 8
-; CHECK-NEXT: beq 0, .LBB3_49
+; CHECK-NEXT: beq+ 0, .LBB3_49
; CHECK-NEXT: # %bb.47: # %cmpxchg.releasedload84
; CHECK-NEXT: #
; CHECK-NEXT: lharx 30, 0, 8
; CHECK-NEXT: cmplw 30, 6
-; CHECK-NEXT: beq 0, .LBB3_46
+; CHECK-NEXT: beq+ 0, .LBB3_46
; CHECK-NEXT: .LBB3_48: # %cmpxchg.nostore82
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4617,21 +4606,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 6, uc@toc@l(3)
; CHECK-NEXT: lharx 8, 0, 9
; CHECK-NEXT: cmplw 8, 6
-; CHECK-NEXT: bne 0, .LBB3_54
+; CHECK-NEXT: bne- 0, .LBB3_54
; CHECK-NEXT: # %bb.51: # %cmpxchg.fencedstore67
; CHECK-NEXT: extsb 7, 7
; CHECK-NEXT: sync
; CHECK-NEXT: clrlwi 7, 7, 16
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_52: # %cmpxchg.trystore66
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 7, 0, 9
-; CHEC...
[truncated]
|
@@ -12816,6 +12817,11 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder, | |||
return Builder.CreateXor(Call, Builder.getInt32(1)); | |||
} | |||
|
|||
MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg( | |||
LLVMContext &Ctx) const { | |||
return MDBuilder(Ctx).createLikelyBranchWeights(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't target specific. Just remove the new TLI hook and directly do it in AtomicExpand
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not all platforms use the same branch prediction mechanism, so I introduced a new virtual API:
virtual MDNode *
getTrueBranchHintWeightForAtomicCmpXchg(LLVMContext &Ctx) const {
return nullptr;
}
for example, Target AArch64 do not has branch hint mechanism, it will use default getTrueBranchHintWeightForAtomicCmpXchg(LLVMContext &Ctx)
const which return nullptr
Targets that want to support branch hinting for expandAtomicCmpXchg can override this with a target-specific implementation.
according to Power10 Chip manual
The Power10 core normally ignores any software that attempts to override the dynamic branch prediction by setting the “a” bit in the BO field.
When the hardware detects a l*arx instruction near a static prediction, the static prediction is honored.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The details of what the target is going to do with the metadata is unimportant, this is a generic branch likely hint.
… getTrueBranchHintWeightForAtomicCmpXchg
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is no plus to adding new target API for this. The bar for new target hooks needs to be higher and have a concrete reason why this would be harmful on other targets (which realistically is only unrelated downstream optimization bugs, so even then it should be unconditional)
@@ -12816,6 +12817,11 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder, | |||
return Builder.CreateXor(Call, Builder.getInt32(1)); | |||
} | |||
|
|||
MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg( | |||
LLVMContext &Ctx) const { | |||
return MDBuilder(Ctx).createLikelyBranchWeights(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The details of what the target is going to do with the metadata is unimportant, this is a generic branch likely hint.
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/CodeGen/AtomicExpandPass.cpp llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll llvm/test/CodeGen/AArch64/atomic-ops.ll llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll llvm/test/CodeGen/ARM/atomic-cmpxchg.ll llvm/test/CodeGen/ARM/cmpxchg-idioms.ll llvm/test/CodeGen/ARM/cmpxchg-weak.ll llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll llvm/test/CodeGen/PowerPC/all-atomics.ll llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll llvm/test/CodeGen/PowerPC/atomic-float.ll llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll llvm/test/CodeGen/PowerPC/atomics-regression.ll llvm/test/CodeGen/PowerPC/atomics.ll llvm/test/CodeGen/PowerPC/loop-comment.ll llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
The patch add branch hint for AtomicExpandImpl::expandAtomicCmpXchg, in PowerPC, it support branch hint as
-
hints not taken,+
hints taken,