diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 3f3d5dc90711f..0d90cdb52a448 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1454,7 +1454,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); + Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB, + MDBuilder(F->getContext()).createLikelyBranchWeights()); Builder.SetInsertPoint(ReleasingStoreBB); if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) @@ -1473,7 +1474,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; Builder.CreateCondBr(StoreSuccess, SuccessBB, - CI->isWeak() ? FailureBB : RetryBB); + CI->isWeak() ? FailureBB : RetryBB, + MDBuilder(F->getContext()).createLikelyBranchWeights()); Builder.SetInsertPoint(ReleasedLoadBB); Value *SecondLoad; @@ -1486,7 +1488,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + Builder.CreateCondBr( + ShouldStore, TryStoreBB, NoStoreBB, + MDBuilder(F->getContext()).createLikelyBranchWeights()); // Update PHI node in TryStoreBB. LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); } else diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll index 5bc041aef88ba..e6bf3ab674717 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -6002,15 +6002,17 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) { ; CHECK-NOLSE-O1-NEXT: b.ne LBB67_4 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB67_1 Depth=1 -; CHECK-NOLSE-O1-NEXT: stxrb w9, w2, [x8] -; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB67_1 -; CHECK-NOLSE-O1-NEXT: ; %bb.3: -; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1 +; CHECK-NOLSE-O1-NEXT: stxrb w10, w2, [x8] +; CHECK-NOLSE-O1-NEXT: mov w9, #1 ; =0x1 +; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB67_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w1, w9 ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret ; CHECK-NOLSE-O1-NEXT: LBB67_4: ; %cmpxchg.nostore -; CHECK-NOLSE-O1-NEXT: mov w1, wzr +; CHECK-NOLSE-O1-NEXT: mov w9, wzr ; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w1, w9 ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret ; @@ -6108,15 +6110,17 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) { ; CHECK-NOLSE-O1-NEXT: b.ne LBB68_4 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB68_1 Depth=1 -; CHECK-NOLSE-O1-NEXT: stxrh w9, w2, [x8] -; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB68_1 -; CHECK-NOLSE-O1-NEXT: ; %bb.3: -; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1 +; CHECK-NOLSE-O1-NEXT: stxrh w10, w2, [x8] +; CHECK-NOLSE-O1-NEXT: mov w9, #1 ; =0x1 +; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB68_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w1, w9 ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret ; CHECK-NOLSE-O1-NEXT: LBB68_4: ; %cmpxchg.nostore -; CHECK-NOLSE-O1-NEXT: mov w1, wzr +; CHECK-NOLSE-O1-NEXT: mov w9, wzr ; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w1, w9 ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret ; @@ -6206,6 +6210,7 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) { ; CHECK-NOLSE-O1-LABEL: cmpxchg_i32: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: mov x8, x0 +; CHECK-NOLSE-O1-NEXT: mov w9, #1 ; =0x1 ; CHECK-NOLSE-O1-NEXT: LBB69_1: ; %cmpxchg.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldxr w0, [x8] @@ -6213,15 +6218,16 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) { ; CHECK-NOLSE-O1-NEXT: b.ne LBB69_4 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB69_1 Depth=1 -; CHECK-NOLSE-O1-NEXT: stxr w9, w2, [x8] -; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB69_1 -; CHECK-NOLSE-O1-NEXT: ; %bb.3: -; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1 +; CHECK-NOLSE-O1-NEXT: stxr w10, w2, [x8] +; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB69_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w1, w9 ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret ; CHECK-NOLSE-O1-NEXT: LBB69_4: ; %cmpxchg.nostore -; CHECK-NOLSE-O1-NEXT: mov w1, wzr +; CHECK-NOLSE-O1-NEXT: mov w9, wzr ; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w1, w9 ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret ; @@ -6306,6 +6312,7 @@ define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) { ; CHECK-NOLSE-O1-LABEL: cmpxchg_i64: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: mov x8, x0 +; CHECK-NOLSE-O1-NEXT: mov w9, #1 ; =0x1 ; CHECK-NOLSE-O1-NEXT: LBB70_1: ; %cmpxchg.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldxr x0, [x8] @@ -6313,14 +6320,15 @@ define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) { ; CHECK-NOLSE-O1-NEXT: b.ne LBB70_4 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB70_1 Depth=1 -; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x8] -; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB70_1 -; CHECK-NOLSE-O1-NEXT: ; %bb.3: -; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1 +; CHECK-NOLSE-O1-NEXT: stxr w10, x2, [x8] +; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB70_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w1, w9 ; CHECK-NOLSE-O1-NEXT: ret ; CHECK-NOLSE-O1-NEXT: LBB70_4: ; %cmpxchg.nostore -; CHECK-NOLSE-O1-NEXT: mov w1, wzr +; CHECK-NOLSE-O1-NEXT: mov w9, wzr ; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w1, w9 ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-OUTLINE-O1-LABEL: cmpxchg_i64: @@ -6404,6 +6412,7 @@ define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) { ; CHECK-NOLSE-O1-LABEL: cmpxchg_ptr: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: mov x8, x0 +; CHECK-NOLSE-O1-NEXT: mov w9, #1 ; =0x1 ; CHECK-NOLSE-O1-NEXT: LBB71_1: ; %cmpxchg.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldxr x0, [x8] @@ -6411,14 +6420,15 @@ define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) { ; CHECK-NOLSE-O1-NEXT: b.ne LBB71_4 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB71_1 Depth=1 -; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x8] -; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB71_1 -; CHECK-NOLSE-O1-NEXT: ; %bb.3: -; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1 +; CHECK-NOLSE-O1-NEXT: stxr w10, x2, [x8] +; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB71_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w1, w9 ; CHECK-NOLSE-O1-NEXT: ret ; CHECK-NOLSE-O1-NEXT: LBB71_4: ; %cmpxchg.nostore -; CHECK-NOLSE-O1-NEXT: mov w1, wzr +; CHECK-NOLSE-O1-NEXT: mov w9, wzr ; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w1, w9 ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-OUTLINE-O1-LABEL: cmpxchg_ptr: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll index 4a85d8490d2e9..cab2741be9929 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -9,7 +9,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) { ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: - ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) @@ -17,7 +17,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) { ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cmpxchg.trystore: - ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: early-clobber renamable $w9 = STXRW renamable $w2, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p) @@ -49,7 +49,7 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) { ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: - ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $w1, $x0, $x9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) @@ -57,7 +57,7 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) { ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cmpxchg.trystore: - ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800) ; CHECK-NEXT: liveins: $w1, $x0, $x8, $x9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: early-clobber renamable $w10 = STXRW renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p) @@ -88,7 +88,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) { ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: - ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) @@ -96,7 +96,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) { ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cmpxchg.trystore: - ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: early-clobber renamable $w9 = STLXRW renamable $w2, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p) @@ -126,7 +126,7 @@ define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) { ; CHECK-NEXT: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: - ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $x8 = LDXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p) @@ -134,7 +134,7 @@ define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) { ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cmpxchg.trystore: - ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800) ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: early-clobber renamable $w9 = STXRX renamable $x2, renamable $x0, pcsections !0 :: (volatile store (s64) into %ir.p) @@ -164,7 +164,7 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new) ; CHECK-NEXT: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: - ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $x8 = LDAXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p) @@ -172,7 +172,7 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new) ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cmpxchg.trystore: - ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800) ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: early-clobber renamable $w9 = STLXRX renamable $x2, renamable $x0, pcsections !0 :: (volatile store (s64) into %ir.p) @@ -202,7 +202,7 @@ define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new) ; CHECK-NEXT: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: - ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $x8 = LDAXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p) @@ -210,7 +210,7 @@ define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new) ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cmpxchg.trystore: - ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800) ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: early-clobber renamable $w9 = STLXRX renamable $x2, renamable $x0, pcsections !0 :: (volatile store (s64) into %ir.p) @@ -240,7 +240,7 @@ define i32 @fetch_and_nand(ptr %p) { ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) @@ -265,7 +265,7 @@ define i64 @fetch_and_nand_64(ptr %p) { ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $x8 = LDAXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p) @@ -292,7 +292,7 @@ define i32 @fetch_and_or(ptr %p) { ; CHECK-NEXT: renamable $w9 = MOVZWi 5, 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) @@ -316,7 +316,7 @@ define i64 @fetch_and_or_64(ptr %p) { ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $x8 = LDXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p) @@ -723,7 +723,7 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) @@ -747,7 +747,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) @@ -770,7 +770,7 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) @@ -794,7 +794,7 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) @@ -818,7 +818,7 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) @@ -842,7 +842,7 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) @@ -866,7 +866,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) @@ -892,7 +892,7 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) @@ -920,7 +920,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) @@ -948,7 +948,7 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) @@ -974,7 +974,7 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) @@ -998,7 +998,7 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) @@ -1021,7 +1021,7 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) @@ -1045,7 +1045,7 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) @@ -1069,7 +1069,7 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) @@ -1093,7 +1093,7 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) @@ -1117,7 +1117,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) @@ -1143,7 +1143,7 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) @@ -1171,7 +1171,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 15 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) @@ -1199,7 +1199,7 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 15 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: - ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) @@ -1227,34 +1227,35 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) { ; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: - ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000) + ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0 + ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cmpxchg.trystore: - ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr) - ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1 + ; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr) + ; CHECK-NEXT: renamable $w9 = MOVZWi 1, 0 + ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1 + ; CHECK-NEXT: B %bb.4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: bb.3.cmpxchg.nostore: + ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w1 = MOVZWi 1, 0 - ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0 - ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 + ; CHECK-NEXT: $w9 = ORRWrs $wzr, $wzr, 0 + ; CHECK-NEXT: CLREX 15, pcsections !0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4.cmpxchg.nostore: - ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: bb.4.cmpxchg.end: + ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0 - ; CHECK-NEXT: CLREX 15, pcsections !0 ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0 + ; CHECK-NEXT: $w1 = ORRWrs $wzr, killed $w9, 0 ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic, !pcsections !0 ret { i8, i1 } %res @@ -1269,35 +1270,36 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) { ; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: - ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000) + ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0 + ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cmpxchg.trystore: - ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800) ; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr) - ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1 + ; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr) + ; CHECK-NEXT: renamable $w9 = MOVZWi 1, 0 + ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1 + ; CHECK-NEXT: B %bb.4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: bb.3.cmpxchg.nostore: + ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w1 = MOVZWi 1, 0 - ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0 - ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 + ; CHECK-NEXT: $w9 = ORRWrs $wzr, $wzr, 0 + ; CHECK-NEXT: CLREX 15, pcsections !0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4.cmpxchg.nostore: - ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: bb.4.cmpxchg.end: + ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0 - ; CHECK-NEXT: CLREX 15, pcsections !0 ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0 - ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 + ; CHECK-NEXT: $w1 = ORRWrs $wzr, killed $w9, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic, !pcsections !0 ret { i16, i1 } %res } diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll index 42cb3d4e9589d..bf78429da52f3 100644 --- a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll @@ -850,18 +850,18 @@ define dso_local void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldxr x8, [x9] ; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: b.ne .LBB43_3 +; CHECK-NEXT: b.ne .LBB43_4 ; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore ; CHECK-NEXT: // in Loop: Header=BB43_1 Depth=1 ; CHECK-NEXT: stxr w10, x1, [x9] ; CHECK-NEXT: cbnz w10, .LBB43_1 -; CHECK-NEXT: b .LBB43_4 -; CHECK-NEXT: .LBB43_3: // %cmpxchg.nostore -; CHECK-NEXT: clrex -; CHECK-NEXT: .LBB43_4: // %cmpxchg.end +; CHECK-NEXT: .LBB43_3: // %cmpxchg.end ; CHECK-NEXT: adrp x9, var64 ; CHECK-NEXT: str x8, [x9, :lo12:var64] ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB43_4: // %cmpxchg.nostore +; CHECK-NEXT: clrex +; CHECK-NEXT: b .LBB43_3 %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new monotonic monotonic %old = extractvalue { i64, i1 } %pair, 0 store i64 %old, ptr @var64 diff --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll index d8ac89f76b321..deeba7ef3ce2c 100644 --- a/llvm/test/CodeGen/AArch64/atomic-ops.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll @@ -1090,18 +1090,18 @@ define dso_local void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; INLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; INLINE_ATOMICS-NEXT: ldxr x8, [x9] ; INLINE_ATOMICS-NEXT: cmp x8, x0 -; INLINE_ATOMICS-NEXT: b.ne .LBB43_3 +; INLINE_ATOMICS-NEXT: b.ne .LBB43_4 ; INLINE_ATOMICS-NEXT: // %bb.2: // %cmpxchg.trystore ; INLINE_ATOMICS-NEXT: // in Loop: Header=BB43_1 Depth=1 ; INLINE_ATOMICS-NEXT: stxr w10, x1, [x9] ; INLINE_ATOMICS-NEXT: cbnz w10, .LBB43_1 -; INLINE_ATOMICS-NEXT: b .LBB43_4 -; INLINE_ATOMICS-NEXT: .LBB43_3: // %cmpxchg.nostore -; INLINE_ATOMICS-NEXT: clrex -; INLINE_ATOMICS-NEXT: .LBB43_4: // %cmpxchg.end +; INLINE_ATOMICS-NEXT: .LBB43_3: // %cmpxchg.end ; INLINE_ATOMICS-NEXT: adrp x9, var64 ; INLINE_ATOMICS-NEXT: str x8, [x9, :lo12:var64] ; INLINE_ATOMICS-NEXT: ret +; INLINE_ATOMICS-NEXT: .LBB43_4: // %cmpxchg.nostore +; INLINE_ATOMICS-NEXT: clrex +; INLINE_ATOMICS-NEXT: b .LBB43_3 ; ; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i64: ; OUTLINE_ATOMICS: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll index 21729b9dfd101..24a6c3c440e18 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll @@ -49,15 +49,9 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 ; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB0_2 -; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6 -; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB0_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w22, w0 ; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 @@ -68,19 +62,25 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: bl __addsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB0_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB0_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB0_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1 +; SOFTFP-NOLSE-NEXT: b .LBB0_6 +; SOFTFP-NOLSE-NEXT: .LBB0_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1 ; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -137,15 +137,9 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 ; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB1_2 -; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6 -; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB1_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w22, w0 ; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 @@ -156,19 +150,25 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: bl __addsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB1_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB1_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB1_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1 +; SOFTFP-NOLSE-NEXT: b .LBB1_6 +; SOFTFP-NOLSE-NEXT: .LBB1_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1 ; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -236,34 +236,34 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value) ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 -; SOFTFP-NOLSE-NEXT: b .LBB2_2 -; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6 -; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB2_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: bl __addsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB2_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB2_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB2_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1 +; SOFTFP-NOLSE-NEXT: b .LBB2_6 +; SOFTFP-NOLSE-NEXT: .LBB2_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1 ; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -330,34 +330,34 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value) ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 -; SOFTFP-NOLSE-NEXT: b .LBB3_2 -; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6 -; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB3_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: bl __addsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB3_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB3_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB3_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1 +; SOFTFP-NOLSE-NEXT: b .LBB3_6 +; SOFTFP-NOLSE-NEXT: .LBB3_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1 ; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -406,32 +406,32 @@ define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0 ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldr w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 -; SOFTFP-NOLSE-NEXT: b .LBB4_2 -; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6 -; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB4_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: bl __addsf3 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB4_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21 -; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB4_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB4_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1 +; SOFTFP-NOLSE-NEXT: b .LBB4_6 +; SOFTFP-NOLSE-NEXT: .LBB4_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1 ; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -480,32 +480,32 @@ define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) # ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldr x0, [x0] ; SOFTFP-NOLSE-NEXT: mov x20, x1 -; SOFTFP-NOLSE-NEXT: b .LBB5_2 -; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB5_6 -; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB5_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov x1, x20 ; SOFTFP-NOLSE-NEXT: mov x21, x0 ; SOFTFP-NOLSE-NEXT: bl __adddf3 ; SOFTFP-NOLSE-NEXT: mov x8, x0 -; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB5_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr x0, [x19] ; SOFTFP-NOLSE-NEXT: cmp x0, x21 -; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB5_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, x8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB5_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1 +; SOFTFP-NOLSE-NEXT: b .LBB5_6 +; SOFTFP-NOLSE-NEXT: .LBB5_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1 ; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload @@ -701,16 +701,9 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; SOFTFP-NOLSE-NEXT: mov w19, w2 ; SOFTFP-NOLSE-NEXT: mov x20, x0 -; SOFTFP-NOLSE-NEXT: b .LBB7_2 -; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6 -; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB7_2 Depth 2 ; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 @@ -731,20 +724,27 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: mov w8, w22 ; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 ; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16 -; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB7_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20] ; SOFTFP-NOLSE-NEXT: cmp w22, w8 -; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB7_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB7_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1 +; SOFTFP-NOLSE-NEXT: b .LBB7_6 +; SOFTFP-NOLSE-NEXT: .LBB7_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1 ; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: mov w1, w23 @@ -817,16 +817,9 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16 ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB8_2 -; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6 -; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB8_2 Depth 2 ; SOFTFP-NOLSE-NEXT: lsl w23, w1, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: mov w0, w23 @@ -839,20 +832,27 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16 ; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 -; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB8_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19] ; SOFTFP-NOLSE-NEXT: cmp w22, w23 -; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB8_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB8_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1 +; SOFTFP-NOLSE-NEXT: b .LBB8_6 +; SOFTFP-NOLSE-NEXT: .LBB8_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1 ; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload @@ -906,16 +906,9 @@ define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x floa ; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; SOFTFP-NOLSE-NEXT: mov w19, w2 ; SOFTFP-NOLSE-NEXT: mov x20, x0 -; SOFTFP-NOLSE-NEXT: b .LBB9_2 -; SOFTFP-NOLSE-NEXT: .LBB9_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB9_6 -; SOFTFP-NOLSE-NEXT: .LBB9_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB9_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB9_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB9_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w0, w23 ; SOFTFP-NOLSE-NEXT: mov w1, w19 ; SOFTFP-NOLSE-NEXT: bl __addsf3 @@ -928,20 +921,27 @@ define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x floa ; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23 ; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32 ; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32 -; SOFTFP-NOLSE-NEXT: .LBB9_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB9_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20] ; SOFTFP-NOLSE-NEXT: cmp x22, x9 -; SOFTFP-NOLSE-NEXT: b.ne .LBB9_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB9_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20] -; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB9_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB9_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_1 +; SOFTFP-NOLSE-NEXT: b .LBB9_6 +; SOFTFP-NOLSE-NEXT: .LBB9_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_1 ; SOFTFP-NOLSE-NEXT: .LBB9_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: mov w1, w23 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll index e3e18a1f91c6d..16825c9dcd178 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll @@ -51,15 +51,9 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 ; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB0_2 -; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6 -; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB0_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w22, w0 ; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 @@ -70,19 +64,25 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: bl fmaxf ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB0_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB0_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB0_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1 +; SOFTFP-NOLSE-NEXT: b .LBB0_6 +; SOFTFP-NOLSE-NEXT: .LBB0_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1 ; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -139,15 +139,9 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 ; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB1_2 -; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6 -; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB1_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w22, w0 ; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 @@ -158,19 +152,25 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: bl fmaxf ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB1_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB1_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB1_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1 +; SOFTFP-NOLSE-NEXT: b .LBB1_6 +; SOFTFP-NOLSE-NEXT: .LBB1_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1 ; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -238,34 +238,34 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value) ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 -; SOFTFP-NOLSE-NEXT: b .LBB2_2 -; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6 -; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB2_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: bl fmaxf ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB2_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB2_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB2_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1 +; SOFTFP-NOLSE-NEXT: b .LBB2_6 +; SOFTFP-NOLSE-NEXT: .LBB2_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1 ; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -332,34 +332,34 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value) ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 -; SOFTFP-NOLSE-NEXT: b .LBB3_2 -; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6 -; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB3_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: bl fmaxf ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB3_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB3_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB3_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1 +; SOFTFP-NOLSE-NEXT: b .LBB3_6 +; SOFTFP-NOLSE-NEXT: .LBB3_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1 ; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -408,32 +408,32 @@ define float @test_atomicrmw_fmax_f32_seq_cst_align4(ptr %ptr, float %value) #0 ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldr w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 -; SOFTFP-NOLSE-NEXT: b .LBB4_2 -; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6 -; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB4_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: bl fmaxf ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB4_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21 -; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB4_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB4_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1 +; SOFTFP-NOLSE-NEXT: b .LBB4_6 +; SOFTFP-NOLSE-NEXT: .LBB4_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1 ; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -482,32 +482,32 @@ define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) # ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldr x0, [x0] ; SOFTFP-NOLSE-NEXT: mov x20, x1 -; SOFTFP-NOLSE-NEXT: b .LBB5_2 -; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB5_6 -; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB5_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov x1, x20 ; SOFTFP-NOLSE-NEXT: mov x21, x0 ; SOFTFP-NOLSE-NEXT: bl fmax ; SOFTFP-NOLSE-NEXT: mov x8, x0 -; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB5_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr x0, [x19] ; SOFTFP-NOLSE-NEXT: cmp x0, x21 -; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB5_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, x8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB5_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1 +; SOFTFP-NOLSE-NEXT: b .LBB5_6 +; SOFTFP-NOLSE-NEXT: .LBB5_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1 ; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload @@ -581,16 +581,9 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; SOFTFP-NOLSE-NEXT: mov w19, w2 ; SOFTFP-NOLSE-NEXT: mov x20, x0 -; SOFTFP-NOLSE-NEXT: b .LBB6_2 -; SOFTFP-NOLSE-NEXT: .LBB6_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB6_6 -; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB6_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB6_2 Depth 2 ; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 @@ -611,20 +604,27 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: mov w8, w22 ; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 ; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16 -; SOFTFP-NOLSE-NEXT: .LBB6_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB6_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20] ; SOFTFP-NOLSE-NEXT: cmp w22, w8 -; SOFTFP-NOLSE-NEXT: b.ne .LBB6_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB6_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB6_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB6_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_1 +; SOFTFP-NOLSE-NEXT: b .LBB6_6 +; SOFTFP-NOLSE-NEXT: .LBB6_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_1 ; SOFTFP-NOLSE-NEXT: .LBB6_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: mov w1, w23 @@ -725,16 +725,9 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16 ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB7_2 -; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6 -; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB7_2 Depth 2 ; SOFTFP-NOLSE-NEXT: lsl w23, w1, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: mov w0, w23 @@ -747,20 +740,27 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16 ; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 -; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB7_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19] ; SOFTFP-NOLSE-NEXT: cmp w22, w23 -; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB7_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB7_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1 +; SOFTFP-NOLSE-NEXT: b .LBB7_6 +; SOFTFP-NOLSE-NEXT: .LBB7_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1 ; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload @@ -814,16 +814,9 @@ define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x floa ; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; SOFTFP-NOLSE-NEXT: mov w19, w2 ; SOFTFP-NOLSE-NEXT: mov x20, x0 -; SOFTFP-NOLSE-NEXT: b .LBB8_2 -; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6 -; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB8_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w0, w23 ; SOFTFP-NOLSE-NEXT: mov w1, w19 ; SOFTFP-NOLSE-NEXT: bl fmaxf @@ -836,20 +829,27 @@ define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x floa ; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23 ; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32 ; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32 -; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB8_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20] ; SOFTFP-NOLSE-NEXT: cmp x22, x9 -; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB8_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20] -; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB8_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1 +; SOFTFP-NOLSE-NEXT: b .LBB8_6 +; SOFTFP-NOLSE-NEXT: .LBB8_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1 ; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: mov w1, w23 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll index 10de6777bd285..314075c619103 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll @@ -51,15 +51,9 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 ; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB0_2 -; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6 -; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB0_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w22, w0 ; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 @@ -70,19 +64,25 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: bl fminf ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB0_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB0_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB0_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1 +; SOFTFP-NOLSE-NEXT: b .LBB0_6 +; SOFTFP-NOLSE-NEXT: .LBB0_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1 ; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -139,15 +139,9 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 ; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB1_2 -; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6 -; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB1_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w22, w0 ; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 @@ -158,19 +152,25 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: bl fminf ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB1_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB1_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB1_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1 +; SOFTFP-NOLSE-NEXT: b .LBB1_6 +; SOFTFP-NOLSE-NEXT: .LBB1_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1 ; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -238,34 +238,34 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value) ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 -; SOFTFP-NOLSE-NEXT: b .LBB2_2 -; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6 -; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB2_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: bl fminf ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB2_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB2_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB2_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1 +; SOFTFP-NOLSE-NEXT: b .LBB2_6 +; SOFTFP-NOLSE-NEXT: .LBB2_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1 ; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -332,34 +332,34 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value) ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 -; SOFTFP-NOLSE-NEXT: b .LBB3_2 -; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6 -; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB3_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: bl fminf ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB3_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB3_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB3_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1 +; SOFTFP-NOLSE-NEXT: b .LBB3_6 +; SOFTFP-NOLSE-NEXT: .LBB3_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1 ; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -408,32 +408,32 @@ define float @test_atomicrmw_fmin_f32_seq_cst_align4(ptr %ptr, float %value) #0 ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldr w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 -; SOFTFP-NOLSE-NEXT: b .LBB4_2 -; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6 -; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB4_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: bl fminf ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB4_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21 -; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB4_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB4_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1 +; SOFTFP-NOLSE-NEXT: b .LBB4_6 +; SOFTFP-NOLSE-NEXT: .LBB4_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1 ; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -482,32 +482,32 @@ define double @test_atomicrmw_fmin_f32_seq_cst_align8(ptr %ptr, double %value) # ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldr x0, [x0] ; SOFTFP-NOLSE-NEXT: mov x20, x1 -; SOFTFP-NOLSE-NEXT: b .LBB5_2 -; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB5_6 -; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB5_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov x1, x20 ; SOFTFP-NOLSE-NEXT: mov x21, x0 ; SOFTFP-NOLSE-NEXT: bl fmin ; SOFTFP-NOLSE-NEXT: mov x8, x0 -; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB5_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr x0, [x19] ; SOFTFP-NOLSE-NEXT: cmp x0, x21 -; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB5_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, x8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB5_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1 +; SOFTFP-NOLSE-NEXT: b .LBB5_6 +; SOFTFP-NOLSE-NEXT: .LBB5_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1 ; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload @@ -581,16 +581,9 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; SOFTFP-NOLSE-NEXT: mov w19, w2 ; SOFTFP-NOLSE-NEXT: mov x20, x0 -; SOFTFP-NOLSE-NEXT: b .LBB6_2 -; SOFTFP-NOLSE-NEXT: .LBB6_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB6_6 -; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB6_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB6_2 Depth 2 ; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 @@ -611,20 +604,27 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: mov w8, w22 ; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 ; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16 -; SOFTFP-NOLSE-NEXT: .LBB6_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB6_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20] ; SOFTFP-NOLSE-NEXT: cmp w22, w8 -; SOFTFP-NOLSE-NEXT: b.ne .LBB6_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB6_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB6_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB6_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_1 +; SOFTFP-NOLSE-NEXT: b .LBB6_6 +; SOFTFP-NOLSE-NEXT: .LBB6_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_1 ; SOFTFP-NOLSE-NEXT: .LBB6_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: mov w1, w23 @@ -725,16 +725,9 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16 ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB7_2 -; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6 -; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB7_2 Depth 2 ; SOFTFP-NOLSE-NEXT: lsl w23, w1, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: mov w0, w23 @@ -747,20 +740,27 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16 ; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 -; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB7_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19] ; SOFTFP-NOLSE-NEXT: cmp w22, w23 -; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB7_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB7_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1 +; SOFTFP-NOLSE-NEXT: b .LBB7_6 +; SOFTFP-NOLSE-NEXT: .LBB7_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1 ; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload @@ -814,16 +814,9 @@ define <2 x float> @test_atomicrmw_fmin_v2f32_seq_cst_align8(ptr %ptr, <2 x floa ; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; SOFTFP-NOLSE-NEXT: mov w19, w2 ; SOFTFP-NOLSE-NEXT: mov x20, x0 -; SOFTFP-NOLSE-NEXT: b .LBB8_2 -; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6 -; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB8_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w0, w23 ; SOFTFP-NOLSE-NEXT: mov w1, w19 ; SOFTFP-NOLSE-NEXT: bl fminf @@ -836,20 +829,27 @@ define <2 x float> @test_atomicrmw_fmin_v2f32_seq_cst_align8(ptr %ptr, <2 x floa ; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23 ; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32 ; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32 -; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB8_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20] ; SOFTFP-NOLSE-NEXT: cmp x22, x9 -; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB8_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20] -; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB8_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1 +; SOFTFP-NOLSE-NEXT: b .LBB8_6 +; SOFTFP-NOLSE-NEXT: .LBB8_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1 ; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: mov w1, w23 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll index 82e0f14e68e26..6bb541684c2bd 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll @@ -49,15 +49,9 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 ; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB0_2 -; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6 -; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB0_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w22, w0 ; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 @@ -68,19 +62,25 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: bl __subsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB0_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB0_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB0_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1 +; SOFTFP-NOLSE-NEXT: b .LBB0_6 +; SOFTFP-NOLSE-NEXT: .LBB0_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1 ; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -137,15 +137,9 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 ; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB1_2 -; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6 -; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB1_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w22, w0 ; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 @@ -156,19 +150,25 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: bl __subsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB1_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB1_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB1_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1 +; SOFTFP-NOLSE-NEXT: b .LBB1_6 +; SOFTFP-NOLSE-NEXT: .LBB1_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1 ; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -236,34 +236,34 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value) ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 -; SOFTFP-NOLSE-NEXT: b .LBB2_2 -; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6 -; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB2_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: bl __subsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB2_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB2_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB2_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1 +; SOFTFP-NOLSE-NEXT: b .LBB2_6 +; SOFTFP-NOLSE-NEXT: .LBB2_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1 ; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -330,34 +330,34 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value) ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] ; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 -; SOFTFP-NOLSE-NEXT: b .LBB3_2 -; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6 -; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB3_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: bl __subsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB3_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth -; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB3_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB3_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1 +; SOFTFP-NOLSE-NEXT: b .LBB3_6 +; SOFTFP-NOLSE-NEXT: .LBB3_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1 ; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -406,32 +406,32 @@ define float @test_atomicrmw_fsub_f32_seq_cst_align4(ptr %ptr, float %value) #0 ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldr w0, [x0] ; SOFTFP-NOLSE-NEXT: mov w20, w1 -; SOFTFP-NOLSE-NEXT: b .LBB4_2 -; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6 -; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB4_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: mov w21, w0 ; SOFTFP-NOLSE-NEXT: bl __subsf3 ; SOFTFP-NOLSE-NEXT: mov w8, w0 -; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB4_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19] ; SOFTFP-NOLSE-NEXT: cmp w0, w21 -; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB4_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB4_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1 +; SOFTFP-NOLSE-NEXT: b .LBB4_6 +; SOFTFP-NOLSE-NEXT: .LBB4_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1 ; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -480,32 +480,32 @@ define double @test_atomicrmw_fsub_f32_seq_cst_align8(ptr %ptr, double %value) # ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: ldr x0, [x0] ; SOFTFP-NOLSE-NEXT: mov x20, x1 -; SOFTFP-NOLSE-NEXT: b .LBB5_2 -; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB5_6 -; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB5_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov x1, x20 ; SOFTFP-NOLSE-NEXT: mov x21, x0 ; SOFTFP-NOLSE-NEXT: bl __subdf3 ; SOFTFP-NOLSE-NEXT: mov x8, x0 -; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB5_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr x0, [x19] ; SOFTFP-NOLSE-NEXT: cmp x0, x21 -; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB5_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, x8, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB5_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1 +; SOFTFP-NOLSE-NEXT: b .LBB5_6 +; SOFTFP-NOLSE-NEXT: .LBB5_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1 ; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload @@ -701,16 +701,9 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; SOFTFP-NOLSE-NEXT: mov w19, w2 ; SOFTFP-NOLSE-NEXT: mov x20, x0 -; SOFTFP-NOLSE-NEXT: b .LBB7_2 -; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6 -; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB7_2 Depth 2 ; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 @@ -731,20 +724,27 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: mov w8, w22 ; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 ; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16 -; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB7_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20] ; SOFTFP-NOLSE-NEXT: cmp w22, w8 -; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB7_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20] -; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB7_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1 +; SOFTFP-NOLSE-NEXT: b .LBB7_6 +; SOFTFP-NOLSE-NEXT: .LBB7_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1 ; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: mov w1, w23 @@ -817,16 +817,9 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16 ; SOFTFP-NOLSE-NEXT: mov x19, x0 ; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; SOFTFP-NOLSE-NEXT: b .LBB8_2 -; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6 -; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB8_2 Depth 2 ; SOFTFP-NOLSE-NEXT: lsl w23, w1, #16 ; SOFTFP-NOLSE-NEXT: mov w1, w20 ; SOFTFP-NOLSE-NEXT: mov w0, w23 @@ -839,20 +832,27 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 ; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16 ; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 -; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB8_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19] ; SOFTFP-NOLSE-NEXT: cmp w22, w23 -; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB8_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19] -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB8_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1 +; SOFTFP-NOLSE-NEXT: b .LBB8_6 +; SOFTFP-NOLSE-NEXT: .LBB8_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1 ; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload @@ -906,16 +906,9 @@ define <2 x float> @test_atomicrmw_fsub_v2f32_seq_cst_align8(ptr %ptr, <2 x floa ; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; SOFTFP-NOLSE-NEXT: mov w19, w2 ; SOFTFP-NOLSE-NEXT: mov x20, x0 -; SOFTFP-NOLSE-NEXT: b .LBB9_2 -; SOFTFP-NOLSE-NEXT: .LBB9_1: // %cmpxchg.nostore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=1 -; SOFTFP-NOLSE-NEXT: mov w8, wzr -; SOFTFP-NOLSE-NEXT: clrex -; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 -; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB9_6 -; SOFTFP-NOLSE-NEXT: .LBB9_2: // %atomicrmw.start +; SOFTFP-NOLSE-NEXT: .LBB9_1: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 -; SOFTFP-NOLSE-NEXT: // Child Loop BB9_3 Depth 2 +; SOFTFP-NOLSE-NEXT: // Child Loop BB9_2 Depth 2 ; SOFTFP-NOLSE-NEXT: mov w0, w23 ; SOFTFP-NOLSE-NEXT: mov w1, w19 ; SOFTFP-NOLSE-NEXT: bl __subsf3 @@ -928,20 +921,27 @@ define <2 x float> @test_atomicrmw_fsub_v2f32_seq_cst_align8(ptr %ptr, <2 x floa ; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23 ; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32 ; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32 -; SOFTFP-NOLSE-NEXT: .LBB9_3: // %cmpxchg.start -; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_2 Depth=1 +; SOFTFP-NOLSE-NEXT: .LBB9_2: // %cmpxchg.start +; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_1 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20] ; SOFTFP-NOLSE-NEXT: cmp x22, x9 -; SOFTFP-NOLSE-NEXT: b.ne .LBB9_1 -; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore -; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_3 Depth=2 +; SOFTFP-NOLSE-NEXT: b.ne .LBB9_5 +; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2 ; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20] -; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_3 -; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB9_2 Depth=1 +; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_2 +; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB9_1 Depth=1 ; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 ; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 -; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_2 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_1 +; SOFTFP-NOLSE-NEXT: b .LBB9_6 +; SOFTFP-NOLSE-NEXT: .LBB9_5: // %cmpxchg.nostore +; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1 +; SOFTFP-NOLSE-NEXT: mov w8, wzr +; SOFTFP-NOLSE-NEXT: clrex +; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 +; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_1 ; SOFTFP-NOLSE-NEXT: .LBB9_6: // %atomicrmw.end ; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: mov w1, w23 diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll index b7817ebe59b9b..3f4dd116d91f8 100644 --- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -181,41 +181,41 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) { ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxr w8, [x19] ; CHECK-NEXT: cmp w8, w21 -; CHECK-NEXT: b.ne LBB3_4 +; CHECK-NEXT: b.ne LBB3_9 ; CHECK-NEXT: ; %bb.2: ; %cmpxchg.trystore ; CHECK-NEXT: ; in Loop: Header=BB3_1 Depth=1 ; CHECK-NEXT: stlxr w8, w20, [x19] ; CHECK-NEXT: cbnz w8, LBB3_1 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: mov w8, #1 ; =0x1 -; CHECK-NEXT: b LBB3_5 -; CHECK-NEXT: LBB3_4: ; %cmpxchg.nostore -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: clrex -; CHECK-NEXT: LBB3_5: ; %for.cond.preheader +; CHECK-NEXT: LBB3_4: ; %for.cond.preheader ; CHECK-NEXT: mov w22, #2 ; =0x2 -; CHECK-NEXT: LBB3_6: ; %for.cond +; CHECK-NEXT: LBB3_5: ; %for.cond ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cbz w22, LBB3_9 -; CHECK-NEXT: ; %bb.7: ; %for.body -; CHECK-NEXT: ; in Loop: Header=BB3_6 Depth=1 +; CHECK-NEXT: cbz w22, LBB3_8 +; CHECK-NEXT: ; %bb.6: ; %for.body +; CHECK-NEXT: ; in Loop: Header=BB3_5 Depth=1 ; CHECK-NEXT: sub w22, w22, #1 ; CHECK-NEXT: orr w9, w21, w20 ; CHECK-NEXT: ldr w10, [x19, w22, sxtw #2] ; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: b.eq LBB3_6 -; CHECK-NEXT: ; %bb.8: ; %if.then -; CHECK-NEXT: ; in Loop: Header=BB3_6 Depth=1 +; CHECK-NEXT: b.eq LBB3_5 +; CHECK-NEXT: ; %bb.7: ; %if.then +; CHECK-NEXT: ; in Loop: Header=BB3_5 Depth=1 ; CHECK-NEXT: str w9, [x19, w22, sxtw #2] ; CHECK-NEXT: bl _foo ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: b LBB3_6 -; CHECK-NEXT: LBB3_9: ; %for.cond.cleanup +; CHECK-NEXT: b LBB3_5 +; CHECK-NEXT: LBB3_8: ; %for.cond.cleanup ; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload ; CHECK-NEXT: ret +; CHECK-NEXT: LBB3_9: ; %cmpxchg.nostore +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: clrex +; CHECK-NEXT: b LBB3_4 ; ; OUTLINE-ATOMICS-LABEL: test_conditional2: ; OUTLINE-ATOMICS: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll index 4bf42d4ac9629..c37a21be5ca4d 100644 --- a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll @@ -36,16 +36,18 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new) ; CHECK-THUMB-NEXT: bx r1 ; ; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8: -; CHECK-ARMV6: uxtb r1, r1 +; CHECK-ARMV6: .fnstart +; CHECK-ARMV6-NEXT: uxtb r12, r1 ; CHECK-ARMV6-NEXT: .LBB0_1: -; CHECK-ARMV6-NEXT: ldrexb r3, [r0] -; CHECK-ARMV6-NEXT: cmp r3, r1 +; CHECK-ARMV6-NEXT: ldrexb r1, [r0] +; CHECK-ARMV6-NEXT: cmp r1, r12 ; CHECK-ARMV6-NEXT: movne r0, #0 ; CHECK-ARMV6-NEXT: bxne lr ; CHECK-ARMV6-NEXT: .LBB0_2: ; CHECK-ARMV6-NEXT: strexb r3, r2, [r0] +; CHECK-ARMV6-NEXT: mov r1, #1 ; CHECK-ARMV6-NEXT: cmp r3, #0 -; CHECK-ARMV6-NEXT: moveq r0, #1 +; CHECK-ARMV6-NEXT: moveq r0, r1 ; CHECK-ARMV6-NEXT: bxeq lr ; CHECK-ARMV6-NEXT: b .LBB0_1 ; @@ -61,19 +63,22 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new) ; CHECK-THUMBV6-NEXT: pop {r4, pc} ; ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8: -; CHECK-ARMV7: uxtb r1, r1 +; CHECK-ARMV7: .fnstart +; CHECK-ARMV7-NEXT: uxtb r12, r1 ; CHECK-ARMV7-NEXT: .LBB0_1: -; CHECK-ARMV7-NEXT: ldrexb r3, [r0] -; CHECK-ARMV7-NEXT: cmp r3, r1 -; CHECK-ARMV7-NEXT: bne .LBB0_3 +; CHECK-ARMV7-NEXT: ldrexb r1, [r0] +; CHECK-ARMV7-NEXT: cmp r1, r12 +; CHECK-ARMV7-NEXT: bne .LBB0_4 ; CHECK-ARMV7-NEXT: strexb r3, r2, [r0] +; CHECK-ARMV7-NEXT: mov r1, #1 ; CHECK-ARMV7-NEXT: cmp r3, #0 -; CHECK-ARMV7-NEXT: moveq r0, #1 -; CHECK-ARMV7-NEXT: bxeq lr -; CHECK-ARMV7-NEXT: b .LBB0_1 -; CHECK-ARMV7-NEXT: .LBB0_3: -; CHECK-ARMV7-NEXT: mov r0, #0 +; CHECK-ARMV7-NEXT: bne .LBB0_1 +; CHECK-ARMV7-NEXT: mov r0, r1 +; CHECK-ARMV7-NEXT: bx lr +; CHECK-ARMV7-NEXT: .LBB0_4: +; CHECK-ARMV7-NEXT: mov r1, #0 ; CHECK-ARMV7-NEXT: clrex +; CHECK-ARMV7-NEXT: mov r0, r1 ; CHECK-ARMV7-NEXT: bx lr ; ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: diff --git a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll index 4ff71b42d5db0..d59f282314b5c 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll @@ -4,14 +4,14 @@ define i32 @test_return(ptr %p, i32 %oldval, i32 %newval) { ; CHECK-LABEL: test_return: ; CHECK: ldrex [[LOADED:r[0-9]+]], [r0] -; CHECK: cmp [[LOADED]], r1 +; CHECK: cmp [[LOADED]], r1 ; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]] ; CHECK: dmb ishst ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0] -; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]] +; CHECK: cmp r3, #0 ; CHECK: ldrex [[LOADED]], [r0] ; CHECK: cmp [[LOADED]], r1 @@ -22,12 +22,6 @@ define i32 @test_return(ptr %p, i32 %oldval, i32 %newval) { ; CHECK: clrex ; CHECK: movs r0, #0 ; CHECK: dmb ish -; CHECK: bx lr - -; CHECK: [[SUCCESS]]: -; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} -; CHECK: movs r0, #1 -; CHECK: dmb ish ; CHECK: bx lr %pair = cmpxchg ptr %p, i32 %oldval, i32 %newval seq_cst seq_cst @@ -49,7 +43,7 @@ define i1 @test_return_bool(ptr %value, i8 %oldValue, i8 %newValue) { ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: strexb [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0] -; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]] +; CHECK: cmp [[STATUS]], #0 ; CHECK: ldrexb [[LOADED]], [r0] ; CHECK: cmp [[LOADED]], [[OLDBYTE]] @@ -63,12 +57,6 @@ define i1 @test_return_bool(ptr %value, i8 %oldValue, i8 %newValue) { ; CHECK: eor r0, [[TMP]], #1 ; CHECK: bx lr -; CHECK: [[SUCCESS]]: -; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} -; CHECK: movs [[TMP:r[0-9]+]], #1 -; CHECK: eor r0, [[TMP]], #1 -; CHECK: bx lr - %pair = cmpxchg ptr %value, i8 %oldValue, i8 %newValue acq_rel monotonic %success = extractvalue { i8, i1 } %pair, 1 @@ -87,7 +75,7 @@ define void @test_conditional(ptr %p, i32 %oldval, i32 %newval) { ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0] -; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]] +; CHECK: cmp [[STATUS]], #0 ; CHECK: ldrex [[LOADED]], [r0] ; CHECK: cmp [[LOADED]], r1 @@ -99,11 +87,6 @@ define void @test_conditional(ptr %p, i32 %oldval, i32 %newval) { ; CHECK: dmb ish ; CHECK: b.w _baz -; CHECK: [[SUCCESS]]: -; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} -; CHECK: dmb ish -; CHECK: b.w _bar - %pair = cmpxchg ptr %p, i32 %oldval, i32 %newval seq_cst seq_cst %success = extractvalue { i32, i1 } %pair, 1 br i1 %success, label %true, label %false diff --git a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll index 9963f2d08ba52..b33eea9975740 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll @@ -10,16 +10,14 @@ define void @test_cmpxchg_weak(ptr %addr, i32 %desired, i32 %new) { ; CHECK-NEXT: dmb ish ; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0] ; CHECK-NEXT: cmp [[SUCCESS]], #0 -; CHECK-NEXT: beq [[SUCCESSBB:LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: %bb.2: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr ; CHECK-NEXT: [[LDFAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: str r3, [r0] -; CHECK-NEXT: bx lr -; CHECK-NEXT: [[SUCCESSBB]]: -; CHECK-NEXT: dmb ish +; CHECK-NEXT: [[FAILBB]]: ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr ; @@ -37,19 +35,20 @@ define i1 @test_cmpxchg_weak_to_bool(i32, ptr %addr, i32 %desired, i32 %new) { ; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: %bb.1: ; CHECK-NEXT: dmb ish -; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1] ; CHECK-NEXT: cmp [[SUCCESS]], #0 -; CHECK-NEXT: bxne lr -; CHECK-NEXT: LBB1_2: +; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: %bb.2: ; CHECK-NEXT: mov r0, #1 ; CHECK-NEXT: dmb ish ; CHECK-NEXT: bx lr ; CHECK-NEXT: [[LDFAILBB]]: -; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: clrex +; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: bx lr ; + %pair = cmpxchg weak ptr %addr, i32 %desired, i32 %new seq_cst monotonic %success = extractvalue { i32, i1 } %pair, 1 ret i1 %success diff --git a/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll b/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll index 4372cad3f87c6..0b0399cbf4661 100644 --- a/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll +++ b/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll @@ -79,7 +79,6 @@ define void @f1() #0 { ; CHECK-NEXT: { ; CHECK-NEXT: r4 = sub(#-1,r4) ; CHECK-NEXT: } -; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_1: // %cmpxchg.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: { diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll index b7852c3c3e6e0..2d8e0e869a860 100644 --- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll +++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll @@ -19,51 +19,53 @@ define signext i32 @main() nounwind { ; CHECK-NEXT: addi 3, 1, 46 ; CHECK-NEXT: lharx 4, 0, 3 ; CHECK-NEXT: cmplwi 4, 33059 -; CHECK-NEXT: bne 0, .LBB0_4 +; CHECK-NEXT: bne- 0, .LBB0_4 ; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore ; CHECK-NEXT: sync ; CHECK-NEXT: li 4, 234 -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_2: # %cmpxchg.trystore ; CHECK-NEXT: # ; CHECK-NEXT: sthcx. 4, 0, 3 -; CHECK-NEXT: beq 0, .LBB0_7 +; CHECK-NEXT: beq+ 0, .LBB0_5 ; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload ; CHECK-NEXT: # ; CHECK-NEXT: lharx 5, 0, 3 ; CHECK-NEXT: cmplwi 5, 33059 -; CHECK-NEXT: beq 0, .LBB0_2 +; CHECK-NEXT: beq+ 0, .LBB0_2 ; CHECK-NEXT: .LBB0_4: # %cmpxchg.nostore ; CHECK-NEXT: lwsync -; CHECK-NEXT: b .LBB0_8 -; CHECK-NEXT: .LBB0_5: # %L.B0000 +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: b .LBB0_6 +; CHECK-NEXT: .LBB0_5: # %cmpxchg.success +; CHECK-NEXT: lwsync +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: .LBB0_6: # %cmpxchg.end +; CHECK-NEXT: bc 4, 20, .LBB0_9 +; CHECK-NEXT: # %bb.7: # %L.B0000 ; CHECK-NEXT: lhz 3, 46(1) ; CHECK-NEXT: cmplwi 3, 234 -; CHECK-NEXT: bne 0, .LBB0_9 -; CHECK-NEXT: # %bb.6: # %L.B0001 +; CHECK-NEXT: bne 0, .LBB0_10 +; CHECK-NEXT: # %bb.8: # %L.B0001 ; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-NEXT: bl puts ; CHECK-NEXT: nop ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB0_11 -; CHECK-NEXT: .LBB0_7: # %cmpxchg.success -; CHECK-NEXT: lwsync -; CHECK-NEXT: b .LBB0_5 -; CHECK-NEXT: .LBB0_8: # %L.B0003 +; CHECK-NEXT: b .LBB0_12 +; CHECK-NEXT: .LBB0_9: # %L.B0003 ; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-NEXT: addi 3, 3, 16 -; CHECK-NEXT: b .LBB0_10 -; CHECK-NEXT: .LBB0_9: # %L.B0005 +; CHECK-NEXT: b .LBB0_11 +; CHECK-NEXT: .LBB0_10: # %L.B0005 ; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-NEXT: addi 3, 3, 64 -; CHECK-NEXT: .LBB0_10: # %L.B0003 +; CHECK-NEXT: .LBB0_11: # %L.B0003 ; CHECK-NEXT: bl puts ; CHECK-NEXT: nop ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB0_11: # %L.B0003 +; CHECK-NEXT: .LBB0_12: # %L.B0003 ; CHECK-NEXT: addi 1, 1, 48 ; CHECK-NEXT: ld 0, 16(1) ; CHECK-NEXT: mtlr 0 @@ -83,7 +85,7 @@ define signext i32 @main() nounwind { ; CHECK-P7-NEXT: srw 6, 5, 4 ; CHECK-P7-NEXT: clrlwi 6, 6, 16 ; CHECK-P7-NEXT: cmplwi 6, 33059 -; CHECK-P7-NEXT: bne 0, .LBB0_4 +; CHECK-P7-NEXT: bne- 0, .LBB0_4 ; CHECK-P7-NEXT: # %bb.1: # %cmpxchg.fencedstore ; CHECK-P7-NEXT: lis 6, 0 ; CHECK-P7-NEXT: li 7, 234 @@ -92,51 +94,53 @@ define signext i32 @main() nounwind { ; CHECK-P7-NEXT: slw 7, 7, 4 ; CHECK-P7-NEXT: slw 6, 6, 4 ; CHECK-P7-NEXT: not 6, 6 -; CHECK-P7-NEXT: .p2align 4 ; CHECK-P7-NEXT: .LBB0_2: # %cmpxchg.trystore ; CHECK-P7-NEXT: # ; CHECK-P7-NEXT: and 5, 5, 6 ; CHECK-P7-NEXT: or 5, 5, 7 ; CHECK-P7-NEXT: stwcx. 5, 0, 3 -; CHECK-P7-NEXT: beq 0, .LBB0_7 +; CHECK-P7-NEXT: beq+ 0, .LBB0_5 ; CHECK-P7-NEXT: # %bb.3: # %cmpxchg.releasedload ; CHECK-P7-NEXT: # ; CHECK-P7-NEXT: lwarx 5, 0, 3 ; CHECK-P7-NEXT: srw 8, 5, 4 ; CHECK-P7-NEXT: clrlwi 8, 8, 16 ; CHECK-P7-NEXT: cmplwi 8, 33059 -; CHECK-P7-NEXT: beq 0, .LBB0_2 +; CHECK-P7-NEXT: beq+ 0, .LBB0_2 ; CHECK-P7-NEXT: .LBB0_4: # %cmpxchg.nostore +; CHECK-P7-NEXT: crxor 20, 20, 20 ; CHECK-P7-NEXT: lwsync -; CHECK-P7-NEXT: b .LBB0_8 -; CHECK-P7-NEXT: .LBB0_5: # %L.B0000 +; CHECK-P7-NEXT: b .LBB0_6 +; CHECK-P7-NEXT: .LBB0_5: # %cmpxchg.success +; CHECK-P7-NEXT: lwsync +; CHECK-P7-NEXT: creqv 20, 20, 20 +; CHECK-P7-NEXT: .LBB0_6: # %cmpxchg.end +; CHECK-P7-NEXT: bc 4, 20, .LBB0_9 +; CHECK-P7-NEXT: # %bb.7: # %L.B0000 ; CHECK-P7-NEXT: lhz 3, 46(1) ; CHECK-P7-NEXT: cmplwi 3, 234 -; CHECK-P7-NEXT: bne 0, .LBB0_9 -; CHECK-P7-NEXT: # %bb.6: # %L.B0001 +; CHECK-P7-NEXT: bne 0, .LBB0_10 +; CHECK-P7-NEXT: # %bb.8: # %L.B0001 ; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-P7-NEXT: bl puts ; CHECK-P7-NEXT: nop ; CHECK-P7-NEXT: li 3, 0 -; CHECK-P7-NEXT: b .LBB0_11 -; CHECK-P7-NEXT: .LBB0_7: # %cmpxchg.success -; CHECK-P7-NEXT: lwsync -; CHECK-P7-NEXT: b .LBB0_5 -; CHECK-P7-NEXT: .LBB0_8: # %L.B0003 +; CHECK-P7-NEXT: b .LBB0_12 +; CHECK-P7-NEXT: .LBB0_9: # %L.B0003 ; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-P7-NEXT: addi 3, 3, 16 -; CHECK-P7-NEXT: b .LBB0_10 -; CHECK-P7-NEXT: .LBB0_9: # %L.B0005 +; CHECK-P7-NEXT: b .LBB0_11 +; CHECK-P7-NEXT: .LBB0_10: # %L.B0005 ; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-P7-NEXT: addi 3, 3, 64 -; CHECK-P7-NEXT: .LBB0_10: # %L.B0003 +; CHECK-P7-NEXT: .LBB0_11: # %L.B0003 ; CHECK-P7-NEXT: bl puts ; CHECK-P7-NEXT: nop ; CHECK-P7-NEXT: li 3, 1 -; CHECK-P7-NEXT: .LBB0_11: # %L.B0003 +; CHECK-P7-NEXT: .LBB0_12: # %L.B0003 ; CHECK-P7-NEXT: addi 1, 1, 48 ; CHECK-P7-NEXT: ld 0, 16(1) ; CHECK-P7-NEXT: mtlr 0 diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll index 07afea75aec67..7e892fc4ae6eb 100644 --- a/llvm/test/CodeGen/PowerPC/all-atomics.ll +++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll @@ -4347,19 +4347,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 8, sc@toc@l(4) ; CHECK-NEXT: lbarx 5, 0, 6 ; CHECK-NEXT: cmplw 5, 7 -; CHECK-NEXT: bne 0, .LBB3_4 +; CHECK-NEXT: bne- 0, .LBB3_4 ; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore276 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_2: # %cmpxchg.trystore275 ; CHECK-NEXT: # ; CHECK-NEXT: stbcx. 8, 0, 6 -; CHECK-NEXT: beq 0, .LBB3_4 +; CHECK-NEXT: beq+ 0, .LBB3_4 ; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload274 ; CHECK-NEXT: # ; CHECK-NEXT: lbarx 5, 0, 6 ; CHECK-NEXT: cmplw 5, 7 -; CHECK-NEXT: beq 0, .LBB3_2 +; CHECK-NEXT: beq+ 0, .LBB3_2 ; CHECK-NEXT: .LBB3_4: # %cmpxchg.nostore272 ; CHECK-NEXT: addi 7, 3, uc@toc@l ; CHECK-NEXT: lwsync @@ -4367,20 +4366,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 9, uc@toc@l(3) ; CHECK-NEXT: lbarx 8, 0, 7 ; CHECK-NEXT: cmplw 8, 9 -; CHECK-NEXT: bne 0, .LBB3_8 +; CHECK-NEXT: bne- 0, .LBB3_8 ; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore257 ; CHECK-NEXT: sync ; CHECK-NEXT: clrlwi 5, 5, 24 -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_6: # %cmpxchg.trystore256 ; CHECK-NEXT: # ; CHECK-NEXT: stbcx. 5, 0, 7 -; CHECK-NEXT: beq 0, .LBB3_8 +; CHECK-NEXT: beq+ 0, .LBB3_8 ; CHECK-NEXT: # %bb.7: # %cmpxchg.releasedload255 ; CHECK-NEXT: # ; CHECK-NEXT: lbarx 8, 0, 7 ; CHECK-NEXT: cmplw 8, 9 -; CHECK-NEXT: beq 0, .LBB3_6 +; CHECK-NEXT: beq+ 0, .LBB3_6 ; CHECK-NEXT: .LBB3_8: # %cmpxchg.nostore253 ; CHECK-NEXT: addis 5, 2, ss@toc@ha ; CHECK-NEXT: lwsync @@ -4390,21 +4388,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: addi 8, 5, ss@toc@l ; CHECK-NEXT: lharx 9, 0, 8 ; CHECK-NEXT: cmplw 9, 10 -; CHECK-NEXT: bne 0, .LBB3_12 +; CHECK-NEXT: bne- 0, .LBB3_12 ; CHECK-NEXT: # %bb.9: # %cmpxchg.fencedstore238 ; CHECK-NEXT: extsb 11, 11 ; CHECK-NEXT: sync ; CHECK-NEXT: clrlwi 11, 11, 16 -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_10: # %cmpxchg.trystore237 ; CHECK-NEXT: # ; CHECK-NEXT: sthcx. 11, 0, 8 -; CHECK-NEXT: beq 0, .LBB3_12 +; CHECK-NEXT: beq+ 0, .LBB3_12 ; CHECK-NEXT: # %bb.11: # %cmpxchg.releasedload236 ; CHECK-NEXT: # ; CHECK-NEXT: lharx 9, 0, 8 ; CHECK-NEXT: cmplw 9, 10 -; CHECK-NEXT: beq 0, .LBB3_10 +; CHECK-NEXT: beq+ 0, .LBB3_10 ; CHECK-NEXT: .LBB3_12: # %cmpxchg.nostore234 ; CHECK-NEXT: lwsync ; CHECK-NEXT: sth 9, ss@toc@l(5) @@ -4414,21 +4411,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: addi 9, 5, us@toc@l ; CHECK-NEXT: lharx 10, 0, 9 ; CHECK-NEXT: cmplw 10, 11 -; CHECK-NEXT: bne 0, .LBB3_16 +; CHECK-NEXT: bne- 0, .LBB3_16 ; CHECK-NEXT: # %bb.13: # %cmpxchg.fencedstore219 ; CHECK-NEXT: extsb 12, 12 ; CHECK-NEXT: sync ; CHECK-NEXT: clrlwi 12, 12, 16 -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_14: # %cmpxchg.trystore218 ; CHECK-NEXT: # ; CHECK-NEXT: sthcx. 12, 0, 9 -; CHECK-NEXT: beq 0, .LBB3_16 +; CHECK-NEXT: beq+ 0, .LBB3_16 ; CHECK-NEXT: # %bb.15: # %cmpxchg.releasedload217 ; CHECK-NEXT: # ; CHECK-NEXT: lharx 10, 0, 9 ; CHECK-NEXT: cmplw 10, 11 -; CHECK-NEXT: beq 0, .LBB3_14 +; CHECK-NEXT: beq+ 0, .LBB3_14 ; CHECK-NEXT: .LBB3_16: # %cmpxchg.nostore215 ; CHECK-NEXT: lwsync ; CHECK-NEXT: sth 10, us@toc@l(5) @@ -4438,20 +4434,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: addi 10, 5, si@toc@l ; CHECK-NEXT: lwarx 11, 0, 10 ; CHECK-NEXT: cmplw 11, 12 -; CHECK-NEXT: bne 0, .LBB3_20 +; CHECK-NEXT: bne- 0, .LBB3_20 ; CHECK-NEXT: # %bb.17: # %cmpxchg.fencedstore200 ; CHECK-NEXT: extsb 0, 0 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_18: # %cmpxchg.trystore199 ; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 0, 0, 10 -; CHECK-NEXT: beq 0, .LBB3_20 +; CHECK-NEXT: beq+ 0, .LBB3_20 ; CHECK-NEXT: # %bb.19: # %cmpxchg.releasedload198 ; CHECK-NEXT: # ; CHECK-NEXT: lwarx 11, 0, 10 ; CHECK-NEXT: cmplw 11, 12 -; CHECK-NEXT: beq 0, .LBB3_18 +; CHECK-NEXT: beq+ 0, .LBB3_18 ; CHECK-NEXT: .LBB3_20: # %cmpxchg.nostore196 ; CHECK-NEXT: lwsync ; CHECK-NEXT: stw 11, si@toc@l(5) @@ -4461,20 +4456,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: addi 11, 5, ui@toc@l ; CHECK-NEXT: lwarx 12, 0, 11 ; CHECK-NEXT: cmplw 12, 0 -; CHECK-NEXT: bne 0, .LBB3_24 +; CHECK-NEXT: bne- 0, .LBB3_24 ; CHECK-NEXT: # %bb.21: # %cmpxchg.fencedstore181 ; CHECK-NEXT: extsb 30, 30 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_22: # %cmpxchg.trystore180 ; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 30, 0, 11 -; CHECK-NEXT: beq 0, .LBB3_24 +; CHECK-NEXT: beq+ 0, .LBB3_24 ; CHECK-NEXT: # %bb.23: # %cmpxchg.releasedload179 ; CHECK-NEXT: # ; CHECK-NEXT: lwarx 12, 0, 11 ; CHECK-NEXT: cmplw 12, 0 -; CHECK-NEXT: beq 0, .LBB3_22 +; CHECK-NEXT: beq+ 0, .LBB3_22 ; CHECK-NEXT: .LBB3_24: # %cmpxchg.nostore177 ; CHECK-NEXT: addis 30, 2, sll@toc@ha ; CHECK-NEXT: lwsync @@ -4484,20 +4478,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: addi 12, 30, sll@toc@l ; CHECK-NEXT: ldarx 0, 0, 12 ; CHECK-NEXT: cmpld 0, 29 -; CHECK-NEXT: bne 0, .LBB3_28 +; CHECK-NEXT: bne- 0, .LBB3_28 ; CHECK-NEXT: # %bb.25: # %cmpxchg.fencedstore162 ; CHECK-NEXT: extsb 28, 28 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_26: # %cmpxchg.trystore161 ; CHECK-NEXT: # ; CHECK-NEXT: stdcx. 28, 0, 12 -; CHECK-NEXT: beq 0, .LBB3_28 +; CHECK-NEXT: beq+ 0, .LBB3_28 ; CHECK-NEXT: # %bb.27: # %cmpxchg.releasedload160 ; CHECK-NEXT: # ; CHECK-NEXT: ldarx 0, 0, 12 ; CHECK-NEXT: cmpld 0, 29 -; CHECK-NEXT: beq 0, .LBB3_26 +; CHECK-NEXT: beq+ 0, .LBB3_26 ; CHECK-NEXT: .LBB3_28: # %cmpxchg.nostore158 ; CHECK-NEXT: lwsync ; CHECK-NEXT: std 0, sll@toc@l(30) @@ -4507,20 +4500,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: addi 0, 30, ull@toc@l ; CHECK-NEXT: ldarx 29, 0, 0 ; CHECK-NEXT: cmpld 29, 28 -; CHECK-NEXT: bne 0, .LBB3_32 +; CHECK-NEXT: bne- 0, .LBB3_32 ; CHECK-NEXT: # %bb.29: # %cmpxchg.fencedstore143 ; CHECK-NEXT: extsb 27, 27 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_30: # %cmpxchg.trystore142 ; CHECK-NEXT: # ; CHECK-NEXT: stdcx. 27, 0, 0 -; CHECK-NEXT: beq 0, .LBB3_32 +; CHECK-NEXT: beq+ 0, .LBB3_32 ; CHECK-NEXT: # %bb.31: # %cmpxchg.releasedload141 ; CHECK-NEXT: # ; CHECK-NEXT: ldarx 29, 0, 0 ; CHECK-NEXT: cmpld 29, 28 -; CHECK-NEXT: beq 0, .LBB3_30 +; CHECK-NEXT: beq+ 0, .LBB3_30 ; CHECK-NEXT: .LBB3_32: # %cmpxchg.nostore139 ; CHECK-NEXT: lwsync ; CHECK-NEXT: std 29, ull@toc@l(30) @@ -4528,19 +4520,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 29, sc@toc@l(4) ; CHECK-NEXT: lbarx 28, 0, 6 ; CHECK-NEXT: cmplw 28, 30 -; CHECK-NEXT: bne 0, .LBB3_36 +; CHECK-NEXT: bne- 0, .LBB3_36 ; CHECK-NEXT: # %bb.33: # %cmpxchg.fencedstore124 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_34: # %cmpxchg.trystore123 ; CHECK-NEXT: # ; CHECK-NEXT: stbcx. 29, 0, 6 -; CHECK-NEXT: beq 0, .LBB3_37 +; CHECK-NEXT: beq+ 0, .LBB3_37 ; CHECK-NEXT: # %bb.35: # %cmpxchg.releasedload122 ; CHECK-NEXT: # ; CHECK-NEXT: lbarx 28, 0, 6 ; CHECK-NEXT: cmplw 28, 30 -; CHECK-NEXT: beq 0, .LBB3_34 +; CHECK-NEXT: beq+ 0, .LBB3_34 ; CHECK-NEXT: .LBB3_36: # %cmpxchg.nostore120 ; CHECK-NEXT: lwsync ; CHECK-NEXT: crxor 20, 20, 20 @@ -4557,19 +4548,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 6, uc@toc@l(3) ; CHECK-NEXT: lbarx 29, 0, 7 ; CHECK-NEXT: cmplw 29, 6 -; CHECK-NEXT: bne 0, .LBB3_42 +; CHECK-NEXT: bne- 0, .LBB3_42 ; CHECK-NEXT: # %bb.39: # %cmpxchg.fencedstore105 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_40: # %cmpxchg.trystore104 ; CHECK-NEXT: # ; CHECK-NEXT: stbcx. 30, 0, 7 -; CHECK-NEXT: beq 0, .LBB3_43 +; CHECK-NEXT: beq+ 0, .LBB3_43 ; CHECK-NEXT: # %bb.41: # %cmpxchg.releasedload103 ; CHECK-NEXT: # ; CHECK-NEXT: lbarx 29, 0, 7 ; CHECK-NEXT: cmplw 29, 6 -; CHECK-NEXT: beq 0, .LBB3_40 +; CHECK-NEXT: beq+ 0, .LBB3_40 ; CHECK-NEXT: .LBB3_42: # %cmpxchg.nostore101 ; CHECK-NEXT: lwsync ; CHECK-NEXT: crxor 20, 20, 20 @@ -4586,21 +4576,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 6, uc@toc@l(3) ; CHECK-NEXT: lharx 30, 0, 8 ; CHECK-NEXT: cmplw 30, 6 -; CHECK-NEXT: bne 0, .LBB3_48 +; CHECK-NEXT: bne- 0, .LBB3_48 ; CHECK-NEXT: # %bb.45: # %cmpxchg.fencedstore86 ; CHECK-NEXT: extsb 7, 7 ; CHECK-NEXT: sync ; CHECK-NEXT: clrlwi 7, 7, 16 -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_46: # %cmpxchg.trystore85 ; CHECK-NEXT: # ; CHECK-NEXT: sthcx. 7, 0, 8 -; CHECK-NEXT: beq 0, .LBB3_49 +; CHECK-NEXT: beq+ 0, .LBB3_49 ; CHECK-NEXT: # %bb.47: # %cmpxchg.releasedload84 ; CHECK-NEXT: # ; CHECK-NEXT: lharx 30, 0, 8 ; CHECK-NEXT: cmplw 30, 6 -; CHECK-NEXT: beq 0, .LBB3_46 +; CHECK-NEXT: beq+ 0, .LBB3_46 ; CHECK-NEXT: .LBB3_48: # %cmpxchg.nostore82 ; CHECK-NEXT: lwsync ; CHECK-NEXT: crxor 20, 20, 20 @@ -4617,21 +4606,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 6, uc@toc@l(3) ; CHECK-NEXT: lharx 8, 0, 9 ; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: bne 0, .LBB3_54 +; CHECK-NEXT: bne- 0, .LBB3_54 ; CHECK-NEXT: # %bb.51: # %cmpxchg.fencedstore67 ; CHECK-NEXT: extsb 7, 7 ; CHECK-NEXT: sync ; CHECK-NEXT: clrlwi 7, 7, 16 -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_52: # %cmpxchg.trystore66 ; CHECK-NEXT: # ; CHECK-NEXT: sthcx. 7, 0, 9 -; CHECK-NEXT: beq 0, .LBB3_55 +; CHECK-NEXT: beq+ 0, .LBB3_55 ; CHECK-NEXT: # %bb.53: # %cmpxchg.releasedload65 ; CHECK-NEXT: # ; CHECK-NEXT: lharx 8, 0, 9 ; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: beq 0, .LBB3_52 +; CHECK-NEXT: beq+ 0, .LBB3_52 ; CHECK-NEXT: .LBB3_54: # %cmpxchg.nostore63 ; CHECK-NEXT: lwsync ; CHECK-NEXT: crxor 20, 20, 20 @@ -4648,20 +4636,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 6, uc@toc@l(3) ; CHECK-NEXT: lwarx 8, 0, 10 ; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: bne 0, .LBB3_60 +; CHECK-NEXT: bne- 0, .LBB3_60 ; CHECK-NEXT: # %bb.57: # %cmpxchg.fencedstore48 ; CHECK-NEXT: extsb 7, 7 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_58: # %cmpxchg.trystore47 ; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 7, 0, 10 -; CHECK-NEXT: beq 0, .LBB3_61 +; CHECK-NEXT: beq+ 0, .LBB3_61 ; CHECK-NEXT: # %bb.59: # %cmpxchg.releasedload46 ; CHECK-NEXT: # ; CHECK-NEXT: lwarx 8, 0, 10 ; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: beq 0, .LBB3_58 +; CHECK-NEXT: beq+ 0, .LBB3_58 ; CHECK-NEXT: .LBB3_60: # %cmpxchg.nostore44 ; CHECK-NEXT: lwsync ; CHECK-NEXT: crxor 20, 20, 20 @@ -4678,20 +4665,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 6, uc@toc@l(3) ; CHECK-NEXT: lwarx 8, 0, 11 ; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: bne 0, .LBB3_66 +; CHECK-NEXT: bne- 0, .LBB3_66 ; CHECK-NEXT: # %bb.63: # %cmpxchg.fencedstore29 ; CHECK-NEXT: extsb 7, 7 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_64: # %cmpxchg.trystore28 ; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 7, 0, 11 -; CHECK-NEXT: beq 0, .LBB3_67 +; CHECK-NEXT: beq+ 0, .LBB3_67 ; CHECK-NEXT: # %bb.65: # %cmpxchg.releasedload27 ; CHECK-NEXT: # ; CHECK-NEXT: lwarx 8, 0, 11 ; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: beq 0, .LBB3_64 +; CHECK-NEXT: beq+ 0, .LBB3_64 ; CHECK-NEXT: .LBB3_66: # %cmpxchg.nostore25 ; CHECK-NEXT: lwsync ; CHECK-NEXT: crxor 20, 20, 20 @@ -4708,20 +4694,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 6, uc@toc@l(3) ; CHECK-NEXT: ldarx 8, 0, 12 ; CHECK-NEXT: cmpld 8, 6 -; CHECK-NEXT: bne 0, .LBB3_72 +; CHECK-NEXT: bne- 0, .LBB3_72 ; CHECK-NEXT: # %bb.69: # %cmpxchg.fencedstore10 ; CHECK-NEXT: extsb 7, 7 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_70: # %cmpxchg.trystore9 ; CHECK-NEXT: # ; CHECK-NEXT: stdcx. 7, 0, 12 -; CHECK-NEXT: beq 0, .LBB3_73 +; CHECK-NEXT: beq+ 0, .LBB3_73 ; CHECK-NEXT: # %bb.71: # %cmpxchg.releasedload8 ; CHECK-NEXT: # ; CHECK-NEXT: ldarx 8, 0, 12 ; CHECK-NEXT: cmpld 8, 6 -; CHECK-NEXT: beq 0, .LBB3_70 +; CHECK-NEXT: beq+ 0, .LBB3_70 ; CHECK-NEXT: .LBB3_72: # %cmpxchg.nostore6 ; CHECK-NEXT: lwsync ; CHECK-NEXT: crxor 20, 20, 20 @@ -4738,20 +4723,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: stw 6, ui@toc@l(5) ; CHECK-NEXT: ldarx 6, 0, 0 ; CHECK-NEXT: cmpld 6, 3 -; CHECK-NEXT: bne 0, .LBB3_78 +; CHECK-NEXT: bne- 0, .LBB3_78 ; CHECK-NEXT: # %bb.75: # %cmpxchg.fencedstore ; CHECK-NEXT: extsb 4, 4 ; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_76: # %cmpxchg.trystore ; CHECK-NEXT: # ; CHECK-NEXT: stdcx. 4, 0, 0 -; CHECK-NEXT: beq 0, .LBB3_79 +; CHECK-NEXT: beq+ 0, .LBB3_79 ; CHECK-NEXT: # %bb.77: # %cmpxchg.releasedload ; CHECK-NEXT: # ; CHECK-NEXT: ldarx 6, 0, 0 ; CHECK-NEXT: cmpld 6, 3 -; CHECK-NEXT: beq 0, .LBB3_76 +; CHECK-NEXT: beq+ 0, .LBB3_76 ; CHECK-NEXT: .LBB3_78: # %cmpxchg.nostore ; CHECK-NEXT: lwsync ; CHECK-NEXT: crxor 20, 20, 20 @@ -4807,24 +4791,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: srw 6, 3, 26 ; AIX32-NEXT: clrlwi 6, 6, 24 ; AIX32-NEXT: cmplw 6, 4 -; AIX32-NEXT: bne 0, L..BB3_4 +; AIX32-NEXT: bne- 0, L..BB3_4 ; AIX32-NEXT: # %bb.1: # %cmpxchg.fencedstore289 ; AIX32-NEXT: sync ; AIX32-NEXT: slw 5, 5, 26 -; AIX32-NEXT: .align 4 ; AIX32-NEXT: L..BB3_2: # %cmpxchg.trystore288 ; AIX32-NEXT: # ; AIX32-NEXT: and 6, 3, 25 ; AIX32-NEXT: or 6, 6, 5 ; AIX32-NEXT: stwcx. 6, 0, 27 -; AIX32-NEXT: beq 0, L..BB3_4 +; AIX32-NEXT: beq+ 0, L..BB3_4 ; AIX32-NEXT: # %bb.3: # %cmpxchg.releasedload287 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 3, 0, 27 ; AIX32-NEXT: srw 6, 3, 26 ; AIX32-NEXT: clrlwi 6, 6, 24 ; AIX32-NEXT: cmplw 6, 4 -; AIX32-NEXT: beq 0, L..BB3_2 +; AIX32-NEXT: beq+ 0, L..BB3_2 ; AIX32-NEXT: L..BB3_4: # %cmpxchg.nostore285 ; AIX32-NEXT: not 4, 30 ; AIX32-NEXT: srw 5, 3, 26 @@ -4840,25 +4823,24 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: srw 6, 4, 23 ; AIX32-NEXT: clrlwi 6, 6, 24 ; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: bne 0, L..BB3_8 +; AIX32-NEXT: bne- 0, L..BB3_8 ; AIX32-NEXT: # %bb.5: # %cmpxchg.fencedstore256 ; AIX32-NEXT: clrlwi 5, 5, 24 ; AIX32-NEXT: sync ; AIX32-NEXT: slw 5, 5, 23 -; AIX32-NEXT: .align 4 ; AIX32-NEXT: L..BB3_6: # %cmpxchg.trystore255 ; AIX32-NEXT: # ; AIX32-NEXT: and 6, 4, 22 ; AIX32-NEXT: or 6, 6, 5 ; AIX32-NEXT: stwcx. 6, 0, 24 -; AIX32-NEXT: beq 0, L..BB3_8 +; AIX32-NEXT: beq+ 0, L..BB3_8 ; AIX32-NEXT: # %bb.7: # %cmpxchg.releasedload254 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 4, 0, 24 ; AIX32-NEXT: srw 6, 4, 23 ; AIX32-NEXT: clrlwi 6, 6, 24 ; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: beq 0, L..BB3_6 +; AIX32-NEXT: beq+ 0, L..BB3_6 ; AIX32-NEXT: L..BB3_8: # %cmpxchg.nostore252 ; AIX32-NEXT: srw 4, 4, 23 ; AIX32-NEXT: lwsync @@ -4878,26 +4860,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: srw 8, 5, 20 ; AIX32-NEXT: clrlwi 8, 8, 16 ; AIX32-NEXT: cmplw 8, 6 -; AIX32-NEXT: bne 0, L..BB3_12 +; AIX32-NEXT: bne- 0, L..BB3_12 ; AIX32-NEXT: # %bb.9: # %cmpxchg.fencedstore223 ; AIX32-NEXT: extsb 7, 7 ; AIX32-NEXT: sync ; AIX32-NEXT: clrlwi 7, 7, 16 ; AIX32-NEXT: slw 7, 7, 20 -; AIX32-NEXT: .align 4 ; AIX32-NEXT: L..BB3_10: # %cmpxchg.trystore222 ; AIX32-NEXT: # ; AIX32-NEXT: and 8, 5, 19 ; AIX32-NEXT: or 8, 8, 7 ; AIX32-NEXT: stwcx. 8, 0, 21 -; AIX32-NEXT: beq 0, L..BB3_12 +; AIX32-NEXT: beq+ 0, L..BB3_12 ; AIX32-NEXT: # %bb.11: # %cmpxchg.releasedload221 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 5, 0, 21 ; AIX32-NEXT: srw 8, 5, 20 ; AIX32-NEXT: clrlwi 8, 8, 16 ; AIX32-NEXT: cmplw 8, 6 -; AIX32-NEXT: beq 0, L..BB3_10 +; AIX32-NEXT: beq+ 0, L..BB3_10 ; AIX32-NEXT: L..BB3_12: # %cmpxchg.nostore219 ; AIX32-NEXT: srw 5, 5, 20 ; AIX32-NEXT: lwsync @@ -4915,26 +4896,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: srw 7, 3, 17 ; AIX32-NEXT: clrlwi 7, 7, 16 ; AIX32-NEXT: cmplw 7, 5 -; AIX32-NEXT: bne 0, L..BB3_16 +; AIX32-NEXT: bne- 0, L..BB3_16 ; AIX32-NEXT: # %bb.13: # %cmpxchg.fencedstore190 ; AIX32-NEXT: extsb 6, 6 ; AIX32-NEXT: sync ; AIX32-NEXT: clrlwi 6, 6, 16 ; AIX32-NEXT: slw 6, 6, 17 -; AIX32-NEXT: .align 4 ; AIX32-NEXT: L..BB3_14: # %cmpxchg.trystore189 ; AIX32-NEXT: # ; AIX32-NEXT: and 7, 3, 16 ; AIX32-NEXT: or 7, 7, 6 ; AIX32-NEXT: stwcx. 7, 0, 18 -; AIX32-NEXT: beq 0, L..BB3_16 +; AIX32-NEXT: beq+ 0, L..BB3_16 ; AIX32-NEXT: # %bb.15: # %cmpxchg.releasedload188 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 3, 0, 18 ; AIX32-NEXT: srw 7, 3, 17 ; AIX32-NEXT: clrlwi 7, 7, 16 ; AIX32-NEXT: cmplw 7, 5 -; AIX32-NEXT: beq 0, L..BB3_14 +; AIX32-NEXT: beq+ 0, L..BB3_14 ; AIX32-NEXT: L..BB3_16: # %cmpxchg.nostore186 ; AIX32-NEXT: srw 3, 3, 17 ; AIX32-NEXT: lwsync @@ -4944,20 +4924,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: lbz 4, 0(30) ; AIX32-NEXT: lwarx 3, 0, 15 ; AIX32-NEXT: cmplw 3, 4 -; AIX32-NEXT: bne 0, L..BB3_20 +; AIX32-NEXT: bne- 0, L..BB3_20 ; AIX32-NEXT: # %bb.17: # %cmpxchg.fencedstore171 ; AIX32-NEXT: extsb 5, 5 ; AIX32-NEXT: sync -; AIX32-NEXT: .align 5 ; AIX32-NEXT: L..BB3_18: # %cmpxchg.trystore170 ; AIX32-NEXT: # ; AIX32-NEXT: stwcx. 5, 0, 15 -; AIX32-NEXT: beq 0, L..BB3_20 +; AIX32-NEXT: beq+ 0, L..BB3_20 ; AIX32-NEXT: # %bb.19: # %cmpxchg.releasedload169 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 3, 0, 15 ; AIX32-NEXT: cmplw 3, 4 -; AIX32-NEXT: beq 0, L..BB3_18 +; AIX32-NEXT: beq+ 0, L..BB3_18 ; AIX32-NEXT: L..BB3_20: # %cmpxchg.nostore167 ; AIX32-NEXT: lwsync ; AIX32-NEXT: lwz 28, L..C5(2) # @ui @@ -4966,20 +4945,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: lbz 5, 0(29) ; AIX32-NEXT: lwarx 3, 0, 28 ; AIX32-NEXT: cmplw 3, 4 -; AIX32-NEXT: bne 0, L..BB3_24 +; AIX32-NEXT: bne- 0, L..BB3_24 ; AIX32-NEXT: # %bb.21: # %cmpxchg.fencedstore152 ; AIX32-NEXT: extsb 5, 5 ; AIX32-NEXT: sync -; AIX32-NEXT: .align 5 ; AIX32-NEXT: L..BB3_22: # %cmpxchg.trystore151 ; AIX32-NEXT: # ; AIX32-NEXT: stwcx. 5, 0, 28 -; AIX32-NEXT: beq 0, L..BB3_24 +; AIX32-NEXT: beq+ 0, L..BB3_24 ; AIX32-NEXT: # %bb.23: # %cmpxchg.releasedload150 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 3, 0, 28 ; AIX32-NEXT: cmplw 3, 4 -; AIX32-NEXT: beq 0, L..BB3_22 +; AIX32-NEXT: beq+ 0, L..BB3_22 ; AIX32-NEXT: L..BB3_24: # %cmpxchg.nostore148 ; AIX32-NEXT: lwsync ; AIX32-NEXT: stw 3, 0(28) @@ -5024,24 +5002,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: srw 6, 4, 26 ; AIX32-NEXT: clrlwi 6, 6, 24 ; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: bne 0, L..BB3_28 +; AIX32-NEXT: bne- 0, L..BB3_28 ; AIX32-NEXT: # %bb.25: # %cmpxchg.fencedstore119 ; AIX32-NEXT: sync ; AIX32-NEXT: slw 5, 5, 26 -; AIX32-NEXT: .align 4 ; AIX32-NEXT: L..BB3_26: # %cmpxchg.trystore118 ; AIX32-NEXT: # ; AIX32-NEXT: and 4, 4, 25 ; AIX32-NEXT: or 4, 4, 5 ; AIX32-NEXT: stwcx. 4, 0, 27 -; AIX32-NEXT: beq 0, L..BB3_29 +; AIX32-NEXT: beq+ 0, L..BB3_29 ; AIX32-NEXT: # %bb.27: # %cmpxchg.releasedload117 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 4, 0, 27 ; AIX32-NEXT: srw 6, 4, 26 ; AIX32-NEXT: clrlwi 6, 6, 24 ; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: beq 0, L..BB3_26 +; AIX32-NEXT: beq+ 0, L..BB3_26 ; AIX32-NEXT: L..BB3_28: # %cmpxchg.nostore115 ; AIX32-NEXT: crxor 20, 20, 20 ; AIX32-NEXT: lwsync @@ -5060,24 +5037,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: srw 6, 4, 23 ; AIX32-NEXT: clrlwi 6, 6, 24 ; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: bne 0, L..BB3_34 +; AIX32-NEXT: bne- 0, L..BB3_34 ; AIX32-NEXT: # %bb.31: # %cmpxchg.fencedstore86 ; AIX32-NEXT: sync ; AIX32-NEXT: slw 5, 5, 23 -; AIX32-NEXT: .align 4 ; AIX32-NEXT: L..BB3_32: # %cmpxchg.trystore85 ; AIX32-NEXT: # ; AIX32-NEXT: and 4, 4, 22 ; AIX32-NEXT: or 4, 4, 5 ; AIX32-NEXT: stwcx. 4, 0, 24 -; AIX32-NEXT: beq 0, L..BB3_35 +; AIX32-NEXT: beq+ 0, L..BB3_35 ; AIX32-NEXT: # %bb.33: # %cmpxchg.releasedload84 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 4, 0, 24 ; AIX32-NEXT: srw 6, 4, 23 ; AIX32-NEXT: clrlwi 6, 6, 24 ; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: beq 0, L..BB3_32 +; AIX32-NEXT: beq+ 0, L..BB3_32 ; AIX32-NEXT: L..BB3_34: # %cmpxchg.nostore82 ; AIX32-NEXT: crxor 20, 20, 20 ; AIX32-NEXT: lwsync @@ -5096,26 +5072,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: srw 6, 4, 20 ; AIX32-NEXT: clrlwi 6, 6, 16 ; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: bne 0, L..BB3_40 +; AIX32-NEXT: bne- 0, L..BB3_40 ; AIX32-NEXT: # %bb.37: # %cmpxchg.fencedstore53 ; AIX32-NEXT: extsb 5, 5 ; AIX32-NEXT: sync ; AIX32-NEXT: clrlwi 5, 5, 16 ; AIX32-NEXT: slw 5, 5, 20 -; AIX32-NEXT: .align 4 ; AIX32-NEXT: L..BB3_38: # %cmpxchg.trystore52 ; AIX32-NEXT: # ; AIX32-NEXT: and 4, 4, 19 ; AIX32-NEXT: or 4, 4, 5 ; AIX32-NEXT: stwcx. 4, 0, 21 -; AIX32-NEXT: beq 0, L..BB3_41 +; AIX32-NEXT: beq+ 0, L..BB3_41 ; AIX32-NEXT: # %bb.39: # %cmpxchg.releasedload51 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 4, 0, 21 ; AIX32-NEXT: srw 6, 4, 20 ; AIX32-NEXT: clrlwi 6, 6, 16 ; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: beq 0, L..BB3_38 +; AIX32-NEXT: beq+ 0, L..BB3_38 ; AIX32-NEXT: L..BB3_40: # %cmpxchg.nostore49 ; AIX32-NEXT: crxor 20, 20, 20 ; AIX32-NEXT: lwsync @@ -5134,26 +5109,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: srw 6, 4, 17 ; AIX32-NEXT: clrlwi 6, 6, 16 ; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: bne 0, L..BB3_46 +; AIX32-NEXT: bne- 0, L..BB3_46 ; AIX32-NEXT: # %bb.43: # %cmpxchg.fencedstore29 ; AIX32-NEXT: extsb 5, 5 ; AIX32-NEXT: sync ; AIX32-NEXT: clrlwi 5, 5, 16 ; AIX32-NEXT: slw 5, 5, 17 -; AIX32-NEXT: .align 4 ; AIX32-NEXT: L..BB3_44: # %cmpxchg.trystore28 ; AIX32-NEXT: # ; AIX32-NEXT: and 4, 4, 16 ; AIX32-NEXT: or 4, 4, 5 ; AIX32-NEXT: stwcx. 4, 0, 18 -; AIX32-NEXT: beq 0, L..BB3_47 +; AIX32-NEXT: beq+ 0, L..BB3_47 ; AIX32-NEXT: # %bb.45: # %cmpxchg.releasedload27 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 4, 0, 18 ; AIX32-NEXT: srw 6, 4, 17 ; AIX32-NEXT: clrlwi 6, 6, 16 ; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: beq 0, L..BB3_44 +; AIX32-NEXT: beq+ 0, L..BB3_44 ; AIX32-NEXT: L..BB3_46: # %cmpxchg.nostore25 ; AIX32-NEXT: crxor 20, 20, 20 ; AIX32-NEXT: lwsync @@ -5170,20 +5144,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: lbz 3, 0(30) ; AIX32-NEXT: lwarx 5, 0, 15 ; AIX32-NEXT: cmplw 5, 3 -; AIX32-NEXT: bne 0, L..BB3_52 +; AIX32-NEXT: bne- 0, L..BB3_52 ; AIX32-NEXT: # %bb.49: # %cmpxchg.fencedstore10 ; AIX32-NEXT: extsb 4, 4 ; AIX32-NEXT: sync -; AIX32-NEXT: .align 5 ; AIX32-NEXT: L..BB3_50: # %cmpxchg.trystore9 ; AIX32-NEXT: # ; AIX32-NEXT: stwcx. 4, 0, 15 -; AIX32-NEXT: beq 0, L..BB3_53 +; AIX32-NEXT: beq+ 0, L..BB3_53 ; AIX32-NEXT: # %bb.51: # %cmpxchg.releasedload8 ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 5, 0, 15 ; AIX32-NEXT: cmplw 5, 3 -; AIX32-NEXT: beq 0, L..BB3_50 +; AIX32-NEXT: beq+ 0, L..BB3_50 ; AIX32-NEXT: L..BB3_52: # %cmpxchg.nostore6 ; AIX32-NEXT: crxor 20, 20, 20 ; AIX32-NEXT: lwsync @@ -5200,20 +5173,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; AIX32-NEXT: lbz 3, 0(30) ; AIX32-NEXT: lwarx 5, 0, 28 ; AIX32-NEXT: cmplw 5, 3 -; AIX32-NEXT: bne 0, L..BB3_58 +; AIX32-NEXT: bne- 0, L..BB3_58 ; AIX32-NEXT: # %bb.55: # %cmpxchg.fencedstore ; AIX32-NEXT: extsb 4, 4 ; AIX32-NEXT: sync -; AIX32-NEXT: .align 5 ; AIX32-NEXT: L..BB3_56: # %cmpxchg.trystore ; AIX32-NEXT: # ; AIX32-NEXT: stwcx. 4, 0, 28 -; AIX32-NEXT: beq 0, L..BB3_59 +; AIX32-NEXT: beq+ 0, L..BB3_59 ; AIX32-NEXT: # %bb.57: # %cmpxchg.releasedload ; AIX32-NEXT: # ; AIX32-NEXT: lwarx 5, 0, 28 ; AIX32-NEXT: cmplw 5, 3 -; AIX32-NEXT: beq 0, L..BB3_56 +; AIX32-NEXT: beq+ 0, L..BB3_56 ; AIX32-NEXT: L..BB3_58: # %cmpxchg.nostore ; AIX32-NEXT: crxor 20, 20, 20 ; AIX32-NEXT: lwsync @@ -5838,21 +5810,20 @@ define dso_local i64 @cmpswplp(ptr noundef %ptr, ptr nocapture noundef readnone ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ldarx 4, 0, 3 ; CHECK-NEXT: cmpld 4, 5 -; CHECK-NEXT: bne 0, .LBB6_2 +; CHECK-NEXT: bne- 0, .LBB6_3 ; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore ; CHECK-NEXT: addi 4, 5, 1 +; CHECK-NEXT: creqv 20, 20, 20 ; CHECK-NEXT: stdcx. 4, 0, 3 -; CHECK-NEXT: beq 0, .LBB6_4 -; CHECK-NEXT: .LBB6_2: # %cmpxchg.failure -; CHECK-NEXT: crxor 20, 20, 20 -; CHECK-NEXT: .LBB6_3: # %cmpxchg.end +; CHECK-NEXT: bne- 0, .LBB6_3 +; CHECK-NEXT: .LBB6_2: # %cmpxchg.end ; CHECK-NEXT: li 3, 66 ; CHECK-NEXT: li 4, 55 ; CHECK-NEXT: isel 3, 4, 3, 20 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB6_4: -; CHECK-NEXT: creqv 20, 20, 20 -; CHECK-NEXT: b .LBB6_3 +; CHECK-NEXT: .LBB6_3: # %cmpxchg.failure +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: b .LBB6_2 ; ; AIX32-LABEL: cmpswplp: ; AIX32: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll b/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll index 65a12a6222f24..ae071194b4479 100644 --- a/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll +++ b/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll @@ -19,13 +19,14 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) { ; CHECK-NEXT: stw r5, -16(r1) ; CHECK-NEXT: lwarx r6, 0, r3 ; CHECK-NEXT: cmplw r6, r7 -; CHECK-NEXT: bne cr0, L..BB0_2 +; CHECK-NEXT: bne- cr0, L..BB0_5 ; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore +; CHECK-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt ; CHECK-NEXT: stwcx. r5, 0, r3 -; CHECK-NEXT: beq cr0, L..BB0_5 -; CHECK-NEXT: L..BB0_2: # %cmpxchg.failure -; CHECK-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt -; CHECK-NEXT: # %bb.3: # %cmpxchg.store_expected +; CHECK-NEXT: bne- cr0, L..BB0_5 +; CHECK-NEXT: # %bb.2: # %cmpxchg.end +; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_4 +; CHECK-NEXT: L..BB0_3: # %cmpxchg.store_expected ; CHECK-NEXT: stw r6, 0(r4) ; CHECK-NEXT: L..BB0_4: # %cmpxchg.continue ; CHECK-NEXT: li r3, 0 @@ -33,9 +34,9 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) { ; CHECK-NEXT: isel r3, r4, r3, 4*cr5+lt ; CHECK-NEXT: stb r3, -17(r1) ; CHECK-NEXT: blr -; CHECK-NEXT: L..BB0_5: -; CHECK-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt -; CHECK-NEXT: b L..BB0_4 +; CHECK-NEXT: L..BB0_5: # %cmpxchg.failure +; CHECK-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt +; CHECK-NEXT: b L..BB0_3 ; ; CHECK64-LABEL: foo: ; CHECK64: # %bb.0: # %entry @@ -46,13 +47,14 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) { ; CHECK64-NEXT: stw r5, -24(r1) ; CHECK64-NEXT: lwarx r6, 0, r3 ; CHECK64-NEXT: cmplw r6, r7 -; CHECK64-NEXT: bne cr0, L..BB0_2 +; CHECK64-NEXT: bne- cr0, L..BB0_5 ; CHECK64-NEXT: # %bb.1: # %cmpxchg.fencedstore +; CHECK64-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt ; CHECK64-NEXT: stwcx. r5, 0, r3 -; CHECK64-NEXT: beq cr0, L..BB0_5 -; CHECK64-NEXT: L..BB0_2: # %cmpxchg.failure -; CHECK64-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt -; CHECK64-NEXT: # %bb.3: # %cmpxchg.store_expected +; CHECK64-NEXT: bne- cr0, L..BB0_5 +; CHECK64-NEXT: # %bb.2: # %cmpxchg.end +; CHECK64-NEXT: bc 12, 4*cr5+lt, L..BB0_4 +; CHECK64-NEXT: L..BB0_3: # %cmpxchg.store_expected ; CHECK64-NEXT: stw r6, 0(r4) ; CHECK64-NEXT: L..BB0_4: # %cmpxchg.continue ; CHECK64-NEXT: li r3, 0 @@ -63,9 +65,9 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) { ; CHECK64-NEXT: li r3, 0 ; CHECK64-NEXT: isel r3, r4, r3, 4*cr5+lt ; CHECK64-NEXT: blr -; CHECK64-NEXT: L..BB0_5: -; CHECK64-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt -; CHECK64-NEXT: b L..BB0_4 +; CHECK64-NEXT: L..BB0_5: # %cmpxchg.failure +; CHECK64-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt +; CHECK64-NEXT: b L..BB0_3 entry: %cp.addr = alloca ptr, align 4 %old.addr = alloca ptr, align 4 diff --git a/llvm/test/CodeGen/PowerPC/atomic-float.ll b/llvm/test/CodeGen/PowerPC/atomic-float.ll index 600d28936c162..8232a44c7da26 100644 --- a/llvm/test/CodeGen/PowerPC/atomic-float.ll +++ b/llvm/test/CodeGen/PowerPC/atomic-float.ll @@ -9,37 +9,36 @@ define float @test_add(ptr %ptr, float %incr) { ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: sync ; CHECK-64-NEXT: lfs 0, 0(3) -; CHECK-64-NEXT: b .LBB0_3 -; CHECK-64-NEXT: .LBB0_1: # %cmpxchg.nostore -; CHECK-64-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-64-NEXT: crxor 20, 20, 20 -; CHECK-64-NEXT: .LBB0_2: # %cmpxchg.end -; CHECK-64-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-64-NEXT: stw 4, -12(1) -; CHECK-64-NEXT: lfs 0, -12(1) -; CHECK-64-NEXT: bc 12, 20, .LBB0_7 -; CHECK-64-NEXT: .LBB0_3: # %atomicrmw.start -; CHECK-64-NEXT: # =>This Loop Header: Depth=1 -; CHECK-64-NEXT: # Child Loop BB0_4 Depth 2 +; CHECK-64-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-64-NEXT: # =>This Loop Header: Depth=1 +; CHECK-64-NEXT: # Child Loop BB0_2 Depth 2 ; CHECK-64-NEXT: fadds 2, 0, 1 ; CHECK-64-NEXT: stfs 2, -4(1) ; CHECK-64-NEXT: stfs 0, -8(1) ; CHECK-64-NEXT: lwz 5, -4(1) ; CHECK-64-NEXT: lwz 6, -8(1) -; CHECK-64-NEXT: .LBB0_4: # %cmpxchg.start -; CHECK-64-NEXT: # Parent Loop BB0_3 Depth=1 -; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-64-NEXT: .LBB0_2: # %cmpxchg.start +; CHECK-64-NEXT: # Parent Loop BB0_1 Depth=1 +; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-64-NEXT: lwarx 4, 0, 3 -; CHECK-64-NEXT: cmplw 4, 6 -; CHECK-64-NEXT: bne 0, .LBB0_1 -; CHECK-64-NEXT: # %bb.5: # %cmpxchg.fencedstore -; CHECK-64-NEXT: # in Loop: Header=BB0_4 Depth=2 +; CHECK-64-NEXT: cmplw 4, 6 +; CHECK-64-NEXT: bne- 0, .LBB0_5 +; CHECK-64-NEXT: # %bb.3: # %cmpxchg.fencedstore +; CHECK-64-NEXT: # ; CHECK-64-NEXT: stwcx. 5, 0, 3 -; CHECK-64-NEXT: bne 0, .LBB0_4 -; CHECK-64-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1 ; CHECK-64-NEXT: creqv 20, 20, 20 -; CHECK-64-NEXT: b .LBB0_2 -; CHECK-64-NEXT: .LBB0_7: # %atomicrmw.end +; CHECK-64-NEXT: bne- 0, .LBB0_2 +; CHECK-64-NEXT: .LBB0_4: # %cmpxchg.end +; CHECK-64-NEXT: # +; CHECK-64-NEXT: stw 4, -12(1) +; CHECK-64-NEXT: lfs 0, -12(1) +; CHECK-64-NEXT: bc 4, 20, .LBB0_1 +; CHECK-64-NEXT: b .LBB0_6 +; CHECK-64-NEXT: .LBB0_5: # %cmpxchg.nostore +; CHECK-64-NEXT: # +; CHECK-64-NEXT: crxor 20, 20, 20 +; CHECK-64-NEXT: b .LBB0_4 +; CHECK-64-NEXT: .LBB0_6: # %atomicrmw.end ; CHECK-64-NEXT: fmr 1, 0 ; CHECK-64-NEXT: lwsync ; CHECK-64-NEXT: blr @@ -50,37 +49,36 @@ define float @test_add(ptr %ptr, float %incr) { ; CHECK-32-NEXT: .cfi_def_cfa_offset 32 ; CHECK-32-NEXT: sync ; CHECK-32-NEXT: lfs 0, 0(3) -; CHECK-32-NEXT: b .LBB0_3 -; CHECK-32-NEXT: .LBB0_1: # %cmpxchg.nostore -; CHECK-32-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-32-NEXT: crxor 20, 20, 20 -; CHECK-32-NEXT: .LBB0_2: # %cmpxchg.end -; CHECK-32-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-32-NEXT: stw 4, 20(1) -; CHECK-32-NEXT: lfs 0, 20(1) -; CHECK-32-NEXT: bc 12, 20, .LBB0_7 -; CHECK-32-NEXT: .LBB0_3: # %atomicrmw.start -; CHECK-32-NEXT: # =>This Loop Header: Depth=1 -; CHECK-32-NEXT: # Child Loop BB0_4 Depth 2 +; CHECK-32-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-32-NEXT: # =>This Loop Header: Depth=1 +; CHECK-32-NEXT: # Child Loop BB0_2 Depth 2 ; CHECK-32-NEXT: fadds 2, 0, 1 ; CHECK-32-NEXT: stfs 2, 28(1) ; CHECK-32-NEXT: stfs 0, 24(1) ; CHECK-32-NEXT: lwz 5, 28(1) ; CHECK-32-NEXT: lwz 6, 24(1) -; CHECK-32-NEXT: .LBB0_4: # %cmpxchg.start -; CHECK-32-NEXT: # Parent Loop BB0_3 Depth=1 -; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-32-NEXT: .LBB0_2: # %cmpxchg.start +; CHECK-32-NEXT: # Parent Loop BB0_1 Depth=1 +; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-32-NEXT: lwarx 4, 0, 3 -; CHECK-32-NEXT: cmplw 4, 6 -; CHECK-32-NEXT: bne 0, .LBB0_1 -; CHECK-32-NEXT: # %bb.5: # %cmpxchg.fencedstore -; CHECK-32-NEXT: # in Loop: Header=BB0_4 Depth=2 +; CHECK-32-NEXT: cmplw 4, 6 +; CHECK-32-NEXT: bne- 0, .LBB0_5 +; CHECK-32-NEXT: # %bb.3: # %cmpxchg.fencedstore +; CHECK-32-NEXT: # ; CHECK-32-NEXT: stwcx. 5, 0, 3 -; CHECK-32-NEXT: bne 0, .LBB0_4 -; CHECK-32-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1 ; CHECK-32-NEXT: creqv 20, 20, 20 -; CHECK-32-NEXT: b .LBB0_2 -; CHECK-32-NEXT: .LBB0_7: # %atomicrmw.end +; CHECK-32-NEXT: bne- 0, .LBB0_2 +; CHECK-32-NEXT: .LBB0_4: # %cmpxchg.end +; CHECK-32-NEXT: # +; CHECK-32-NEXT: stw 4, 20(1) +; CHECK-32-NEXT: lfs 0, 20(1) +; CHECK-32-NEXT: bc 4, 20, .LBB0_1 +; CHECK-32-NEXT: b .LBB0_6 +; CHECK-32-NEXT: .LBB0_5: # %cmpxchg.nostore +; CHECK-32-NEXT: # +; CHECK-32-NEXT: crxor 20, 20, 20 +; CHECK-32-NEXT: b .LBB0_4 +; CHECK-32-NEXT: .LBB0_6: # %atomicrmw.end ; CHECK-32-NEXT: fmr 1, 0 ; CHECK-32-NEXT: lwsync ; CHECK-32-NEXT: addi 1, 1, 32 diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll index 27a26aaca8b26..ff176c80ab342 100644 --- a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll +++ b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll @@ -6,45 +6,49 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: rldicr 5, 3, 0, 61 -; CHECK-NEXT: not 3, 3 +; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: li 6, 255 ; CHECK-NEXT: lwz 8, 0(5) ; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28 ; CHECK-NEXT: slw 6, 6, 3 -; CHECK-NEXT: not 6, 6 -; CHECK-NEXT: clrlwi 7, 4, 24 -; CHECK-NEXT: b .LBB0_2 -; CHECK-NEXT: .LBB0_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: mr 8, 9 -; CHECK-NEXT: .LBB0_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_5 Depth 2 +; CHECK-NEXT: not 6, 6 +; CHECK-NEXT: clrlwi 7, 4, 24 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_4 Depth 2 ; CHECK-NEXT: srw 9, 8, 3 -; CHECK-NEXT: clrlwi 10, 9, 24 -; CHECK-NEXT: cmplw 10, 7 -; CHECK-NEXT: blt 0, .LBB0_4 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: sub 9, 9, 4 -; CHECK-NEXT: .LBB0_4: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: clrlwi 9, 9, 24 +; CHECK-NEXT: clrlwi 10, 9, 24 +; CHECK-NEXT: cmplw 10, 7 +; CHECK-NEXT: blt 0, .LBB0_3 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: sub 9, 9, 4 +; CHECK-NEXT: .LBB0_3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: clrlwi 9, 9, 24 ; CHECK-NEXT: slw 9, 9, 3 ; CHECK-NEXT: and 10, 8, 6 ; CHECK-NEXT: or 10, 10, 9 -; CHECK-NEXT: .LBB0_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB0_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 9, 0, 5 -; CHECK-NEXT: cmplw 9, 8 -; CHECK-NEXT: bne 0, .LBB0_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=2 +; CHECK-NEXT: cmplw 9, 8 +; CHECK-NEXT: bne- 0, .LBB0_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 10, 0, 5 -; CHECK-NEXT: bne 0, .LBB0_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 8, 9 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB0_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 8, 9 +; CHECK-NEXT: bc 4, 20, .LBB0_1 +; CHECK-NEXT: b .LBB0_8 +; CHECK-NEXT: .LBB0_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 8, 9 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_8: # %atomicrmw.end ; CHECK-NEXT: srw 3, 9, 3 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -57,47 +61,51 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: rldicr 5, 3, 0, 61 -; CHECK-NEXT: clrlwi 3, 3, 30 +; CHECK-NEXT: clrlwi 3, 3, 30 ; CHECK-NEXT: lis 6, 0 ; CHECK-NEXT: xori 3, 3, 2 ; CHECK-NEXT: lwz 8, 0(5) ; CHECK-NEXT: ori 6, 6, 65535 ; CHECK-NEXT: slwi 3, 3, 3 ; CHECK-NEXT: slw 6, 6, 3 -; CHECK-NEXT: not 6, 6 -; CHECK-NEXT: clrlwi 7, 4, 16 -; CHECK-NEXT: b .LBB1_2 -; CHECK-NEXT: .LBB1_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: mr 8, 9 -; CHECK-NEXT: .LBB1_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB1_5 Depth 2 +; CHECK-NEXT: not 6, 6 +; CHECK-NEXT: clrlwi 7, 4, 16 +; CHECK-NEXT: .LBB1_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB1_4 Depth 2 ; CHECK-NEXT: srw 9, 8, 3 -; CHECK-NEXT: clrlwi 10, 9, 16 -; CHECK-NEXT: cmplw 10, 7 -; CHECK-NEXT: blt 0, .LBB1_4 -; CHECK-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: sub 9, 9, 4 -; CHECK-NEXT: .LBB1_4: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: clrlwi 9, 9, 16 +; CHECK-NEXT: clrlwi 10, 9, 16 +; CHECK-NEXT: cmplw 10, 7 +; CHECK-NEXT: blt 0, .LBB1_3 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: sub 9, 9, 4 +; CHECK-NEXT: .LBB1_3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: clrlwi 9, 9, 16 ; CHECK-NEXT: slw 9, 9, 3 ; CHECK-NEXT: and 10, 8, 6 ; CHECK-NEXT: or 10, 10, 9 -; CHECK-NEXT: .LBB1_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB1_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB1_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB1_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 9, 0, 5 -; CHECK-NEXT: cmplw 9, 8 -; CHECK-NEXT: bne 0, .LBB1_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB1_5 Depth=2 +; CHECK-NEXT: cmplw 9, 8 +; CHECK-NEXT: bne- 0, .LBB1_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 10, 0, 5 -; CHECK-NEXT: bne 0, .LBB1_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 8, 9 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB1_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 8, 9 +; CHECK-NEXT: bc 4, 20, .LBB1_1 +; CHECK-NEXT: b .LBB1_8 +; CHECK-NEXT: .LBB1_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 8, 9 +; CHECK-NEXT: b .LBB1_1 +; CHECK-NEXT: .LBB1_8: # %atomicrmw.end ; CHECK-NEXT: srw 3, 9, 3 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -110,34 +118,38 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: lwz 6, 0(3) -; CHECK-NEXT: b .LBB2_2 -; CHECK-NEXT: .LBB2_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: .LBB2_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB2_5 Depth 2 -; CHECK-NEXT: cmplw 6, 4 -; CHECK-NEXT: bge 0, .LBB2_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: mr 7, 6 -; CHECK-NEXT: b .LBB2_5 -; CHECK-NEXT: .LBB2_4: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: sub 7, 6, 4 -; CHECK-NEXT: .LBB2_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB2_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB2_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB2_4 Depth 2 +; CHECK-NEXT: cmplw 6, 4 +; CHECK-NEXT: bge 0, .LBB2_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 6 +; CHECK-NEXT: b .LBB2_4 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: sub 7, 6, 4 +; CHECK-NEXT: .LBB2_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB2_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 5, 0, 3 -; CHECK-NEXT: cmplw 5, 6 -; CHECK-NEXT: bne 0, .LBB2_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB2_5 Depth=2 +; CHECK-NEXT: cmplw 5, 6 +; CHECK-NEXT: bne- 0, .LBB2_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 7, 0, 3 -; CHECK-NEXT: bne 0, .LBB2_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB2_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: bc 4, 20, .LBB2_1 +; CHECK-NEXT: b .LBB2_8 +; CHECK-NEXT: .LBB2_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: b .LBB2_1 +; CHECK-NEXT: .LBB2_8: # %atomicrmw.end ; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -150,34 +162,38 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: ld 6, 0(3) -; CHECK-NEXT: b .LBB3_2 -; CHECK-NEXT: .LBB3_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: .LBB3_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB3_5 Depth 2 -; CHECK-NEXT: cmpld 6, 4 -; CHECK-NEXT: bge 0, .LBB3_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: mr 7, 6 -; CHECK-NEXT: b .LBB3_5 -; CHECK-NEXT: .LBB3_4: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: sub 7, 6, 4 -; CHECK-NEXT: .LBB3_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB3_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB3_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB3_4 Depth 2 +; CHECK-NEXT: cmpld 6, 4 +; CHECK-NEXT: bge 0, .LBB3_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 6 +; CHECK-NEXT: b .LBB3_4 +; CHECK-NEXT: .LBB3_3: +; CHECK-NEXT: sub 7, 6, 4 +; CHECK-NEXT: .LBB3_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB3_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldarx 5, 0, 3 -; CHECK-NEXT: cmpld 5, 6 -; CHECK-NEXT: bne 0, .LBB3_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB3_5 Depth=2 +; CHECK-NEXT: cmpld 5, 6 +; CHECK-NEXT: bne- 0, .LBB3_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stdcx. 7, 0, 3 -; CHECK-NEXT: bne 0, .LBB3_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB3_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: bc 4, 20, .LBB3_1 +; CHECK-NEXT: b .LBB3_8 +; CHECK-NEXT: .LBB3_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: b .LBB3_1 +; CHECK-NEXT: .LBB3_8: # %atomicrmw.end ; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -190,47 +206,51 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: rldicr 5, 3, 0, 61 -; CHECK-NEXT: not 3, 3 +; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: li 6, 255 ; CHECK-NEXT: lwz 7, 0(5) ; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28 ; CHECK-NEXT: slw 6, 6, 3 -; CHECK-NEXT: not 6, 6 -; CHECK-NEXT: clrlwi 4, 4, 24 -; CHECK-NEXT: b .LBB4_2 -; CHECK-NEXT: .LBB4_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 -; CHECK-NEXT: mr 7, 8 -; CHECK-NEXT: .LBB4_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB4_5 Depth 2 +; CHECK-NEXT: not 6, 6 +; CHECK-NEXT: clrlwi 4, 4, 24 +; CHECK-NEXT: .LBB4_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB4_4 Depth 2 ; CHECK-NEXT: srw 8, 7, 3 -; CHECK-NEXT: clrlwi 9, 8, 24 -; CHECK-NEXT: sub 8, 9, 4 -; CHECK-NEXT: cmplw 8, 9 +; CHECK-NEXT: clrlwi 9, 8, 24 +; CHECK-NEXT: sub 8, 9, 4 +; CHECK-NEXT: cmplw 8, 9 ; CHECK-NEXT: li 9, 0 -; CHECK-NEXT: bgt 0, .LBB4_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 -; CHECK-NEXT: mr 9, 8 -; CHECK-NEXT: .LBB4_4: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: bgt 0, .LBB4_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: mr 9, 8 +; CHECK-NEXT: .LBB4_3: # %atomicrmw.start +; CHECK-NEXT: # ; CHECK-NEXT: slw 8, 9, 3 ; CHECK-NEXT: and 9, 7, 6 ; CHECK-NEXT: or 9, 9, 8 -; CHECK-NEXT: .LBB4_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB4_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB4_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB4_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 8, 0, 5 -; CHECK-NEXT: cmplw 8, 7 -; CHECK-NEXT: bne 0, .LBB4_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB4_5 Depth=2 +; CHECK-NEXT: cmplw 8, 7 +; CHECK-NEXT: bne- 0, .LBB4_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 9, 0, 5 -; CHECK-NEXT: bne 0, .LBB4_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 7, 8 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB4_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 8 +; CHECK-NEXT: bc 4, 20, .LBB4_1 +; CHECK-NEXT: b .LBB4_8 +; CHECK-NEXT: .LBB4_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 8 +; CHECK-NEXT: b .LBB4_1 +; CHECK-NEXT: .LBB4_8: # %atomicrmw.end ; CHECK-NEXT: srw 3, 8, 3 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -243,49 +263,53 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: rldicr 5, 3, 0, 61 -; CHECK-NEXT: clrlwi 3, 3, 30 +; CHECK-NEXT: clrlwi 3, 3, 30 ; CHECK-NEXT: lis 6, 0 ; CHECK-NEXT: xori 3, 3, 2 ; CHECK-NEXT: lwz 7, 0(5) ; CHECK-NEXT: ori 6, 6, 65535 ; CHECK-NEXT: slwi 3, 3, 3 ; CHECK-NEXT: slw 6, 6, 3 -; CHECK-NEXT: not 6, 6 -; CHECK-NEXT: clrlwi 4, 4, 16 -; CHECK-NEXT: b .LBB5_2 -; CHECK-NEXT: .LBB5_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 -; CHECK-NEXT: mr 7, 8 -; CHECK-NEXT: .LBB5_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB5_5 Depth 2 +; CHECK-NEXT: not 6, 6 +; CHECK-NEXT: clrlwi 4, 4, 16 +; CHECK-NEXT: .LBB5_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB5_4 Depth 2 ; CHECK-NEXT: srw 8, 7, 3 -; CHECK-NEXT: clrlwi 9, 8, 16 -; CHECK-NEXT: sub 8, 9, 4 -; CHECK-NEXT: cmplw 8, 9 +; CHECK-NEXT: clrlwi 9, 8, 16 +; CHECK-NEXT: sub 8, 9, 4 +; CHECK-NEXT: cmplw 8, 9 ; CHECK-NEXT: li 9, 0 -; CHECK-NEXT: bgt 0, .LBB5_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 -; CHECK-NEXT: mr 9, 8 -; CHECK-NEXT: .LBB5_4: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: bgt 0, .LBB5_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: mr 9, 8 +; CHECK-NEXT: .LBB5_3: # %atomicrmw.start +; CHECK-NEXT: # ; CHECK-NEXT: slw 8, 9, 3 ; CHECK-NEXT: and 9, 7, 6 ; CHECK-NEXT: or 9, 9, 8 -; CHECK-NEXT: .LBB5_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB5_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB5_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB5_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 8, 0, 5 -; CHECK-NEXT: cmplw 8, 7 -; CHECK-NEXT: bne 0, .LBB5_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB5_5 Depth=2 +; CHECK-NEXT: cmplw 8, 7 +; CHECK-NEXT: bne- 0, .LBB5_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 9, 0, 5 -; CHECK-NEXT: bne 0, .LBB5_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 7, 8 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB5_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 8 +; CHECK-NEXT: bc 4, 20, .LBB5_1 +; CHECK-NEXT: b .LBB5_8 +; CHECK-NEXT: .LBB5_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 8 +; CHECK-NEXT: b .LBB5_1 +; CHECK-NEXT: .LBB5_8: # %atomicrmw.end ; CHECK-NEXT: srw 3, 8, 3 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -298,33 +322,37 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: lwz 6, 0(3) -; CHECK-NEXT: b .LBB6_2 -; CHECK-NEXT: .LBB6_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: .LBB6_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB6_4 Depth 2 -; CHECK-NEXT: sub 5, 6, 4 -; CHECK-NEXT: cmplw 5, 6 +; CHECK-NEXT: .LBB6_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB6_3 Depth 2 +; CHECK-NEXT: sub 5, 6, 4 +; CHECK-NEXT: cmplw 5, 6 ; CHECK-NEXT: li 7, 0 -; CHECK-NEXT: bgt 0, .LBB6_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 -; CHECK-NEXT: mr 7, 5 -; CHECK-NEXT: .LBB6_4: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB6_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: bgt 0, .LBB6_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: .LBB6_3: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB6_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 5, 0, 3 -; CHECK-NEXT: cmplw 5, 6 -; CHECK-NEXT: bne 0, .LBB6_1 -; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB6_4 Depth=2 +; CHECK-NEXT: cmplw 5, 6 +; CHECK-NEXT: bne- 0, .LBB6_6 +; CHECK-NEXT: # %bb.4: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 7, 0, 3 -; CHECK-NEXT: bne 0, .LBB6_4 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: # %bb.7: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB6_3 +; CHECK-NEXT: # %bb.5: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: bc 4, 20, .LBB6_1 +; CHECK-NEXT: b .LBB6_7 +; CHECK-NEXT: .LBB6_6: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: b .LBB6_1 +; CHECK-NEXT: .LBB6_7: # %atomicrmw.end ; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -337,33 +365,37 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: ld 6, 0(3) -; CHECK-NEXT: b .LBB7_2 -; CHECK-NEXT: .LBB7_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: .LBB7_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB7_4 Depth 2 -; CHECK-NEXT: subc 5, 6, 4 +; CHECK-NEXT: .LBB7_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB7_3 Depth 2 +; CHECK-NEXT: subc 5, 6, 4 ; CHECK-NEXT: li 7, 0 ; CHECK-NEXT: addze. 8, 7 -; CHECK-NEXT: beq 0, .LBB7_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: mr 7, 5 -; CHECK-NEXT: .LBB7_4: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB7_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: beq 0, .LBB7_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: .LBB7_3: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB7_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldarx 5, 0, 3 -; CHECK-NEXT: cmpld 5, 6 -; CHECK-NEXT: bne 0, .LBB7_1 -; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB7_4 Depth=2 +; CHECK-NEXT: cmpld 5, 6 +; CHECK-NEXT: bne- 0, .LBB7_6 +; CHECK-NEXT: # %bb.4: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stdcx. 7, 0, 3 -; CHECK-NEXT: bne 0, .LBB7_4 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: # %bb.7: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB7_3 +; CHECK-NEXT: # %bb.5: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: bc 4, 20, .LBB7_1 +; CHECK-NEXT: b .LBB7_7 +; CHECK-NEXT: .LBB7_6: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: b .LBB7_1 +; CHECK-NEXT: .LBB7_7: # %atomicrmw.end ; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll index 6ced47bd6bcba..4dc6d0ad3d5c7 100644 --- a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll @@ -6,47 +6,51 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: rldicr 5, 3, 0, 61 -; CHECK-NEXT: not 3, 3 +; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: li 6, 255 ; CHECK-NEXT: lwz 7, 0(5) ; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28 ; CHECK-NEXT: slw 6, 6, 3 -; CHECK-NEXT: not 6, 6 -; CHECK-NEXT: clrlwi 4, 4, 24 -; CHECK-NEXT: b .LBB0_2 -; CHECK-NEXT: .LBB0_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: mr 7, 8 -; CHECK-NEXT: .LBB0_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_5 Depth 2 +; CHECK-NEXT: not 6, 6 +; CHECK-NEXT: clrlwi 4, 4, 24 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_4 Depth 2 ; CHECK-NEXT: srw 8, 7, 3 -; CHECK-NEXT: clrlwi 9, 8, 24 -; CHECK-NEXT: cmplw 9, 4 +; CHECK-NEXT: clrlwi 9, 8, 24 +; CHECK-NEXT: cmplw 9, 4 ; CHECK-NEXT: li 9, 0 -; CHECK-NEXT: bge 0, .LBB0_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: bge 0, .LBB0_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # ; CHECK-NEXT: addi 9, 8, 1 -; CHECK-NEXT: .LBB0_4: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: clrlwi 8, 9, 24 +; CHECK-NEXT: .LBB0_3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: clrlwi 8, 9, 24 ; CHECK-NEXT: slw 8, 8, 3 ; CHECK-NEXT: and 9, 7, 6 ; CHECK-NEXT: or 9, 9, 8 -; CHECK-NEXT: .LBB0_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB0_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 8, 0, 5 -; CHECK-NEXT: cmplw 8, 7 -; CHECK-NEXT: bne 0, .LBB0_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=2 +; CHECK-NEXT: cmplw 8, 7 +; CHECK-NEXT: bne- 0, .LBB0_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 9, 0, 5 -; CHECK-NEXT: bne 0, .LBB0_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 7, 8 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB0_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 8 +; CHECK-NEXT: bc 4, 20, .LBB0_1 +; CHECK-NEXT: b .LBB0_8 +; CHECK-NEXT: .LBB0_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 8 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_8: # %atomicrmw.end ; CHECK-NEXT: srw 3, 8, 3 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -59,49 +63,53 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: rldicr 5, 3, 0, 61 -; CHECK-NEXT: clrlwi 3, 3, 30 +; CHECK-NEXT: clrlwi 3, 3, 30 ; CHECK-NEXT: lis 6, 0 ; CHECK-NEXT: xori 3, 3, 2 ; CHECK-NEXT: lwz 7, 0(5) ; CHECK-NEXT: ori 6, 6, 65535 ; CHECK-NEXT: slwi 3, 3, 3 ; CHECK-NEXT: slw 6, 6, 3 -; CHECK-NEXT: not 6, 6 -; CHECK-NEXT: clrlwi 4, 4, 16 -; CHECK-NEXT: b .LBB1_2 -; CHECK-NEXT: .LBB1_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: mr 7, 8 -; CHECK-NEXT: .LBB1_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB1_5 Depth 2 +; CHECK-NEXT: not 6, 6 +; CHECK-NEXT: clrlwi 4, 4, 16 +; CHECK-NEXT: .LBB1_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB1_4 Depth 2 ; CHECK-NEXT: srw 8, 7, 3 -; CHECK-NEXT: clrlwi 9, 8, 16 -; CHECK-NEXT: cmplw 9, 4 +; CHECK-NEXT: clrlwi 9, 8, 16 +; CHECK-NEXT: cmplw 9, 4 ; CHECK-NEXT: li 9, 0 -; CHECK-NEXT: bge 0, .LBB1_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: bge 0, .LBB1_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # ; CHECK-NEXT: addi 9, 8, 1 -; CHECK-NEXT: .LBB1_4: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: clrlwi 8, 9, 16 +; CHECK-NEXT: .LBB1_3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: clrlwi 8, 9, 16 ; CHECK-NEXT: slw 8, 8, 3 ; CHECK-NEXT: and 9, 7, 6 ; CHECK-NEXT: or 9, 9, 8 -; CHECK-NEXT: .LBB1_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB1_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB1_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB1_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 8, 0, 5 -; CHECK-NEXT: cmplw 8, 7 -; CHECK-NEXT: bne 0, .LBB1_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB1_5 Depth=2 +; CHECK-NEXT: cmplw 8, 7 +; CHECK-NEXT: bne- 0, .LBB1_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 9, 0, 5 -; CHECK-NEXT: bne 0, .LBB1_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 7, 8 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB1_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 8 +; CHECK-NEXT: bc 4, 20, .LBB1_1 +; CHECK-NEXT: b .LBB1_8 +; CHECK-NEXT: .LBB1_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 7, 8 +; CHECK-NEXT: b .LBB1_1 +; CHECK-NEXT: .LBB1_8: # %atomicrmw.end ; CHECK-NEXT: srw 3, 8, 3 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -114,32 +122,36 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: lwz 6, 0(3) -; CHECK-NEXT: b .LBB2_2 -; CHECK-NEXT: .LBB2_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: .LBB2_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB2_4 Depth 2 -; CHECK-NEXT: cmplw 6, 4 +; CHECK-NEXT: .LBB2_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB2_3 Depth 2 +; CHECK-NEXT: cmplw 6, 4 ; CHECK-NEXT: li 7, 0 -; CHECK-NEXT: bge 0, .LBB2_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: bge 0, .LBB2_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # ; CHECK-NEXT: addi 7, 6, 1 -; CHECK-NEXT: .LBB2_4: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB2_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB2_3: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB2_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 5, 0, 3 -; CHECK-NEXT: cmplw 5, 6 -; CHECK-NEXT: bne 0, .LBB2_1 -; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB2_4 Depth=2 +; CHECK-NEXT: cmplw 5, 6 +; CHECK-NEXT: bne- 0, .LBB2_6 +; CHECK-NEXT: # %bb.4: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 7, 0, 3 -; CHECK-NEXT: bne 0, .LBB2_4 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: # %bb.7: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB2_3 +; CHECK-NEXT: # %bb.5: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: bc 4, 20, .LBB2_1 +; CHECK-NEXT: b .LBB2_7 +; CHECK-NEXT: .LBB2_6: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: b .LBB2_1 +; CHECK-NEXT: .LBB2_7: # %atomicrmw.end ; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -152,32 +164,36 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: ld 6, 0(3) -; CHECK-NEXT: b .LBB3_2 -; CHECK-NEXT: .LBB3_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: .LBB3_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB3_4 Depth 2 -; CHECK-NEXT: cmpld 6, 4 +; CHECK-NEXT: .LBB3_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB3_3 Depth 2 +; CHECK-NEXT: cmpld 6, 4 ; CHECK-NEXT: li 7, 0 -; CHECK-NEXT: bge 0, .LBB3_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 +; CHECK-NEXT: bge 0, .LBB3_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # ; CHECK-NEXT: addi 7, 6, 1 -; CHECK-NEXT: .LBB3_4: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB3_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB3_3: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB3_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldarx 5, 0, 3 -; CHECK-NEXT: cmpld 5, 6 -; CHECK-NEXT: bne 0, .LBB3_1 -; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB3_4 Depth=2 +; CHECK-NEXT: cmpld 5, 6 +; CHECK-NEXT: bne- 0, .LBB3_6 +; CHECK-NEXT: # %bb.4: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stdcx. 7, 0, 3 -; CHECK-NEXT: bne 0, .LBB3_4 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: # %bb.7: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB3_3 +; CHECK-NEXT: # %bb.5: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: bc 4, 20, .LBB3_1 +; CHECK-NEXT: b .LBB3_7 +; CHECK-NEXT: .LBB3_6: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: b .LBB3_1 +; CHECK-NEXT: .LBB3_7: # %atomicrmw.end ; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -190,48 +206,52 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: rldicr 5, 3, 0, 61 -; CHECK-NEXT: not 3, 3 +; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: li 6, 255 ; CHECK-NEXT: lwz 8, 0(5) ; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28 ; CHECK-NEXT: slw 6, 6, 3 -; CHECK-NEXT: not 6, 6 -; CHECK-NEXT: clrlwi 7, 4, 24 -; CHECK-NEXT: b .LBB4_2 -; CHECK-NEXT: .LBB4_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 -; CHECK-NEXT: mr 8, 9 -; CHECK-NEXT: .LBB4_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB4_5 Depth 2 +; CHECK-NEXT: not 6, 6 +; CHECK-NEXT: clrlwi 7, 4, 24 +; CHECK-NEXT: .LBB4_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB4_4 Depth 2 ; CHECK-NEXT: srw 9, 8, 3 ; CHECK-NEXT: andi. 10, 9, 255 ; CHECK-NEXT: cmplw 1, 10, 7 ; CHECK-NEXT: cror 20, 2, 5 -; CHECK-NEXT: mr 10, 4 -; CHECK-NEXT: bc 12, 20, .LBB4_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: mr 10, 4 +; CHECK-NEXT: bc 12, 20, .LBB4_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # ; CHECK-NEXT: addi 10, 9, -1 -; CHECK-NEXT: .LBB4_4: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 -; CHECK-NEXT: clrlwi 9, 10, 24 +; CHECK-NEXT: .LBB4_3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: clrlwi 9, 10, 24 ; CHECK-NEXT: slw 9, 9, 3 ; CHECK-NEXT: and 10, 8, 6 ; CHECK-NEXT: or 10, 10, 9 -; CHECK-NEXT: .LBB4_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB4_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB4_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB4_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 9, 0, 5 -; CHECK-NEXT: cmplw 9, 8 -; CHECK-NEXT: bne 0, .LBB4_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB4_5 Depth=2 +; CHECK-NEXT: cmplw 9, 8 +; CHECK-NEXT: bne- 0, .LBB4_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 10, 0, 5 -; CHECK-NEXT: bne 0, .LBB4_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 8, 9 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB4_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 8, 9 +; CHECK-NEXT: bc 4, 20, .LBB4_1 +; CHECK-NEXT: b .LBB4_8 +; CHECK-NEXT: .LBB4_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 8, 9 +; CHECK-NEXT: b .LBB4_1 +; CHECK-NEXT: .LBB4_8: # %atomicrmw.end ; CHECK-NEXT: srw 3, 9, 3 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -244,50 +264,54 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: rldicr 5, 3, 0, 61 -; CHECK-NEXT: clrlwi 3, 3, 30 +; CHECK-NEXT: clrlwi 3, 3, 30 ; CHECK-NEXT: lis 6, 0 ; CHECK-NEXT: xori 3, 3, 2 ; CHECK-NEXT: lwz 8, 0(5) ; CHECK-NEXT: ori 6, 6, 65535 ; CHECK-NEXT: slwi 3, 3, 3 ; CHECK-NEXT: slw 6, 6, 3 -; CHECK-NEXT: not 6, 6 -; CHECK-NEXT: clrlwi 7, 4, 16 -; CHECK-NEXT: b .LBB5_2 -; CHECK-NEXT: .LBB5_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 -; CHECK-NEXT: mr 8, 9 -; CHECK-NEXT: .LBB5_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB5_5 Depth 2 +; CHECK-NEXT: not 6, 6 +; CHECK-NEXT: clrlwi 7, 4, 16 +; CHECK-NEXT: .LBB5_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB5_4 Depth 2 ; CHECK-NEXT: srw 9, 8, 3 ; CHECK-NEXT: andi. 10, 9, 65535 ; CHECK-NEXT: cmplw 1, 10, 7 ; CHECK-NEXT: cror 20, 2, 5 -; CHECK-NEXT: mr 10, 4 -; CHECK-NEXT: bc 12, 20, .LBB5_4 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: mr 10, 4 +; CHECK-NEXT: bc 12, 20, .LBB5_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # ; CHECK-NEXT: addi 10, 9, -1 -; CHECK-NEXT: .LBB5_4: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 -; CHECK-NEXT: clrlwi 9, 10, 16 +; CHECK-NEXT: .LBB5_3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: clrlwi 9, 10, 16 ; CHECK-NEXT: slw 9, 9, 3 ; CHECK-NEXT: and 10, 8, 6 ; CHECK-NEXT: or 10, 10, 9 -; CHECK-NEXT: .LBB5_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB5_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB5_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB5_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 9, 0, 5 -; CHECK-NEXT: cmplw 9, 8 -; CHECK-NEXT: bne 0, .LBB5_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB5_5 Depth=2 +; CHECK-NEXT: cmplw 9, 8 +; CHECK-NEXT: bne- 0, .LBB5_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 10, 0, 5 -; CHECK-NEXT: bne 0, .LBB5_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 8, 9 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB5_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 8, 9 +; CHECK-NEXT: bc 4, 20, .LBB5_1 +; CHECK-NEXT: b .LBB5_8 +; CHECK-NEXT: .LBB5_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 8, 9 +; CHECK-NEXT: b .LBB5_1 +; CHECK-NEXT: .LBB5_8: # %atomicrmw.end ; CHECK-NEXT: srw 3, 9, 3 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -300,37 +324,41 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: lwz 6, 0(3) -; CHECK-NEXT: b .LBB6_2 -; CHECK-NEXT: .LBB6_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: .LBB6_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB6_5 Depth 2 -; CHECK-NEXT: cmpwi 6, 0 -; CHECK-NEXT: mr 7, 4 -; CHECK-NEXT: bc 12, 2, .LBB6_5 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 -; CHECK-NEXT: cmplw 6, 4 -; CHECK-NEXT: mr 7, 4 -; CHECK-NEXT: bc 12, 1, .LBB6_5 -; CHECK-NEXT: # %bb.4: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 +; CHECK-NEXT: .LBB6_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB6_4 Depth 2 +; CHECK-NEXT: cmpwi 6, 0 +; CHECK-NEXT: mr 7, 4 +; CHECK-NEXT: bc 12, 2, .LBB6_4 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: cmplw 6, 4 +; CHECK-NEXT: mr 7, 4 +; CHECK-NEXT: bc 12, 1, .LBB6_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # ; CHECK-NEXT: addi 7, 6, -1 -; CHECK-NEXT: .LBB6_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB6_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB6_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB6_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lwarx 5, 0, 3 -; CHECK-NEXT: cmplw 5, 6 -; CHECK-NEXT: bne 0, .LBB6_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB6_5 Depth=2 +; CHECK-NEXT: cmplw 5, 6 +; CHECK-NEXT: bne- 0, .LBB6_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. 7, 0, 3 -; CHECK-NEXT: bne 0, .LBB6_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB6_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: bc 4, 20, .LBB6_1 +; CHECK-NEXT: b .LBB6_8 +; CHECK-NEXT: .LBB6_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: b .LBB6_1 +; CHECK-NEXT: .LBB6_8: # %atomicrmw.end ; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr @@ -343,38 +371,42 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: ld 6, 0(3) -; CHECK-NEXT: b .LBB7_2 -; CHECK-NEXT: .LBB7_1: # %cmpxchg.nostore -; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: .LBB7_2: # %atomicrmw.start -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB7_5 Depth 2 -; CHECK-NEXT: cmpdi 6, 0 -; CHECK-NEXT: mr 7, 4 -; CHECK-NEXT: bc 12, 2, .LBB7_5 -; CHECK-NEXT: # %bb.3: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: cmpld 6, 4 -; CHECK-NEXT: mr 7, 4 -; CHECK-NEXT: bc 12, 1, .LBB7_5 -; CHECK-NEXT: # %bb.4: # %atomicrmw.start -; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1 +; CHECK-NEXT: .LBB7_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB7_4 Depth 2 +; CHECK-NEXT: cmpdi 6, 0 +; CHECK-NEXT: mr 7, 4 +; CHECK-NEXT: bc 12, 2, .LBB7_4 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: cmpld 6, 4 +; CHECK-NEXT: mr 7, 4 +; CHECK-NEXT: bc 12, 1, .LBB7_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # ; CHECK-NEXT: addi 7, 6, -1 -; CHECK-NEXT: .LBB7_5: # %cmpxchg.start -; CHECK-NEXT: # Parent Loop BB7_2 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: .LBB7_4: # %cmpxchg.start +; CHECK-NEXT: # Parent Loop BB7_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldarx 5, 0, 3 -; CHECK-NEXT: cmpld 5, 6 -; CHECK-NEXT: bne 0, .LBB7_1 -; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore -; CHECK-NEXT: # in Loop: Header=BB7_5 Depth=2 +; CHECK-NEXT: cmpld 5, 6 +; CHECK-NEXT: bne- 0, .LBB7_7 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore +; CHECK-NEXT: # ; CHECK-NEXT: stdcx. 7, 0, 3 -; CHECK-NEXT: bne 0, .LBB7_5 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: mr 6, 5 -; CHECK-NEXT: # %bb.8: # %atomicrmw.end -; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: bne- 0, .LBB7_4 +; CHECK-NEXT: # %bb.6: # %cmpxchg.end +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: bc 4, 20, .LBB7_1 +; CHECK-NEXT: b .LBB7_8 +; CHECK-NEXT: .LBB7_7: # %cmpxchg.nostore +; CHECK-NEXT: # +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: b .LBB7_1 +; CHECK-NEXT: .LBB7_8: # %atomicrmw.end +; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll index 0474a479a1fef..90990bbb4124d 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll @@ -402,16 +402,15 @@ define void @test40(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 24 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB40_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB40_1 +; PPC64LE-NEXT: bne- 0, .LBB40_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic @@ -423,16 +422,15 @@ define void @test41(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 24 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB41_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB41_1 +; PPC64LE-NEXT: bne- 0, .LBB41_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -445,16 +443,15 @@ define void @test42(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 24 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB42_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB42_3 +; PPC64LE-NEXT: bne- 0, .LBB42_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB42_1 +; PPC64LE-NEXT: bne- 0, .LBB42_1 ; PPC64LE-NEXT: .LBB42_3: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -468,7 +465,7 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 24 @@ -476,12 +473,12 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB43_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB43_2 +; PPC64LE-NEXT: beq+ 0, .LBB43_2 ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic ret void @@ -493,7 +490,7 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB44_4 +; PPC64LE-NEXT: bne- 0, .LBB44_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 24 @@ -501,12 +498,12 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB44_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB44_2 +; PPC64LE-NEXT: beq+ 0, .LBB44_2 ; PPC64LE-NEXT: .LBB44_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -520,23 +517,21 @@ define void @test45(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB45_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB45_5 +; PPC64LE-NEXT: beq+ 0, .LBB45_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB45_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB45_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB45_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB45_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic @@ -549,20 +544,19 @@ define void @test46(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB46_4 +; PPC64LE-NEXT: bne- 0, .LBB46_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB46_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB46_4 +; PPC64LE-NEXT: beq+ 0, .LBB46_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB46_2 +; PPC64LE-NEXT: beq+ 0, .LBB46_2 ; PPC64LE-NEXT: .LBB46_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -576,23 +570,21 @@ define void @test47(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB47_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB47_5 +; PPC64LE-NEXT: beq+ 0, .LBB47_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB47_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB47_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB47_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB47_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst monotonic @@ -605,20 +597,19 @@ define void @test48(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB48_4 +; PPC64LE-NEXT: bne- 0, .LBB48_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB48_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB48_4 +; PPC64LE-NEXT: beq+ 0, .LBB48_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB48_2 +; PPC64LE-NEXT: beq+ 0, .LBB48_2 ; PPC64LE-NEXT: .LBB48_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -632,20 +623,19 @@ define void @test49(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB49_4 +; PPC64LE-NEXT: bne- 0, .LBB49_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB49_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB49_4 +; PPC64LE-NEXT: beq+ 0, .LBB49_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB49_2 +; PPC64LE-NEXT: beq+ 0, .LBB49_2 ; PPC64LE-NEXT: .LBB49_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -658,16 +648,15 @@ define void @test50(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 16 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB50_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB50_1 +; PPC64LE-NEXT: bne- 0, .LBB50_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic @@ -679,16 +668,15 @@ define void @test51(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 16 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB51_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB51_1 +; PPC64LE-NEXT: bne- 0, .LBB51_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -701,16 +689,15 @@ define void @test52(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 16 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB52_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB52_3 +; PPC64LE-NEXT: bne- 0, .LBB52_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB52_1 +; PPC64LE-NEXT: bne- 0, .LBB52_1 ; PPC64LE-NEXT: .LBB52_3: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -724,7 +711,7 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 16 @@ -732,12 +719,12 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: .LBB53_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB53_2 +; PPC64LE-NEXT: beq+ 0, .LBB53_2 ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic ret void @@ -749,7 +736,7 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB54_4 +; PPC64LE-NEXT: bne- 0, .LBB54_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 16 @@ -757,12 +744,12 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: .LBB54_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB54_2 +; PPC64LE-NEXT: beq+ 0, .LBB54_2 ; PPC64LE-NEXT: .LBB54_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -776,23 +763,21 @@ define void @test55(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB55_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB55_5 +; PPC64LE-NEXT: beq+ 0, .LBB55_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB55_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB55_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB55_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB55_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic @@ -805,20 +790,19 @@ define void @test56(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB56_4 +; PPC64LE-NEXT: bne- 0, .LBB56_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB56_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB56_4 +; PPC64LE-NEXT: beq+ 0, .LBB56_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB56_2 +; PPC64LE-NEXT: beq+ 0, .LBB56_2 ; PPC64LE-NEXT: .LBB56_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -832,23 +816,21 @@ define void @test57(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB57_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB57_5 +; PPC64LE-NEXT: beq+ 0, .LBB57_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB57_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB57_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB57_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB57_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst monotonic @@ -861,20 +843,19 @@ define void @test58(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB58_4 +; PPC64LE-NEXT: bne- 0, .LBB58_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB58_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB58_4 +; PPC64LE-NEXT: beq+ 0, .LBB58_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB58_2 +; PPC64LE-NEXT: beq+ 0, .LBB58_2 ; PPC64LE-NEXT: .LBB58_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -888,20 +869,19 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB59_4 +; PPC64LE-NEXT: bne- 0, .LBB59_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB59_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB59_4 +; PPC64LE-NEXT: beq+ 0, .LBB59_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB59_2 +; PPC64LE-NEXT: beq+ 0, .LBB59_2 ; PPC64LE-NEXT: .LBB59_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -912,16 +892,15 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) { define void @test60(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test60: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB60_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB60_1 +; PPC64LE-NEXT: bne- 0, .LBB60_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic @@ -931,16 +910,15 @@ define void @test60(ptr %ptr, i32 %cmp, i32 %val) { define void @test61(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test61: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB61_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB61_1 +; PPC64LE-NEXT: bne- 0, .LBB61_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -951,16 +929,15 @@ define void @test61(ptr %ptr, i32 %cmp, i32 %val) { define void @test62(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test62: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB62_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB62_3 +; PPC64LE-NEXT: bne- 0, .LBB62_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB62_1 +; PPC64LE-NEXT: bne- 0, .LBB62_1 ; PPC64LE-NEXT: .LBB62_3: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -973,19 +950,19 @@ define void @test63(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB63_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB63_2 +; PPC64LE-NEXT: beq+ 0, .LBB63_2 ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic ret void @@ -996,19 +973,19 @@ define void @test64(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB64_4 +; PPC64LE-NEXT: bne- 0, .LBB64_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB64_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB64_2 +; PPC64LE-NEXT: beq+ 0, .LBB64_2 ; PPC64LE-NEXT: .LBB64_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1021,22 +998,20 @@ define void @test65(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB65_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB65_5 +; PPC64LE-NEXT: beq+ 0, .LBB65_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB65_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB65_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB65_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB65_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic @@ -1048,19 +1023,18 @@ define void @test66(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB66_4 +; PPC64LE-NEXT: bne- 0, .LBB66_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB66_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB66_4 +; PPC64LE-NEXT: beq+ 0, .LBB66_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB66_2 +; PPC64LE-NEXT: beq+ 0, .LBB66_2 ; PPC64LE-NEXT: .LBB66_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1073,22 +1047,20 @@ define void @test67(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB67_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB67_5 +; PPC64LE-NEXT: beq+ 0, .LBB67_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB67_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB67_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB67_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB67_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic @@ -1100,19 +1072,18 @@ define void @test68(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB68_4 +; PPC64LE-NEXT: bne- 0, .LBB68_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB68_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB68_4 +; PPC64LE-NEXT: beq+ 0, .LBB68_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB68_2 +; PPC64LE-NEXT: beq+ 0, .LBB68_2 ; PPC64LE-NEXT: .LBB68_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1125,19 +1096,18 @@ define void @test69(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB69_4 +; PPC64LE-NEXT: bne- 0, .LBB69_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB69_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB69_4 +; PPC64LE-NEXT: beq+ 0, .LBB69_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB69_2 +; PPC64LE-NEXT: beq+ 0, .LBB69_2 ; PPC64LE-NEXT: .LBB69_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1148,16 +1118,15 @@ define void @test69(ptr %ptr, i32 %cmp, i32 %val) { define void @test70(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test70: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB70_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB70_1 +; PPC64LE-NEXT: bne- 0, .LBB70_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic @@ -1167,16 +1136,15 @@ define void @test70(ptr %ptr, i64 %cmp, i64 %val) { define void @test71(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test71: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB71_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB71_1 +; PPC64LE-NEXT: bne- 0, .LBB71_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1187,16 +1155,15 @@ define void @test71(ptr %ptr, i64 %cmp, i64 %val) { define void @test72(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test72: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB72_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bne 0, .LBB72_3 +; PPC64LE-NEXT: bne- 0, .LBB72_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB72_1 +; PPC64LE-NEXT: bne- 0, .LBB72_1 ; PPC64LE-NEXT: .LBB72_3: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1209,19 +1176,19 @@ define void @test73(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB73_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB73_2 +; PPC64LE-NEXT: beq+ 0, .LBB73_2 ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic ret void @@ -1232,19 +1199,19 @@ define void @test74(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bne 0, .LBB74_4 +; PPC64LE-NEXT: bne- 0, .LBB74_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB74_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB74_2 +; PPC64LE-NEXT: beq+ 0, .LBB74_2 ; PPC64LE-NEXT: .LBB74_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1257,22 +1224,20 @@ define void @test75(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB75_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB75_5 +; PPC64LE-NEXT: beq+ 0, .LBB75_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB75_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB75_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB75_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB75_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic @@ -1284,19 +1249,18 @@ define void @test76(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bne 0, .LBB76_4 +; PPC64LE-NEXT: bne- 0, .LBB76_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB76_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB76_4 +; PPC64LE-NEXT: beq+ 0, .LBB76_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB76_2 +; PPC64LE-NEXT: beq+ 0, .LBB76_2 ; PPC64LE-NEXT: .LBB76_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1309,22 +1273,20 @@ define void @test77(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB77_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB77_5 +; PPC64LE-NEXT: beq+ 0, .LBB77_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB77_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB77_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB77_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB77_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic @@ -1336,19 +1298,18 @@ define void @test78(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bne 0, .LBB78_4 +; PPC64LE-NEXT: bne- 0, .LBB78_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB78_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB78_4 +; PPC64LE-NEXT: beq+ 0, .LBB78_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB78_2 +; PPC64LE-NEXT: beq+ 0, .LBB78_2 ; PPC64LE-NEXT: .LBB78_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1361,19 +1322,18 @@ define void @test79(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bne 0, .LBB79_4 +; PPC64LE-NEXT: bne- 0, .LBB79_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB79_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB79_4 +; PPC64LE-NEXT: beq+ 0, .LBB79_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB79_2 +; PPC64LE-NEXT: beq+ 0, .LBB79_2 ; PPC64LE-NEXT: .LBB79_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1386,16 +1346,15 @@ define void @test80(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 24 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB80_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB80_1 +; PPC64LE-NEXT: bne- 0, .LBB80_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") monotonic monotonic @@ -1407,16 +1366,15 @@ define void @test81(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 24 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB81_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB81_1 +; PPC64LE-NEXT: bne- 0, .LBB81_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1429,16 +1387,15 @@ define void @test82(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 24 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB82_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB82_3 +; PPC64LE-NEXT: bne- 0, .LBB82_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB82_1 +; PPC64LE-NEXT: bne- 0, .LBB82_1 ; PPC64LE-NEXT: .LBB82_3: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1452,7 +1409,7 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 24 @@ -1460,12 +1417,12 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB83_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB83_2 +; PPC64LE-NEXT: beq+ 0, .LBB83_2 ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") release monotonic ret void @@ -1477,7 +1434,7 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB84_4 +; PPC64LE-NEXT: bne- 0, .LBB84_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 24 @@ -1485,12 +1442,12 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB84_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB84_2 +; PPC64LE-NEXT: beq+ 0, .LBB84_2 ; PPC64LE-NEXT: .LBB84_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1504,23 +1461,21 @@ define void @test85(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB85_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB85_5 +; PPC64LE-NEXT: beq+ 0, .LBB85_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB85_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB85_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB85_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB85_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") acq_rel monotonic @@ -1533,20 +1488,19 @@ define void @test86(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB86_4 +; PPC64LE-NEXT: bne- 0, .LBB86_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB86_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB86_4 +; PPC64LE-NEXT: beq+ 0, .LBB86_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB86_2 +; PPC64LE-NEXT: beq+ 0, .LBB86_2 ; PPC64LE-NEXT: .LBB86_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1560,23 +1514,21 @@ define void @test87(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB87_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB87_5 +; PPC64LE-NEXT: beq+ 0, .LBB87_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB87_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB87_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB87_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB87_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") seq_cst monotonic @@ -1589,20 +1541,19 @@ define void @test88(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB88_4 +; PPC64LE-NEXT: bne- 0, .LBB88_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB88_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB88_4 +; PPC64LE-NEXT: beq+ 0, .LBB88_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB88_2 +; PPC64LE-NEXT: beq+ 0, .LBB88_2 ; PPC64LE-NEXT: .LBB88_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1616,20 +1567,19 @@ define void @test89(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB89_4 +; PPC64LE-NEXT: bne- 0, .LBB89_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB89_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB89_4 +; PPC64LE-NEXT: beq+ 0, .LBB89_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB89_2 +; PPC64LE-NEXT: beq+ 0, .LBB89_2 ; PPC64LE-NEXT: .LBB89_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1642,16 +1592,15 @@ define void @test90(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 16 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB90_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB90_1 +; PPC64LE-NEXT: bne- 0, .LBB90_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") monotonic monotonic @@ -1663,16 +1612,15 @@ define void @test91(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 16 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB91_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB91_1 +; PPC64LE-NEXT: bne- 0, .LBB91_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1685,16 +1633,15 @@ define void @test92(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 16 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB92_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB92_3 +; PPC64LE-NEXT: bne- 0, .LBB92_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB92_1 +; PPC64LE-NEXT: bne- 0, .LBB92_1 ; PPC64LE-NEXT: .LBB92_3: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1708,7 +1655,7 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 16 @@ -1716,12 +1663,12 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: .LBB93_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB93_2 +; PPC64LE-NEXT: beq+ 0, .LBB93_2 ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") release monotonic ret void @@ -1733,7 +1680,7 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB94_4 +; PPC64LE-NEXT: bne- 0, .LBB94_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 16 @@ -1741,12 +1688,12 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: .LBB94_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB94_2 +; PPC64LE-NEXT: beq+ 0, .LBB94_2 ; PPC64LE-NEXT: .LBB94_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1760,23 +1707,21 @@ define void @test95(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB95_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB95_5 +; PPC64LE-NEXT: beq+ 0, .LBB95_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB95_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB95_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB95_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB95_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") acq_rel monotonic @@ -1789,20 +1734,19 @@ define void @test96(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB96_4 +; PPC64LE-NEXT: bne- 0, .LBB96_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: clrlwi 5, 5, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB96_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB96_4 +; PPC64LE-NEXT: beq+ 0, .LBB96_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB96_2 +; PPC64LE-NEXT: beq+ 0, .LBB96_2 ; PPC64LE-NEXT: .LBB96_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1816,23 +1760,21 @@ define void @test97(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB97_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB97_5 +; PPC64LE-NEXT: beq+ 0, .LBB97_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB97_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB97_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB97_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB97_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") seq_cst monotonic @@ -1845,20 +1787,19 @@ define void @test98(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB98_4 +; PPC64LE-NEXT: bne- 0, .LBB98_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB98_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB98_4 +; PPC64LE-NEXT: beq+ 0, .LBB98_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB98_2 +; PPC64LE-NEXT: beq+ 0, .LBB98_2 ; PPC64LE-NEXT: .LBB98_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1872,20 +1813,19 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB99_4 +; PPC64LE-NEXT: bne- 0, .LBB99_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: clrlwi 5, 5, 16 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB99_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB99_4 +; PPC64LE-NEXT: beq+ 0, .LBB99_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB99_2 +; PPC64LE-NEXT: beq+ 0, .LBB99_2 ; PPC64LE-NEXT: .LBB99_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1896,16 +1836,15 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) { define void @test100(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test100: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB100_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB100_1 +; PPC64LE-NEXT: bne- 0, .LBB100_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") monotonic monotonic @@ -1915,16 +1854,15 @@ define void @test100(ptr %ptr, i32 %cmp, i32 %val) { define void @test101(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test101: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB101_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB101_1 +; PPC64LE-NEXT: bne- 0, .LBB101_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1935,16 +1873,15 @@ define void @test101(ptr %ptr, i32 %cmp, i32 %val) { define void @test102(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test102: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB102_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB102_3 +; PPC64LE-NEXT: bne- 0, .LBB102_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB102_1 +; PPC64LE-NEXT: bne- 0, .LBB102_1 ; PPC64LE-NEXT: .LBB102_3: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -1957,19 +1894,19 @@ define void @test103(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB103_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB103_2 +; PPC64LE-NEXT: beq+ 0, .LBB103_2 ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") release monotonic ret void @@ -1980,19 +1917,19 @@ define void @test104(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB104_4 +; PPC64LE-NEXT: bne- 0, .LBB104_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB104_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB104_2 +; PPC64LE-NEXT: beq+ 0, .LBB104_2 ; PPC64LE-NEXT: .LBB104_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -2005,22 +1942,20 @@ define void @test105(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB105_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB105_5 +; PPC64LE-NEXT: beq+ 0, .LBB105_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB105_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB105_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB105_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB105_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") acq_rel monotonic @@ -2032,19 +1967,18 @@ define void @test106(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB106_4 +; PPC64LE-NEXT: bne- 0, .LBB106_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB106_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB106_4 +; PPC64LE-NEXT: beq+ 0, .LBB106_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB106_2 +; PPC64LE-NEXT: beq+ 0, .LBB106_2 ; PPC64LE-NEXT: .LBB106_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -2057,22 +1991,20 @@ define void @test107(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB107_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB107_5 +; PPC64LE-NEXT: beq+ 0, .LBB107_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB107_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB107_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB107_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB107_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") seq_cst monotonic @@ -2084,19 +2016,18 @@ define void @test108(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB108_4 +; PPC64LE-NEXT: bne- 0, .LBB108_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB108_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB108_4 +; PPC64LE-NEXT: beq+ 0, .LBB108_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB108_2 +; PPC64LE-NEXT: beq+ 0, .LBB108_2 ; PPC64LE-NEXT: .LBB108_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -2109,19 +2040,18 @@ define void @test109(ptr %ptr, i32 %cmp, i32 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bne 0, .LBB109_4 +; PPC64LE-NEXT: bne- 0, .LBB109_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB109_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB109_4 +; PPC64LE-NEXT: beq+ 0, .LBB109_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: beq 0, .LBB109_2 +; PPC64LE-NEXT: beq+ 0, .LBB109_2 ; PPC64LE-NEXT: .LBB109_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -2132,16 +2062,15 @@ define void @test109(ptr %ptr, i32 %cmp, i32 %val) { define void @test110(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test110: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB110_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB110_1 +; PPC64LE-NEXT: bne- 0, .LBB110_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") monotonic monotonic @@ -2151,16 +2080,15 @@ define void @test110(ptr %ptr, i64 %cmp, i64 %val) { define void @test111(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test111: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB111_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB111_1 +; PPC64LE-NEXT: bne- 0, .LBB111_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -2171,16 +2099,15 @@ define void @test111(ptr %ptr, i64 %cmp, i64 %val) { define void @test112(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test112: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB112_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bne 0, .LBB112_3 +; PPC64LE-NEXT: bne- 0, .LBB112_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB112_1 +; PPC64LE-NEXT: bne- 0, .LBB112_1 ; PPC64LE-NEXT: .LBB112_3: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -2193,19 +2120,19 @@ define void @test113(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB113_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB113_2 +; PPC64LE-NEXT: beq+ 0, .LBB113_2 ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") release monotonic ret void @@ -2216,19 +2143,19 @@ define void @test114(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bne 0, .LBB114_4 +; PPC64LE-NEXT: bne- 0, .LBB114_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB114_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: beqlr+ 0 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB114_2 +; PPC64LE-NEXT: beq+ 0, .LBB114_2 ; PPC64LE-NEXT: .LBB114_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -2241,22 +2168,20 @@ define void @test115(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB115_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB115_5 +; PPC64LE-NEXT: beq+ 0, .LBB115_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB115_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB115_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB115_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB115_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") acq_rel monotonic @@ -2268,19 +2193,18 @@ define void @test116(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bne 0, .LBB116_4 +; PPC64LE-NEXT: bne- 0, .LBB116_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB116_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB116_4 +; PPC64LE-NEXT: beq+ 0, .LBB116_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB116_2 +; PPC64LE-NEXT: beq+ 0, .LBB116_2 ; PPC64LE-NEXT: .LBB116_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -2293,22 +2217,20 @@ define void @test117(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB117_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB117_5 +; PPC64LE-NEXT: beq+ 0, .LBB117_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB117_2 -; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end +; PPC64LE-NEXT: beq+ 0, .LBB117_2 ; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB117_5: # %cmpxchg.success +; PPC64LE-NEXT: .LBB117_4: # %cmpxchg.success ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") seq_cst monotonic @@ -2320,19 +2242,18 @@ define void @test118(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bne 0, .LBB118_4 +; PPC64LE-NEXT: bne- 0, .LBB118_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB118_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB118_4 +; PPC64LE-NEXT: beq+ 0, .LBB118_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB118_2 +; PPC64LE-NEXT: beq+ 0, .LBB118_2 ; PPC64LE-NEXT: .LBB118_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr @@ -2345,19 +2266,18 @@ define void @test119(ptr %ptr, i64 %cmp, i64 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: bne 0, .LBB119_4 +; PPC64LE-NEXT: bne- 0, .LBB119_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64LE-NEXT: sync -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB119_2: # %cmpxchg.trystore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beq 0, .LBB119_4 +; PPC64LE-NEXT: beq+ 0, .LBB119_4 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpld 6, 4 -; PPC64LE-NEXT: beq 0, .LBB119_2 +; PPC64LE-NEXT: beq+ 0, .LBB119_2 ; PPC64LE-NEXT: .LBB119_4: # %cmpxchg.nostore ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll index 40786057ead5f..183c8e1323f2e 100644 --- a/llvm/test/CodeGen/PowerPC/atomics.ll +++ b/llvm/test/CodeGen/PowerPC/atomics.ll @@ -138,67 +138,67 @@ define void @store_i64_seq_cst(ptr %mem) { ; Atomic CmpXchg define i8 @cas_strong_i8_sc_sc(ptr %mem) { ; PPC32-LABEL: cas_strong_i8_sc_sc: -; PPC32: # %bb.0: +; PPC32: # %bb.0: # %cmpxchg.start ; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 ; PPC32-NEXT: lwarx r4, 0, r5 -; PPC32-NEXT: not r3, r3 +; PPC32-NEXT: not r3, r3 ; PPC32-NEXT: rlwinm r3, r3, 3, 27, 28 ; PPC32-NEXT: srw r6, r4, r3 ; PPC32-NEXT: andi. r6, r6, 255 -; PPC32-NEXT: bne cr0, .LBB8_4 -; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore +; PPC32-NEXT: bne- cr0, .LBB8_4 +; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC32-NEXT: li r6, 255 ; PPC32-NEXT: li r7, 1 ; PPC32-NEXT: slw r6, r6, r3 -; PPC32-NEXT: not r6, r6 +; PPC32-NEXT: not r6, r6 ; PPC32-NEXT: slw r7, r7, r3 ; PPC32-NEXT: sync -; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore -; PPC32-NEXT: # =>This Inner Loop Header: Depth=1 +; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore +; PPC32-NEXT: # ; PPC32-NEXT: and r8, r4, r6 ; PPC32-NEXT: or r8, r8, r7 ; PPC32-NEXT: stwcx. r8, 0, r5 -; PPC32-NEXT: beq cr0, .LBB8_4 -; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload -; PPC32-NEXT: # in Loop: Header=BB8_2 Depth=1 +; PPC32-NEXT: beq+ cr0, .LBB8_4 +; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload +; PPC32-NEXT: # ; PPC32-NEXT: lwarx r4, 0, r5 ; PPC32-NEXT: srw r8, r4, r3 ; PPC32-NEXT: andi. r8, r8, 255 -; PPC32-NEXT: beq cr0, .LBB8_2 -; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore +; PPC32-NEXT: beq+ cr0, .LBB8_2 +; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore ; PPC32-NEXT: srw r3, r4, r3 ; PPC32-NEXT: lwsync ; PPC32-NEXT: blr ; ; PPC64-LABEL: cas_strong_i8_sc_sc: -; PPC64: # %bb.0: +; PPC64: # %bb.0: # %cmpxchg.start ; PPC64-NEXT: rldicr r5, r3, 0, 61 -; PPC64-NEXT: not r3, r3 +; PPC64-NEXT: not r3, r3 ; PPC64-NEXT: lwarx r4, 0, r5 ; PPC64-NEXT: rlwinm r3, r3, 3, 27, 28 ; PPC64-NEXT: srw r6, r4, r3 ; PPC64-NEXT: andi. r6, r6, 255 -; PPC64-NEXT: bne cr0, .LBB8_4 -; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore +; PPC64-NEXT: bne- cr0, .LBB8_4 +; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64-NEXT: li r6, 255 ; PPC64-NEXT: li r7, 1 ; PPC64-NEXT: slw r6, r6, r3 -; PPC64-NEXT: not r6, r6 +; PPC64-NEXT: not r6, r6 ; PPC64-NEXT: slw r7, r7, r3 ; PPC64-NEXT: sync -; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore -; PPC64-NEXT: # =>This Inner Loop Header: Depth=1 +; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore +; PPC64-NEXT: # ; PPC64-NEXT: and r8, r4, r6 ; PPC64-NEXT: or r8, r8, r7 ; PPC64-NEXT: stwcx. r8, 0, r5 -; PPC64-NEXT: beq cr0, .LBB8_4 -; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload -; PPC64-NEXT: # in Loop: Header=BB8_2 Depth=1 +; PPC64-NEXT: beq+ cr0, .LBB8_4 +; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload +; PPC64-NEXT: # ; PPC64-NEXT: lwarx r4, 0, r5 ; PPC64-NEXT: srw r8, r4, r3 ; PPC64-NEXT: andi. r8, r8, 255 -; PPC64-NEXT: beq cr0, .LBB8_2 -; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore +; PPC64-NEXT: beq+ cr0, .LBB8_2 +; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore ; PPC64-NEXT: srw r3, r4, r3 ; PPC64-NEXT: lwsync ; PPC64-NEXT: blr @@ -208,54 +208,50 @@ define i8 @cas_strong_i8_sc_sc(ptr %mem) { } define i16 @cas_weak_i16_acquire_acquire(ptr %mem) { ; PPC32-LABEL: cas_weak_i16_acquire_acquire: -; PPC32: # %bb.0: +; PPC32: # %bb.0: # %cmpxchg.start ; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29 ; PPC32-NEXT: lwarx r5, 0, r4 -; PPC32-NEXT: clrlwi r3, r3, 30 +; PPC32-NEXT: clrlwi r3, r3, 30 ; PPC32-NEXT: xori r3, r3, 2 ; PPC32-NEXT: slwi r6, r3, 3 ; PPC32-NEXT: srw r3, r5, r6 ; PPC32-NEXT: andi. r7, r3, 65535 -; PPC32-NEXT: beq cr0, .LBB9_2 -; PPC32-NEXT: # %bb.1: # %cmpxchg.failure -; PPC32-NEXT: lwsync -; PPC32-NEXT: blr -; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore +; PPC32-NEXT: bne- cr0, .LBB9_2 +; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC32-NEXT: lis r7, 0 ; PPC32-NEXT: ori r7, r7, 65535 ; PPC32-NEXT: slw r7, r7, r6 ; PPC32-NEXT: li r8, 1 -; PPC32-NEXT: not r7, r7 +; PPC32-NEXT: not r7, r7 ; PPC32-NEXT: slw r6, r8, r6 ; PPC32-NEXT: and r5, r5, r7 ; PPC32-NEXT: or r5, r5, r6 ; PPC32-NEXT: stwcx. r5, 0, r4 +; PPC32-NEXT: .LBB9_2: # %cmpxchg.failure ; PPC32-NEXT: lwsync ; PPC32-NEXT: blr ; ; PPC64-LABEL: cas_weak_i16_acquire_acquire: -; PPC64: # %bb.0: -; PPC64-NEXT: rldicr r4, r3, 0, 61 -; PPC64-NEXT: clrlwi r3, r3, 30 +; PPC64: # %bb.0: # %cmpxchg.start +; PPC64-NEXT: rldicr r4, r3, 0, 61 +; PPC64-NEXT: clrlwi r3, r3, 30 ; PPC64-NEXT: lwarx r5, 0, r4 ; PPC64-NEXT: xori r3, r3, 2 ; PPC64-NEXT: slwi r6, r3, 3 ; PPC64-NEXT: srw r3, r5, r6 ; PPC64-NEXT: andi. r7, r3, 65535 -; PPC64-NEXT: beq cr0, .LBB9_2 -; PPC64-NEXT: # %bb.1: # %cmpxchg.failure -; PPC64-NEXT: lwsync -; PPC64-NEXT: blr -; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore +; PPC64-NEXT: bne- cr0, .LBB9_2 +; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64-NEXT: lis r7, 0 ; PPC64-NEXT: ori r7, r7, 65535 ; PPC64-NEXT: slw r7, r7, r6 ; PPC64-NEXT: li r8, 1 -; PPC64-NEXT: not r7, r7 +; PPC64-NEXT: not r7, r7 ; PPC64-NEXT: slw r6, r8, r6 ; PPC64-NEXT: and r5, r5, r7 ; PPC64-NEXT: or r5, r5, r6 ; PPC64-NEXT: stwcx. r5, 0, r4 +; PPC64-NEXT: .LBB9_2: # %cmpxchg.failure ; PPC64-NEXT: lwsync ; PPC64-NEXT: blr %val = cmpxchg weak ptr %mem, i16 0, i16 1 acquire acquire @@ -264,24 +260,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) { } define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) { ; CHECK-LABEL: cas_strong_i32_acqrel_acquire: -; CHECK: # %bb.0: -; CHECK-NEXT: mr r4, r3 +; CHECK: # %bb.0: # %cmpxchg.start +; CHECK-NEXT: mr r4, r3 ; CHECK-NEXT: lwarx r3, 0, r3 -; CHECK-NEXT: cmplwi r3, 0 -; CHECK-NEXT: bne cr0, .LBB10_4 -; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: bne- cr0, .LBB10_4 +; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore ; CHECK-NEXT: li r5, 1 ; CHECK-NEXT: lwsync -; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore +; CHECK-NEXT: # ; CHECK-NEXT: stwcx. r5, 0, r4 -; CHECK-NEXT: beq cr0, .LBB10_4 -; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload -; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=1 +; CHECK-NEXT: beq+ cr0, .LBB10_4 +; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload +; CHECK-NEXT: # ; CHECK-NEXT: lwarx r3, 0, r4 -; CHECK-NEXT: cmplwi r3, 0 -; CHECK-NEXT: beq cr0, .LBB10_2 -; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: beq+ cr0, .LBB10_2 +; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore ; CHECK-NEXT: lwsync ; CHECK-NEXT: blr %val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire @@ -313,12 +309,12 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) { ; PPC32-NEXT: blr ; ; PPC64-LABEL: cas_weak_i64_release_monotonic: -; PPC64: # %bb.0: -; PPC64-NEXT: mr r4, r3 +; PPC64: # %bb.0: # %cmpxchg.start +; PPC64-NEXT: mr r4, r3 ; PPC64-NEXT: ldarx r3, 0, r3 -; PPC64-NEXT: cmpldi r3, 0 -; PPC64-NEXT: bnelr cr0 -; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore +; PPC64-NEXT: cmpldi r3, 0 +; PPC64-NEXT: bnelr- cr0 +; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore ; PPC64-NEXT: li r5, 1 ; PPC64-NEXT: lwsync ; PPC64-NEXT: stdcx. r5, 0, r4 diff --git a/llvm/test/CodeGen/PowerPC/loop-comment.ll b/llvm/test/CodeGen/PowerPC/loop-comment.ll index 530e67b4804fb..b4ceb36768904 100644 --- a/llvm/test/CodeGen/PowerPC/loop-comment.ll +++ b/llvm/test/CodeGen/PowerPC/loop-comment.ll @@ -6,16 +6,15 @@ define void @test(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 24 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB0_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmplw 6, 4 -; PPC64LE-NEXT: bnelr 0 +; PPC64LE-NEXT: bnelr- 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore ; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: bne 0, .LBB0_1 +; PPC64LE-NEXT: bne- 0, .LBB0_1 ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll index 2e72d26ed4566..585b4c7538246 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -passes=atomic-expand -codegen-opt-level=1 %s | FileCheck %s define i8 @test_atomic_xchg_i8(ptr %ptr, i8 %xchgend) { @@ -221,49 +222,40 @@ define i8 @test_atomic_umin_i8(ptr %ptr, i8 %uminend) { define i8 @test_cmpxchg_i8_seqcst_seqcst(ptr %ptr, i8 %desired, i8 %newval) { ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst ; CHECK: br label %[[START:.*]] - ; CHECK: [[START]]: ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i8) %ptr) ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired ; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] - ; CHECK: [[FENCED_STORE]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[LOOP:.*]] - ; CHECK: [[LOOP]]: ; CHECK: [[LOADED_LOOP:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ] ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEWVAL32]], ptr elementtype(i8) %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD]] - ; CHECK: [[RELEASED_LOAD]]: ; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i8) %ptr) ; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i8 ; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i8 [[OLDVAL_LOOP]], %desired -; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]] - +; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB:cmpxchg\.nostore]] ; CHECK: [[SUCCESS_BB]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE:.*]] - ; CHECK: [[NO_STORE_BB]]: ; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i8 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] - ; CHECK: [[FAILURE_BB]]: ; CHECK: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE]] - ; CHECK: [[DONE]]: ; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i8 [[LOADED]] - %pairold = cmpxchg ptr %ptr, i8 %desired, i8 %newval seq_cst seq_cst %old = extractvalue { i8, i1 } %pairold, 0 ret i8 %old @@ -272,49 +264,40 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(ptr %ptr, i8 %desired, i8 %newval) { define i16 @test_cmpxchg_i16_seqcst_monotonic(ptr %ptr, i16 %desired, i16 %newval) { ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic ; CHECK: br label %[[LOOP:.*]] - ; CHECK: [[LOOP]]: ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i16) %ptr) ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired ; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] - ; CHECK: [[FENCED_STORE]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[LOOP:.*]] - ; CHECK: [[LOOP]]: ; CHECK: [[LOADED_LOOP:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ] ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEWVAL32]], ptr elementtype(i16) %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:.*]] - +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:cmpxchg\.releasedload]] ; CHECK: [[RELEASED_LOAD]]: ; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i16) %ptr) ; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i16 ; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i16 [[OLDVAL_LOOP]], %desired -; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]] - +; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB:cmpxchg\.nostore]] ; CHECK: [[SUCCESS_BB]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE:.*]] - ; CHECK: [[NO_STORE_BB]]: ; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i16 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] - ; CHECK: [[FAILURE_BB]]: ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: dmb ; CHECK: br label %[[DONE]] - ; CHECK: [[DONE]]: ; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i16 [[LOADED]] - %pairold = cmpxchg ptr %ptr, i16 %desired, i16 %newval seq_cst monotonic %old = extractvalue { i16, i1 } %pairold, 0 ret i16 %old @@ -324,40 +307,32 @@ define i32 @test_cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %desired, i32 %newval ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire ; CHECK-NOT: dmb ; CHECK: br label %[[LOOP:.*]] - ; CHECK: [[LOOP]]: ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) %ptr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] - +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg\.nostore]] ; CHECK: [[FENCED_STORE]]: ; CHECK-NEXT: br label %[[TRY_STORE:.*]] - ; CHECK: [[TRY_STORE]]: ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0(i32 %newval, ptr elementtype(i32) %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - ; CHECK: [[SUCCESS_BB]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE:.*]] - ; CHECK: [[NO_STORE_BB]]: ; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] - ; CHECK: [[FAILURE_BB]]: ; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE]] - ; CHECK: [[DONE]]: ; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i32 [[LOADED_EXIT]] - %pairold = cmpxchg ptr %ptr, i32 %desired, i32 %newval acquire acquire %old = extractvalue { i32, i1 } %pairold, 0 ret i32 %old @@ -367,7 +342,6 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic ; CHECK-NOT: dmb ; CHECK: br label %[[LOOP:.*]] - ; CHECK: [[LOOP]]: ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(ptr %ptr) ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0 @@ -377,11 +351,9 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] - +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg\.nostore]] ; CHECK: [[FENCED_STORE]]: ; CHECK-NEXT: br label %[[TRY_STORE:.*]] - ; CHECK: [[TRY_STORE]]: ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32 @@ -390,26 +362,21 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], ptr %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - ; CHECK: [[SUCCESS_BB]]: ; CHECK-NOT: dmb ; CHECK: br label %[[DONE:.*]] - ; CHECK: [[NO_STORE_BB]]: ; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] - ; CHECK: [[FAILURE_BB]]: ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: dmb ; CHECK: br label %[[DONE]] - ; CHECK: [[DONE]]: ; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i64 [[LOADED_EXIT]] - %pairold = cmpxchg ptr %ptr, i64 %desired, i64 %newval monotonic monotonic %old = extractvalue { i64, i1 } %pairold, 0 ret i64 %old @@ -419,40 +386,32 @@ define i32 @test_cmpxchg_minsize(ptr %addr, i32 %desired, i32 %new) minsize { ; CHECK-LABEL: @test_cmpxchg_minsize ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[START:.*]] - ; CHECK: [[START]]: ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) %addr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] - +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg\.nostore]] ; CHECK: [[FENCED_STORE]]: ; CHECK-NEXT: br label %[[TRY_STORE:.*]] - ; CHECK: [[TRY_STORE]]: ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ] ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0(i32 %new, ptr elementtype(i32) %addr) ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]] - ; CHECK: [[SUCCESS_BB]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[END:.*]] - ; CHECK: [[NO_STORE_BB]]: ; CHECK: [[LOADED_NO_STORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ] ; CHECK: call void @llvm.arm.clrex() ; CHECK: br label %[[FAILURE_BB]] - ; CHECK: [[FAILURE_BB]]: ; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[END]] - ; CHECK: [[END]]: ; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i32 [[LOADED_EXIT]] - %pair = cmpxchg ptr %addr, i32 %desired, i32 %new seq_cst seq_cst %oldval = extractvalue { i32, i1 } %pair, 0 ret i32 %oldval diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll index 10073e23f5d46..98539ffcde32a 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -passes=atomic-expand %s -codegen-opt-level=1 | FileCheck %s define i8 @test_atomic_xchg_i8(ptr %ptr, i8 %xchgend) { @@ -84,42 +85,34 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(ptr %ptr, i8 %desired, i8 %newval) { ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst ; CHECK-NOT: fence ; CHECK: br label %[[LOOP:.*]] - ; CHECK: [[LOOP]]: ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0(ptr elementtype(i8) %ptr) ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] - +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]] ; CHECK: [[FENCED_STORE]]: ; CHECK-NEXT: br label %[[TRY_STORE:.*]] - ; CHECK: [[TRY_STORE]]: ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0(i32 [[NEWVAL32]], ptr elementtype(i8) %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - ; CHECK: [[SUCCESS_BB]]: ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE:.*]] - ; CHECK: [[NO_STORE_BB]]: ; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] - ; CHECK: [[FAILURE_BB]]: ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE]] - ; CHECK: [[DONE]]: ; CHECK: [[LOADED_EXIT:%.*]] = phi i8 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i8 [[LOADED_EXIT]] - %pairold = cmpxchg ptr %ptr, i8 %desired, i8 %newval seq_cst seq_cst %old = extractvalue { i8, i1 } %pairold, 0 ret i8 %old @@ -129,43 +122,35 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(ptr %ptr, i16 %desired, i16 %newva ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic ; CHECK-NOT: fence ; CHECK: br label %[[LOOP:.*]] - ; CHECK: [[LOOP]]: ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0(ptr elementtype(i16) %ptr) ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] - +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]] ; CHECK: [[FENCED_STORE]]: ; CHECK-NEXT: br label %[[TRY_STORE:.*]] - ; CHECK: [[TRY_STORE]]: ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0(i32 [[NEWVAL32]], ptr elementtype(i16) %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - ; CHECK: [[SUCCESS_BB]]: ; CHECK-NOT: fence ; CHECK: br label %[[DONE:.*]] - ; CHECK: [[NO_STORE_BB]]: ; The PHI is not required. ; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] - ; CHECK: [[FAILURE_BB]]: ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence ; CHECK: br label %[[DONE]] - ; CHECK: [[DONE]]: ; CHECK: [[LOADED_EXIT:%.*]] = phi i16 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i16 [[LOADED_EXIT]] - %pairold = cmpxchg ptr %ptr, i16 %desired, i16 %newval seq_cst monotonic %old = extractvalue { i16, i1 } %pairold, 0 ret i16 %old @@ -175,40 +160,32 @@ define i32 @test_cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %desired, i32 %newval ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire ; CHECK-NOT: fence ; CHECK: br label %[[LOOP:.*]] - ; CHECK: [[LOOP]]: ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0(ptr elementtype(i32) %ptr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] - +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]] ; CHECK: [[FENCED_STORE]]: ; CHECK-NEXT: br label %[[TRY_STORE:.*]] - ; CHECK: [[TRY_STORE]]: ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0(i32 %newval, ptr elementtype(i32) %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - ; CHECK: [[SUCCESS_BB]]: ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE:.*]] - ; CHECK: [[NO_STORE_BB]]: ; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] - ; CHECK: [[FAILURE_BB]]: ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE]] - ; CHECK: [[DONE]]: ; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i32 [[LOADED_EXIT]] - %pairold = cmpxchg ptr %ptr, i32 %desired, i32 %newval acquire acquire %old = extractvalue { i32, i1 } %pairold, 0 ret i32 %old @@ -218,7 +195,6 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic ; CHECK-NOT: fence ; CHECK: br label %[[LOOP:.*]] - ; CHECK: [[LOOP]]: ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(ptr %ptr) ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0 @@ -228,11 +204,9 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] - +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]] ; CHECK: [[FENCED_STORE]]: ; CHECK-NEXT: br label %[[TRY_STORE:.*]] - ; CHECK: [[TRY_STORE]]: ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32 @@ -241,26 +215,21 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], ptr %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] - ; CHECK: [[SUCCESS_BB]]: ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE:.*]] - ; CHECK: [[NO_STORE_BB]]: ; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] - ; CHECK: [[FAILURE_BB]]: ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE]] - ; CHECK: [[DONE]]: ; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i64 [[LOADED_EXIT]] - %pairold = cmpxchg ptr %ptr, i64 %desired, i64 %newval monotonic monotonic %old = extractvalue { i64, i1 } %pairold, 0 ret i64 %old diff --git a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll index 8195a5b6145e3..aff4196815e21 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll @@ -9,7 +9,7 @@ define i32 @test_cmpxchg_seq_cst(ptr %addr, i32 %desired, i32 %new) { ; CHECK: [[CMPXCHG_START]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]]) ; CHECK-NEXT: [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]] -; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]] +; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]] ; CHECK: [[CMPXCHG_FENCEDSTORE]]: ; CHECK-NEXT: call void @llvm.arm.dmb(i32 10) ; CHECK-NEXT: br label %[[CMPXCHG_TRYSTORE:.*]] @@ -17,7 +17,7 @@ define i32 @test_cmpxchg_seq_cst(ptr %addr, i32 %desired, i32 %new) { ; CHECK-NEXT: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]]) ; CHECK-NEXT: [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]] +; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]] ; CHECK: [[CMPXCHG_RELEASEDLOAD:.*:]] ; CHECK-NEXT: unreachable ; CHECK: [[CMPXCHG_SUCCESS]]: @@ -48,7 +48,7 @@ define i1 @test_cmpxchg_weak_fail(ptr %addr, i32 %desired, i32 %new) { ; CHECK: [[CMPXCHG_START]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]]) ; CHECK-NEXT: [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]] -; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]] +; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]] ; CHECK: [[CMPXCHG_FENCEDSTORE]]: ; CHECK-NEXT: call void @llvm.arm.dmb(i32 10) ; CHECK-NEXT: br label %[[CMPXCHG_TRYSTORE:.*]] @@ -56,7 +56,7 @@ define i1 @test_cmpxchg_weak_fail(ptr %addr, i32 %desired, i32 %new) { ; CHECK-NEXT: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]]) ; CHECK-NEXT: [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]] +; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]] ; CHECK: [[CMPXCHG_RELEASEDLOAD:.*:]] ; CHECK-NEXT: unreachable ; CHECK: [[CMPXCHG_SUCCESS]]: @@ -86,14 +86,14 @@ define i32 @test_cmpxchg_monotonic(ptr %addr, i32 %desired, i32 %new) { ; CHECK: [[CMPXCHG_START]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]]) ; CHECK-NEXT: [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]] -; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]] +; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]] ; CHECK: [[CMPXCHG_FENCEDSTORE]]: ; CHECK-NEXT: br label %[[CMPXCHG_TRYSTORE:.*]] ; CHECK: [[CMPXCHG_TRYSTORE]]: ; CHECK-NEXT: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]]) ; CHECK-NEXT: [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]] +; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]] ; CHECK: [[CMPXCHG_RELEASEDLOAD:.*:]] ; CHECK-NEXT: unreachable ; CHECK: [[CMPXCHG_SUCCESS]]: @@ -122,7 +122,7 @@ define i32 @test_cmpxchg_seq_cst_minsize(ptr %addr, i32 %desired, i32 %new) mins ; CHECK: [[CMPXCHG_START]]: ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]]) ; CHECK-NEXT: [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]] -; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]] +; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]] ; CHECK: [[CMPXCHG_FENCEDSTORE]]: ; CHECK-NEXT: call void @llvm.arm.dmb(i32 10) ; CHECK-NEXT: br label %[[CMPXCHG_TRYSTORE:.*]] @@ -130,7 +130,7 @@ define i32 @test_cmpxchg_seq_cst_minsize(ptr %addr, i32 %desired, i32 %new) mins ; CHECK-NEXT: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]]) ; CHECK-NEXT: [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]] +; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]] ; CHECK: [[CMPXCHG_RELEASEDLOAD:.*:]] ; CHECK-NEXT: unreachable ; CHECK: [[CMPXCHG_SUCCESS]]: