diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp old mode 100644 new mode 100755 index e71ba5ea5521e..11d22c75831d0 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1647,6 +1647,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::amdgcn_perm: case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: + case Intrinsic::amdgcn_wave_reduce_max: + case Intrinsic::amdgcn_wave_reduce_min: + case Intrinsic::amdgcn_wave_reduce_add: + case Intrinsic::amdgcn_wave_reduce_sub: + case Intrinsic::amdgcn_wave_reduce_and: + case Intrinsic::amdgcn_wave_reduce_or: + case Intrinsic::amdgcn_wave_reduce_xor: case Intrinsic::amdgcn_s_wqm: case Intrinsic::amdgcn_s_quadmask: case Intrinsic::amdgcn_s_bitreplicate: @@ -3520,6 +3527,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty, return ConstantInt::get(Ty, C0->abs()); case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: + case Intrinsic::amdgcn_wave_reduce_max: + case Intrinsic::amdgcn_wave_reduce_min: + case Intrinsic::amdgcn_wave_reduce_add: + case Intrinsic::amdgcn_wave_reduce_sub: + case Intrinsic::amdgcn_wave_reduce_and: + case Intrinsic::amdgcn_wave_reduce_or: + case Intrinsic::amdgcn_wave_reduce_xor: return dyn_cast(Operands[0]); } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll index b6af8b4bb798d..f5e4060e6e623 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll @@ -187,1644 +187,931 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: +define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { +; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8DAGISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: const_value: +; GFX8GISEL-LABEL: divergent_value: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GISEL-NEXT: s_mov_b32 s4, 0 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: const_value: +; GFX9DAGISEL-LABEL: divergent_value: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9DAGISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9DAGISEL-NEXT: ; %bb.2: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: const_value: +; GFX9GISEL-LABEL: divergent_value: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9GISEL-NEXT: ; %bb.2: +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: const_value: +; GFX1064DAGISEL-LABEL: divergent_value: ; GFX1064DAGISEL: ; %bb.0: ; %entry ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064DAGISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064DAGISEL-NEXT: ; %bb.2: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: const_value: +; GFX1064GISEL-LABEL: divergent_value: ; GFX1064GISEL: ; %bb.0: ; %entry ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064GISEL-NEXT: ; %bb.2: +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: const_value: +; GFX1032DAGISEL-LABEL: divergent_value: ; GFX1032DAGISEL: ; %bb.0: ; %entry ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032DAGISEL-NEXT: s_add_i32 s2, s2, s5 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032DAGISEL-NEXT: ; %bb.2: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: const_value: +; GFX1032GISEL-LABEL: divergent_value: ; GFX1032GISEL: ; %bb.0: ; %entry ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b +; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032GISEL-NEXT: s_add_i32 s2, s2, s5 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: const_value: +; GFX1164DAGISEL-LABEL: divergent_value: ; GFX1164DAGISEL: ; %bb.0: ; %entry ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164DAGISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164DAGISEL-NEXT: ; %bb.2: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: const_value: +; GFX1164GISEL-LABEL: divergent_value: ; GFX1164GISEL: ; %bb.0: ; %entry ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164GISEL-NEXT: ; %bb.2: +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: const_value: +; GFX1132DAGISEL-LABEL: divergent_value: ; GFX1132DAGISEL: ; %bb.0: ; %entry ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132DAGISEL-NEXT: s_add_i32 s2, s2, s5 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132DAGISEL-NEXT: ; %bb.2: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value: +; GFX1132GISEL-LABEL: divergent_value: ; GFX1132GISEL: ; %bb.0: ; %entry ; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132GISEL-NEXT: s_add_i32 s2, s2, s5 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132GISEL-NEXT: ; %bb.2: +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 123, i32 1) + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %id.x, i32 1) store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: +define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { +; GFX8DAGISEL-LABEL: divergent_cfg: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8DAGISEL-NEXT: ; %bb.1: ; %else +; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX8DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX8DAGISEL-NEXT: ; %bb.3: ; %if +; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8DAGISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8DAGISEL-NEXT: ; %bb.5: +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value: +; GFX8GISEL-LABEL: divergent_cfg: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8GISEL-NEXT: ; %bb.1: ; %else +; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow +; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX8GISEL-NEXT: ; %bb.3: ; %if +; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8GISEL-NEXT: s_mov_b32 s6, 0 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8GISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif +; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value: +; GFX9DAGISEL-LABEL: divergent_cfg: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9DAGISEL-NEXT: ; %bb.1: ; %else +; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s2 +; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX9DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: poison_value: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: poison_value: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: poison_value: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: poison_value: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: poison_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: poison_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: poison_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: poison_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: divergent_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8DAGISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8DAGISEL-NEXT: ; %bb.2: -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8GISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8GISEL-NEXT: ; %bb.2: -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9DAGISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9DAGISEL-NEXT: ; %bb.2: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9GISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9GISEL-NEXT: ; %bb.2: -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_value: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064DAGISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064DAGISEL-NEXT: ; %bb.2: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_value: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064GISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064GISEL-NEXT: ; %bb.2: -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_value: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032DAGISEL-NEXT: s_add_i32 s2, s2, s5 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032DAGISEL-NEXT: ; %bb.2: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_value: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032GISEL-NEXT: s_add_i32 s2, s2, s5 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032GISEL-NEXT: ; %bb.2: -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164DAGISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164DAGISEL-NEXT: ; %bb.2: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164GISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164GISEL-NEXT: ; %bb.2: -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132DAGISEL-NEXT: s_add_i32 s2, s2, s5 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132DAGISEL-NEXT: ; %bb.2: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132GISEL-NEXT: s_add_i32 s2, s2, s5 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132GISEL-NEXT: ; %bb.2: -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %id.x = call i32 @llvm.amdgcn.workitem.id.x() - %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %id.x, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: divergent_cfg: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8DAGISEL-NEXT: ; %bb.1: ; %else -; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX8DAGISEL-NEXT: ; %bb.3: ; %if -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8DAGISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8DAGISEL-NEXT: ; %bb.5: -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_cfg: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8GISEL-NEXT: ; %bb.1: ; %else -; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow -; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX8GISEL-NEXT: ; %bb.3: ; %if -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s6, 0 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8GISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif -; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_cfg: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9DAGISEL-NEXT: ; %bb.1: ; %else -; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX9DAGISEL-NEXT: ; %bb.3: ; %if -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9DAGISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9DAGISEL-NEXT: ; %bb.5: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_cfg: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9GISEL-NEXT: ; %bb.1: ; %else -; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow -; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX9GISEL-NEXT: ; %bb.3: ; %if -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9GISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif -; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_cfg: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064DAGISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064DAGISEL-NEXT: ; %bb.5: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_cfg: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064GISEL-NEXT: ; %bb.1: ; %else -; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1064GISEL-NEXT: ; %bb.3: ; %if -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064GISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif -; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_cfg: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 -; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo -; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032DAGISEL-NEXT: s_add_i32 s1, s1, s6 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032DAGISEL-NEXT: ; %bb.5: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_cfg: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 -; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo -; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032GISEL-NEXT: ; %bb.1: ; %else -; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1032GISEL-NEXT: ; %bb.3: ; %if -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032GISEL-NEXT: s_add_i32 s0, s0, s6 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif -; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_cfg: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164DAGISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164DAGISEL-NEXT: ; %bb.5: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_cfg: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164GISEL-NEXT: ; %bb.1: ; %else -; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1164GISEL-NEXT: ; %bb.3: ; %if -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164GISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif -; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_cfg: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132DAGISEL-NEXT: s_add_i32 s1, s1, s6 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132DAGISEL-NEXT: ; %bb.5: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_cfg: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132GISEL-NEXT: ; %bb.1: ; %else -; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1132GISEL-NEXT: ; %bb.3: ; %if -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132GISEL-NEXT: s_add_i32 s0, s0, s6 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif -; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %d_cmp = icmp ult i32 %tid, 16 - br i1 %d_cmp, label %if, label %else - -if: - %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %tid, i32 1) - br label %endif - -else: - %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %in, i32 1) - br label %endif - -endif: - %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] - store i32 %combine, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: uniform_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s1, s2, s4 -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s4 -; GFX8DAGISEL-NEXT: s_add_u32 s1, s1, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: uniform_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX8GISEL-NEXT: s_add_u32 s5, s2, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: uniform_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s3 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: uniform_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX9GISEL-NEXT: s_add_u32 s5, s2, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: uniform_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: uniform_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: uniform_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: uniform_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: uniform_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: uniform_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: uniform_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: uniform_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %in, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9DAGISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9DAGISEL-NEXT: ; %bb.5: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: const_value_i64: +; GFX9GISEL-LABEL: divergent_cfg: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9GISEL-NEXT: ; %bb.1: ; %else +; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow +; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX9GISEL-NEXT: ; %bb.3: ; %if +; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9GISEL-NEXT: s_mov_b32 s6, 0 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9GISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif +; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: const_value_i64: +; GFX1064DAGISEL-LABEL: divergent_cfg: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX1064DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064DAGISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064DAGISEL-NEXT: ; %bb.5: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: const_value_i64: +; GFX1064GISEL-LABEL: divergent_cfg: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064GISEL-NEXT: ; %bb.1: ; %else +; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1064GISEL-NEXT: ; %bb.3: ; %if +; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064GISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif +; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: const_value_i64: +; GFX1032DAGISEL-LABEL: divergent_cfg: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1032DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032DAGISEL-NEXT: s_add_i32 s1, s1, s6 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032DAGISEL-NEXT: ; %bb.5: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: const_value_i64: +; GFX1032GISEL-LABEL: divergent_cfg: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 +; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo +; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032GISEL-NEXT: ; %bb.1: ; %else +; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1032GISEL-NEXT: ; %bb.3: ; %if +; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032GISEL-NEXT: s_add_i32 s0, s0, s6 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif +; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: const_value_i64: +; GFX1164DAGISEL-LABEL: divergent_cfg: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX1164DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164DAGISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164DAGISEL-NEXT: ; %bb.5: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: const_value_i64: +; GFX1164GISEL-LABEL: divergent_cfg: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164GISEL-NEXT: ; %bb.1: ; %else +; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1164GISEL-NEXT: ; %bb.3: ; %if +; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164GISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif +; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: const_value_i64: +; GFX1132DAGISEL-LABEL: divergent_cfg: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo +; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1132DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132DAGISEL-NEXT: s_add_i32 s1, s1, s6 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132DAGISEL-NEXT: ; %bb.5: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value_i64: +; GFX1132GISEL-LABEL: divergent_cfg: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo +; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132GISEL-NEXT: ; %bb.1: ; %else +; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1132GISEL-NEXT: ; %bb.3: ; %if +; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 +; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132GISEL-NEXT: s_add_i32 s0, s0, s6 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif +; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 +; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %d_cmp = icmp ult i32 %tid, 16 + br i1 %d_cmp, label %if, label %else + +if: + %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %tid, i32 1) + br label %endif + +else: + %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %in, i32 1) + br label %endif + +endif: + %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] + store i32 %combine, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: +define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { +; GFX8DAGISEL-LABEL: uniform_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] +; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s4, s3 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 +; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 +; GFX8DAGISEL-NEXT: s_mul_hi_u32 s1, s2, s4 +; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s4 +; GFX8DAGISEL-NEXT: s_add_u32 s1, s1, s2 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value_i64: +; GFX8GISEL-LABEL: uniform_value_i64: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] +; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8GISEL-NEXT: s_mul_hi_u32 s4, s0, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX8GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX8GISEL-NEXT: s_add_u32 s5, s2, s3 +; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value_i64: +; GFX9DAGISEL-LABEL: uniform_value_i64: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] +; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s3 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value_i64: +; GFX9GISEL-LABEL: uniform_value_i64: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] +; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9GISEL-NEXT: s_mul_hi_u32 s4, s0, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX9GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX9GISEL-NEXT: s_add_u32 s5, s2, s3 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value_i64: +; GFX1064DAGISEL-LABEL: uniform_value_i64: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064DAGISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value_i64: +; GFX1064GISEL-LABEL: uniform_value_i64: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1064GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064GISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value_i64: +; GFX1032DAGISEL-LABEL: uniform_value_i64: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032DAGISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value_i64: +; GFX1032GISEL-LABEL: uniform_value_i64: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1032GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032GISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value_i64: +; GFX1164DAGISEL-LABEL: uniform_value_i64: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164DAGISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: poison_value_i64: +; GFX1164GISEL-LABEL: uniform_value_i64: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1164GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164GISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: poison_value_i64: +; GFX1132DAGISEL-LABEL: uniform_value_i64: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132DAGISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: poison_value_i64: +; GFX1132GISEL-LABEL: uniform_value_i64: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1132GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132GISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 poison, i32 1) + %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %in, i32 1) store i64 %result, ptr addrspace(1) %out ret void } @@ -1836,7 +1123,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX8DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -1844,7 +1131,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX8DAGISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1858,7 +1145,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_mov_b32 s5, s4 -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX8GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX8GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -1866,7 +1153,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX8GISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1880,7 +1167,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX9DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -1888,7 +1175,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX9DAGISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1902,7 +1189,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_mov_b32 s5, s4 -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX9GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX9GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -1910,7 +1197,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX9GISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1924,7 +1211,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX1064DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -1932,7 +1219,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_add_u32 s4, s4, s9 ; GFX1064DAGISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1945,7 +1232,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX1064GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX1064GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -1953,7 +1240,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_add_u32 s4, s4, s9 ; GFX1064GISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1966,7 +1253,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 @@ -1974,7 +1261,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_add_u32 s4, s4, s8 ; GFX1032DAGISEL-NEXT: s_addc_u32 s5, s5, s9 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1987,7 +1274,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 @@ -1995,7 +1282,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_add_u32 s4, s4, s8 ; GFX1032GISEL-NEXT: s_addc_u32 s5, s5, s9 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2008,7 +1295,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s4, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v2, s4 @@ -2017,7 +1304,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_add_u32 s0, s0, s5 ; GFX1164DAGISEL-NEXT: s_addc_u32 s1, s1, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2030,7 +1317,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s4, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s5, v2, s4 @@ -2039,7 +1326,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_add_u32 s0, s0, s5 ; GFX1164GISEL-NEXT: s_addc_u32 s1, s1, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2052,7 +1339,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2061,7 +1348,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_add_u32 s0, s0, s4 ; GFX1132DAGISEL-NEXT: s_addc_u32 s1, s1, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2073,7 +1360,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2082,7 +1369,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_add_u32 s0, s0, s4 ; GFX1132GISEL-NEXT: s_addc_u32 s1, s1, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2102,7 +1389,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8DAGISEL-NEXT: ; %bb.1: ; %else ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s7, s[6:7] @@ -2111,7 +1398,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s7 ; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX8DAGISEL-NEXT: s_add_u32 s7, s2, s3 -; GFX8DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX8DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -2140,7 +1427,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s7, s[6:7] @@ -2149,10 +1436,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s7 ; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX8GISEL-NEXT: s_add_u32 s7, s2, s3 -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2162,7 +1449,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_mul_hi_u32 s4, s4, s7 ; GFX8GISEL-NEXT: s_mul_i32 s5, s5, s7 ; GFX8GISEL-NEXT: s_add_u32 s7, s4, s5 -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -2179,7 +1466,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9DAGISEL-NEXT: ; %bb.1: ; %else ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] @@ -2188,7 +1475,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5 ; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 ; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s3 -; GFX9DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX9DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2216,7 +1503,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s7, s[6:7] @@ -2225,10 +1512,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s7 ; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX9GISEL-NEXT: s_add_u32 s7, s2, s3 -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2238,7 +1525,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_mul_hi_u32 s5, s8, s4 ; GFX9GISEL-NEXT: s_mul_i32 s4, s9, s4 ; GFX9GISEL-NEXT: s_add_u32 s7, s5, s4 -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2255,7 +1542,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064DAGISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1064DAGISEL-NEXT: s_mov_b64 s[8:9], exec ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s8, s[8:9] @@ -2264,7 +1551,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s8 ; GFX1064DAGISEL-NEXT: s_mul_i32 s8, s2, s8 ; GFX1064DAGISEL-NEXT: s_add_u32 s9, s9, s3 -; GFX1064DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[4:5] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s8 @@ -2292,7 +1579,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2301,10 +1588,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX1064GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1064GISEL-NEXT: s_add_u32 s7, s7, s3 -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2314,7 +1601,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_mul_i32 s7, s7, s4 ; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s4 ; GFX1064GISEL-NEXT: s_add_u32 s7, s5, s7 -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2331,7 +1618,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 @@ -2340,7 +1627,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, s4 ; GFX1032DAGISEL-NEXT: s_add_u32 s5, s5, s3 -; GFX1032DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2368,7 +1655,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s6, s6 @@ -2377,10 +1664,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX1032GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1032GISEL-NEXT: s_add_u32 s7, s7, s3 -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2390,7 +1677,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_mul_i32 s5, s7, s3 ; GFX1032GISEL-NEXT: s_mul_i32 s6, s6, s3 ; GFX1032GISEL-NEXT: s_add_u32 s7, s4, s5 -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2409,7 +1696,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1164DAGISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1164DAGISEL-NEXT: s_mov_b64 s[8:9], exec ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2419,7 +1706,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s8 ; GFX1164DAGISEL-NEXT: s_mul_i32 s8, s2, s8 ; GFX1164DAGISEL-NEXT: s_add_u32 s9, s9, s3 -; GFX1164DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[6:7] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s8 @@ -2451,7 +1738,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2461,10 +1748,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX1164GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1164GISEL-NEXT: s_add_u32 s7, s7, s3 -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2475,7 +1762,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_mul_i32 s5, s5, s6 ; GFX1164GISEL-NEXT: s_mul_i32 s6, s4, s6 ; GFX1164GISEL-NEXT: s_add_u32 s7, s7, s5 -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2494,7 +1781,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1132DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1132DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2504,7 +1791,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX1132DAGISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1132DAGISEL-NEXT: s_add_u32 s7, s7, s3 -; GFX1132DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 @@ -2534,7 +1821,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2544,10 +1831,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX1132GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1132GISEL-NEXT: s_add_u32 s7, s7, s3 -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2558,7 +1845,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_mul_i32 s5, s5, s3 ; GFX1132GISEL-NEXT: s_mul_i32 s6, s4, s3 ; GFX1132GISEL-NEXT: s_add_u32 s7, s7, s5 -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll index 55e6189f65675..87642a1f0b957 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll @@ -124,187 +124,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s4, -1 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s4, -1 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s2, -1 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_and_b32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s2, -1 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_and_b32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s6, -1 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s6, -1 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s6, -1 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s1, -1 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_and_b32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_and_b32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s6, -1 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, -1 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_and_b32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_and_b32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1087,184 +919,6 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry @@ -1272,14 +926,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1293,14 +947,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_mov_b32 s4, -1 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_mov_b32 s5, s4 -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1314,14 +968,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1335,14 +989,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_mov_b32 s4, -1 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_mov_b32 s5, s4 -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1356,14 +1010,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1376,14 +1030,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1396,14 +1050,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1416,14 +1070,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_mov_b32 s4, -1 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1436,7 +1090,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, -1 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164DAGISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1457,7 +1111,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_mov_b32 s0, -1 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -1465,7 +1119,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1478,7 +1132,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1486,7 +1140,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1498,7 +1152,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1506,7 +1160,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1547,19 +1201,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1596,19 +1250,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1645,19 +1299,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1694,19 +1348,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1747,19 +1401,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1798,19 +1452,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1834,3 +1488,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll index 96b67e71fcd28..50da8b34f8555 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll @@ -124,187 +124,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_brev_b32 s4, 1 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_brev_b32 s4, 1 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_brev_b32 s4, 1 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_brev_b32 s4, 1 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_brev_b32 s4, 1 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_brev_b32 s4, 1 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_brev_b32 s2, 1 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_max_i32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_brev_b32 s2, 1 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_max_i32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_brev_b32 s4, 1 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_brev_b32 s4, 1 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_brev_b32 s2, 1 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_max_i32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_brev_b32 s2, 1 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_max_i32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_brev_b32 s6, 1 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_brev_b32 s6, 1 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_brev_b32 s6, 1 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_brev_b32 s6, 1 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_brev_b32 s6, 1 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_brev_b32 s6, 1 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_brev_b32 s1, 1 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_max_i32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_brev_b32 s0, 1 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_max_i32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_brev_b32 s6, 1 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_brev_b32 s6, 1 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_max_i32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_brev_b32 s0, 1 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_max_i32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1087,184 +919,6 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry @@ -1272,7 +926,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1283,7 +937,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1297,7 +951,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_brev_b32 s5, 1 ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1308,7 +962,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1322,7 +976,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1333,7 +987,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1347,7 +1001,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_brev_b32 s5, 1 ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1358,7 +1012,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1372,7 +1026,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1383,7 +1037,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1396,7 +1050,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_brev_b32 s5, 1 ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1407,7 +1061,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1420,7 +1074,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1431,7 +1085,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_brev_b32 s5, 1 ; GFX1032GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1468,7 +1122,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1 ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0 -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1 @@ -1480,7 +1134,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1493,7 +1147,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_brev_b32 s1, 1 ; GFX1164GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1 @@ -1505,7 +1159,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1518,7 +1172,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1 ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1529,7 +1183,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1541,7 +1195,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_brev_b32 s1, 1 ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1552,7 +1206,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1593,19 +1247,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1642,19 +1296,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1691,19 +1345,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1740,19 +1394,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1793,19 +1447,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1844,19 +1498,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1880,3 +1534,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll index 4e0c9ec111cbe..42c8e996fa720 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll @@ -124,187 +124,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_brev_b32 s4, -2 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_brev_b32 s4, -2 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_brev_b32 s4, -2 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_brev_b32 s4, -2 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_brev_b32 s4, -2 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_brev_b32 s4, -2 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_brev_b32 s2, -2 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_min_i32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_brev_b32 s2, -2 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_min_i32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_brev_b32 s4, -2 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_brev_b32 s4, -2 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_brev_b32 s2, -2 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_min_i32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_brev_b32 s2, -2 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_min_i32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_brev_b32 s6, -2 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_brev_b32 s6, -2 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_brev_b32 s6, -2 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_brev_b32 s6, -2 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_brev_b32 s6, -2 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_brev_b32 s6, -2 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_brev_b32 s1, -2 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_min_i32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_brev_b32 s0, -2 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_min_i32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_brev_b32 s6, -2 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_brev_b32 s6, -2 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_brev_b32 s1, -2 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_min_i32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_brev_b32 s0, -2 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_min_i32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1087,184 +919,6 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry @@ -1272,7 +926,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1283,7 +937,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1297,7 +951,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_brev_b32 s5, -2 ; GFX8GISEL-NEXT: s_mov_b32 s4, -1 -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1308,7 +962,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1322,7 +976,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1333,7 +987,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1347,7 +1001,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_brev_b32 s5, -2 ; GFX9GISEL-NEXT: s_mov_b32 s4, -1 -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1358,7 +1012,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1372,7 +1026,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1383,7 +1037,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1396,7 +1050,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_brev_b32 s5, -2 ; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1407,7 +1061,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1420,7 +1074,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1431,7 +1085,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_brev_b32 s5, -2 ; GFX1032GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1468,7 +1122,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_brev_b32 s1, -2 ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, -1 -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1 @@ -1480,7 +1134,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1493,7 +1147,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_brev_b32 s1, -2 ; GFX1164GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1 @@ -1505,7 +1159,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1518,7 +1172,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_brev_b32 s1, -2 ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1 -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1529,7 +1183,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1541,7 +1195,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_brev_b32 s1, -2 ; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1552,7 +1206,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1593,19 +1247,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1642,19 +1296,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1691,19 +1345,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1740,19 +1394,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1793,19 +1447,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1844,19 +1498,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1880,3 +1534,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll index 1849eaecbe143..92f9fca285bb6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll @@ -124,187 +124,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_or_b32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_or_b32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_or_b32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_or_b32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s6, 0 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_or_b32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_or_b32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_or_b32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_or_b32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1087,184 +919,6 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry @@ -1272,14 +926,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1293,14 +947,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_mov_b32 s5, s4 -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1314,14 +968,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1335,14 +989,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_mov_b32 s5, s4 -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1356,14 +1010,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1376,14 +1030,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1396,14 +1050,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1416,14 +1070,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1436,7 +1090,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164DAGISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1457,7 +1111,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -1465,7 +1119,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1478,7 +1132,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1486,7 +1140,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1498,7 +1152,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1506,7 +1160,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1548,19 +1202,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1597,19 +1251,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1646,19 +1300,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1695,19 +1349,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1748,19 +1402,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1799,19 +1453,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1835,3 +1489,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll index 9a0917133fc59..552dd6f859c7a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll @@ -200,1834 +200,995 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: +define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { +; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, 0x7b -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s4, s2 +; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8DAGISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: const_value: +; GFX8GISEL-LABEL: divergent_value: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX8GISEL-NEXT: s_mov_b32 s4, 0 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8GISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: const_value: +; GFX9DAGISEL-LABEL: divergent_value: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9DAGISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9DAGISEL-NEXT: ; %bb.2: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: const_value: +; GFX9GISEL-LABEL: divergent_value: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9GISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9GISEL-NEXT: ; %bb.2: +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: const_value: +; GFX1064DAGISEL-LABEL: divergent_value: ; GFX1064DAGISEL: ; %bb.0: ; %entry ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064DAGISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064DAGISEL-NEXT: ; %bb.2: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: const_value: +; GFX1064GISEL-LABEL: divergent_value: ; GFX1064GISEL: ; %bb.0: ; %entry ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064GISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064GISEL-NEXT: ; %bb.2: +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1064GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: const_value: +; GFX1032DAGISEL-LABEL: divergent_value: ; GFX1032DAGISEL: ; %bb.0: ; %entry ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032DAGISEL-NEXT: s_sub_i32 s2, s2, s5 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032DAGISEL-NEXT: ; %bb.2: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: const_value: +; GFX1032GISEL-LABEL: divergent_value: ; GFX1032GISEL: ; %bb.0: ; %entry ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032GISEL-NEXT: s_sub_i32 s2, s2, s5 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: const_value: +; GFX1164DAGISEL-LABEL: divergent_value: ; GFX1164DAGISEL: ; %bb.0: ; %entry ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164DAGISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164DAGISEL-NEXT: ; %bb.2: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: const_value: +; GFX1164GISEL-LABEL: divergent_value: ; GFX1164GISEL: ; %bb.0: ; %entry ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164GISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164GISEL-NEXT: ; %bb.2: +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: const_value: +; GFX1132DAGISEL-LABEL: divergent_value: ; GFX1132DAGISEL: ; %bb.0: ; %entry ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132DAGISEL-NEXT: s_sub_i32 s2, s2, s5 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132DAGISEL-NEXT: ; %bb.2: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value: +; GFX1132GISEL-LABEL: divergent_value: ; GFX1132GISEL: ; %bb.0: ; %entry ; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132GISEL-NEXT: s_sub_i32 s2, s2, s5 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132GISEL-NEXT: ; %bb.2: +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 123, i32 1) + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %id.x, i32 1) store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: +define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { +; GFX8DAGISEL-LABEL: divergent_cfg: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8DAGISEL-NEXT: ; %bb.1: ; %else +; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, s0 -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s4, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s6 +; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX8DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX8DAGISEL-NEXT: ; %bb.3: ; %if +; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8DAGISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8DAGISEL-NEXT: ; %bb.5: +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value: +; GFX8GISEL-LABEL: divergent_cfg: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8GISEL-NEXT: ; %bb.1: ; %else +; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s6 +; GFX8GISEL-NEXT: s_mul_i32 s6, s3, s2 +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow +; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX8GISEL-NEXT: ; %bb.3: ; %if +; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8GISEL-NEXT: s_mov_b32 s6, 0 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8GISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif +; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value: +; GFX9DAGISEL-LABEL: divergent_cfg: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9DAGISEL-NEXT: ; %bb.1: ; %else +; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s0 +; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s6 ; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX9DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm +; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX9DAGISEL-NEXT: ; %bb.3: ; %if +; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9DAGISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9DAGISEL-NEXT: ; %bb.5: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value: +; GFX9GISEL-LABEL: divergent_cfg: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9GISEL-NEXT: ; %bb.1: ; %else +; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s6 +; GFX9GISEL-NEXT: s_mul_i32 s6, s3, s2 +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow +; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX9GISEL-NEXT: ; %bb.3: ; %if +; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9GISEL-NEXT: s_mov_b32 s6, 0 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9GISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif +; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value: +; GFX1064DAGISEL-LABEL: divergent_cfg: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, s0 +; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, s6 ; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX1064DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064DAGISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064DAGISEL-NEXT: ; %bb.5: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value: +; GFX1064GISEL-LABEL: divergent_cfg: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064GISEL-NEXT: ; %bb.1: ; %else +; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, s6 +; GFX1064GISEL-NEXT: s_mul_i32 s6, s3, s2 +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1064GISEL-NEXT: ; %bb.3: ; %if +; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064GISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif +; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value: +; GFX1032DAGISEL-LABEL: divergent_cfg: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: s_sub_i32 s1, 0, s1 +; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1032DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032DAGISEL-NEXT: s_sub_i32 s1, s1, s6 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032DAGISEL-NEXT: ; %bb.5: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value: +; GFX1032GISEL-LABEL: divergent_cfg: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 +; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo +; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032GISEL-NEXT: ; %bb.1: ; %else +; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX1032GISEL-NEXT: s_sub_i32 s0, 0, s0 +; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1032GISEL-NEXT: ; %bb.3: ; %if +; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032GISEL-NEXT: s_sub_i32 s0, s0, s6 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif +; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value: +; GFX1164DAGISEL-LABEL: divergent_cfg: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, s6 ; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX1164DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164DAGISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164DAGISEL-NEXT: ; %bb.5: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: poison_value: +; GFX1164GISEL-LABEL: divergent_cfg: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164GISEL-NEXT: ; %bb.1: ; %else +; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, s6 +; GFX1164GISEL-NEXT: s_mul_i32 s6, s3, s2 +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1164GISEL-NEXT: ; %bb.3: ; %if +; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] +; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164GISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif +; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: poison_value: +; GFX1132DAGISEL-LABEL: divergent_cfg: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo +; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX1132DAGISEL-NEXT: s_sub_i32 s1, 0, s1 +; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1132DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: poison_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: divergent_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8DAGISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8DAGISEL-NEXT: ; %bb.2: -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8GISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8GISEL-NEXT: ; %bb.2: -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9DAGISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9DAGISEL-NEXT: ; %bb.2: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9GISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9GISEL-NEXT: ; %bb.2: -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_value: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064DAGISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064DAGISEL-NEXT: ; %bb.2: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_value: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064GISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064GISEL-NEXT: ; %bb.2: -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_value: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032DAGISEL-NEXT: s_sub_i32 s2, s2, s5 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032DAGISEL-NEXT: ; %bb.2: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_value: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032GISEL-NEXT: s_sub_i32 s2, s2, s5 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032GISEL-NEXT: ; %bb.2: -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164DAGISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164DAGISEL-NEXT: ; %bb.2: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164GISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164GISEL-NEXT: ; %bb.2: -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132DAGISEL-NEXT: s_sub_i32 s2, s2, s5 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132DAGISEL-NEXT: ; %bb.2: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132GISEL-NEXT: s_sub_i32 s2, s2, s5 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132GISEL-NEXT: ; %bb.2: -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %id.x = call i32 @llvm.amdgcn.workitem.id.x() - %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %id.x, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: divergent_cfg: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8DAGISEL-NEXT: ; %bb.1: ; %else -; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX8DAGISEL-NEXT: ; %bb.3: ; %if -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8DAGISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8DAGISEL-NEXT: ; %bb.5: -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_cfg: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8GISEL-NEXT: ; %bb.1: ; %else -; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX8GISEL-NEXT: s_mul_i32 s6, s3, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow -; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX8GISEL-NEXT: ; %bb.3: ; %if -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s6, 0 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8GISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif -; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_cfg: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9DAGISEL-NEXT: ; %bb.1: ; %else -; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX9DAGISEL-NEXT: ; %bb.3: ; %if -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9DAGISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9DAGISEL-NEXT: ; %bb.5: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_cfg: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9GISEL-NEXT: ; %bb.1: ; %else -; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX9GISEL-NEXT: s_mul_i32 s6, s3, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow -; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX9GISEL-NEXT: ; %bb.3: ; %if -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9GISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif -; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_cfg: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064DAGISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064DAGISEL-NEXT: ; %bb.5: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_cfg: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064GISEL-NEXT: ; %bb.1: ; %else -; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX1064GISEL-NEXT: s_mul_i32 s6, s3, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1064GISEL-NEXT: ; %bb.3: ; %if -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064GISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif -; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_cfg: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 -; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo -; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_sub_i32 s1, 0, s1 -; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032DAGISEL-NEXT: s_sub_i32 s1, s1, s6 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032DAGISEL-NEXT: ; %bb.5: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_cfg: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 -; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo -; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032GISEL-NEXT: ; %bb.1: ; %else -; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_sub_i32 s0, 0, s0 -; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1032GISEL-NEXT: ; %bb.3: ; %if -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032GISEL-NEXT: s_sub_i32 s0, s0, s6 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif -; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_cfg: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164DAGISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164DAGISEL-NEXT: ; %bb.5: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_cfg: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164GISEL-NEXT: ; %bb.1: ; %else -; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX1164GISEL-NEXT: s_mul_i32 s6, s3, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1164GISEL-NEXT: ; %bb.3: ; %if -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164GISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif -; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_cfg: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_sub_i32 s1, 0, s1 -; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132DAGISEL-NEXT: s_sub_i32 s1, s1, s6 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132DAGISEL-NEXT: ; %bb.5: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_cfg: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132GISEL-NEXT: ; %bb.1: ; %else -; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_sub_i32 s0, 0, s0 -; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1132GISEL-NEXT: ; %bb.3: ; %if -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132GISEL-NEXT: s_sub_i32 s0, s0, s6 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif -; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %d_cmp = icmp ult i32 %tid, 16 - br i1 %d_cmp, label %if, label %else - -if: - %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %tid, i32 1) - br label %endif - -else: - %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %in, i32 1) - br label %endif - -endif: - %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] - store i32 %combine, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: uniform_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: s_ashr_i32 s0, s4, 31 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: s_mul_i32 s1, s2, s0 -; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s4 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX8DAGISEL-NEXT: s_add_u32 s2, s2, s3 -; GFX8DAGISEL-NEXT: s_add_u32 s1, s2, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: uniform_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX8GISEL-NEXT: s_sub_i32 s5, 0, s4 -; GFX8GISEL-NEXT: s_ashr_i32 s4, s5, 31 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s6, s2, s4 -; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX8GISEL-NEXT: s_add_u32 s2, s2, s3 -; GFX8GISEL-NEXT: s_add_u32 s5, s2, s6 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: uniform_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX9DAGISEL-NEXT: s_sub_i32 s5, 0, s4 -; GFX9DAGISEL-NEXT: s_ashr_i32 s4, s5, 31 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s6, s2, s4 -; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX9DAGISEL-NEXT: s_add_u32 s2, s2, s3 -; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s6 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: uniform_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX9GISEL-NEXT: s_sub_i32 s5, 0, s4 -; GFX9GISEL-NEXT: s_ashr_i32 s4, s5, 31 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s6, s2, s4 -; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX9GISEL-NEXT: s_add_u32 s2, s2, s3 -; GFX9GISEL-NEXT: s_add_u32 s5, s2, s6 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: uniform_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064DAGISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1064DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: uniform_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064GISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1064GISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064GISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1064GISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: uniform_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032DAGISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1032DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: uniform_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032GISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1032GISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032GISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1032GISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: uniform_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1164DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: uniform_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1164GISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164GISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1164GISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: uniform_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132DAGISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: uniform_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1132GISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132GISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1132GISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %in, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX8DAGISEL-NEXT: s_ashr_i32 s2, s3, 31 -; GFX8DAGISEL-NEXT: s_mul_i32 s4, s2, 0x7b -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX8GISEL-NEXT: s_ashr_i32 s2, s3, 31 -; GFX8GISEL-NEXT: s_mul_i32 s4, s2, 0x7b -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8GISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX8GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_ashr_i32 s2, s3, 31 -; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, 0x7b -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_ashr_i32 s2, s3, 31 -; GFX9GISEL-NEXT: s_mul_i32 s4, s2, 0x7b -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9GISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX9GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: const_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1064DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1064DAGISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1064DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: const_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1064GISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1064GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1064GISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1064GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: const_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1032DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1032DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: const_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1032GISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1032GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1032GISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1032GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1164DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1164DAGISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1164DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1164GISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1164GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1164GISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1164GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry +; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132DAGISEL-NEXT: s_sub_i32 s1, s1, s6 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132DAGISEL-NEXT: ; %bb.5: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1132DAGISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1132DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value_i64: +; GFX1132GISEL-LABEL: divergent_cfg: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo +; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132GISEL-NEXT: ; %bb.1: ; %else +; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1132GISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1132GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1132GISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1132GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132GISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1132GISEL-NEXT: s_sub_i32 s0, 0, s0 +; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1132GISEL-NEXT: ; %bb.3: ; %if +; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 +; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132GISEL-NEXT: s_sub_i32 s0, s0, s6 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif +; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %d_cmp = icmp ult i32 %tid, 16 + br i1 %d_cmp, label %if, label %else + +if: + %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %tid, i32 1) + br label %endif + +else: + %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %in, i32 1) + br label %endif + +endif: + %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] + store i32 %combine, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: +define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { +; GFX8DAGISEL-LABEL: uniform_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX8DAGISEL-NEXT: s_ashr_i32 s2, s3, 31 +; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, s4 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s4, s0, s2 -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s5, s0, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s3, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8DAGISEL-NEXT: s_ashr_i32 s0, s4, 31 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 +; GFX8DAGISEL-NEXT: s_mul_i32 s1, s2, s0 +; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 +; GFX8DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s4 +; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX8DAGISEL-NEXT: s_add_u32 s2, s2, s3 +; GFX8DAGISEL-NEXT: s_add_u32 s1, s2, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value_i64: +; GFX8GISEL-LABEL: uniform_value_i64: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX8GISEL-NEXT: s_ashr_i32 s2, s3, 31 +; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX8GISEL-NEXT: s_sub_i32 s5, 0, s4 +; GFX8GISEL-NEXT: s_ashr_i32 s4, s5, 31 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s4, s0, s2 -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8GISEL-NEXT: s_mul_hi_u32 s5, s0, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX8GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX8GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX8GISEL-NEXT: s_mul_i32 s6, s2, s4 +; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX8GISEL-NEXT: s_add_u32 s2, s2, s3 +; GFX8GISEL-NEXT: s_add_u32 s5, s2, s6 +; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value_i64: +; GFX9DAGISEL-LABEL: uniform_value_i64: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX9DAGISEL-NEXT: s_ashr_i32 s2, s3, 31 +; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9DAGISEL-NEXT: s_sub_i32 s5, 0, s4 +; GFX9DAGISEL-NEXT: s_ashr_i32 s4, s5, 31 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s4, s0, s2 -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s5, s0, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9DAGISEL-NEXT: s_mul_i32 s6, s2, s4 +; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX9DAGISEL-NEXT: s_add_u32 s2, s2, s3 +; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s6 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value_i64: +; GFX9GISEL-LABEL: uniform_value_i64: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX9GISEL-NEXT: s_ashr_i32 s2, s3, 31 +; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9GISEL-NEXT: s_sub_i32 s5, 0, s4 +; GFX9GISEL-NEXT: s_ashr_i32 s4, s5, 31 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s4, s0, s2 -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9GISEL-NEXT: s_mul_hi_u32 s5, s0, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX9GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX9GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9GISEL-NEXT: s_mul_i32 s6, s2, s4 +; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX9GISEL-NEXT: s_add_u32 s2, s2, s3 +; GFX9GISEL-NEXT: s_add_u32 s5, s2, s6 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value_i64: +; GFX1064DAGISEL-LABEL: uniform_value_i64: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1064DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX1064DAGISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1064DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1064DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1064DAGISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value_i64: +; GFX1064GISEL-LABEL: uniform_value_i64: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1064GISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX1064GISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1064GISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1064GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064GISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1064GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1064GISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1064GISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value_i64: +; GFX1032DAGISEL-LABEL: uniform_value_i64: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1032DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1032DAGISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1032DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1032DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1032DAGISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value_i64: +; GFX1032GISEL-LABEL: uniform_value_i64: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1032GISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1032GISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1032GISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1032GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032GISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1032GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1032GISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1032GISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value_i64: +; GFX1164DAGISEL-LABEL: uniform_value_i64: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1164DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1164DAGISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1164DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1164DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1164DAGISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: poison_value_i64: +; GFX1164GISEL-LABEL: uniform_value_i64: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1164GISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1164GISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1164GISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1164GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164GISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1164GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1164GISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1164GISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: poison_value_i64: +; GFX1132DAGISEL-LABEL: uniform_value_i64: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_sub_i32 s2, 0, s2 +; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1132DAGISEL-NEXT: s_sub_i32 s4, 0, s4 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1132DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1132DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1132DAGISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: poison_value_i64: +; GFX1132GISEL-LABEL: uniform_value_i64: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1132GISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1132GISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1132GISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1132GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132GISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1132GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1132GISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1132GISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 poison, i32 1) + %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %in, i32 1) store i64 %result, ptr addrspace(1) %out ret void } @@ -2039,7 +1200,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX8DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2047,7 +1208,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX8DAGISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2061,7 +1222,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_mov_b32 s5, s4 -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX8GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX8GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2069,7 +1230,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX8GISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2083,7 +1244,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX9DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2091,7 +1252,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX9DAGISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2105,7 +1266,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_mov_b32 s5, s4 -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX9GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX9GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2113,7 +1274,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX9GISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2127,7 +1288,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX1064DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2135,7 +1296,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_sub_u32 s4, s4, s9 ; GFX1064DAGISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2148,7 +1309,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX1064GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX1064GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2156,7 +1317,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_sub_u32 s4, s4, s9 ; GFX1064GISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2169,7 +1330,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 @@ -2177,7 +1338,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_sub_u32 s4, s4, s8 ; GFX1032DAGISEL-NEXT: s_subb_u32 s5, s5, s9 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2190,7 +1351,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 @@ -2198,7 +1359,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_sub_u32 s4, s4, s8 ; GFX1032GISEL-NEXT: s_subb_u32 s5, s5, s9 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2211,7 +1372,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s4, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v2, s4 @@ -2220,7 +1381,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_sub_u32 s0, s0, s5 ; GFX1164DAGISEL-NEXT: s_subb_u32 s1, s1, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2233,7 +1394,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s4, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s5, v2, s4 @@ -2242,7 +1403,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_sub_u32 s0, s0, s5 ; GFX1164GISEL-NEXT: s_subb_u32 s1, s1, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2255,7 +1416,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2264,7 +1425,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_sub_u32 s0, s0, s4 ; GFX1132DAGISEL-NEXT: s_subb_u32 s1, s1, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2276,7 +1437,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2285,7 +1446,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_sub_u32 s0, s0, s4 ; GFX1132GISEL-NEXT: s_subb_u32 s1, s1, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2305,7 +1466,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8DAGISEL-NEXT: ; %bb.1: ; %else ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2318,13 +1479,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX8DAGISEL-NEXT: s_add_u32 s2, s2, s3 ; GFX8DAGISEL-NEXT: s_add_u32 s7, s2, s10 -; GFX8DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX8DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s7 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[2:3] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2338,7 +1499,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: s_add_u32 s7, s4, s8 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s7 -; GFX8DAGISEL-NEXT: .LBB9_4: ; %endif +; GFX8DAGISEL-NEXT: .LBB5_4: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -2352,7 +1513,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2365,10 +1526,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX8GISEL-NEXT: s_add_u32 s2, s2, s3 ; GFX8GISEL-NEXT: s_add_u32 s7, s2, s10 -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2382,7 +1543,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_mul_i32 s5, s5, s7 ; GFX8GISEL-NEXT: s_add_u32 s4, s4, s5 ; GFX8GISEL-NEXT: s_add_u32 s7, s4, s8 -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -2399,7 +1560,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9DAGISEL-NEXT: ; %bb.1: ; %else ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] @@ -2412,13 +1573,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 ; GFX9DAGISEL-NEXT: s_add_u32 s2, s2, s3 ; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s10 -; GFX9DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX9DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[2:3] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] @@ -2432,7 +1593,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: s_add_u32 s5, s5, s8 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9DAGISEL-NEXT: .LBB9_4: ; %endif +; GFX9DAGISEL-NEXT: .LBB5_4: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] @@ -2445,7 +1606,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2458,10 +1619,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX9GISEL-NEXT: s_add_u32 s2, s2, s3 ; GFX9GISEL-NEXT: s_add_u32 s7, s2, s10 -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2475,7 +1636,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_mul_i32 s5, s8, s5 ; GFX9GISEL-NEXT: s_add_u32 s4, s7, s4 ; GFX9GISEL-NEXT: s_add_u32 s7, s4, s5 -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2492,7 +1653,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064DAGISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1064DAGISEL-NEXT: s_mov_b64 s[8:9], exec ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s8, s[8:9] @@ -2505,7 +1666,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: s_add_u32 s3, s10, s3 ; GFX1064DAGISEL-NEXT: s_mul_i32 s8, s2, s8 ; GFX1064DAGISEL-NEXT: s_add_u32 s9, s3, s9 -; GFX1064DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[4:5] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s8 @@ -2537,7 +1698,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2550,10 +1711,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_add_u32 s3, s10, s3 ; GFX1064GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1064GISEL-NEXT: s_add_u32 s7, s3, s7 -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2567,7 +1728,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_add_u32 s7, s8, s7 ; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s4 ; GFX1064GISEL-NEXT: s_add_u32 s7, s7, s5 -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2584,7 +1745,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 @@ -2597,7 +1758,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: s_add_u32 s3, s9, s3 ; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, s4 ; GFX1032DAGISEL-NEXT: s_add_u32 s5, s3, s5 -; GFX1032DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2629,7 +1790,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s6, s6 @@ -2642,10 +1803,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_add_u32 s3, s9, s3 ; GFX1032GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1032GISEL-NEXT: s_add_u32 s7, s3, s7 -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2659,7 +1820,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_add_u32 s5, s5, s7 ; GFX1032GISEL-NEXT: s_mul_i32 s6, s6, s3 ; GFX1032GISEL-NEXT: s_add_u32 s7, s5, s4 -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2678,7 +1839,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1164DAGISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1164DAGISEL-NEXT: s_mov_b64 s[8:9], exec ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2693,7 +1854,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_add_u32 s3, s10, s3 ; GFX1164DAGISEL-NEXT: s_mul_i32 s8, s2, s8 ; GFX1164DAGISEL-NEXT: s_add_u32 s9, s3, s9 -; GFX1164DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[6:7] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s8 @@ -2729,7 +1890,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2744,10 +1905,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_add_u32 s3, s10, s3 ; GFX1164GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1164GISEL-NEXT: s_add_u32 s7, s3, s7 -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2763,7 +1924,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_add_u32 s5, s8, s5 ; GFX1164GISEL-NEXT: s_mul_i32 s6, s4, s6 ; GFX1164GISEL-NEXT: s_add_u32 s7, s5, s7 -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2782,7 +1943,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1132DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1132DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2797,7 +1958,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_add_u32 s3, s9, s3 ; GFX1132DAGISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1132DAGISEL-NEXT: s_add_u32 s7, s3, s7 -; GFX1132DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 @@ -2832,7 +1993,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2847,10 +2008,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_add_u32 s3, s9, s3 ; GFX1132GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1132GISEL-NEXT: s_add_u32 s7, s3, s7 -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2866,7 +2027,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_add_u32 s5, s7, s5 ; GFX1132GISEL-NEXT: s_mul_i32 s6, s4, s3 ; GFX1132GISEL-NEXT: s_add_u32 s7, s5, s8 -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll index a4a5b01a873b7..dc2c2dad16f55 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll @@ -125,153 +125,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -285,13 +151,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -306,13 +172,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -324,13 +190,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -344,13 +210,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -362,13 +228,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -382,13 +248,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_max_u32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -400,13 +266,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_max_u32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -421,14 +287,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -441,14 +307,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -462,14 +328,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_max_u32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -482,14 +348,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_max_u32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -517,20 +383,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -545,26 +411,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s6, 0 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -588,20 +454,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -615,26 +481,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -657,20 +523,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -684,26 +550,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -726,20 +592,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_max_u32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -753,26 +619,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_max_u32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -797,21 +663,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -827,27 +693,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -872,21 +738,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_max_u32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -902,27 +768,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_max_u32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1054,146 +920,6 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry @@ -1330,19 +1056,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1379,19 +1105,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1428,19 +1154,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1477,19 +1203,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1530,19 +1256,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1581,19 +1307,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1617,3 +1343,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll index 29a78855d6629..d170fbc957e28 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll @@ -125,153 +125,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -285,13 +151,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s4, -1 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -306,13 +172,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -324,13 +190,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s4, -1 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -344,13 +210,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -362,13 +228,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -382,13 +248,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_min_u32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -400,13 +266,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s2, -1 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_min_u32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -421,14 +287,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -441,14 +307,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -462,14 +328,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_min_u32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -482,14 +348,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s2, -1 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_min_u32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -517,20 +383,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -545,26 +411,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s6, -1 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -588,20 +454,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -615,26 +481,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s6, -1 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -657,20 +523,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -684,26 +550,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s6, -1 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -726,20 +592,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s1, -1 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_min_u32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -753,26 +619,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_min_u32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -797,21 +663,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -827,27 +693,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s6, -1 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -872,21 +738,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, -1 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_min_u32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -902,27 +768,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_min_u32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1054,146 +920,6 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry @@ -1201,7 +927,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1212,7 +938,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1226,7 +952,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_mov_b32 s4, -1 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_mov_b32 s5, s4 -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1237,7 +963,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1251,7 +977,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1262,7 +988,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1276,7 +1002,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_mov_b32 s4, -1 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_mov_b32 s5, s4 -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1287,7 +1013,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1301,7 +1027,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1312,7 +1038,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1325,7 +1051,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1336,7 +1062,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1349,7 +1075,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1360,7 +1086,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1373,7 +1099,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_mov_b32 s4, -1 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1384,7 +1110,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1397,7 +1123,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, -1 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1 @@ -1409,7 +1135,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1422,7 +1148,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_mov_b32 s0, -1 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1 @@ -1434,7 +1160,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1447,7 +1173,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1458,7 +1184,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1470,7 +1196,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1481,7 +1207,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1522,19 +1248,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1571,19 +1297,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1620,19 +1346,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1669,19 +1395,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1722,19 +1448,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1773,19 +1499,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1809,3 +1535,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll index 9c523b2404121..c64ad81fdcf6d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll @@ -200,1494 +200,773 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: +define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { +; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX8DAGISEL-NEXT: s_mulk_i32 s2, 0x7b +; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8DAGISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: const_value: +; GFX8GISEL-LABEL: divergent_value: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX8GISEL-NEXT: s_mulk_i32 s2, 0x7b +; GFX8GISEL-NEXT: s_mov_b32 s4, 0 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8GISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: const_value: +; GFX9DAGISEL-LABEL: divergent_value: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX9DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9DAGISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9DAGISEL-NEXT: ; %bb.2: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: const_value: +; GFX9GISEL-LABEL: divergent_value: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX9GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9GISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9GISEL-NEXT: ; %bb.2: +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: const_value: +; GFX1064DAGISEL-LABEL: divergent_value: ; GFX1064DAGISEL: ; %bb.0: ; %entry ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064DAGISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064DAGISEL-NEXT: ; %bb.2: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: const_value: +; GFX1064GISEL-LABEL: divergent_value: ; GFX1064GISEL: ; %bb.0: ; %entry ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064GISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064GISEL-NEXT: ; %bb.2: +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: const_value: +; GFX1032DAGISEL-LABEL: divergent_value: ; GFX1032DAGISEL: ; %bb.0: ; %entry ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032DAGISEL-NEXT: s_xor_b32 s2, s2, s5 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032DAGISEL-NEXT: ; %bb.2: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: const_value: +; GFX1032GISEL-LABEL: divergent_value: ; GFX1032GISEL: ; %bb.0: ; %entry ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b +; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032GISEL-NEXT: s_xor_b32 s2, s2, s5 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: const_value: +; GFX1164DAGISEL-LABEL: divergent_value: ; GFX1164DAGISEL: ; %bb.0: ; %entry ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164DAGISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164DAGISEL-NEXT: ; %bb.2: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: const_value: +; GFX1164GISEL-LABEL: divergent_value: ; GFX1164GISEL: ; %bb.0: ; %entry ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164GISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164GISEL-NEXT: ; %bb.2: +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: const_value: +; GFX1132DAGISEL-LABEL: divergent_value: ; GFX1132DAGISEL: ; %bb.0: ; %entry ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132DAGISEL-NEXT: s_xor_b32 s2, s2, s5 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132DAGISEL-NEXT: ; %bb.2: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value: +; GFX1132GISEL-LABEL: divergent_value: ; GFX1132GISEL: ; %bb.0: ; %entry ; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132GISEL-NEXT: s_xor_b32 s2, s2, s5 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132GISEL-NEXT: ; %bb.2: +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 123, i32 1) + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %id.x, i32 1) store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: +define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { +; GFX8DAGISEL-LABEL: divergent_cfg: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8DAGISEL-NEXT: ; %bb.1: ; %else +; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX8DAGISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX8DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX8DAGISEL-NEXT: ; %bb.3: ; %if +; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8DAGISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8DAGISEL-NEXT: ; %bb.5: +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value: +; GFX8GISEL-LABEL: divergent_cfg: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8GISEL-NEXT: ; %bb.1: ; %else +; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX8GISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow +; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX8GISEL-NEXT: ; %bb.3: ; %if +; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8GISEL-NEXT: s_mov_b32 s6, 0 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8GISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif +; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value: +; GFX9DAGISEL-LABEL: divergent_cfg: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9DAGISEL-NEXT: ; %bb.1: ; %else +; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s2 +; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX9DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX9DAGISEL-NEXT: ; %bb.3: ; %if +; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9DAGISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9DAGISEL-NEXT: ; %bb.5: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value: +; GFX9GISEL-LABEL: divergent_cfg: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9GISEL-NEXT: ; %bb.1: ; %else +; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9GISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow +; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX9GISEL-NEXT: ; %bb.3: ; %if +; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9GISEL-NEXT: s_mov_b32 s6, 0 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9GISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif +; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value: +; GFX1064DAGISEL-LABEL: divergent_cfg: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2 +; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX1064DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064DAGISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064DAGISEL-NEXT: ; %bb.5: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value: +; GFX1064GISEL-LABEL: divergent_cfg: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064GISEL-NEXT: ; %bb.1: ; %else +; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1064GISEL-NEXT: ; %bb.3: ; %if +; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064GISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif +; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value: +; GFX1032DAGISEL-LABEL: divergent_cfg: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1032DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032DAGISEL-NEXT: s_xor_b32 s1, s1, s6 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032DAGISEL-NEXT: ; %bb.5: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value: +; GFX1032GISEL-LABEL: divergent_cfg: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 +; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo +; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032GISEL-NEXT: ; %bb.1: ; %else +; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1032GISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1032GISEL-NEXT: ; %bb.3: ; %if +; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032GISEL-NEXT: s_xor_b32 s0, s0, s6 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif +; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value: +; GFX1164DAGISEL-LABEL: divergent_cfg: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2 +; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX1164DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: poison_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: poison_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: poison_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: divergent_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8DAGISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8DAGISEL-NEXT: ; %bb.2: -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8GISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8GISEL-NEXT: ; %bb.2: -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9DAGISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9DAGISEL-NEXT: ; %bb.2: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9GISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9GISEL-NEXT: ; %bb.2: -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_value: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064DAGISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064DAGISEL-NEXT: ; %bb.2: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_value: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064GISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064GISEL-NEXT: ; %bb.2: -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_value: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032DAGISEL-NEXT: s_xor_b32 s2, s2, s5 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032DAGISEL-NEXT: ; %bb.2: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_value: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032GISEL-NEXT: s_xor_b32 s2, s2, s5 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032GISEL-NEXT: ; %bb.2: -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164DAGISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164DAGISEL-NEXT: s_xor_b32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164DAGISEL-NEXT: ; %bb.2: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164GISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164GISEL-NEXT: ; %bb.2: -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132DAGISEL-NEXT: s_xor_b32 s2, s2, s5 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132DAGISEL-NEXT: ; %bb.2: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132GISEL-NEXT: s_xor_b32 s2, s2, s5 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132GISEL-NEXT: ; %bb.2: -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %id.x = call i32 @llvm.amdgcn.workitem.id.x() - %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %id.x, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: divergent_cfg: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8DAGISEL-NEXT: ; %bb.1: ; %else -; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX8DAGISEL-NEXT: ; %bb.3: ; %if -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8DAGISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8DAGISEL-NEXT: ; %bb.5: -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_cfg: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8GISEL-NEXT: ; %bb.1: ; %else -; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow -; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX8GISEL-NEXT: ; %bb.3: ; %if -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s6, 0 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8GISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif -; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_cfg: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9DAGISEL-NEXT: ; %bb.1: ; %else -; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX9DAGISEL-NEXT: ; %bb.3: ; %if -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9DAGISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9DAGISEL-NEXT: ; %bb.5: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_cfg: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9GISEL-NEXT: ; %bb.1: ; %else -; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow -; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX9GISEL-NEXT: ; %bb.3: ; %if -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9GISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif -; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_cfg: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064DAGISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064DAGISEL-NEXT: ; %bb.5: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_cfg: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064GISEL-NEXT: ; %bb.1: ; %else -; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1064GISEL-NEXT: ; %bb.3: ; %if -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064GISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif -; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_cfg: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 -; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo -; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032DAGISEL-NEXT: s_xor_b32 s1, s1, s6 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032DAGISEL-NEXT: ; %bb.5: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_cfg: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 -; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo -; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032GISEL-NEXT: ; %bb.1: ; %else -; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1032GISEL-NEXT: ; %bb.3: ; %if -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032GISEL-NEXT: s_xor_b32 s0, s0, s6 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif -; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_cfg: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164DAGISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164DAGISEL-NEXT: ; %bb.5: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_cfg: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164GISEL-NEXT: ; %bb.1: ; %else -; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1164GISEL-NEXT: ; %bb.3: ; %if -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164GISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif -; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_cfg: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132DAGISEL-NEXT: s_xor_b32 s1, s1, s6 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132DAGISEL-NEXT: ; %bb.5: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_cfg: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132GISEL-NEXT: ; %bb.1: ; %else -; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1132GISEL-NEXT: ; %bb.3: ; %if -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132GISEL-NEXT: s_xor_b32 s0, s0, s6 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif -; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %d_cmp = icmp ult i32 %tid, 16 - br i1 %d_cmp, label %if, label %else - -if: - %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %tid, i32 1) - br label %endif - -else: - %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %in, i32 1) - br label %endif - -endif: - %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] - store i32 %combine, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: uniform_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX8DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: s_mul_i32 s1, s3, s4 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: uniform_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX8GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: uniform_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX9DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: uniform_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX9GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: uniform_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: uniform_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: uniform_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: uniform_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: uniform_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: uniform_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: uniform_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: uniform_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %in, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: const_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: const_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: const_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: const_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164DAGISEL-NEXT: ; %bb.5: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: const_value_i64: +; GFX1164GISEL-LABEL: divergent_cfg: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164GISEL-NEXT: ; %bb.1: ; %else +; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1164GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1164GISEL-NEXT: ; %bb.3: ; %if +; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] +; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164GISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif +; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: const_value_i64: +; GFX1132DAGISEL-LABEL: divergent_cfg: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo +; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1132DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 +; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132DAGISEL-NEXT: s_xor_b32 s1, s1, s6 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132DAGISEL-NEXT: ; %bb.5: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value_i64: +; GFX1132GISEL-LABEL: divergent_cfg: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo +; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132GISEL-NEXT: ; %bb.1: ; %else +; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, 0 +; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1132GISEL-NEXT: ; %bb.3: ; %if +; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 +; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132GISEL-NEXT: s_xor_b32 s0, s0, s6 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif +; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %d_cmp = icmp ult i32 %tid, 16 + br i1 %d_cmp, label %if, label %else + +if: + %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %tid, i32 1) + br label %endif + +else: + %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %in, i32 1) + br label %endif + +endif: + %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] + store i32 %combine, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: +define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { +; GFX8DAGISEL-LABEL: uniform_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX8DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 +; GFX8DAGISEL-NEXT: s_mul_i32 s1, s3, s4 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value_i64: +; GFX8GISEL-LABEL: uniform_value_i64: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX8GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX8GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 @@ -1695,159 +974,159 @@ define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { ; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value_i64: +; GFX9DAGISEL-LABEL: uniform_value_i64: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX9DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value_i64: +; GFX9GISEL-LABEL: uniform_value_i64: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX9GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value_i64: +; GFX1064DAGISEL-LABEL: uniform_value_i64: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX1064DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value_i64: +; GFX1064GISEL-LABEL: uniform_value_i64: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX1064GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1064GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value_i64: +; GFX1032DAGISEL-LABEL: uniform_value_i64: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1032DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value_i64: +; GFX1032GISEL-LABEL: uniform_value_i64: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1032GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1032GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value_i64: +; GFX1164DAGISEL-LABEL: uniform_value_i64: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1164DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: poison_value_i64: +; GFX1164GISEL-LABEL: uniform_value_i64: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1164GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1164GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: poison_value_i64: +; GFX1132DAGISEL-LABEL: uniform_value_i64: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1132DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: poison_value_i64: +; GFX1132GISEL-LABEL: uniform_value_i64: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1132GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1132GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 poison, i32 1) + %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %in, i32 1) store i64 %result, ptr addrspace(1) %out ret void } @@ -1859,14 +1138,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1880,14 +1159,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_mov_b32 s5, s4 -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1901,14 +1180,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1922,14 +1201,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_mov_b32 s5, s4 -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1943,14 +1222,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1963,14 +1242,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1983,14 +1262,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2003,14 +1282,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s5, s4 -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2023,7 +1302,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -2031,7 +1310,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2044,7 +1323,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -2052,7 +1331,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2065,7 +1344,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2073,7 +1352,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2085,7 +1364,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s1, s0 -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2093,7 +1372,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2113,7 +1392,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8DAGISEL-NEXT: ; %bb.1: ; %else ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2121,7 +1400,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX8DAGISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX8DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX8DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -2149,7 +1428,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2157,10 +1436,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX8GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2169,7 +1448,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mul_i32 s6, s4, s7 ; GFX8GISEL-NEXT: s_mul_i32 s7, s5, s7 -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -2186,7 +1465,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9DAGISEL-NEXT: ; %bb.1: ; %else ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] @@ -2194,7 +1473,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5 ; GFX9DAGISEL-NEXT: s_mul_i32 s5, s3, s5 -; GFX9DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX9DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2221,7 +1500,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2229,10 +1508,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX9GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2241,7 +1520,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s4 ; GFX9GISEL-NEXT: s_mul_i32 s7, s7, s4 -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2258,7 +1537,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] @@ -2266,7 +1545,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_mul_i32 s4, s2, s5 ; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s3, s5 -; GFX1064DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2293,7 +1572,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2301,10 +1580,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1064GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2313,7 +1592,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s4 ; GFX1064GISEL-NEXT: s_mul_i32 s7, s7, s4 -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2330,7 +1609,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 @@ -2338,7 +1617,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, s5 ; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s3, s5 -; GFX1032DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2365,7 +1644,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s6, s6 @@ -2373,10 +1652,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1032GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2385,7 +1664,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mul_i32 s6, s6, s3 ; GFX1032GISEL-NEXT: s_mul_i32 s7, s7, s3 -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2404,7 +1683,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1164DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1164DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2413,7 +1692,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1164DAGISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1164DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -2444,7 +1723,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2453,10 +1732,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1164GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2466,7 +1745,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mul_i32 s6, s4, s7 ; GFX1164GISEL-NEXT: s_mul_i32 s7, s5, s7 -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2485,7 +1764,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1132DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1132DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2494,7 +1773,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1132DAGISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1132DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 @@ -2524,7 +1803,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2533,10 +1812,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1132GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2546,7 +1825,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mul_i32 s6, s4, s3 ; GFX1132GISEL-NEXT: s_mul_i32 s7, s5, s3 -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll index 863598578ea77..a27a121a3af61 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll @@ -2,13 +2,14 @@ ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s ; -------------------------------------------------------------------- -; llvm.amdgcn.wave.reduce.umin.i32 +; llvm.amdgcn.wave.reduce.umin ; -------------------------------------------------------------------- declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.umin.i64(i64, i32 immarg) -define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_poison(ptr addrspace(1) %out, i32 %in) { -; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_poison( +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i32_poison( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void @@ -19,8 +20,8 @@ entry: ret void } -define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_const(ptr addrspace(1) %out) { -; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_const( +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i32_const( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void @@ -31,14 +32,94 @@ entry: ret void } +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.min +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.min.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.min.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + ; -------------------------------------------------------------------- -; llvm.amdgcn.wave.reduce.umin.i32 +; llvm.amdgcn.wave.reduce.umax ; -------------------------------------------------------------------- declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.umax.i64(i64, i32 immarg) -define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_poison(ptr addrspace(1) %out, i32 %in) { -; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_poison( +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i32_poison( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void @@ -49,8 +130,8 @@ entry: ret void } -define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_const(ptr addrspace(1) %out) { -; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_const( +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i32_const( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void @@ -61,6 +142,30 @@ entry: ret void } +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i64_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + @gv = constant i32 0 define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_gv(ptr addrspace(1) %out) { ; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_gv( @@ -74,3 +179,333 @@ entry: store i32 %result, ptr addrspace(1) %out ret void } + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.max +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.max.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.max.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.add +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.add.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.add.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.sub +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.sub.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.sub.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.and +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.and.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.and.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.or +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.or.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.or.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.xor +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.xor.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.xor.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +}