-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[AMDGPU] Propagate Constants for Wave Reduction Intrinsics #150395
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/easyonaadit/amdgpu/wave-reduce-builtins
Are you sure you want to change the base?
[AMDGPU] Propagate Constants for Wave Reduction Intrinsics #150395
Conversation
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-analysis Author: Aaditya (easyonaadit) ChangesPatch is 169.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150395.diff 9 Files Affected:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
old mode 100644
new mode 100755
index e71ba5ea5521e..11d22c75831d0
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1647,6 +1647,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_wave_reduce_max:
+ case Intrinsic::amdgcn_wave_reduce_min:
+ case Intrinsic::amdgcn_wave_reduce_add:
+ case Intrinsic::amdgcn_wave_reduce_sub:
+ case Intrinsic::amdgcn_wave_reduce_and:
+ case Intrinsic::amdgcn_wave_reduce_or:
+ case Intrinsic::amdgcn_wave_reduce_xor:
case Intrinsic::amdgcn_s_wqm:
case Intrinsic::amdgcn_s_quadmask:
case Intrinsic::amdgcn_s_bitreplicate:
@@ -3520,6 +3527,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
return ConstantInt::get(Ty, C0->abs());
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_wave_reduce_max:
+ case Intrinsic::amdgcn_wave_reduce_min:
+ case Intrinsic::amdgcn_wave_reduce_add:
+ case Intrinsic::amdgcn_wave_reduce_sub:
+ case Intrinsic::amdgcn_wave_reduce_and:
+ case Intrinsic::amdgcn_wave_reduce_or:
+ case Intrinsic::amdgcn_wave_reduce_xor:
return dyn_cast<Constant>(Operands[0]);
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
index b6af8b4bb798d..d3d9275a566e2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
@@ -191,10 +191,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: const_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
@@ -204,10 +201,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX8GISEL-LABEL: const_value:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
@@ -217,11 +211,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: const_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -229,72 +220,35 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX9GISEL-LABEL: const_value:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: const_value:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
+; GFX10DAGISEL-LABEL: const_value:
+; GFX10DAGISEL: ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
+; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10DAGISEL-NEXT: s_endpgm
+;
+; GFX10GISEL-LABEL: const_value:
+; GFX10GISEL: ; %bb.0: ; %entry
+; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX10GISEL-NEXT: s_endpgm
;
; GFX1164DAGISEL-LABEL: const_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -302,12 +256,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX1164GISEL-LABEL: const_value:
; GFX1164GISEL: ; %bb.0: ; %entry
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
@@ -315,12 +265,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: const_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
@@ -328,12 +273,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX1132GISEL-LABEL: const_value:
; GFX1132GISEL: ; %bb.0: ; %entry
; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132GISEL-NEXT: s_endpgm
@@ -346,152 +286,35 @@ entry:
define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-LABEL: poison_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8DAGISEL-NEXT: s_endpgm
;
; GFX8GISEL-LABEL: poison_value:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8GISEL-NEXT: s_endpgm
;
; GFX9DAGISEL-LABEL: poison_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
; GFX9GISEL-LABEL: poison_value:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: poison_value:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: poison_value:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: poison_value:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: poison_value:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: poison_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
+; GFX10DAGISEL-LABEL: poison_value:
+; GFX10DAGISEL: ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: poison_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
+; GFX10GISEL-LABEL: poison_value:
+; GFX10GISEL: ; %bb.0: ; %entry
+; GFX10GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: poison_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
+; GFX11DAGISEL-LABEL: poison_value:
+; GFX11DAGISEL: ; %bb.0: ; %entry
+; GFX11DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
+; GFX11GISEL-LABEL: poison_value:
+; GFX11GISEL: ; %bb.0: ; %entry
+; GFX11GISEL-NEXT: s_endpgm
entry:
%result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1)
store i32 %result, ptr addrspace(1) %out
@@ -1431,33 +1254,21 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: const_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8DAGISEL-NEXT: s_endpgm
;
; GFX8GISEL-LABEL: const_value_i64:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8GISEL-NEXT: s_endpgm
@@ -1465,129 +1276,56 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: const_value_i64:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
; GFX9GISEL-LABEL: const_value_i64:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Aaditya (easyonaadit) ChangesPatch is 169.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150395.diff 9 Files Affected:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
old mode 100644
new mode 100755
index e71ba5ea5521e..11d22c75831d0
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1647,6 +1647,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_wave_reduce_max:
+ case Intrinsic::amdgcn_wave_reduce_min:
+ case Intrinsic::amdgcn_wave_reduce_add:
+ case Intrinsic::amdgcn_wave_reduce_sub:
+ case Intrinsic::amdgcn_wave_reduce_and:
+ case Intrinsic::amdgcn_wave_reduce_or:
+ case Intrinsic::amdgcn_wave_reduce_xor:
case Intrinsic::amdgcn_s_wqm:
case Intrinsic::amdgcn_s_quadmask:
case Intrinsic::amdgcn_s_bitreplicate:
@@ -3520,6 +3527,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
return ConstantInt::get(Ty, C0->abs());
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_wave_reduce_max:
+ case Intrinsic::amdgcn_wave_reduce_min:
+ case Intrinsic::amdgcn_wave_reduce_add:
+ case Intrinsic::amdgcn_wave_reduce_sub:
+ case Intrinsic::amdgcn_wave_reduce_and:
+ case Intrinsic::amdgcn_wave_reduce_or:
+ case Intrinsic::amdgcn_wave_reduce_xor:
return dyn_cast<Constant>(Operands[0]);
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
index b6af8b4bb798d..d3d9275a566e2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
@@ -191,10 +191,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: const_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
@@ -204,10 +201,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX8GISEL-LABEL: const_value:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
@@ -217,11 +211,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: const_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -229,72 +220,35 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX9GISEL-LABEL: const_value:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: const_value:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
+; GFX10DAGISEL-LABEL: const_value:
+; GFX10DAGISEL: ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
+; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10DAGISEL-NEXT: s_endpgm
+;
+; GFX10GISEL-LABEL: const_value:
+; GFX10GISEL: ; %bb.0: ; %entry
+; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX10GISEL-NEXT: s_endpgm
;
; GFX1164DAGISEL-LABEL: const_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -302,12 +256,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX1164GISEL-LABEL: const_value:
; GFX1164GISEL: ; %bb.0: ; %entry
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
@@ -315,12 +265,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: const_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
@@ -328,12 +273,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX1132GISEL-LABEL: const_value:
; GFX1132GISEL: ; %bb.0: ; %entry
; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132GISEL-NEXT: s_endpgm
@@ -346,152 +286,35 @@ entry:
define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-LABEL: poison_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8DAGISEL-NEXT: s_endpgm
;
; GFX8GISEL-LABEL: poison_value:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8GISEL-NEXT: s_endpgm
;
; GFX9DAGISEL-LABEL: poison_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
; GFX9GISEL-LABEL: poison_value:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: poison_value:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: poison_value:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: poison_value:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: poison_value:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: poison_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
+; GFX10DAGISEL-LABEL: poison_value:
+; GFX10DAGISEL: ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: poison_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
+; GFX10GISEL-LABEL: poison_value:
+; GFX10GISEL: ; %bb.0: ; %entry
+; GFX10GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: poison_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
+; GFX11DAGISEL-LABEL: poison_value:
+; GFX11DAGISEL: ; %bb.0: ; %entry
+; GFX11DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
+; GFX11GISEL-LABEL: poison_value:
+; GFX11GISEL: ; %bb.0: ; %entry
+; GFX11GISEL-NEXT: s_endpgm
entry:
%result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1)
store i32 %result, ptr addrspace(1) %out
@@ -1431,33 +1254,21 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: const_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8DAGISEL-NEXT: s_endpgm
;
; GFX8GISEL-LABEL: const_value_i64:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8GISEL-NEXT: s_endpgm
@@ -1465,129 +1276,56 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: const_value_i64:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
; GFX9GISEL-LABEL: const_value_i64:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s...
[truncated]
|
a5486f1
to
9b605ec
Compare
54c7cab
to
7c28c1f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's probably no point in keeping all the constant codegen test cases, unless you have a way to get the test to skip the simplification
9b605ec
to
b45b107
Compare
7c28c1f
to
65e79ec
Compare
b45b107
to
01432a0
Compare
cac00b2
to
8407435
Compare
dcaea72
to
45ff803
Compare
94d47a9
to
8a97bc0
Compare
508d60c
to
8d9130b
Compare
8a97bc0
to
7b8db2f
Compare
8d9130b
to
e9752fc
Compare
e9752fc
to
bc06c18
Compare
7b8db2f
to
c3c2812
Compare
c3c2812
to
5a8fdf8
Compare
bc06c18
to
2d22d22
Compare
No description provided.