Skip to content

Commit 5fe9b52

Browse files
authored
Add FABS to canCreateUndefOrPoison (#149440)
FABS will not create undef/poison, add it into canCreateUndefOrPoison return false
1 parent f761d73 commit 5fe9b52

File tree

11 files changed

+537
-396
lines changed

11 files changed

+537
-396
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5569,6 +5569,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
55695569
case ISD::BUILD_VECTOR:
55705570
case ISD::BUILD_PAIR:
55715571
case ISD::SPLAT_VECTOR:
5572+
case ISD::FABS:
55725573
return false;
55735574

55745575
case ISD::ABS:

llvm/test/CodeGen/AMDGPU/fmaximum3.ll

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3272,10 +3272,9 @@ define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) {
32723272
; GFX9-LABEL: v_fmaximum3_f64_fabs0:
32733273
; GFX9: ; %bb.0:
32743274
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3275-
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3276-
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
3275+
; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, v[2:3]
32773276
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3278-
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3277+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
32793278
; GFX9-NEXT: s_nop 1
32803279
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
32813280
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
@@ -3307,10 +3306,9 @@ define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) {
33073306
; GFX9-LABEL: v_fmaximum3_f64_fabs1:
33083307
; GFX9: ; %bb.0:
33093308
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3310-
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3311-
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
3309+
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], |v[2:3]|
33123310
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3313-
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3311+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
33143312
; GFX9-NEXT: s_nop 1
33153313
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33163314
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
@@ -3345,12 +3343,11 @@ define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) {
33453343
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
33463344
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
33473345
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3348-
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3349-
; GFX9-NEXT: s_nop 0
3346+
; GFX9-NEXT: s_nop 1
33503347
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33513348
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3352-
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
3353-
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
3349+
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]|
3350+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
33543351
; GFX9-NEXT: s_nop 1
33553352
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
33563353
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
@@ -3377,17 +3374,14 @@ define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) {
33773374
; GFX9-LABEL: v_fmaximum3_f64_fabs_all:
33783375
; GFX9: ; %bb.0:
33793376
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3380-
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3381-
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3382-
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
3377+
; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, |v[2:3]|
33833378
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3384-
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3385-
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3386-
; GFX9-NEXT: s_nop 0
3379+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
3380+
; GFX9-NEXT: s_nop 1
33873381
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33883382
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3389-
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
3390-
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
3383+
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]|
3384+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
33913385
; GFX9-NEXT: s_nop 1
33923386
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
33933387
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
@@ -3452,17 +3446,14 @@ define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
34523446
; GFX9-LABEL: v_fmaximum3_f64_fneg_fabs_all:
34533447
; GFX9: ; %bb.0:
34543448
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3455-
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3456-
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3457-
; GFX9-NEXT: v_max_f64 v[6:7], -v[0:1], -v[2:3]
3449+
; GFX9-NEXT: v_max_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
34583450
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3459-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
3460-
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3461-
; GFX9-NEXT: s_nop 0
3451+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
3452+
; GFX9-NEXT: s_nop 1
34623453
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
34633454
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3464-
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5]
3465-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
3455+
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -|v[4:5]|
3456+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
34663457
; GFX9-NEXT: s_nop 1
34673458
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
34683459
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc

llvm/test/CodeGen/AMDGPU/fminimum3.ll

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3272,10 +3272,9 @@ define double @v_fminimum3_f64_fabs0(double %a, double %b, double %c) {
32723272
; GFX9-LABEL: v_fminimum3_f64_fabs0:
32733273
; GFX9: ; %bb.0:
32743274
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3275-
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3276-
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
3275+
; GFX9-NEXT: v_min_f64 v[6:7], |v[0:1]|, v[2:3]
32773276
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3278-
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3277+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
32793278
; GFX9-NEXT: s_nop 1
32803279
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
32813280
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
@@ -3307,10 +3306,9 @@ define double @v_fminimum3_f64_fabs1(double %a, double %b, double %c) {
33073306
; GFX9-LABEL: v_fminimum3_f64_fabs1:
33083307
; GFX9: ; %bb.0:
33093308
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3310-
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3311-
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
3309+
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], |v[2:3]|
33123310
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3313-
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3311+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
33143312
; GFX9-NEXT: s_nop 1
33153313
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33163314
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
@@ -3345,12 +3343,11 @@ define double @v_fminimum3_f64_fabs2(double %a, double %b, double %c) {
33453343
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
33463344
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
33473345
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3348-
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3349-
; GFX9-NEXT: s_nop 0
3346+
; GFX9-NEXT: s_nop 1
33503347
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33513348
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3352-
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
3353-
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
3349+
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]|
3350+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
33543351
; GFX9-NEXT: s_nop 1
33553352
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
33563353
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
@@ -3377,17 +3374,14 @@ define double @v_fminimum3_f64_fabs_all(double %a, double %b, double %c) {
33773374
; GFX9-LABEL: v_fminimum3_f64_fabs_all:
33783375
; GFX9: ; %bb.0:
33793376
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3380-
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3381-
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3382-
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
3377+
; GFX9-NEXT: v_min_f64 v[6:7], |v[0:1]|, |v[2:3]|
33833378
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3384-
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3385-
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3386-
; GFX9-NEXT: s_nop 0
3379+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
3380+
; GFX9-NEXT: s_nop 1
33873381
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33883382
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3389-
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
3390-
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
3383+
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]|
3384+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
33913385
; GFX9-NEXT: s_nop 1
33923386
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
33933387
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
@@ -3452,17 +3446,14 @@ define double @v_fminimum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
34523446
; GFX9-LABEL: v_fminimum3_f64_fneg_fabs_all:
34533447
; GFX9: ; %bb.0:
34543448
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3455-
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3456-
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3457-
; GFX9-NEXT: v_min_f64 v[6:7], -v[0:1], -v[2:3]
3449+
; GFX9-NEXT: v_min_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
34583450
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3459-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
3460-
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3461-
; GFX9-NEXT: s_nop 0
3451+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
3452+
; GFX9-NEXT: s_nop 1
34623453
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
34633454
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3464-
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -v[4:5]
3465-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
3455+
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -|v[4:5]|
3456+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
34663457
; GFX9-NEXT: s_nop 1
34673458
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
34683459
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc

llvm/test/CodeGen/AMDGPU/fnearbyint.ll

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,8 @@ define amdgpu_kernel void @nearbyint_f64(ptr addrspace(1) %out, double %in) {
223223
; SI-NEXT: v_bfi_b32 v1, s8, v1, v6
224224
; SI-NEXT: v_mov_b32_e32 v7, s2
225225
; SI-NEXT: v_add_f64 v[4:5], s[2:3], v[0:1]
226-
; SI-NEXT: s_bitset0_b32 s3, 31
227226
; SI-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
228-
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[2:3], v[2:3]
227+
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[2:3]|, v[2:3]
229228
; SI-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
230229
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
231230
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -285,16 +284,14 @@ define amdgpu_kernel void @nearbyint_v2f64(ptr addrspace(1) %out, <2 x double> %
285284
; SI-NEXT: v_mov_b32_e32 v9, s5
286285
; SI-NEXT: v_mov_b32_e32 v10, s4
287286
; SI-NEXT: v_add_f64 v[2:3], s[6:7], v[0:1]
288-
; SI-NEXT: s_bitset0_b32 s7, 31
289287
; SI-NEXT: v_add_f64 v[2:3], v[2:3], -v[0:1]
290288
; SI-NEXT: v_bfi_b32 v1, s10, v6, v9
291-
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[4:5]
289+
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[6:7]|, v[4:5]
292290
; SI-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
293291
; SI-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
294292
; SI-NEXT: v_add_f64 v[6:7], s[4:5], v[0:1]
295-
; SI-NEXT: s_bitset0_b32 s5, 31
296293
; SI-NEXT: v_add_f64 v[0:1], v[6:7], -v[0:1]
297-
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[4:5]
294+
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[4:5]|, v[4:5]
298295
; SI-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
299296
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
300297
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
@@ -368,30 +365,26 @@ define amdgpu_kernel void @nearbyint_v4f64(ptr addrspace(1) %out, <4 x double> %
368365
; SI-NEXT: v_mov_b32_e32 v14, s5
369366
; SI-NEXT: v_mov_b32_e32 v15, s4
370367
; SI-NEXT: v_add_f64 v[0:1], s[2:3], v[4:5]
371-
; SI-NEXT: s_bitset0_b32 s3, 31
372368
; SI-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
373369
; SI-NEXT: v_bfi_b32 v5, s14, v10, v7
374-
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[2:3], v[8:9]
370+
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[2:3]|, v[8:9]
375371
; SI-NEXT: v_cndmask_b32_e32 v3, v1, v2, vcc
376372
; SI-NEXT: v_cndmask_b32_e32 v2, v0, v6, vcc
377373
; SI-NEXT: v_add_f64 v[0:1], s[0:1], v[4:5]
378-
; SI-NEXT: s_bitset0_b32 s1, 31
379374
; SI-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
380375
; SI-NEXT: v_bfi_b32 v5, s14, v10, v12
381-
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[0:1], v[8:9]
376+
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[0:1]|, v[8:9]
382377
; SI-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
383378
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
384379
; SI-NEXT: v_add_f64 v[6:7], s[6:7], v[4:5]
385-
; SI-NEXT: s_bitset0_b32 s7, 31
386380
; SI-NEXT: v_add_f64 v[6:7], v[6:7], -v[4:5]
387381
; SI-NEXT: v_bfi_b32 v5, s14, v10, v14
388-
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[8:9]
382+
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[6:7]|, v[8:9]
389383
; SI-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc
390384
; SI-NEXT: v_cndmask_b32_e32 v6, v6, v13, vcc
391385
; SI-NEXT: v_add_f64 v[10:11], s[4:5], v[4:5]
392-
; SI-NEXT: s_bitset0_b32 s5, 31
393386
; SI-NEXT: v_add_f64 v[4:5], v[10:11], -v[4:5]
394-
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[8:9]
387+
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[4:5]|, v[8:9]
395388
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v14, vcc
396389
; SI-NEXT: v_cndmask_b32_e32 v4, v4, v15, vcc
397390
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16

llvm/test/CodeGen/AMDGPU/fract-match.ll

Lines changed: 26 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2356,11 +2356,10 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly capture
23562356
; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
23572357
; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9]
23582358
; GFX6-NEXT: s_mov_b32 s8, 0
2359-
; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc
2360-
; GFX6-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
23612359
; GFX6-NEXT: s_mov_b32 s9, 0x7ff00000
2360+
; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc
23622361
; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
2363-
; GFX6-NEXT: v_cmp_neq_f64_e32 vcc, s[8:9], v[0:1]
2362+
; GFX6-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9]
23642363
; GFX6-NEXT: s_mov_b32 s6, 0
23652364
; GFX6-NEXT: s_mov_b32 s7, 0xf000
23662365
; GFX6-NEXT: s_mov_b32 s4, s6
@@ -2375,46 +2374,43 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly capture
23752374
; GFX7: ; %bb.0: ; %entry
23762375
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23772376
; GFX7-NEXT: s_mov_b32 s4, 0
2378-
; GFX7-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2379-
; GFX7-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2380-
; GFX7-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
23812377
; GFX7-NEXT: s_mov_b32 s5, 0x7ff00000
2382-
; GFX7-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
2378+
; GFX7-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2379+
; GFX7-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
2380+
; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
23832381
; GFX7-NEXT: s_mov_b32 s6, 0
23842382
; GFX7-NEXT: s_mov_b32 s7, 0xf000
23852383
; GFX7-NEXT: s_mov_b32 s4, s6
23862384
; GFX7-NEXT: s_mov_b32 s5, s6
2387-
; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
2388-
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
2389-
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
2385+
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
2386+
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
2387+
; GFX7-NEXT: buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64
23902388
; GFX7-NEXT: s_waitcnt vmcnt(0)
23912389
; GFX7-NEXT: s_setpc_b64 s[30:31]
23922390
;
23932391
; GFX8-LABEL: safe_math_fract_f64:
23942392
; GFX8: ; %bb.0: ; %entry
23952393
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23962394
; GFX8-NEXT: s_mov_b32 s4, 0
2397-
; GFX8-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2398-
; GFX8-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2399-
; GFX8-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
24002395
; GFX8-NEXT: s_mov_b32 s5, 0x7ff00000
2401-
; GFX8-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
2402-
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off
2403-
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
2404-
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
2396+
; GFX8-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2397+
; GFX8-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
2398+
; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
2399+
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
2400+
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
2401+
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[6:7], off
24052402
; GFX8-NEXT: s_waitcnt vmcnt(0)
24062403
; GFX8-NEXT: s_setpc_b64 s[30:31]
24072404
;
24082405
; GFX11-LABEL: safe_math_fract_f64:
24092406
; GFX11: ; %bb.0: ; %entry
24102407
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2411-
; GFX11-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2412-
; GFX11-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2413-
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
2414-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2415-
; GFX11-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
2416-
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
2417-
; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v6 :: v_dual_cndmask_b32 v1, 0, v7
2408+
; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2409+
; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
2410+
; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
2411+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
2412+
; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
2413+
; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off
24182414
; GFX11-NEXT: s_setpc_b64 s[30:31]
24192415
;
24202416
; GFX12-LABEL: safe_math_fract_f64:
@@ -2424,14 +2420,13 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly capture
24242420
; GFX12-NEXT: s_wait_samplecnt 0x0
24252421
; GFX12-NEXT: s_wait_bvhcnt 0x0
24262422
; GFX12-NEXT: s_wait_kmcnt 0x0
2427-
; GFX12-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2428-
; GFX12-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2429-
; GFX12-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
2430-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
2431-
; GFX12-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
2432-
; GFX12-NEXT: global_store_b64 v[2:3], v[4:5], off
2423+
; GFX12-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2424+
; GFX12-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
2425+
; GFX12-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
24332426
; GFX12-NEXT: s_wait_alu 0xfffd
2434-
; GFX12-NEXT: v_dual_cndmask_b32 v0, 0, v6 :: v_dual_cndmask_b32 v1, 0, v7
2427+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
2428+
; GFX12-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
2429+
; GFX12-NEXT: global_store_b64 v[2:3], v[6:7], off
24352430
; GFX12-NEXT: s_setpc_b64 s[30:31]
24362431
entry:
24372432
%floor = tail call double @llvm.floor.f64(double %x)

0 commit comments

Comments
 (0)