@@ -475,28 +475,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
475
475
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
476
476
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
477
477
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
478
+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
479
+ ; GFX9-O0-NEXT: s_nop 0
480
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
478
481
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
479
482
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
480
483
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
481
484
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
482
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
483
- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
484
- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
485
- ; GFX9-O0-NEXT: s_nop 0
486
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
487
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
488
- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
489
- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
485
+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
490
486
; GFX9-O0-NEXT: s_nop 0
491
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
492
- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
493
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
487
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
488
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
494
489
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
495
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
496
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
490
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
497
491
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
498
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
499
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
492
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
500
493
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
501
494
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
502
495
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -507,7 +500,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
507
500
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
508
501
; GFX9-O0-NEXT: s_mov_b32 s14, s13
509
502
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
510
- ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
511
503
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
512
504
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
513
505
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1043,10 +1035,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
1043
1035
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1044
1036
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1045
1037
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1046
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1047
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1048
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1049
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1038
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1039
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1040
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1041
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1050
1042
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
1051
1043
; GFX9-O0-NEXT: s_mov_b32 s5, s6
1052
1044
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2664,28 +2656,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2664
2656
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2665
2657
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2666
2658
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2659
+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2660
+ ; GFX9-O0-NEXT: s_nop 0
2661
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2667
2662
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2668
2663
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2669
2664
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
2670
2665
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2671
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2672
- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2673
- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2674
- ; GFX9-O0-NEXT: s_nop 0
2675
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2676
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2677
- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2678
- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2666
+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2679
2667
; GFX9-O0-NEXT: s_nop 0
2680
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2681
- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2682
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
2668
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2669
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
2683
2670
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2684
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2685
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
2671
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2686
2672
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2687
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2688
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
2673
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
2689
2674
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
2690
2675
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
2691
2676
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -2696,7 +2681,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2696
2681
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
2697
2682
; GFX9-O0-NEXT: s_mov_b32 s14, s13
2698
2683
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2699
- ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2700
2684
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2701
2685
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
2702
2686
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3232,10 +3216,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
3232
3216
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3233
3217
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3234
3218
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3235
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3236
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3237
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3238
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3219
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3220
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3221
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3222
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3239
3223
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
3240
3224
; GFX9-O0-NEXT: s_mov_b32 s5, s6
3241
3225
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments