@@ -686,17 +686,33 @@ define amdgpu_ps float @flat_load_i8_vgpr64_sgpr32_offset_8388607(ptr %vbase, i3
686686
687687; Cannot push the shift into 32-bits, and cannot match.
688688define amdgpu_ps float @flat_load_saddr_f32_natural_addressing (ptr inreg %sbase , ptr %voffset.ptr ) {
689- ; GFX1250-LABEL: flat_load_saddr_f32_natural_addressing:
690- ; GFX1250: ; %bb.0:
691- ; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
692- ; GFX1250-NEXT: s_wait_xcnt 0x0
693- ; GFX1250-NEXT: v_mov_b32_e32 v1, 0
694- ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
695- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
696- ; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[2:3]
697- ; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
698- ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
699- ; GFX1250-NEXT: ; return to shader part epilog
689+ ; GFX1250-SDAG-LABEL: flat_load_saddr_f32_natural_addressing:
690+ ; GFX1250-SDAG: ; %bb.0:
691+ ; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1]
692+ ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
693+ ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
694+ ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
695+ ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
696+ ; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[2:3]
697+ ; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1]
698+ ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
699+ ; GFX1250-SDAG-NEXT: ; return to shader part epilog
700+ ;
701+ ; GFX1250-GISEL-LABEL: flat_load_saddr_f32_natural_addressing:
702+ ; GFX1250-GISEL: ; %bb.0:
703+ ; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1]
704+ ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
705+ ; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0
706+ ; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
707+ ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
708+ ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
709+ ; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
710+ ; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
711+ ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
712+ ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
713+ ; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1]
714+ ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
715+ ; GFX1250-GISEL-NEXT: ; return to shader part epilog
700716 %voffset = load i32 , ptr %voffset.ptr
701717 %zext.offset = zext i32 %voffset to i64
702718 %gep = getelementptr inbounds float , ptr %sbase , i64 %zext.offset
@@ -758,17 +774,33 @@ define amdgpu_ps float @flat_load_f32_saddr_zext_vgpr_range_imm_offset(ptr inreg
758774
759775; Range is 1 beyond the limit where we can move the shift into 32-bits.
760776define amdgpu_ps float @flat_load_f32_saddr_zext_vgpr_range_too_large (ptr inreg %sbase , ptr %voffset.ptr ) {
761- ; GFX1250-LABEL: flat_load_f32_saddr_zext_vgpr_range_too_large:
762- ; GFX1250: ; %bb.0:
763- ; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
764- ; GFX1250-NEXT: s_wait_xcnt 0x0
765- ; GFX1250-NEXT: v_mov_b32_e32 v1, 0
766- ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
767- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
768- ; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[2:3]
769- ; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
770- ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
771- ; GFX1250-NEXT: ; return to shader part epilog
777+ ; GFX1250-SDAG-LABEL: flat_load_f32_saddr_zext_vgpr_range_too_large:
778+ ; GFX1250-SDAG: ; %bb.0:
779+ ; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1]
780+ ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
781+ ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
782+ ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
783+ ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
784+ ; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[2:3]
785+ ; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1]
786+ ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
787+ ; GFX1250-SDAG-NEXT: ; return to shader part epilog
788+ ;
789+ ; GFX1250-GISEL-LABEL: flat_load_f32_saddr_zext_vgpr_range_too_large:
790+ ; GFX1250-GISEL: ; %bb.0:
791+ ; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1]
792+ ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
793+ ; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0
794+ ; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
795+ ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
796+ ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
797+ ; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
798+ ; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
799+ ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
800+ ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
801+ ; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1]
802+ ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
803+ ; GFX1250-GISEL-NEXT: ; return to shader part epilog
772804 %voffset = load i32 , ptr %voffset.ptr , !range !1 , !noundef !{}
773805 %zext.offset = zext i32 %voffset to i64
774806 %gep = getelementptr inbounds float , ptr %sbase , i64 %zext.offset
0 commit comments