@@ -686,33 +686,17 @@ define amdgpu_ps float @flat_load_i8_vgpr64_sgpr32_offset_8388607(ptr %vbase, i3
686686
687687; Cannot push the shift into 32-bits, and cannot match.
688688define amdgpu_ps float @flat_load_saddr_f32_natural_addressing (ptr inreg %sbase , ptr %voffset.ptr ) {
689- ; GFX1250-SDAG-LABEL: flat_load_saddr_f32_natural_addressing:
690- ; GFX1250-SDAG: ; %bb.0:
691- ; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1]
692- ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
693- ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
694- ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
695- ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
696- ; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[2:3]
697- ; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1]
698- ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
699- ; GFX1250-SDAG-NEXT: ; return to shader part epilog
700- ;
701- ; GFX1250-GISEL-LABEL: flat_load_saddr_f32_natural_addressing:
702- ; GFX1250-GISEL: ; %bb.0:
703- ; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1]
704- ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
705- ; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0
706- ; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
707- ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
708- ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
709- ; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
710- ; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
711- ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
712- ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
713- ; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1]
714- ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
715- ; GFX1250-GISEL-NEXT: ; return to shader part epilog
689+ ; GFX1250-LABEL: flat_load_saddr_f32_natural_addressing:
690+ ; GFX1250: ; %bb.0:
691+ ; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
692+ ; GFX1250-NEXT: s_wait_xcnt 0x0
693+ ; GFX1250-NEXT: v_mov_b32_e32 v1, 0
694+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
695+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
696+ ; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[2:3]
697+ ; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
698+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
699+ ; GFX1250-NEXT: ; return to shader part epilog
716700 %voffset = load i32 , ptr %voffset.ptr
717701 %zext.offset = zext i32 %voffset to i64
718702 %gep = getelementptr inbounds float , ptr %sbase , i64 %zext.offset
@@ -774,33 +758,17 @@ define amdgpu_ps float @flat_load_f32_saddr_zext_vgpr_range_imm_offset(ptr inreg
774758
775759; Range is 1 beyond the limit where we can move the shift into 32-bits.
776760define amdgpu_ps float @flat_load_f32_saddr_zext_vgpr_range_too_large (ptr inreg %sbase , ptr %voffset.ptr ) {
777- ; GFX1250-SDAG-LABEL: flat_load_f32_saddr_zext_vgpr_range_too_large:
778- ; GFX1250-SDAG: ; %bb.0:
779- ; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1]
780- ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
781- ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
782- ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
783- ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
784- ; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[2:3]
785- ; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1]
786- ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
787- ; GFX1250-SDAG-NEXT: ; return to shader part epilog
788- ;
789- ; GFX1250-GISEL-LABEL: flat_load_f32_saddr_zext_vgpr_range_too_large:
790- ; GFX1250-GISEL: ; %bb.0:
791- ; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1]
792- ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
793- ; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0
794- ; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
795- ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
796- ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
797- ; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
798- ; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
799- ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
800- ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
801- ; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1]
802- ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
803- ; GFX1250-GISEL-NEXT: ; return to shader part epilog
761+ ; GFX1250-LABEL: flat_load_f32_saddr_zext_vgpr_range_too_large:
762+ ; GFX1250: ; %bb.0:
763+ ; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
764+ ; GFX1250-NEXT: s_wait_xcnt 0x0
765+ ; GFX1250-NEXT: v_mov_b32_e32 v1, 0
766+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
767+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
768+ ; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[2:3]
769+ ; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
770+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
771+ ; GFX1250-NEXT: ; return to shader part epilog
804772 %voffset = load i32 , ptr %voffset.ptr , !range !1 , !noundef !{}
805773 %zext.offset = zext i32 %voffset to i64
806774 %gep = getelementptr inbounds float , ptr %sbase , i64 %zext.offset
0 commit comments