Skip to content

Commit 7c53c61

Browse files
authored
[AMDGPU][True16][CodeGen] use vgpr16 for zext patterns (#153894)
Update true16 mode with zext patterns using vgpr16 for 16bit data types. This stop isel from inserting invalid "vgpr32 = copy vgpr16"
1 parent 03912a1 commit 7c53c61

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+12480
-14018
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3056,6 +3056,8 @@ def : GCNPat<
30563056
}
30573057
} // AddedComplexity = 1
30583058

3059+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3060+
let True16Predicate = p in {
30593061
def : GCNPat<
30603062
(i32 (DivergentUnaryFrag<zext> i16:$src)),
30613063
(V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src)
@@ -3071,6 +3073,26 @@ def : GCNPat<
30713073
def : GCNPat<
30723074
(i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
30733075
(COPY VSrc_b16:$src)>;
3076+
}
3077+
3078+
let True16Predicate = UseRealTrue16Insts in {
3079+
def : GCNPat<
3080+
(i32 (DivergentUnaryFrag<zext> i16:$src)),
3081+
(REG_SEQUENCE VGPR_32, $src, lo16, (V_MOV_B16_t16_e64 0, (i16 0), 0), hi16)
3082+
>;
3083+
3084+
def : GCNPat<
3085+
(i64 (DivergentUnaryFrag<zext> i16:$src)),
3086+
(REG_SEQUENCE VReg_64,
3087+
(REG_SEQUENCE VGPR_32, $src, lo16, (V_MOV_B16_t16_e64 0, (i16 0), 0), hi16), sub0,
3088+
(S_MOV_B32 (i32 0)), sub1)
3089+
>;
3090+
3091+
def : GCNPat<
3092+
(i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
3093+
(REG_SEQUENCE VGPR_32, $src, lo16, (V_MOV_B16_t16_e64 0, (i16 0), 0), hi16)
3094+
>;
3095+
}
30743096

30753097
def : GCNPat <
30763098
(i32 (trunc i64:$a)),

llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ define zeroext i16 @v_mul_i16_zeroext(i16 zeroext %num, i16 zeroext %den) {
164164
; GFX11-TRUE16: ; %bb.0:
165165
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166166
; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
167-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
167+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0
168168
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
169169
;
170170
; GFX11-FAKE16-LABEL: v_mul_i16_zeroext:

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 5636 additions & 6265 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll

Lines changed: 546 additions & 602 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll

Lines changed: 620 additions & 700 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll

Lines changed: 1352 additions & 1534 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll

Lines changed: 108 additions & 132 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll

Lines changed: 2537 additions & 2877 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll

Lines changed: 287 additions & 350 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll

Lines changed: 283 additions & 311 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)