Skip to content

Commit aecd448

Browse files
shiltianrampitec
andauthored
[AMDGPU] Add support for v_tanh_f16 on gfx1250 (#149439)
Co-authored-by: Mekhanoshin, Stanislav <[email protected]>
1 parent 4c85bf2 commit aecd448

23 files changed

+876
-2
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
670670
TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
671671

672672
TARGET_BUILTIN(__builtin_amdgcn_tanhf, "ff", "nc", "tanh-insts")
673+
TARGET_BUILTIN(__builtin_amdgcn_tanhh, "hh", "nc", "tanh-insts")
673674
TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts")
674675
TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts")
675676
TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts")

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
504504
return Builder.CreateCall(F, { Src });
505505
}
506506
case AMDGPU::BI__builtin_amdgcn_tanhf:
507+
case AMDGPU::BI__builtin_amdgcn_tanhh:
507508
case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
508509
return emitBuiltinWithOneOverloadedType<1>(*this, E,
509510
Intrinsic::amdgcn_tanh);

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,26 @@ void test_tanh_f32(global float* out, float a)
6161
*out = __builtin_amdgcn_tanhf(a);
6262
}
6363

64+
// CHECK-LABEL: @test_tanh_f16(
65+
// CHECK-NEXT: entry:
66+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
67+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
68+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
69+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
70+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
71+
// CHECK-NEXT: store ptr addrspace(1) [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
72+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
73+
// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr addrspace(1) [[TMP0]], align 2
74+
// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.amdgcn.tanh.f16(half [[TMP1]])
75+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
76+
// CHECK-NEXT: store half [[TMP2]], ptr addrspace(1) [[TMP3]], align 2
77+
// CHECK-NEXT: ret void
78+
//
79+
void test_tanh_f16(global half* out, global half* a)
80+
{
81+
*out = __builtin_amdgcn_tanhh(*a);
82+
}
83+
6484
// CHECK-LABEL: @test_tanh_bf16(
6585
// CHECK-NEXT: entry:
6686
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,10 @@ defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, AMDGPUexpf16>;
530530
defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
531531
defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
532532

533+
let SubtargetPredicate = HasTanhInsts in {
534+
defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>;
535+
}
536+
533537
let SubtargetPredicate = HasBF16TransInsts in {
534538
defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
535539
defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
@@ -1142,6 +1146,7 @@ defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
11421146
defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
11431147

11441148
defm V_TANH_F32 : VOP1_Real_FULL<GFX1250Gen, 0x01e>;
1149+
defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
11451150
defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
11461151
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
11471152
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
; FIXME: GlobalISel does not work with bf16
88

99
declare float @llvm.amdgcn.tanh.f32(float) #0
10+
declare half @llvm.amdgcn.tanh.f16(half) #0
1011
declare bfloat @llvm.amdgcn.tanh.bf16(bfloat) #0
1112

1213
define amdgpu_kernel void @tanh_f32(ptr addrspace(1) %out, float %src) #1 {
@@ -92,6 +93,88 @@ define amdgpu_kernel void @tanh_undef_f32(ptr addrspace(1) %out) #1 {
9293
ret void
9394
}
9495

96+
define amdgpu_kernel void @tanh_f16(ptr addrspace(1) %out, half %src) #1 {
97+
; SDAG-REAL16-LABEL: tanh_f16:
98+
; SDAG-REAL16: ; %bb.0:
99+
; SDAG-REAL16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
100+
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
101+
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
102+
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, s2
103+
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
104+
; SDAG-REAL16-NEXT: s_endpgm
105+
;
106+
; SDAG-FAKE16-LABEL: tanh_f16:
107+
; SDAG-FAKE16: ; %bb.0:
108+
; SDAG-FAKE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
109+
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
110+
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
111+
; SDAG-FAKE16-NEXT: v_tanh_f16_e32 v0, s2
112+
; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1]
113+
; SDAG-FAKE16-NEXT: s_endpgm
114+
%tanh = call half @llvm.amdgcn.tanh.f16(half %src) #0
115+
store half %tanh, ptr addrspace(1) %out, align 2
116+
ret void
117+
}
118+
119+
define amdgpu_kernel void @tanh_f16_constant_4.0(ptr addrspace(1) %out) #1 {
120+
; SDAG-REAL16-LABEL: tanh_f16_constant_4.0:
121+
; SDAG-REAL16: ; %bb.0:
122+
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
123+
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, 4.0
124+
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
125+
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
126+
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
127+
; SDAG-REAL16-NEXT: s_endpgm
128+
;
129+
; SDAG-FAKE16-LABEL: tanh_f16_constant_4.0:
130+
; SDAG-FAKE16: ; %bb.0:
131+
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
132+
; SDAG-FAKE16-NEXT: v_tanh_f16_e32 v0, 4.0
133+
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
134+
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
135+
; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1]
136+
; SDAG-FAKE16-NEXT: s_endpgm
137+
%tanh = call half @llvm.amdgcn.tanh.f16(half 4.0) #0
138+
store half %tanh, ptr addrspace(1) %out, align 2
139+
ret void
140+
}
141+
142+
define amdgpu_kernel void @tanh_f16_constant_100.0(ptr addrspace(1) %out) #1 {
143+
; SDAG-REAL16-LABEL: tanh_f16_constant_100.0:
144+
; SDAG-REAL16: ; %bb.0:
145+
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
146+
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, 0x5640
147+
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
148+
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
149+
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
150+
; SDAG-REAL16-NEXT: s_endpgm
151+
;
152+
; SDAG-FAKE16-LABEL: tanh_f16_constant_100.0:
153+
; SDAG-FAKE16: ; %bb.0:
154+
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
155+
; SDAG-FAKE16-NEXT: v_tanh_f16_e32 v0, 0x5640
156+
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
157+
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
158+
; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1]
159+
; SDAG-FAKE16-NEXT: s_endpgm
160+
%tanh = call half @llvm.amdgcn.tanh.f16(half 100.0) #0
161+
store half %tanh, ptr addrspace(1) %out, align 2
162+
ret void
163+
}
164+
165+
define amdgpu_kernel void @tanh_undef_f16(ptr addrspace(1) %out) #1 {
166+
; SDAG-REAL16-LABEL: tanh_undef_f16:
167+
; SDAG-REAL16: ; %bb.0:
168+
; SDAG-REAL16-NEXT: s_endpgm
169+
;
170+
; SDAG-FAKE16-LABEL: tanh_undef_f16:
171+
; SDAG-FAKE16: ; %bb.0:
172+
; SDAG-FAKE16-NEXT: s_endpgm
173+
%tanh = call half @llvm.amdgcn.tanh.f16(half undef)
174+
store half %tanh, ptr addrspace(1) %out, align 2
175+
ret void
176+
}
177+
95178
define amdgpu_kernel void @tanh_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
96179
; SDAG-REAL16-LABEL: tanh_bf16:
97180
; SDAG-REAL16: ; %bb.0:

llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,51 @@ v_tanh_f32 v5, src_scc
7373
v_tanh_f32 v255, 0xaf123456
7474
// GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf]
7575

76+
v_tanh_f16 v5, v1
77+
// GFX1250: v_tanh_f16_e32 v5, v1 ; encoding: [0x01,0x3f,0x0a,0x7e]
78+
79+
v_tanh_f16 v5, v127
80+
// GFX1250: v_tanh_f16_e32 v5, v127 ; encoding: [0x7f,0x3f,0x0a,0x7e]
81+
82+
v_tanh_f16 v5, s1
83+
// GFX1250: v_tanh_f16_e32 v5, s1 ; encoding: [0x01,0x3e,0x0a,0x7e]
84+
85+
v_tanh_f16 v5, s105
86+
// GFX1250: v_tanh_f16_e32 v5, s105 ; encoding: [0x69,0x3e,0x0a,0x7e]
87+
88+
v_tanh_f16 v5, vcc_lo
89+
// GFX1250: v_tanh_f16_e32 v5, vcc_lo ; encoding: [0x6a,0x3e,0x0a,0x7e]
90+
91+
v_tanh_f16 v5, vcc_hi
92+
// GFX1250: v_tanh_f16_e32 v5, vcc_hi ; encoding: [0x6b,0x3e,0x0a,0x7e]
93+
94+
v_tanh_f16 v5, ttmp15
95+
// GFX1250: v_tanh_f16_e32 v5, ttmp15 ; encoding: [0x7b,0x3e,0x0a,0x7e]
96+
97+
v_tanh_f16 v5, m0
98+
// GFX1250: v_tanh_f16_e32 v5, m0 ; encoding: [0x7d,0x3e,0x0a,0x7e]
99+
100+
v_tanh_f16 v5, exec_lo
101+
// GFX1250: v_tanh_f16_e32 v5, exec_lo ; encoding: [0x7e,0x3e,0x0a,0x7e]
102+
103+
v_tanh_f16 v5, exec_hi
104+
// GFX1250: v_tanh_f16_e32 v5, exec_hi ; encoding: [0x7f,0x3e,0x0a,0x7e]
105+
106+
v_tanh_f16 v5, null
107+
// GFX1250: v_tanh_f16_e32 v5, null ; encoding: [0x7c,0x3e,0x0a,0x7e]
108+
109+
v_tanh_f16 v5, -1
110+
// GFX1250: v_tanh_f16_e32 v5, -1 ; encoding: [0xc1,0x3e,0x0a,0x7e]
111+
112+
v_tanh_f16 v5, 0.5
113+
// GFX1250: v_tanh_f16_e32 v5, 0.5 ; encoding: [0xf0,0x3e,0x0a,0x7e]
114+
115+
v_tanh_f16 v5, src_scc
116+
// GFX1250: v_tanh_f16_e32 v5, src_scc ; encoding: [0xfd,0x3e,0x0a,0x7e]
117+
118+
v_tanh_f16 v127, 0x8000
119+
// GFX1250: v_tanh_f16_e32 v127, 0x8000 ; encoding: [0xff,0x3e,0xfe,0x7e,0x00,0x80,0x00,0x00]
120+
76121
v_tanh_bf16 v5, v1
77122
// GFX1250: v_tanh_bf16_e32 v5, v1 ; encoding: [0x01,0x95,0x0a,0x7e]
78123

llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,54 @@ v_tanh_f32 v5, src_scc
7373
v_tanh_f32 v255, 0xaf123456
7474
// GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf]
7575

76+
v_tanh_f16 v5, v1
77+
// GFX1250: v_tanh_f16_e32 v5, v1 ; encoding: [0x01,0x3f,0x0a,0x7e]
78+
79+
v_tanh_f16 v5, v127
80+
// GFX1250: v_tanh_f16_e32 v5, v127 ; encoding: [0x7f,0x3f,0x0a,0x7e]
81+
82+
v_tanh_f16 v5, s1
83+
// GFX1250: v_tanh_f16_e32 v5, s1 ; encoding: [0x01,0x3e,0x0a,0x7e]
84+
85+
v_tanh_f16 v5, s105
86+
// GFX1250: v_tanh_f16_e32 v5, s105 ; encoding: [0x69,0x3e,0x0a,0x7e]
87+
88+
v_tanh_f16 v5, vcc_lo
89+
// GFX1250: v_tanh_f16_e32 v5, vcc_lo ; encoding: [0x6a,0x3e,0x0a,0x7e]
90+
91+
v_tanh_f16 v5, vcc_hi
92+
// GFX1250: v_tanh_f16_e32 v5, vcc_hi ; encoding: [0x6b,0x3e,0x0a,0x7e]
93+
94+
v_tanh_f16 v5, ttmp15
95+
// GFX1250: v_tanh_f16_e32 v5, ttmp15 ; encoding: [0x7b,0x3e,0x0a,0x7e]
96+
97+
v_tanh_f16 v5, m0
98+
// GFX1250: v_tanh_f16_e32 v5, m0 ; encoding: [0x7d,0x3e,0x0a,0x7e]
99+
100+
v_tanh_f16 v5, exec_lo
101+
// GFX1250: v_tanh_f16_e32 v5, exec_lo ; encoding: [0x7e,0x3e,0x0a,0x7e]
102+
103+
v_tanh_f16 v5, exec_hi
104+
// GFX1250: v_tanh_f16_e32 v5, exec_hi ; encoding: [0x7f,0x3e,0x0a,0x7e]
105+
106+
v_tanh_f16 v5, null
107+
// GFX1250: v_tanh_f16_e32 v5, null ; encoding: [0x7c,0x3e,0x0a,0x7e]
108+
109+
v_tanh_f16 v5, -1
110+
// GFX1250: v_tanh_f16_e32 v5, -1 ; encoding: [0xc1,0x3e,0x0a,0x7e]
111+
112+
v_tanh_f16 v5, 0.5
113+
// GFX1250: v_tanh_f16_e32 v5, 0.5 ; encoding: [0xf0,0x3e,0x0a,0x7e]
114+
115+
v_tanh_f16 v5, src_scc
116+
// GFX1250: v_tanh_f16_e32 v5, src_scc ; encoding: [0xfd,0x3e,0x0a,0x7e]
117+
118+
v_tanh_f16 v127, 0x8000
119+
// GFX1250: v_tanh_f16_e32 v127, 0x8000 ; encoding: [0xff,0x3e,0xfe,0x7e,0x00,0x80,0x00,0x00]
120+
121+
v_tanh_f16 v5.h, v1.h
122+
// GFX1250: v_tanh_f16_e32 v5.h, v1.h ; encoding: [0x81,0x3f,0x0a,0x7f]
123+
76124
v_tanh_bf16 v5, v1
77125
// GFX1250: v_tanh_bf16_e32 v5, v1 ; encoding: [0x01,0x95,0x0a,0x7e]
78126

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,62 @@ v_tanh_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi
5858
// GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30]
5959
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
6060

61+
v_tanh_f16 v5, v1 quad_perm:[3,2,1,0]
62+
// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1b,0x00,0xff]
63+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
64+
65+
v_tanh_f16 v5, v1 quad_perm:[0,1,2,3]
66+
// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0xe4,0x00,0xff]
67+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
68+
69+
v_tanh_f16 v5, v1 row_mirror
70+
// GFX1250: v_tanh_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x40,0x01,0xff]
71+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
72+
73+
v_tanh_f16 v5, v1 row_half_mirror
74+
// GFX1250: v_tanh_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x41,0x01,0xff]
75+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
76+
77+
v_tanh_f16 v5, v1 row_shl:1
78+
// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x01,0x01,0xff]
79+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
80+
81+
v_tanh_f16 v5, v1 row_shl:15
82+
// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x0f,0x01,0xff]
83+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
84+
85+
v_tanh_f16 v5, v1 row_shr:1
86+
// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x11,0x01,0xff]
87+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
88+
89+
v_tanh_f16 v5, v1 row_shr:15
90+
// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1f,0x01,0xff]
91+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
92+
93+
v_tanh_f16 v5, v1 row_ror:1
94+
// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x21,0x01,0xff]
95+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
96+
97+
v_tanh_f16 v5, v1 row_ror:15
98+
// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x2f,0x01,0xff]
99+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
100+
101+
v_tanh_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
102+
// GFX1250: v_tanh_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x50,0x01,0xff]
103+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
104+
105+
v_tanh_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
106+
// GFX1250: v_tanh_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x5f,0x01,0x01]
107+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
108+
109+
v_tanh_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
110+
// GFX1250: v_tanh_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x60,0x09,0x13]
111+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
112+
113+
v_tanh_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
114+
// GFX1250: v_tanh_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3e,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
115+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
116+
61117
v_tanh_bf16 v5, v1 quad_perm:[3,2,1,0]
62118
// GFX1250: v_tanh_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7e,0x01,0x1b,0x00,0xff]
63119
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,66 @@ v_tanh_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi
5858
// GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30]
5959
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
6060

61+
v_tanh_f16 v5, v1 quad_perm:[3,2,1,0]
62+
// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1b,0x00,0xff]
63+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
64+
65+
v_tanh_f16 v5, v1 quad_perm:[0,1,2,3]
66+
// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0xe4,0x00,0xff]
67+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
68+
69+
v_tanh_f16 v5, v1 row_mirror
70+
// GFX1250: v_tanh_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x40,0x01,0xff]
71+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
72+
73+
v_tanh_f16 v5, v1 row_half_mirror
74+
// GFX1250: v_tanh_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x41,0x01,0xff]
75+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
76+
77+
v_tanh_f16 v5, v1 row_shl:1
78+
// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x01,0x01,0xff]
79+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
80+
81+
v_tanh_f16 v5, v1 row_shl:15
82+
// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x0f,0x01,0xff]
83+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
84+
85+
v_tanh_f16 v5, v1 row_shr:1
86+
// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x11,0x01,0xff]
87+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
88+
89+
v_tanh_f16 v5, v1 row_shr:15
90+
// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1f,0x01,0xff]
91+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
92+
93+
v_tanh_f16 v5, v1 row_ror:1
94+
// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x21,0x01,0xff]
95+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
96+
97+
v_tanh_f16 v5, v1 row_ror:15
98+
// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x2f,0x01,0xff]
99+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
100+
101+
v_tanh_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
102+
// GFX1250: v_tanh_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x50,0x01,0xff]
103+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
104+
105+
v_tanh_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
106+
// GFX1250: v_tanh_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x5f,0x01,0x01]
107+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
108+
109+
v_tanh_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
110+
// GFX1250: v_tanh_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x60,0x09,0x13]
111+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
112+
113+
v_tanh_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
114+
// GFX1250: v_tanh_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3e,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
115+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
116+
117+
v_tanh_f16 v5.h, v1.h quad_perm:[3,2,1,0]
118+
// GFX1250: v_tanh_f16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7f,0x81,0x1b,0x00,0xff]
119+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
120+
61121
v_tanh_bf16 v5, v1 quad_perm:[3,2,1,0]
62122
// GFX1250: v_tanh_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7e,0x01,0x1b,0x00,0xff]
63123
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

0 commit comments

Comments
 (0)