Skip to content

[AMDGPU] Add support for v_tanh_f16 on gfx1250 #149439

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")

TARGET_BUILTIN(__builtin_amdgcn_tanhf, "ff", "nc", "tanh-insts")
TARGET_BUILTIN(__builtin_amdgcn_tanhh, "hh", "nc", "tanh-insts")
TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts")
TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts")
TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts")
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, { Src });
}
case AMDGPU::BI__builtin_amdgcn_tanhf:
case AMDGPU::BI__builtin_amdgcn_tanhh:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we just use llvm.tanh since it exists now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a TODO list that will go over all newly added VOP1 instructions again and add missing tests and support accordingly.

case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
return emitBuiltinWithOneOverloadedType<1>(*this, E,
Intrinsic::amdgcn_tanh);
Expand Down
20 changes: 20 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,26 @@ void test_tanh_f32(global float* out, float a)
*out = __builtin_amdgcn_tanhf(a);
}

// CHECK-LABEL: @test_tanh_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store ptr addrspace(1) [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr addrspace(1) [[TMP0]], align 2
// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.amdgcn.tanh.f16(half [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store half [[TMP2]], ptr addrspace(1) [[TMP3]], align 2
// CHECK-NEXT: ret void
//
void test_tanh_f16(global half* out, global half* a)
{
*out = __builtin_amdgcn_tanhh(*a);
}

// CHECK-LABEL: @test_tanh_bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,10 @@ defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, AMDGPUexpf16>;
defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;

let SubtargetPredicate = HasTanhInsts in {
defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>;
}

let SubtargetPredicate = HasBF16TransInsts in {
defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
Expand Down Expand Up @@ -1142,6 +1146,7 @@ defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;

defm V_TANH_F32 : VOP1_Real_FULL<GFX1250Gen, 0x01e>;
defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
Expand Down
83 changes: 83 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
; FIXME: GlobalISel does not work with bf16

declare float @llvm.amdgcn.tanh.f32(float) #0
declare half @llvm.amdgcn.tanh.f16(half) #0
declare bfloat @llvm.amdgcn.tanh.bf16(bfloat) #0

define amdgpu_kernel void @tanh_f32(ptr addrspace(1) %out, float %src) #1 {
Expand Down Expand Up @@ -92,6 +93,88 @@ define amdgpu_kernel void @tanh_undef_f32(ptr addrspace(1) %out) #1 {
ret void
}

define amdgpu_kernel void @tanh_f16(ptr addrspace(1) %out, half %src) #1 {
; SDAG-REAL16-LABEL: tanh_f16:
; SDAG-REAL16: ; %bb.0:
; SDAG-REAL16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, s2
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_f16:
; SDAG-FAKE16: ; %bb.0:
; SDAG-FAKE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; SDAG-FAKE16-NEXT: v_tanh_f16_e32 v0, s2
; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-FAKE16-NEXT: s_endpgm
%tanh = call half @llvm.amdgcn.tanh.f16(half %src) #0
store half %tanh, ptr addrspace(1) %out, align 2
ret void
}

define amdgpu_kernel void @tanh_f16_constant_4.0(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-LABEL: tanh_f16_constant_4.0:
; SDAG-REAL16: ; %bb.0:
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, 4.0
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_f16_constant_4.0:
; SDAG-FAKE16: ; %bb.0:
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; SDAG-FAKE16-NEXT: v_tanh_f16_e32 v0, 4.0
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-FAKE16-NEXT: s_endpgm
%tanh = call half @llvm.amdgcn.tanh.f16(half 4.0) #0
store half %tanh, ptr addrspace(1) %out, align 2
ret void
}

define amdgpu_kernel void @tanh_f16_constant_100.0(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-LABEL: tanh_f16_constant_100.0:
; SDAG-REAL16: ; %bb.0:
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, 0x5640
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_f16_constant_100.0:
; SDAG-FAKE16: ; %bb.0:
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; SDAG-FAKE16-NEXT: v_tanh_f16_e32 v0, 0x5640
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-FAKE16-NEXT: s_endpgm
%tanh = call half @llvm.amdgcn.tanh.f16(half 100.0) #0
store half %tanh, ptr addrspace(1) %out, align 2
ret void
}

define amdgpu_kernel void @tanh_undef_f16(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-LABEL: tanh_undef_f16:
; SDAG-REAL16: ; %bb.0:
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_undef_f16:
; SDAG-FAKE16: ; %bb.0:
; SDAG-FAKE16-NEXT: s_endpgm
%tanh = call half @llvm.amdgcn.tanh.f16(half undef)
store half %tanh, ptr addrspace(1) %out, align 2
ret void
}

define amdgpu_kernel void @tanh_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
; SDAG-REAL16-LABEL: tanh_bf16:
; SDAG-REAL16: ; %bb.0:
Expand Down
45 changes: 45 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,51 @@ v_tanh_f32 v5, src_scc
v_tanh_f32 v255, 0xaf123456
// GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf]

v_tanh_f16 v5, v1
// GFX1250: v_tanh_f16_e32 v5, v1 ; encoding: [0x01,0x3f,0x0a,0x7e]

v_tanh_f16 v5, v127
// GFX1250: v_tanh_f16_e32 v5, v127 ; encoding: [0x7f,0x3f,0x0a,0x7e]

v_tanh_f16 v5, s1
// GFX1250: v_tanh_f16_e32 v5, s1 ; encoding: [0x01,0x3e,0x0a,0x7e]

v_tanh_f16 v5, s105
// GFX1250: v_tanh_f16_e32 v5, s105 ; encoding: [0x69,0x3e,0x0a,0x7e]

v_tanh_f16 v5, vcc_lo
// GFX1250: v_tanh_f16_e32 v5, vcc_lo ; encoding: [0x6a,0x3e,0x0a,0x7e]

v_tanh_f16 v5, vcc_hi
// GFX1250: v_tanh_f16_e32 v5, vcc_hi ; encoding: [0x6b,0x3e,0x0a,0x7e]

v_tanh_f16 v5, ttmp15
// GFX1250: v_tanh_f16_e32 v5, ttmp15 ; encoding: [0x7b,0x3e,0x0a,0x7e]

v_tanh_f16 v5, m0
// GFX1250: v_tanh_f16_e32 v5, m0 ; encoding: [0x7d,0x3e,0x0a,0x7e]

v_tanh_f16 v5, exec_lo
// GFX1250: v_tanh_f16_e32 v5, exec_lo ; encoding: [0x7e,0x3e,0x0a,0x7e]

v_tanh_f16 v5, exec_hi
// GFX1250: v_tanh_f16_e32 v5, exec_hi ; encoding: [0x7f,0x3e,0x0a,0x7e]

v_tanh_f16 v5, null
// GFX1250: v_tanh_f16_e32 v5, null ; encoding: [0x7c,0x3e,0x0a,0x7e]

v_tanh_f16 v5, -1
// GFX1250: v_tanh_f16_e32 v5, -1 ; encoding: [0xc1,0x3e,0x0a,0x7e]

v_tanh_f16 v5, 0.5
// GFX1250: v_tanh_f16_e32 v5, 0.5 ; encoding: [0xf0,0x3e,0x0a,0x7e]

v_tanh_f16 v5, src_scc
// GFX1250: v_tanh_f16_e32 v5, src_scc ; encoding: [0xfd,0x3e,0x0a,0x7e]

v_tanh_f16 v127, 0x8000
// GFX1250: v_tanh_f16_e32 v127, 0x8000 ; encoding: [0xff,0x3e,0xfe,0x7e,0x00,0x80,0x00,0x00]

v_tanh_bf16 v5, v1
// GFX1250: v_tanh_bf16_e32 v5, v1 ; encoding: [0x01,0x95,0x0a,0x7e]

Expand Down
48 changes: 48 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,54 @@ v_tanh_f32 v5, src_scc
v_tanh_f32 v255, 0xaf123456
// GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf]

v_tanh_f16 v5, v1
// GFX1250: v_tanh_f16_e32 v5, v1 ; encoding: [0x01,0x3f,0x0a,0x7e]

v_tanh_f16 v5, v127
// GFX1250: v_tanh_f16_e32 v5, v127 ; encoding: [0x7f,0x3f,0x0a,0x7e]

v_tanh_f16 v5, s1
// GFX1250: v_tanh_f16_e32 v5, s1 ; encoding: [0x01,0x3e,0x0a,0x7e]

v_tanh_f16 v5, s105
// GFX1250: v_tanh_f16_e32 v5, s105 ; encoding: [0x69,0x3e,0x0a,0x7e]

v_tanh_f16 v5, vcc_lo
// GFX1250: v_tanh_f16_e32 v5, vcc_lo ; encoding: [0x6a,0x3e,0x0a,0x7e]

v_tanh_f16 v5, vcc_hi
// GFX1250: v_tanh_f16_e32 v5, vcc_hi ; encoding: [0x6b,0x3e,0x0a,0x7e]

v_tanh_f16 v5, ttmp15
// GFX1250: v_tanh_f16_e32 v5, ttmp15 ; encoding: [0x7b,0x3e,0x0a,0x7e]

v_tanh_f16 v5, m0
// GFX1250: v_tanh_f16_e32 v5, m0 ; encoding: [0x7d,0x3e,0x0a,0x7e]

v_tanh_f16 v5, exec_lo
// GFX1250: v_tanh_f16_e32 v5, exec_lo ; encoding: [0x7e,0x3e,0x0a,0x7e]

v_tanh_f16 v5, exec_hi
// GFX1250: v_tanh_f16_e32 v5, exec_hi ; encoding: [0x7f,0x3e,0x0a,0x7e]

v_tanh_f16 v5, null
// GFX1250: v_tanh_f16_e32 v5, null ; encoding: [0x7c,0x3e,0x0a,0x7e]

v_tanh_f16 v5, -1
// GFX1250: v_tanh_f16_e32 v5, -1 ; encoding: [0xc1,0x3e,0x0a,0x7e]

v_tanh_f16 v5, 0.5
// GFX1250: v_tanh_f16_e32 v5, 0.5 ; encoding: [0xf0,0x3e,0x0a,0x7e]

v_tanh_f16 v5, src_scc
// GFX1250: v_tanh_f16_e32 v5, src_scc ; encoding: [0xfd,0x3e,0x0a,0x7e]

v_tanh_f16 v127, 0x8000
// GFX1250: v_tanh_f16_e32 v127, 0x8000 ; encoding: [0xff,0x3e,0xfe,0x7e,0x00,0x80,0x00,0x00]

v_tanh_f16 v5.h, v1.h
// GFX1250: v_tanh_f16_e32 v5.h, v1.h ; encoding: [0x81,0x3f,0x0a,0x7f]

v_tanh_bf16 v5, v1
// GFX1250: v_tanh_bf16_e32 v5, v1 ; encoding: [0x01,0x95,0x0a,0x7e]

Expand Down
56 changes: 56 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,62 @@ v_tanh_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi
// GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 quad_perm:[0,1,2,3]
// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_mirror
// GFX1250: v_tanh_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x40,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_half_mirror
// GFX1250: v_tanh_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x41,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_shl:1
// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x01,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_shl:15
// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x0f,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_shr:1
// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x11,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_shr:15
// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1f,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_ror:1
// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x21,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_ror:15
// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x2f,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
// GFX1250: v_tanh_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x50,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
// GFX1250: v_tanh_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x5f,0x01,0x01]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX1250: v_tanh_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x60,0x09,0x13]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX1250: v_tanh_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3e,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
60 changes: 60 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,66 @@ v_tanh_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi
// GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 quad_perm:[0,1,2,3]
// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_mirror
// GFX1250: v_tanh_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x40,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_half_mirror
// GFX1250: v_tanh_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x41,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_shl:1
// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x01,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_shl:15
// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x0f,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_shr:1
// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x11,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_shr:15
// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1f,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_ror:1
// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x21,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_ror:15
// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x2f,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
// GFX1250: v_tanh_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x50,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
// GFX1250: v_tanh_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x5f,0x01,0x01]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX1250: v_tanh_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x60,0x09,0x13]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX1250: v_tanh_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3e,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_f16 v5.h, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_tanh_f16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7f,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_tanh_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
Loading
Loading