diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 4111837d962b5..ed51f1d5de447 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -670,6 +670,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_tanhf, "ff", "nc", "tanh-insts") +TARGET_BUILTIN(__builtin_amdgcn_tanhh, "hh", "nc", "tanh-insts") TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts") diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index bcdb488f11639..a7d796ecccc61 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -504,6 +504,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, { Src }); } case AMDGPU::BI__builtin_amdgcn_tanhf: + case AMDGPU::BI__builtin_amdgcn_tanhh: case AMDGPU::BI__builtin_amdgcn_tanh_bf16: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_tanh); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index e120a46c6327b..738b7ab7f2b75 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -61,6 +61,26 @@ void test_tanh_f32(global float* out, float a) *out = __builtin_amdgcn_tanhf(a); } +// CHECK-LABEL: @test_tanh_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(1) [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr addrspace(1) [[TMP0]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.amdgcn.tanh.f16(half [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[TMP2]], ptr addrspace(1) [[TMP3]], align 2 +// CHECK-NEXT: ret void +// +void test_tanh_f16(global half* out, global half* a) +{ + *out = __builtin_amdgcn_tanhh(*a); +} + // CHECK-LABEL: @test_tanh_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 8c35fea8259f4..1bbbb610305e9 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -530,6 +530,10 @@ defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, AMDGPUexpf16>; defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; +let SubtargetPredicate = HasTanhInsts in { +defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>; +} + let SubtargetPredicate = HasBF16TransInsts in { defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>; defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>; @@ -1142,6 +1146,7 @@ defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>; defm V_MOV_B64 : VOP1_Real_FULL ; defm V_TANH_F32 : VOP1_Real_FULL; +defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>; defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>; defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">; defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll index 81db7354757d9..dd89f80a54949 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll @@ -7,6 +7,7 @@ ; FIXME: GlobalISel does not work with bf16 declare float @llvm.amdgcn.tanh.f32(float) #0 +declare half @llvm.amdgcn.tanh.f16(half) #0 declare bfloat @llvm.amdgcn.tanh.bf16(bfloat) #0 define amdgpu_kernel void @tanh_f32(ptr addrspace(1) %out, float %src) #1 { @@ -92,6 +93,88 @@ define amdgpu_kernel void @tanh_undef_f32(ptr addrspace(1) %out) #1 { ret void } +define amdgpu_kernel void @tanh_f16(ptr addrspace(1) %out, half %src) #1 { +; SDAG-REAL16-LABEL: tanh_f16: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, s2 +; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: tanh_f16: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: v_tanh_f16_e32 v0, s2 +; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm + %tanh = call half @llvm.amdgcn.tanh.f16(half %src) #0 + store half %tanh, ptr addrspace(1) %out, align 2 + ret void +} + +define amdgpu_kernel void @tanh_f16_constant_4.0(ptr addrspace(1) %out) #1 { +; SDAG-REAL16-LABEL: tanh_f16_constant_4.0: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, 4.0 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: tanh_f16_constant_4.0: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-FAKE16-NEXT: v_tanh_f16_e32 v0, 4.0 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm + %tanh = call half @llvm.amdgcn.tanh.f16(half 4.0) #0 + store half %tanh, ptr addrspace(1) %out, align 2 + ret void +} + +define amdgpu_kernel void @tanh_f16_constant_100.0(ptr addrspace(1) %out) #1 { +; SDAG-REAL16-LABEL: tanh_f16_constant_100.0: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, 0x5640 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: tanh_f16_constant_100.0: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-FAKE16-NEXT: v_tanh_f16_e32 v0, 0x5640 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm + %tanh = call half @llvm.amdgcn.tanh.f16(half 100.0) #0 + store half %tanh, ptr addrspace(1) %out, align 2 + ret void +} + +define amdgpu_kernel void @tanh_undef_f16(ptr addrspace(1) %out) #1 { +; SDAG-REAL16-LABEL: tanh_undef_f16: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: tanh_undef_f16: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_endpgm + %tanh = call half @llvm.amdgcn.tanh.f16(half undef) + store half %tanh, ptr addrspace(1) %out, align 2 + ret void +} + define amdgpu_kernel void @tanh_bf16(ptr addrspace(1) %out, bfloat %src) #1 { ; SDAG-REAL16-LABEL: tanh_bf16: ; SDAG-REAL16: ; %bb.0: diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index f9e217d1f0361..279bb262bff04 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -73,6 +73,51 @@ v_tanh_f32 v5, src_scc v_tanh_f32 v255, 0xaf123456 // GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf] +v_tanh_f16 v5, v1 +// GFX1250: v_tanh_f16_e32 v5, v1 ; encoding: [0x01,0x3f,0x0a,0x7e] + +v_tanh_f16 v5, v127 +// GFX1250: v_tanh_f16_e32 v5, v127 ; encoding: [0x7f,0x3f,0x0a,0x7e] + +v_tanh_f16 v5, s1 +// GFX1250: v_tanh_f16_e32 v5, s1 ; encoding: [0x01,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, s105 +// GFX1250: v_tanh_f16_e32 v5, s105 ; encoding: [0x69,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, vcc_lo +// GFX1250: v_tanh_f16_e32 v5, vcc_lo ; encoding: [0x6a,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, vcc_hi +// GFX1250: v_tanh_f16_e32 v5, vcc_hi ; encoding: [0x6b,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, ttmp15 +// GFX1250: v_tanh_f16_e32 v5, ttmp15 ; encoding: [0x7b,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, m0 +// GFX1250: v_tanh_f16_e32 v5, m0 ; encoding: [0x7d,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, exec_lo +// GFX1250: v_tanh_f16_e32 v5, exec_lo ; encoding: [0x7e,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, exec_hi +// GFX1250: v_tanh_f16_e32 v5, exec_hi ; encoding: [0x7f,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, null +// GFX1250: v_tanh_f16_e32 v5, null ; encoding: [0x7c,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, -1 +// GFX1250: v_tanh_f16_e32 v5, -1 ; encoding: [0xc1,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, 0.5 +// GFX1250: v_tanh_f16_e32 v5, 0.5 ; encoding: [0xf0,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, src_scc +// GFX1250: v_tanh_f16_e32 v5, src_scc ; encoding: [0xfd,0x3e,0x0a,0x7e] + +v_tanh_f16 v127, 0x8000 +// GFX1250: v_tanh_f16_e32 v127, 0x8000 ; encoding: [0xff,0x3e,0xfe,0x7e,0x00,0x80,0x00,0x00] + v_tanh_bf16 v5, v1 // GFX1250: v_tanh_bf16_e32 v5, v1 ; encoding: [0x01,0x95,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index d51ef68bf1e19..76272d25d92d4 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -73,6 +73,54 @@ v_tanh_f32 v5, src_scc v_tanh_f32 v255, 0xaf123456 // GFX1250: v_tanh_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x3c,0xfe,0x7f,0x56,0x34,0x12,0xaf] +v_tanh_f16 v5, v1 +// GFX1250: v_tanh_f16_e32 v5, v1 ; encoding: [0x01,0x3f,0x0a,0x7e] + +v_tanh_f16 v5, v127 +// GFX1250: v_tanh_f16_e32 v5, v127 ; encoding: [0x7f,0x3f,0x0a,0x7e] + +v_tanh_f16 v5, s1 +// GFX1250: v_tanh_f16_e32 v5, s1 ; encoding: [0x01,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, s105 +// GFX1250: v_tanh_f16_e32 v5, s105 ; encoding: [0x69,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, vcc_lo +// GFX1250: v_tanh_f16_e32 v5, vcc_lo ; encoding: [0x6a,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, vcc_hi +// GFX1250: v_tanh_f16_e32 v5, vcc_hi ; encoding: [0x6b,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, ttmp15 +// GFX1250: v_tanh_f16_e32 v5, ttmp15 ; encoding: [0x7b,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, m0 +// GFX1250: v_tanh_f16_e32 v5, m0 ; encoding: [0x7d,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, exec_lo +// GFX1250: v_tanh_f16_e32 v5, exec_lo ; encoding: [0x7e,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, exec_hi +// GFX1250: v_tanh_f16_e32 v5, exec_hi ; encoding: [0x7f,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, null +// GFX1250: v_tanh_f16_e32 v5, null ; encoding: [0x7c,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, -1 +// GFX1250: v_tanh_f16_e32 v5, -1 ; encoding: [0xc1,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, 0.5 +// GFX1250: v_tanh_f16_e32 v5, 0.5 ; encoding: [0xf0,0x3e,0x0a,0x7e] + +v_tanh_f16 v5, src_scc +// GFX1250: v_tanh_f16_e32 v5, src_scc ; encoding: [0xfd,0x3e,0x0a,0x7e] + +v_tanh_f16 v127, 0x8000 +// GFX1250: v_tanh_f16_e32 v127, 0x8000 ; encoding: [0xff,0x3e,0xfe,0x7e,0x00,0x80,0x00,0x00] + +v_tanh_f16 v5.h, v1.h +// GFX1250: v_tanh_f16_e32 v5.h, v1.h ; encoding: [0x81,0x3f,0x0a,0x7f] + v_tanh_bf16 v5, v1 // GFX1250: v_tanh_bf16_e32 v5, v1 ; encoding: [0x01,0x95,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s index ae22f68e54835..0a8ee84561d33 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s @@ -58,6 +58,62 @@ v_tanh_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi // GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_tanh_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_mirror +// GFX1250: v_tanh_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_half_mirror +// GFX1250: v_tanh_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_shl:1 +// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_shl:15 +// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_shr:1 +// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_shr:15 +// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_ror:1 +// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_ror:15 +// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_tanh_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_tanh_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_tanh_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_tanh_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3e,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_tanh_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s index 37ecb66bfe809..d4afb9d9b2d9a 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s @@ -58,6 +58,66 @@ v_tanh_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi // GFX1250: v_tanh_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3c,0xfe,0x7f,0xff,0x6f,0x35,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_tanh_f16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_tanh_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_mirror +// GFX1250: v_tanh_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_half_mirror +// GFX1250: v_tanh_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_shl:1 +// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_shl:15 +// GFX1250: v_tanh_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_shr:1 +// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_shr:15 +// GFX1250: v_tanh_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_ror:1 +// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_ror:15 +// GFX1250: v_tanh_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_tanh_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_tanh_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_tanh_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_tanh_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3e,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5.h, v1.h quad_perm:[3,2,1,0] +// GFX1250: v_tanh_f16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7f,0x81,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_tanh_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s index f24122e24b70e..a7cb6bf8de69c 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s @@ -14,6 +14,18 @@ v_tanh_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_tanh_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3c,0xfe,0x7f,0xff,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_tanh_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3e,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_tanh_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3e,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_tanh_f16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3e,0xfe,0x7e,0x7f,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_tanh_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7e,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s index 34abc829d4eb1..6acab7edc0d49 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s @@ -14,6 +14,22 @@ v_tanh_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_tanh_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3c,0xfe,0x7f,0xff,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_tanh_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3e,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_tanh_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3e,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_tanh_f16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3e,0xfe,0x7e,0x7f,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3e,0x0a,0x7f,0x81,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_tanh_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7e,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s index 340a7857419c4..7486d849253e8 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s @@ -172,6 +172,51 @@ v_tanh_f32_e64 v5, src_scc mul:4 v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 // GFX1250: v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x9e,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_tanh_f16_e64 v5, v1 +// GFX1250: v_tanh_f16_e64 v5, v1 ; encoding: [0x05,0x00,0x9f,0xd5,0x01,0x01,0x00,0x00] + +v_tanh_f16_e64 v5, v255 +// GFX1250: v_tanh_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x9f,0xd5,0xff,0x01,0x00,0x00] + +v_tanh_f16_e64 v5, s1 +// GFX1250: v_tanh_f16_e64 v5, s1 ; encoding: [0x05,0x00,0x9f,0xd5,0x01,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, s105 +// GFX1250: v_tanh_f16_e64 v5, s105 ; encoding: [0x05,0x00,0x9f,0xd5,0x69,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, vcc_lo +// GFX1250: v_tanh_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x9f,0xd5,0x6a,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, vcc_hi +// GFX1250: v_tanh_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x9f,0xd5,0x6b,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, ttmp15 +// GFX1250: v_tanh_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x9f,0xd5,0x7b,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, m0 +// GFX1250: v_tanh_f16_e64 v5, m0 ; encoding: [0x05,0x00,0x9f,0xd5,0x7d,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, exec_lo +// GFX1250: v_tanh_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0x9f,0xd5,0x7e,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, exec_hi +// GFX1250: v_tanh_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0x9f,0xd5,0x7f,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, null +// GFX1250: v_tanh_f16_e64 v5, null ; encoding: [0x05,0x00,0x9f,0xd5,0x7c,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, -1 +// GFX1250: v_tanh_f16_e64 v5, -1 ; encoding: [0x05,0x00,0x9f,0xd5,0xc1,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, 0.5 mul:2 +// GFX1250: v_tanh_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x9f,0xd5,0xf0,0x00,0x00,0x08] + +v_tanh_f16_e64 v5, src_scc mul:4 +// GFX1250: v_tanh_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x9f,0xd5,0xfd,0x00,0x00,0x10] + +v_tanh_f16_e64 v255, -|0x8000| clamp div:2 +// GFX1250: v_tanh_f16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0x9f,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] + v_rcp_bf16_e64 v5, v1 // GFX1250: v_rcp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf9,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s index 579a467b41052..b59b8b31e2d5f 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s @@ -175,6 +175,54 @@ v_tanh_f32_e64 v5, src_scc mul:4 v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 // GFX1250: v_tanh_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x9e,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_tanh_f16_e64 v5, v1 +// GFX1250: v_tanh_f16_e64 v5, v1 ; encoding: [0x05,0x00,0x9f,0xd5,0x01,0x01,0x00,0x00] + +v_tanh_f16_e64 v5, v255 +// GFX1250: v_tanh_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x9f,0xd5,0xff,0x01,0x00,0x00] + +v_tanh_f16_e64 v5, s1 +// GFX1250: v_tanh_f16_e64 v5, s1 ; encoding: [0x05,0x00,0x9f,0xd5,0x01,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, s105 +// GFX1250: v_tanh_f16_e64 v5, s105 ; encoding: [0x05,0x00,0x9f,0xd5,0x69,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, vcc_lo +// GFX1250: v_tanh_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x9f,0xd5,0x6a,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, vcc_hi +// GFX1250: v_tanh_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x9f,0xd5,0x6b,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, ttmp15 +// GFX1250: v_tanh_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x9f,0xd5,0x7b,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, m0 +// GFX1250: v_tanh_f16_e64 v5, m0 ; encoding: [0x05,0x00,0x9f,0xd5,0x7d,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, exec_lo +// GFX1250: v_tanh_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0x9f,0xd5,0x7e,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, exec_hi +// GFX1250: v_tanh_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0x9f,0xd5,0x7f,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, null +// GFX1250: v_tanh_f16_e64 v5, null ; encoding: [0x05,0x00,0x9f,0xd5,0x7c,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, -1 +// GFX1250: v_tanh_f16_e64 v5, -1 ; encoding: [0x05,0x00,0x9f,0xd5,0xc1,0x00,0x00,0x00] + +v_tanh_f16_e64 v5, 0.5 mul:2 +// GFX1250: v_tanh_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x9f,0xd5,0xf0,0x00,0x00,0x08] + +v_tanh_f16_e64 v5, src_scc mul:4 +// GFX1250: v_tanh_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x9f,0xd5,0xfd,0x00,0x00,0x10] + +v_tanh_f16_e64 v255, -|0x8000| clamp div:2 +// GFX1250: v_tanh_f16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0x9f,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] + +v_tanh_f16 v5.l, v128.h +// GFX1250: v_tanh_f16_e64 v5.l, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0x9f,0xd5,0x80,0x01,0x00,0x00] + v_rcp_bf16_e64 v5, v1 // GFX1250: v_rcp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf9,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s index 423340cc90b30..f7f20f46161ce 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s @@ -58,6 +58,62 @@ v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask // GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_tanh_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_tanh_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_tanh_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_mirror +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_half_mirror +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_shl:1 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_shl:15 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_shr:1 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_shr:15 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_ror:1 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_ror:15 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_tanh_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9f,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xca,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s index 7968b39839a78..e1241b01ccae1 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s @@ -58,6 +58,66 @@ v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask // GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_tanh_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_tanh_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_tanh_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_mirror +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_half_mirror +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_shl:1 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_shl:15 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_shr:1 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_shr:15 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_ror:1 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_ror:15 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_tanh_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_tanh_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9f,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0] +// GFX1250: v_tanh_f16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_tanh_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xca,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s index dd469c2eef850..0106175301d20 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s @@ -18,6 +18,22 @@ v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9e,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_tanh_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9f,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_tanh_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9f,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xca,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s index 9fce77916b66e..93b86f3ffb841 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s @@ -18,6 +18,26 @@ v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9e,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_tanh_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_tanh_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9f,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_tanh_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9f,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_tanh_f16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_tanh_f16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0x9f,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_tanh_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xca,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt index 0a6fc391e63a5..5f37ba91e071b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt @@ -74,6 +74,69 @@ 0x6a,0x3c,0x0a,0x7e # GFX1250: v_tanh_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x3c,0x0a,0x7e] +0xff,0x3e,0xfe,0x7e,0x00,0x80,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e32 v127.l, 0x8000 ; encoding: [0xff,0x3e,0xfe,0x7e,0x00,0x80,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e32 v127, 0x8000 ; encoding: [0xff,0x3e,0xfe,0x7e,0x00,0x80,0x00,0x00] + +0xc1,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, -1 ; encoding: [0xc1,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, -1 ; encoding: [0xc1,0x3e,0x0a,0x7e] + +0xf0,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, 0.5 ; encoding: [0xf0,0x3e,0x0a,0x7e] + +0x7f,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, exec_hi ; encoding: [0x7f,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, exec_hi ; encoding: [0x7f,0x3e,0x0a,0x7e] + +0x7e,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, exec_lo ; encoding: [0x7e,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, exec_lo ; encoding: [0x7e,0x3e,0x0a,0x7e] + +0x7d,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, m0 ; encoding: [0x7d,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, m0 ; encoding: [0x7d,0x3e,0x0a,0x7e] + +0x7c,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, null ; encoding: [0x7c,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, null ; encoding: [0x7c,0x3e,0x0a,0x7e] + +0x01,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, s1 ; encoding: [0x01,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, s1 ; encoding: [0x01,0x3e,0x0a,0x7e] + +0x69,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, s105 ; encoding: [0x69,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, s105 ; encoding: [0x69,0x3e,0x0a,0x7e] + +0xfd,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, src_scc ; encoding: [0xfd,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, src_scc ; encoding: [0xfd,0x3e,0x0a,0x7e] + +0x7b,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, ttmp15 ; encoding: [0x7b,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, ttmp15 ; encoding: [0x7b,0x3e,0x0a,0x7e] + +0x01,0x3f,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, v1.l ; encoding: [0x01,0x3f,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, v1 ; encoding: [0x01,0x3f,0x0a,0x7e] + +0x7f,0x3f,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, v127.l ; encoding: [0x7f,0x3f,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, v127 ; encoding: [0x7f,0x3f,0x0a,0x7e] + +0x6b,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, vcc_hi ; encoding: [0x6b,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, vcc_hi ; encoding: [0x6b,0x3e,0x0a,0x7e] + +0x6a,0x3e,0x0a,0x7e +# GFX1250-REAL16: v_tanh_f16_e32 v5.l, vcc_lo ; encoding: [0x6a,0x3e,0x0a,0x7e] +# GFX1250-FAKE16: v_tanh_f16_e32 v5, vcc_lo ; encoding: [0x6a,0x3e,0x0a,0x7e] + +0x81,0x3f,0x0a,0x7f +# GFX1250-REAL16: v_tanh_f16_e32 v5.h, v1.h ; encoding: [0x81,0x3f,0x0a,0x7f] + 0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00 # GFX1250-REAL16: v_tanh_bf16_e32 v127.l, 0x8000 ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00] # GFX1250-FAKE16: v_tanh_bf16_e32 v127, 0x8000 ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt index f099ffcba36e4..57bee2766ce44 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt @@ -44,6 +44,65 @@ 0xfa,0x3c,0x0a,0x7e,0x01,0x60,0x09,0x13 # GFX1250: v_tanh_f32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3c,0x0a,0x7e,0x01,0x60,0x09,0x13] +0xfa,0x3e,0xfe,0x7e,0x7f,0x6f,0x35,0x30 +# GFX1250-REAL16: v_tanh_f16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3e,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +# GFX1250-FAKE16: v_tanh_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3e,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +0xfa,0x3e,0x0a,0x7e,0x01,0xe4,0x00,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x1b,0x00,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x41,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x41,0x01,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x40,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x40,0x01,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x21,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x21,0x01,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x2f,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x50,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x50,0x01,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x5f,0x01,0x01 +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +0xfa,0x3e,0x0a,0x7e,0x01,0x01,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x01,0x01,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x0f,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x11,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x11,0x01,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x1f,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +0xfa,0x3e,0x0a,0x7e,0x01,0x60,0x09,0x13 +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x60,0x09,0x13] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x3e,0x0a,0x7e,0x01,0x60,0x09,0x13] + +0xfa,0x3e,0x0a,0x7f,0x81,0x1b,0x00,0xff +# GFX1250-REAL16: v_tanh_f16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3e,0x0a,0x7f,0x81,0x1b,0x00,0xff] + 0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30 # GFX1250-REAL16: v_tanh_bf16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30] # GFX1250-FAKE16: v_tanh_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt index d86d4630c48ea..28ec6b11b4de3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt @@ -11,6 +11,22 @@ 0xea,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05 # GFX1250: v_tanh_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3c,0x0a,0x7e,0x01,0x77,0x39,0x05] +0xe9,0x3e,0xfe,0x7e,0x7f,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3e,0xfe,0x7e,0x7f,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x3e,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +0xe9,0x3e,0x0a,0x7e,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3e,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3e,0x0a,0x7e,0x01,0x77,0x39,0x05] + +0xea,0x3e,0x0a,0x7e,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_tanh_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3e,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_tanh_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x3e,0x0a,0x7e,0x01,0x77,0x39,0x05] + +0xe9,0x3e,0x0a,0x7f,0x81,0x77,0x39,0x05 +# GFX1250-REAL16: v_tanh_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3e,0x0a,0x7f,0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] + 0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250-REAL16: v_tanh_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00] # GFX1250-FAKE16: v_tanh_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt index 4dc7ed4237f53..5004762729701 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt @@ -47,6 +47,70 @@ 0x05,0x00,0x9e,0xd5,0x6a,0x00,0x00,0x00 # GFX1250: v_tanh_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x9e,0xd5,0x6a,0x00,0x00,0x00] +0xff,0x81,0x9f,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0x9f,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0x9f,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0xc1,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, -1 ; encoding: [0x05,0x00,0x9f,0xd5,0xc1,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, -1 ; encoding: [0x05,0x00,0x9f,0xd5,0xc1,0x00,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0xf0,0x00,0x00,0x08 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0x9f,0xd5,0xf0,0x00,0x00,0x08] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x9f,0xd5,0xf0,0x00,0x00,0x08] + +0x05,0x00,0x9f,0xd5,0x7f,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0x9f,0xd5,0x7f,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0x9f,0xd5,0x7f,0x00,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0x7e,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0x9f,0xd5,0x7e,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0x9f,0xd5,0x7e,0x00,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0x7d,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, m0 ; encoding: [0x05,0x00,0x9f,0xd5,0x7d,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, m0 ; encoding: [0x05,0x00,0x9f,0xd5,0x7d,0x00,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0x7c,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, null ; encoding: [0x05,0x00,0x9f,0xd5,0x7c,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, null ; encoding: [0x05,0x00,0x9f,0xd5,0x7c,0x00,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0x01,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, s1 ; encoding: [0x05,0x00,0x9f,0xd5,0x01,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, s1 ; encoding: [0x05,0x00,0x9f,0xd5,0x01,0x00,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0x69,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, s105 ; encoding: [0x05,0x00,0x9f,0xd5,0x69,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, s105 ; encoding: [0x05,0x00,0x9f,0xd5,0x69,0x00,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0xfd,0x00,0x00,0x10 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0x9f,0xd5,0xfd,0x00,0x00,0x10] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x9f,0xd5,0xfd,0x00,0x00,0x10] + +0x05,0x00,0x9f,0xd5,0x7b,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0x9f,0xd5,0x7b,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x9f,0xd5,0x7b,0x00,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0x01,0x01,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0x9f,0xd5,0x01,0x01,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, v1 ; encoding: [0x05,0x00,0x9f,0xd5,0x01,0x01,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0xff,0x01,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0x9f,0xd5,0xff,0x01,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x9f,0xd5,0xff,0x01,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0x6b,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0x9f,0xd5,0x6b,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x9f,0xd5,0x6b,0x00,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0x6a,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0x9f,0xd5,0x6a,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x9f,0xd5,0x6a,0x00,0x00,0x00] + +0x05,0x08,0x9f,0xd5,0x80,0x01,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64 v5.l, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0x9f,0xd5,0x80,0x01,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64 v5, v128 ; encoding: [0x05,0x00,0x9f,0xd5,0x80,0x01,0x00,0x00] + 0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00 # GFX1250-REAL16: v_tanh_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] # GFX1250-FAKE16: v_tanh_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xca,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt index 1f03a43cd8bd4..de908b95d94f9 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-REAL16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s 0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30 # GFX1250: v_tanh_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9e,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] @@ -44,6 +44,66 @@ 0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff # GFX1250: v_tanh_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0xff,0x81,0x9f,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30 +# GFX1250-REAL16: v_tanh_f16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9f,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x9f,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13 +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x48,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x9f,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] + 0xff,0x81,0xfb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30 # GFX1250-REAL16: v_rsq_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] # GFX1250-FAKE16: v_rsq_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt index e673f9fdfc7bb..cfe7173c383b3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt @@ -14,6 +14,26 @@ 0x05,0x00,0x9e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05 # GFX1250: v_tanh_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0xff,0x81,0x9f,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00 +# GFX1250-REAL16: v_tanh_f16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9f,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x9f,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +0x05,0x00,0x9f,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9f,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x9f,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +0x05,0x48,0x9f,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05 +# GFX1250-REAL16: v_tanh_f16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0x9f,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +# GFX1250-FAKE16: v_tanh_f16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x9f,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] + 0xff,0x81,0xfb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00 # GFX1250-REAL16: v_rsq_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] # GFX1250-FAKE16: v_rsq_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]