From 3a8b4087233b698dba968d16bc99f2ae894eed16 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86@yahoo.com>
Date: Mon, 14 Jul 2025 17:08:07 -0700
Subject: [PATCH 1/3] [AMDGPU][MC] In GFX11+ v_pk_fmac_f16 should not allow DPP

In GFX11+ the instruction v_pk_fmac_f16 should not allow DPP.
---
 llvm/lib/Target/AMDGPU/VOP2Instructions.td    |  2 +-
 .../MC/AMDGPU/gfx11_asm_vop2_fake16_err.s     | 15 ++++++++++++++
 llvm/test/MC/AMDGPU/gfx12_asm_vop2_err.s      | 20 +++++++++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop2_err.s

diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 550ec9d3f55ab..cd7e399f65c55 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1904,7 +1904,7 @@ multiclass VOP2_Real_FULL_with_name_gfx11_gfx12<bits<6> op, string opName,
   VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
 
 multiclass VOP2_Real_e32_gfx11_gfx12<bits<6> op> :
-  VOP2Only_Real<GFX11Gen, op>, VOP2Only_Real<GFX12Gen, op>;
+  VOP2Only_Real_e32<GFX11Gen, op>, VOP2Only_Real_e32<GFX12Gen, op>;
 
 multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
   VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_fake16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_fake16_err.s
index bc0f586e1d411..f586e4a33c520 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_fake16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_fake16_err.s
@@ -173,6 +173,21 @@ v_mul_f16_e32 v5, v1, v255
 v_mul_f16_e32 v5, v255, v2
 // GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
+v_pk_fmac_f16 v0, v1, v2 quad_perm:[1,2,3,0]
+// GFX11: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16 v0, v1, v2 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0
+// GFX11: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16_dpp v0, v1, v2 quad_perm:[1,2,3,0]
+// GFX11: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported
+
+v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported
+
 v_sub_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_err.s
new file mode 100644
index 0000000000000..b7d93e1a3b3c8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_err.s
@@ -0,0 +1,20 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --sort --version 5
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error: %s
+
+v_pk_fmac_f16 v0, v1, v2 quad_perm:[1,2,3,0]
+// GFX12: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16 v0, v1, v2 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0
+// GFX12: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16_dpp v0, v1, v2 quad_perm:[1,2,3,0]
+// GFX12: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported
+
+v_pk_fmac_f16_dpp v0, v1, v2 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0
+// GFX12: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported
+
+v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported

From f57fcbbee679126961f8be7a3a8e347815ee9e9c Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86@yahoo.com>
Date: Tue, 29 Jul 2025 13:29:57 -0700
Subject: [PATCH 2/3] Add comments.

---
 llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index cd7e399f65c55..330f556d9290e 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1966,6 +1966,8 @@ defm V_SUBREV_CO_CI_U32 :
 
 defm V_CVT_PK_RTZ_F16_F32  : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f,
   "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">;
+// FIXME: V_PK_FMAC_F16 is currently not used in instruction selection.
+// If this changes, ensure the DPP variant is not used for GFX11+.
 defm V_PK_FMAC_F16     : VOP2_Real_e32_gfx11_gfx12<0x03c>;
 
 defm V_ADD_F16             : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x032, "v_add_f16">;

From 7eab2264557e7165b7dc3bccee159cad81d1d19e Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86@yahoo.com>
Date: Wed, 30 Jul 2025 12:02:16 -0700
Subject: [PATCH 3/3] Move comments to pseudo instruction

---
 llvm/lib/Target/AMDGPU/VOP2Instructions.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 330f556d9290e..9de7d6d009fe1 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1344,6 +1344,8 @@ def V_FMAAK_F64 : VOP2_Pseudo<"v_fmaak_f64", VOP_MADAK_F64, [], "">;
 } // End SubtargetPredicate = HasFmaakFmamkF64Insts, isReMaterializable = 1, FixedSize = 1, Size = 12, SchedRW = [Write64Bit]
 
 let SubtargetPredicate = HasPkFmacF16Inst in {
+// FIXME: V_PK_FMAC_F16 is currently not used in instruction selection.
+// If this changes, ensure the DPP variant is not used for GFX11+.
 defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
 } // End SubtargetPredicate = HasPkFmacF16Inst
 
@@ -1966,8 +1968,6 @@ defm V_SUBREV_CO_CI_U32 :
 
 defm V_CVT_PK_RTZ_F16_F32  : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f,
   "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">;
-// FIXME: V_PK_FMAC_F16 is currently not used in instruction selection.
-// If this changes, ensure the DPP variant is not used for GFX11+.
 defm V_PK_FMAC_F16     : VOP2_Real_e32_gfx11_gfx12<0x03c>;
 
 defm V_ADD_F16             : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x032, "v_add_f16">;