Skip to content

Commit d04b6da

Browse files
authored
[llvm-mca][x86] Ensure avxvnni tests actually test the avxvnni instructions (#157892)
Noticed while checking #97271 - discovered we weren't actually testing the vex variants of the vnni instructions in the avxvnni mca tests Fixing this causes the znver4 results to break, because it turns out we didn't have consistent instruction naming for the avx and avx512 variants, breaking the regex matching So add the missing reg operand to the avx512 vnni instruction signatures to match avx vnni
1 parent c62ea65 commit d04b6da

File tree

10 files changed

+689
-689
lines changed

10 files changed

+689
-689
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12404,22 +12404,22 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
1240412404
X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
1240512405
bit IsCommutable> {
1240612406
let ExeDomain = VTI.ExeDomain in {
12407-
defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12407+
defm rr : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
1240812408
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
1240912409
"$src3, $src2", "$src2, $src3",
1241012410
(VTI.VT (OpNode VTI.RC:$src1,
1241112411
VTI.RC:$src2, VTI.RC:$src3)),
1241212412
IsCommutable, IsCommutable>,
1241312413
EVEX, VVVV, T8, Sched<[sched]>;
12414-
defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12414+
defm rm : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
1241512415
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
1241612416
"$src3, $src2", "$src2, $src3",
1241712417
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
1241812418
(VTI.VT (VTI.LdFrag addr:$src3))))>,
1241912419
EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8,
1242012420
Sched<[sched.Folded, sched.ReadAfterFold,
1242112421
sched.ReadAfterFold]>;
12422-
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12422+
defm rmb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
1242312423
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
1242412424
OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
1242512425
"$src2, ${src3}"#VTI.BroadcastStr,
@@ -12459,24 +12459,24 @@ defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul
1245912459
let Predicates = [HasVNNI] in {
1246012460
def : Pat<(v16i32 (add VR512:$src1,
1246112461
(X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12462-
(VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12462+
(VPDPWSSDZrr VR512:$src1, VR512:$src2, VR512:$src3)>;
1246312463
def : Pat<(v16i32 (add VR512:$src1,
1246412464
(X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12465-
(VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12465+
(VPDPWSSDZrm VR512:$src1, VR512:$src2, addr:$src3)>;
1246612466
}
1246712467
let Predicates = [HasVNNI,HasVLX] in {
1246812468
def : Pat<(v8i32 (add VR256X:$src1,
1246912469
(X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12470-
(VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12470+
(VPDPWSSDZ256rr VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
1247112471
def : Pat<(v8i32 (add VR256X:$src1,
1247212472
(X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12473-
(VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12473+
(VPDPWSSDZ256rm VR256X:$src1, VR256X:$src2, addr:$src3)>;
1247412474
def : Pat<(v4i32 (add VR128X:$src1,
1247512475
(X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12476-
(VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12476+
(VPDPWSSDZ128rr VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
1247712477
def : Pat<(v4i32 (add VR128X:$src1,
1247812478
(X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12479-
(VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12479+
(VPDPWSSDZ128rm VR128X:$src1, VR128X:$src2, addr:$src3)>;
1248012480
}
1248112481

1248212482
//===----------------------------------------------------------------------===//

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 87 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -2939,78 +2939,78 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
29392939
case X86::VPDPBUUDSYrr:
29402940
case X86::VPDPBUUDrr:
29412941
case X86::VPDPBUUDYrr:
2942-
case X86::VPDPBSSDSZ128r:
2943-
case X86::VPDPBSSDSZ128rk:
2944-
case X86::VPDPBSSDSZ128rkz:
2945-
case X86::VPDPBSSDSZ256r:
2946-
case X86::VPDPBSSDSZ256rk:
2947-
case X86::VPDPBSSDSZ256rkz:
2948-
case X86::VPDPBSSDSZr:
2949-
case X86::VPDPBSSDSZrk:
2950-
case X86::VPDPBSSDSZrkz:
2951-
case X86::VPDPBSSDZ128r:
2952-
case X86::VPDPBSSDZ128rk:
2953-
case X86::VPDPBSSDZ128rkz:
2954-
case X86::VPDPBSSDZ256r:
2955-
case X86::VPDPBSSDZ256rk:
2956-
case X86::VPDPBSSDZ256rkz:
2957-
case X86::VPDPBSSDZr:
2958-
case X86::VPDPBSSDZrk:
2959-
case X86::VPDPBSSDZrkz:
2960-
case X86::VPDPBUUDSZ128r:
2961-
case X86::VPDPBUUDSZ128rk:
2962-
case X86::VPDPBUUDSZ128rkz:
2963-
case X86::VPDPBUUDSZ256r:
2964-
case X86::VPDPBUUDSZ256rk:
2965-
case X86::VPDPBUUDSZ256rkz:
2966-
case X86::VPDPBUUDSZr:
2967-
case X86::VPDPBUUDSZrk:
2968-
case X86::VPDPBUUDSZrkz:
2969-
case X86::VPDPBUUDZ128r:
2970-
case X86::VPDPBUUDZ128rk:
2971-
case X86::VPDPBUUDZ128rkz:
2972-
case X86::VPDPBUUDZ256r:
2973-
case X86::VPDPBUUDZ256rk:
2974-
case X86::VPDPBUUDZ256rkz:
2975-
case X86::VPDPBUUDZr:
2976-
case X86::VPDPBUUDZrk:
2977-
case X86::VPDPBUUDZrkz:
2978-
case X86::VPDPWSSDZ128r:
2979-
case X86::VPDPWSSDZ128rk:
2980-
case X86::VPDPWSSDZ128rkz:
2981-
case X86::VPDPWSSDZ256r:
2982-
case X86::VPDPWSSDZ256rk:
2983-
case X86::VPDPWSSDZ256rkz:
2984-
case X86::VPDPWSSDZr:
2985-
case X86::VPDPWSSDZrk:
2986-
case X86::VPDPWSSDZrkz:
2987-
case X86::VPDPWSSDSZ128r:
2988-
case X86::VPDPWSSDSZ128rk:
2989-
case X86::VPDPWSSDSZ128rkz:
2990-
case X86::VPDPWSSDSZ256r:
2991-
case X86::VPDPWSSDSZ256rk:
2992-
case X86::VPDPWSSDSZ256rkz:
2993-
case X86::VPDPWSSDSZr:
2994-
case X86::VPDPWSSDSZrk:
2995-
case X86::VPDPWSSDSZrkz:
2996-
case X86::VPDPWUUDZ128r:
2997-
case X86::VPDPWUUDZ128rk:
2998-
case X86::VPDPWUUDZ128rkz:
2999-
case X86::VPDPWUUDZ256r:
3000-
case X86::VPDPWUUDZ256rk:
3001-
case X86::VPDPWUUDZ256rkz:
3002-
case X86::VPDPWUUDZr:
3003-
case X86::VPDPWUUDZrk:
3004-
case X86::VPDPWUUDZrkz:
3005-
case X86::VPDPWUUDSZ128r:
3006-
case X86::VPDPWUUDSZ128rk:
3007-
case X86::VPDPWUUDSZ128rkz:
3008-
case X86::VPDPWUUDSZ256r:
3009-
case X86::VPDPWUUDSZ256rk:
3010-
case X86::VPDPWUUDSZ256rkz:
3011-
case X86::VPDPWUUDSZr:
3012-
case X86::VPDPWUUDSZrk:
3013-
case X86::VPDPWUUDSZrkz:
2942+
case X86::VPDPBSSDSZ128rr:
2943+
case X86::VPDPBSSDSZ128rrk:
2944+
case X86::VPDPBSSDSZ128rrkz:
2945+
case X86::VPDPBSSDSZ256rr:
2946+
case X86::VPDPBSSDSZ256rrk:
2947+
case X86::VPDPBSSDSZ256rrkz:
2948+
case X86::VPDPBSSDSZrr:
2949+
case X86::VPDPBSSDSZrrk:
2950+
case X86::VPDPBSSDSZrrkz:
2951+
case X86::VPDPBSSDZ128rr:
2952+
case X86::VPDPBSSDZ128rrk:
2953+
case X86::VPDPBSSDZ128rrkz:
2954+
case X86::VPDPBSSDZ256rr:
2955+
case X86::VPDPBSSDZ256rrk:
2956+
case X86::VPDPBSSDZ256rrkz:
2957+
case X86::VPDPBSSDZrr:
2958+
case X86::VPDPBSSDZrrk:
2959+
case X86::VPDPBSSDZrrkz:
2960+
case X86::VPDPBUUDSZ128rr:
2961+
case X86::VPDPBUUDSZ128rrk:
2962+
case X86::VPDPBUUDSZ128rrkz:
2963+
case X86::VPDPBUUDSZ256rr:
2964+
case X86::VPDPBUUDSZ256rrk:
2965+
case X86::VPDPBUUDSZ256rrkz:
2966+
case X86::VPDPBUUDSZrr:
2967+
case X86::VPDPBUUDSZrrk:
2968+
case X86::VPDPBUUDSZrrkz:
2969+
case X86::VPDPBUUDZ128rr:
2970+
case X86::VPDPBUUDZ128rrk:
2971+
case X86::VPDPBUUDZ128rrkz:
2972+
case X86::VPDPBUUDZ256rr:
2973+
case X86::VPDPBUUDZ256rrk:
2974+
case X86::VPDPBUUDZ256rrkz:
2975+
case X86::VPDPBUUDZrr:
2976+
case X86::VPDPBUUDZrrk:
2977+
case X86::VPDPBUUDZrrkz:
2978+
case X86::VPDPWSSDZ128rr:
2979+
case X86::VPDPWSSDZ128rrk:
2980+
case X86::VPDPWSSDZ128rrkz:
2981+
case X86::VPDPWSSDZ256rr:
2982+
case X86::VPDPWSSDZ256rrk:
2983+
case X86::VPDPWSSDZ256rrkz:
2984+
case X86::VPDPWSSDZrr:
2985+
case X86::VPDPWSSDZrrk:
2986+
case X86::VPDPWSSDZrrkz:
2987+
case X86::VPDPWSSDSZ128rr:
2988+
case X86::VPDPWSSDSZ128rrk:
2989+
case X86::VPDPWSSDSZ128rrkz:
2990+
case X86::VPDPWSSDSZ256rr:
2991+
case X86::VPDPWSSDSZ256rrk:
2992+
case X86::VPDPWSSDSZ256rrkz:
2993+
case X86::VPDPWSSDSZrr:
2994+
case X86::VPDPWSSDSZrrk:
2995+
case X86::VPDPWSSDSZrrkz:
2996+
case X86::VPDPWUUDZ128rr:
2997+
case X86::VPDPWUUDZ128rrk:
2998+
case X86::VPDPWUUDZ128rrkz:
2999+
case X86::VPDPWUUDZ256rr:
3000+
case X86::VPDPWUUDZ256rrk:
3001+
case X86::VPDPWUUDZ256rrkz:
3002+
case X86::VPDPWUUDZrr:
3003+
case X86::VPDPWUUDZrrk:
3004+
case X86::VPDPWUUDZrrkz:
3005+
case X86::VPDPWUUDSZ128rr:
3006+
case X86::VPDPWUUDSZ128rrk:
3007+
case X86::VPDPWUUDSZ128rrkz:
3008+
case X86::VPDPWUUDSZ256rr:
3009+
case X86::VPDPWUUDSZ256rrk:
3010+
case X86::VPDPWUUDSZ256rrkz:
3011+
case X86::VPDPWUUDSZrr:
3012+
case X86::VPDPWUUDSZrrk:
3013+
case X86::VPDPWUUDSZrrkz:
30143014
case X86::VPMADD52HUQrr:
30153015
case X86::VPMADD52HUQYrr:
30163016
case X86::VPMADD52HUQZ128r:
@@ -10822,15 +10822,15 @@ bool X86InstrInfo::getMachineCombinerPatterns(
1082210822
}
1082310823
break;
1082410824
}
10825-
case X86::VPDPWSSDZ128r:
10826-
case X86::VPDPWSSDZ128m:
10827-
case X86::VPDPWSSDZ256r:
10828-
case X86::VPDPWSSDZ256m:
10829-
case X86::VPDPWSSDZr:
10830-
case X86::VPDPWSSDZm: {
10831-
if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
10832-
Patterns.push_back(X86MachineCombinerPattern::DPWSSD);
10833-
return true;
10825+
case X86::VPDPWSSDZ128rr:
10826+
case X86::VPDPWSSDZ128rm:
10827+
case X86::VPDPWSSDZ256rr:
10828+
case X86::VPDPWSSDZ256rm:
10829+
case X86::VPDPWSSDZrr:
10830+
case X86::VPDPWSSDZrm: {
10831+
if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
10832+
Patterns.push_back(X86MachineCombinerPattern::DPWSSD);
10833+
return true;
1083410834
}
1083510835
break;
1083610836
}
@@ -10866,11 +10866,11 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
1086610866
MaddOpc = X86::VPMADDWDrm;
1086710867
AddOpc = X86::VPADDDrr;
1086810868
break;
10869-
case X86::VPDPWSSDZ128r:
10869+
case X86::VPDPWSSDZ128rr:
1087010870
MaddOpc = X86::VPMADDWDZ128rr;
1087110871
AddOpc = X86::VPADDDZ128rr;
1087210872
break;
10873-
case X86::VPDPWSSDZ128m:
10873+
case X86::VPDPWSSDZ128rm:
1087410874
MaddOpc = X86::VPMADDWDZ128rm;
1087510875
AddOpc = X86::VPADDDZ128rr;
1087610876
break;
@@ -10886,23 +10886,23 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
1088610886
MaddOpc = X86::VPMADDWDYrm;
1088710887
AddOpc = X86::VPADDDYrr;
1088810888
break;
10889-
case X86::VPDPWSSDZ256r:
10889+
case X86::VPDPWSSDZ256rr:
1089010890
MaddOpc = X86::VPMADDWDZ256rr;
1089110891
AddOpc = X86::VPADDDZ256rr;
1089210892
break;
10893-
case X86::VPDPWSSDZ256m:
10893+
case X86::VPDPWSSDZ256rm:
1089410894
MaddOpc = X86::VPMADDWDZ256rm;
1089510895
AddOpc = X86::VPADDDZ256rr;
1089610896
break;
1089710897
// vpdpwssd zmm2,zmm3,zmm1
1089810898
// -->
1089910899
// vpmaddwd zmm3,zmm3,zmm1
1090010900
// vpaddd zmm2,zmm2,zmm3
10901-
case X86::VPDPWSSDZr:
10901+
case X86::VPDPWSSDZrr:
1090210902
MaddOpc = X86::VPMADDWDZrr;
1090310903
AddOpc = X86::VPADDDZrr;
1090410904
break;
10905-
case X86::VPDPWSSDZm:
10905+
case X86::VPDPWSSDZrm:
1090610906
MaddOpc = X86::VPMADDWDZrm;
1090710907
AddOpc = X86::VPADDDZrr;
1090810908
break;

llvm/lib/Target/X86/X86ScheduleZnver4.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1567,7 +1567,7 @@ def Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> {
15671567
let NumMicroOps = 1;
15681568
}
15691569
def : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex
1570-
"VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)",
1570+
"VPDP(BU|WS)(S|P)(S|D|DS)(Z?|Z128?|Z256?|Y?)r(r|rk|rkz)",
15711571
"VPMADD52(H|L)UQ(Z|Z128|Z256)(r|rk|rkz)"
15721572
)>;
15731573

0 commit comments

Comments
 (0)