Skip to content

Commit a5ed9c7

Browse files
jgu222igcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: afa858f
Add pseudoKill to condMod too. If a NoMask inst without predicate has condMod, make sure to add pkill to condMod if it is a full write. Resubmit as the backout seems not correct.
1 parent 2615174 commit a5ed9c7

File tree

1 file changed

+44
-85
lines changed

1 file changed

+44
-85
lines changed

visa/Optimizer.cpp

Lines changed: 44 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -12417,78 +12417,59 @@ void Optimizer::newDoNoMaskWA()
1241712417
return flagVar;
1241812418
};
1241912419

12420-
// Check if condMod or dst is full write. If so, add pseudo kill for it.
12421-
auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
12420+
auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
1242212421
{
1242312422
// Only NoMask Inst without predicate will call this function!
12424-
// isCondMod = true: check condMod
12425-
// = false: check dst
1242612423
G4_INST* I = *aII;
12427-
if (I->getImplAccSrc() != nullptr || I->isSend())
12424+
G4_DstRegRegion* aDst = I->getDst();
12425+
if (!aDst || aDst->isNullReg() ||
12426+
I->getImplAccSrc() != nullptr || I->isSend() ||
12427+
!aDst->getBase()->isRegVar() || aDst->getBase()->asRegVar()->getPhyReg())
1242812428
{
1242912429
return;
1243012430
}
12431-
G4_CondMod* condMod = I->getCondMod();
12432-
G4_DstRegRegion* dst = I->getDst();
12433-
if (isCondMod)
12434-
{
12435-
if (!condMod || !condMod->getBase()->isRegVar() ||
12436-
condMod->getBase()->asRegVar()->getPhyReg())
12437-
{
12438-
return;
12439-
}
12440-
}
12441-
else
12442-
{
12443-
if (!dst || dst->isNullReg() ||
12444-
!dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
12445-
{
12446-
return;
12447-
}
12448-
}
1244912431

12450-
G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
1245112432
// Make sure dst var is not used in this inst.
1245212433
{
1245312434
G4_Operand* src0_0 = I->getSrc(0);
1245412435
G4_Operand* src0_1 = I->getSrc(1);
1245512436
G4_Operand* src0_2 = I->getSrc(2);
1245612437
G4_Operand* src0_3 = I->getSrc(3);
1245712438

12458-
if ((src0_0 && src0_0->compareOperand(D) != Rel_disjoint) ||
12459-
(src0_1 && src0_1->compareOperand(D) != Rel_disjoint) ||
12460-
(src0_2 && src0_2->compareOperand(D) != Rel_disjoint) ||
12461-
(src0_3 && src0_3->compareOperand(D) != Rel_disjoint))
12439+
if ((src0_0 && src0_0->compareOperand(aDst) != Rel_disjoint) ||
12440+
(src0_1 && src0_1->compareOperand(aDst) != Rel_disjoint) ||
12441+
(src0_2 && src0_2->compareOperand(aDst) != Rel_disjoint) ||
12442+
(src0_3 && src0_3->compareOperand(aDst) != Rel_disjoint))
1246212443
{
1246312444
return;
1246412445
}
1246512446
}
1246612447

1246712448
bool needKill = false;
12468-
const G4_Declare* decl = ((const G4_RegVar*)D->getBase())->getDeclare();
12449+
const G4_Declare* decl = ((const G4_RegVar*)aDst->getBase())->getDeclare();
1246912450
const G4_Declare* primaryDcl = decl->getRootDeclare();
1247012451

12471-
if (D->isFlag() || isCondMod)
12452+
if (aDst->isFlag())
1247212453
{
1247312454
// Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
1247412455
// For example, "mov (1|M0) P3:uw 0"
12475-
needKill = (D->getRightBound() - D->getLeftBound() + 1) >=
12476-
D->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
12456+
needKill = (aDst->getRightBound() - aDst->getLeftBound() + 1) >=
12457+
aDst->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
1247712458
}
1247812459
else
1247912460
{
1248012461
if (decl->getAliasOffset() != 0 ||
12481-
dst->getRegAccess() != Direct ||
12482-
dst->getRegOff() != 0 ||
12483-
dst->getSubRegOff() != 0 ||
12484-
dst->getHorzStride() != 1 ||
12462+
aDst->getRegAccess() != Direct ||
12463+
aDst->getRegOff() != 0 ||
12464+
aDst->getSubRegOff() != 0 ||
12465+
aDst->getHorzStride() != 1 ||
1248512466
I->isPartialWrite())
1248612467
{
1248712468
return;
1248812469
}
1248912470
if (fg.isPseudoDcl(primaryDcl) ||
1249012471
primaryDcl->getRegVar()->isRegVarTransient() ||
12491-
((dst->getTypeSize() * I->getExecSize()) ==
12472+
((aDst->getTypeSize() * I->getExecSize()) ==
1249212473
(primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
1249312474
{
1249412475
needKill = true;
@@ -12611,7 +12592,7 @@ void Optimizer::newDoNoMaskWA()
1261112592
assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
1261212593

1261312594
// add pseudoKill
12614-
addPseudoKillIfFullWrite(aBB, aII, false); // dst
12595+
addPseudoKillIfFullDstWrite(aBB, aII);
1261512596

1261612597
// Create a temp that's big enough to hold data and possible gap
1261712598
// b/w data due to alignment/hw restriction.
@@ -12702,9 +12683,8 @@ void Optimizer::newDoNoMaskWA()
1270212683
return;
1270312684
}
1270412685

12705-
// Add pseudo kill for dst and condMod
12706-
addPseudoKillIfFullWrite(aBB, aII, false); // dst
12707-
addPseudoKillIfFullWrite(aBB, aII, true); // condMod
12686+
// Add pseudo kill for dst
12687+
addPseudoKillIfFullDstWrite(aBB, aII);
1270812688

1270912689
const bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
1271012690
G4_Declare* modDcl = P->getTopDcl();
@@ -13033,7 +13013,7 @@ void Optimizer::newDoNoMaskWA()
1303313013
if (!condmod && !pred)
1303413014
{
1303513015
// Add pseudo Kill
13036-
addPseudoKillIfFullWrite(BB, II, false); // dst
13016+
addPseudoKillIfFullDstWrite(BB, II);
1303713017

1303813018
// case 1: no predicate, no flagModifier (common case)
1303913019
G4_Predicate* newPred = builder.createPredicate(
@@ -13233,78 +13213,58 @@ void Optimizer::doNoMaskWA()
1323313213
return flagVar;
1323413214
};
1323513215

13236-
// Check if condMod or dst is full write. If so, add pseudo kill for it.
13237-
auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
13216+
auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
1323813217
{
1323913218
// Only NoMask Inst without predicate will call this function!
13240-
// isCondMod = true: check condMod
13241-
// = false: check dst
1324213219
G4_INST* I = *aII;
13243-
if (I->getImplAccSrc() != nullptr || I->isSend())
13220+
G4_DstRegRegion* aDst = I->getDst();
13221+
if (!aDst || aDst->isNullReg() ||
13222+
I->getImplAccSrc() != nullptr || I->isSend() ||
13223+
aDst->getBase()->asRegVar()->getPhyReg())
1324413224
{
1324513225
return;
1324613226
}
13247-
G4_CondMod* condMod = I->getCondMod();
13248-
G4_DstRegRegion* dst = I->getDst();
13249-
if (isCondMod)
13250-
{
13251-
if (!condMod || !condMod->getBase()->isRegVar() ||
13252-
condMod->getBase()->asRegVar()->getPhyReg())
13253-
{
13254-
return;
13255-
}
13256-
}
13257-
else
13258-
{
13259-
if (!dst || dst->isNullReg() ||
13260-
!dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
13261-
{
13262-
return;
13263-
}
13264-
}
1326513227

13266-
G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
1326713228
// Make sure dst var is not used in this inst.
1326813229
{
1326913230
G4_Operand* src0_0 = I->getSrc(0);
1327013231
G4_Operand* src0_1 = I->getSrc(1);
1327113232
G4_Operand* src0_2 = I->getSrc(2);
1327213233
G4_Operand* src0_3 = I->getSrc(3);
1327313234

13274-
if ((src0_0 && src0_0->compareOperand(D) != Rel_disjoint) ||
13275-
(src0_1 && src0_1->compareOperand(D) != Rel_disjoint) ||
13276-
(src0_2 && src0_2->compareOperand(D) != Rel_disjoint) ||
13277-
(src0_3 && src0_3->compareOperand(D) != Rel_disjoint))
13235+
if ((src0_0 && src0_0->compareOperand(aDst) != Rel_disjoint) ||
13236+
(src0_1 && src0_1->compareOperand(aDst) != Rel_disjoint) ||
13237+
(src0_2 && src0_2->compareOperand(aDst) != Rel_disjoint) ||
13238+
(src0_3 && src0_3->compareOperand(aDst) != Rel_disjoint))
1327813239
{
1327913240
return;
1328013241
}
1328113242
}
1328213243

1328313244
bool needKill = false;
13284-
const G4_Declare* decl = ((const G4_RegVar*)D->getBase())->getDeclare();
13245+
const G4_Declare* decl = ((const G4_RegVar*)aDst->getBase())->getDeclare();
1328513246
const G4_Declare* primaryDcl = decl->getRootDeclare();
13286-
13287-
if (D->isFlag() || isCondMod)
13247+
if (aDst->isFlag())
1328813248
{
1328913249
// Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
1329013250
// For example, "mov (1|M0) P3:uw 0"
13291-
needKill = (D->getRightBound() - D->getLeftBound() + 1) >=
13292-
D->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
13251+
needKill = (aDst->getRightBound() - aDst->getLeftBound() + 1) >=
13252+
aDst->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
1329313253
}
1329413254
else
1329513255
{
1329613256
if (decl->getAliasOffset() != 0 ||
13297-
dst->getRegAccess() != Direct ||
13298-
dst->getRegOff() != 0 ||
13299-
dst->getSubRegOff() != 0 ||
13300-
dst->getHorzStride() != 1 ||
13257+
aDst->getRegAccess() != Direct ||
13258+
aDst->getRegOff() != 0 ||
13259+
aDst->getSubRegOff() != 0 ||
13260+
aDst->getHorzStride() != 1 ||
1330113261
I->isPartialWrite())
1330213262
{
1330313263
return;
1330413264
}
1330513265
if (fg.isPseudoDcl(primaryDcl) ||
1330613266
primaryDcl->getRegVar()->isRegVarTransient() ||
13307-
((dst->getTypeSize() * I->getExecSize()) ==
13267+
((aDst->getTypeSize() * I->getExecSize()) ==
1330813268
(primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
1330913269
{
1331013270
needKill = true;
@@ -13407,7 +13367,7 @@ void Optimizer::doNoMaskWA()
1340713367
assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
1340813368

1340913369
// add pseudoKill
13410-
addPseudoKillIfFullWrite(currBB, currII, false); // dst
13370+
addPseudoKillIfFullDstWrite(currBB, currII);
1341113371

1341213372
// Create a temp that's big enough to hold data and possible gap
1341313373
// b/w data due to alignment/hw restriction.
@@ -13497,8 +13457,7 @@ void Optimizer::doNoMaskWA()
1349713457
}
1349813458

1349913459
// Add pseudo kill for dst
13500-
addPseudoKillIfFullWrite(currBB, currII, false); // dst
13501-
addPseudoKillIfFullWrite(currBB, currII, true); // condMod
13460+
addPseudoKillIfFullDstWrite(currBB, currII);
1350213461

1350313462
bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
1350413463
G4_Declare* modDcl = P->getTopDcl();
@@ -13769,7 +13728,7 @@ void Optimizer::doNoMaskWA()
1376913728
if (!condmod && !pred)
1377013729
{
1377113730
// Add pseudo Kill
13772-
addPseudoKillIfFullWrite(BB, II, false); // dst
13731+
addPseudoKillIfFullDstWrite(BB, II);
1377313732

1377413733
// case 1: no predicate, no flagModifier (common case)
1377513734
G4_Predicate* newPred = builder.createPredicate(
@@ -13867,7 +13826,7 @@ void Optimizer::doNoMaskWA()
1386713826
if (!condmod && !pred)
1386813827
{
1386913828
// Add pseudo Kill
13870-
addPseudoKillIfFullWrite(BB, II, false);
13829+
addPseudoKillIfFullDstWrite(BB, II);
1387113830

1387213831
// case 1: no predicate, no flagModifier (common case)
1387313832
G4_Predicate* newPred = builder.createPredicate(

0 commit comments

Comments
 (0)