Skip to content

Commit afa858f

Browse files
jgu222igcbot
authored andcommitted
Add pseudoKill to condMod too.
If a NoMask inst without predicate has condMod, make sure to add pkill to condMod if it is a full write. Resubmit as the backout seems not correct.
1 parent 88bb1e0 commit afa858f

File tree

1 file changed

+85
-44
lines changed

1 file changed

+85
-44
lines changed

visa/Optimizer.cpp

Lines changed: 85 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -12417,59 +12417,78 @@ void Optimizer::newDoNoMaskWA()
1241712417
return flagVar;
1241812418
};
1241912419

12420-
auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
12420+
// Check if condMod or dst is full write. If so, add pseudo kill for it.
12421+
auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
1242112422
{
1242212423
// Only NoMask Inst without predicate will call this function!
12424+
// isCondMod = true: check condMod
12425+
// = false: check dst
1242312426
G4_INST* I = *aII;
12424-
G4_DstRegRegion* aDst = I->getDst();
12425-
if (!aDst || aDst->isNullReg() ||
12426-
I->getImplAccSrc() != nullptr || I->isSend() ||
12427-
!aDst->getBase()->isRegVar() || aDst->getBase()->asRegVar()->getPhyReg())
12427+
if (I->getImplAccSrc() != nullptr || I->isSend())
1242812428
{
1242912429
return;
1243012430
}
12431+
G4_CondMod* condMod = I->getCondMod();
12432+
G4_DstRegRegion* dst = I->getDst();
12433+
if (isCondMod)
12434+
{
12435+
if (!condMod || !condMod->getBase()->isRegVar() ||
12436+
condMod->getBase()->asRegVar()->getPhyReg())
12437+
{
12438+
return;
12439+
}
12440+
}
12441+
else
12442+
{
12443+
if (!dst || dst->isNullReg() ||
12444+
!dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
12445+
{
12446+
return;
12447+
}
12448+
}
1243112449

12450+
G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
1243212451
// Make sure dst var is not used in this inst.
1243312452
{
1243412453
G4_Operand* src0_0 = I->getSrc(0);
1243512454
G4_Operand* src0_1 = I->getSrc(1);
1243612455
G4_Operand* src0_2 = I->getSrc(2);
1243712456
G4_Operand* src0_3 = I->getSrc(3);
1243812457

12439-
if ((src0_0 && src0_0->compareOperand(aDst) != Rel_disjoint) ||
12440-
(src0_1 && src0_1->compareOperand(aDst) != Rel_disjoint) ||
12441-
(src0_2 && src0_2->compareOperand(aDst) != Rel_disjoint) ||
12442-
(src0_3 && src0_3->compareOperand(aDst) != Rel_disjoint))
12458+
if ((src0_0 && src0_0->compareOperand(D) != Rel_disjoint) ||
12459+
(src0_1 && src0_1->compareOperand(D) != Rel_disjoint) ||
12460+
(src0_2 && src0_2->compareOperand(D) != Rel_disjoint) ||
12461+
(src0_3 && src0_3->compareOperand(D) != Rel_disjoint))
1244312462
{
1244412463
return;
1244512464
}
1244612465
}
1244712466

1244812467
bool needKill = false;
12449-
const G4_Declare* decl = ((const G4_RegVar*)aDst->getBase())->getDeclare();
12468+
const G4_Declare* decl = ((const G4_RegVar*)D->getBase())->getDeclare();
1245012469
const G4_Declare* primaryDcl = decl->getRootDeclare();
1245112470

12452-
if (aDst->isFlag())
12471+
if (D->isFlag() || isCondMod)
1245312472
{
1245412473
// Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
1245512474
// For example, "mov (1|M0) P3:uw 0"
12456-
needKill = (aDst->getRightBound() - aDst->getLeftBound() + 1) >=
12457-
aDst->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
12475+
needKill = (D->getRightBound() - D->getLeftBound() + 1) >=
12476+
D->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
1245812477
}
1245912478
else
1246012479
{
1246112480
if (decl->getAliasOffset() != 0 ||
12462-
aDst->getRegAccess() != Direct ||
12463-
aDst->getRegOff() != 0 ||
12464-
aDst->getSubRegOff() != 0 ||
12465-
aDst->getHorzStride() != 1 ||
12481+
dst->getRegAccess() != Direct ||
12482+
dst->getRegOff() != 0 ||
12483+
dst->getSubRegOff() != 0 ||
12484+
dst->getHorzStride() != 1 ||
1246612485
I->isPartialWrite())
1246712486
{
1246812487
return;
1246912488
}
1247012489
if (fg.isPseudoDcl(primaryDcl) ||
1247112490
primaryDcl->getRegVar()->isRegVarTransient() ||
12472-
((aDst->getTypeSize() * I->getExecSize()) ==
12491+
((dst->getTypeSize() * I->getExecSize()) ==
1247312492
(primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
1247412493
{
1247512494
needKill = true;
@@ -12592,7 +12611,7 @@ void Optimizer::newDoNoMaskWA()
1259212611
assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
1259312612

1259412613
// add pseudoKill
12595-
addPseudoKillIfFullDstWrite(aBB, aII);
12614+
addPseudoKillIfFullWrite(aBB, aII, false); // dst
1259612615

1259712616
// Create a temp that's big enough to hold data and possible gap
1259812617
// b/w data due to alignment/hw restriction.
@@ -12683,8 +12702,9 @@ void Optimizer::newDoNoMaskWA()
1268312702
return;
1268412703
}
1268512704

12686-
// Add pseudo kill for dst
12687-
addPseudoKillIfFullDstWrite(aBB, aII);
12705+
// Add pseudo kill for dst and condMod
12706+
addPseudoKillIfFullWrite(aBB, aII, false); // dst
12707+
addPseudoKillIfFullWrite(aBB, aII, true); // condMod
1268812708

1268912709
const bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
1269012710
G4_Declare* modDcl = P->getTopDcl();
@@ -13013,7 +13033,7 @@ void Optimizer::newDoNoMaskWA()
1301313033
if (!condmod && !pred)
1301413034
{
1301513035
// Add pseudo Kill
13016-
addPseudoKillIfFullDstWrite(BB, II);
13036+
addPseudoKillIfFullWrite(BB, II, false); // dst
1301713037

1301813038
// case 1: no predicate, no flagModifier (common case)
1301913039
G4_Predicate* newPred = builder.createPredicate(
@@ -13213,58 +13233,78 @@ void Optimizer::doNoMaskWA()
1321313233
return flagVar;
1321413234
};
1321513235

13216-
auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
13236+
// Check if condMod or dst is full write. If so, add pseudo kill for it.
13237+
auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
1321713238
{
1321813239
// Only NoMask Inst without predicate will call this function!
13240+
// isCondMod = true: check condMod
13241+
// = false: check dst
1321913242
G4_INST* I = *aII;
13220-
G4_DstRegRegion* aDst = I->getDst();
13221-
if (!aDst || aDst->isNullReg() ||
13222-
I->getImplAccSrc() != nullptr || I->isSend() ||
13223-
aDst->getBase()->asRegVar()->getPhyReg())
13243+
if (I->getImplAccSrc() != nullptr || I->isSend())
1322413244
{
1322513245
return;
1322613246
}
13247+
G4_CondMod* condMod = I->getCondMod();
13248+
G4_DstRegRegion* dst = I->getDst();
13249+
if (isCondMod)
13250+
{
13251+
if (!condMod || !condMod->getBase()->isRegVar() ||
13252+
condMod->getBase()->asRegVar()->getPhyReg())
13253+
{
13254+
return;
13255+
}
13256+
}
13257+
else
13258+
{
13259+
if (!dst || dst->isNullReg() ||
13260+
!dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
13261+
{
13262+
return;
13263+
}
13264+
}
1322713265

13266+
G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
1322813267
// Make sure dst var is not used in this inst.
1322913268
{
1323013269
G4_Operand* src0_0 = I->getSrc(0);
1323113270
G4_Operand* src0_1 = I->getSrc(1);
1323213271
G4_Operand* src0_2 = I->getSrc(2);
1323313272
G4_Operand* src0_3 = I->getSrc(3);
1323413273

13235-
if ((src0_0 && src0_0->compareOperand(aDst) != Rel_disjoint) ||
13236-
(src0_1 && src0_1->compareOperand(aDst) != Rel_disjoint) ||
13237-
(src0_2 && src0_2->compareOperand(aDst) != Rel_disjoint) ||
13238-
(src0_3 && src0_3->compareOperand(aDst) != Rel_disjoint))
13274+
if ((src0_0 && src0_0->compareOperand(D) != Rel_disjoint) ||
13275+
(src0_1 && src0_1->compareOperand(D) != Rel_disjoint) ||
13276+
(src0_2 && src0_2->compareOperand(D) != Rel_disjoint) ||
13277+
(src0_3 && src0_3->compareOperand(D) != Rel_disjoint))
1323913278
{
1324013279
return;
1324113280
}
1324213281
}
1324313282

1324413283
bool needKill = false;
13245-
const G4_Declare* decl = ((const G4_RegVar*)aDst->getBase())->getDeclare();
13284+
const G4_Declare* decl = ((const G4_RegVar*)D->getBase())->getDeclare();
1324613285
const G4_Declare* primaryDcl = decl->getRootDeclare();
13247-
if (aDst->isFlag())
13286+
13287+
if (D->isFlag() || isCondMod)
1324813288
{
1324913289
// Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
1325013290
// For example, "mov (1|M0) P3:uw 0"
13251-
needKill = (aDst->getRightBound() - aDst->getLeftBound() + 1) >=
13252-
aDst->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
13291+
needKill = (D->getRightBound() - D->getLeftBound() + 1) >=
13292+
D->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
1325313293
}
1325413294
else
1325513295
{
1325613296
if (decl->getAliasOffset() != 0 ||
13257-
aDst->getRegAccess() != Direct ||
13258-
aDst->getRegOff() != 0 ||
13259-
aDst->getSubRegOff() != 0 ||
13260-
aDst->getHorzStride() != 1 ||
13297+
dst->getRegAccess() != Direct ||
13298+
dst->getRegOff() != 0 ||
13299+
dst->getSubRegOff() != 0 ||
13300+
dst->getHorzStride() != 1 ||
1326113301
I->isPartialWrite())
1326213302
{
1326313303
return;
1326413304
}
1326513305
if (fg.isPseudoDcl(primaryDcl) ||
1326613306
primaryDcl->getRegVar()->isRegVarTransient() ||
13267-
((aDst->getTypeSize() * I->getExecSize()) ==
13307+
((dst->getTypeSize() * I->getExecSize()) ==
1326813308
(primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
1326913309
{
1327013310
needKill = true;
@@ -13367,7 +13407,7 @@ void Optimizer::doNoMaskWA()
1336713407
assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
1336813408

1336913409
// add pseudoKill
13370-
addPseudoKillIfFullDstWrite(currBB, currII);
13410+
addPseudoKillIfFullWrite(currBB, currII, false); // dst
1337113411

1337213412
// Create a temp that's big enough to hold data and possible gap
1337313413
// b/w data due to alignment/hw restriction.
@@ -13457,7 +13497,8 @@ void Optimizer::doNoMaskWA()
1345713497
}
1345813498

1345913499
// Add pseudo kill for dst
13460-
addPseudoKillIfFullDstWrite(currBB, currII);
13500+
addPseudoKillIfFullWrite(currBB, currII, false); // dst
13501+
addPseudoKillIfFullWrite(currBB, currII, true); // condMod
1346113502

1346213503
bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
1346313504
G4_Declare* modDcl = P->getTopDcl();
@@ -13728,7 +13769,7 @@ void Optimizer::doNoMaskWA()
1372813769
if (!condmod && !pred)
1372913770
{
1373013771
// Add pseudo Kill
13731-
addPseudoKillIfFullDstWrite(BB, II);
13772+
addPseudoKillIfFullWrite(BB, II, false); // dst
1373213773

1373313774
// case 1: no predicate, no flagModifier (common case)
1373413775
G4_Predicate* newPred = builder.createPredicate(
@@ -13826,7 +13867,7 @@ void Optimizer::doNoMaskWA()
1382613867
if (!condmod && !pred)
1382713868
{
1382813869
// Add pseudo Kill
13829-
addPseudoKillIfFullDstWrite(BB, II);
13870+
addPseudoKillIfFullWrite(BB, II, false);
1383013871

1383113872
// case 1: no predicate, no flagModifier (common case)
1383213873
G4_Predicate* newPred = builder.createPredicate(

0 commit comments

Comments
 (0)