@@ -12417,59 +12417,78 @@ void Optimizer::newDoNoMaskWA()
12417
12417
return flagVar;
12418
12418
};
12419
12419
12420
- auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
12420
+ // Check if condMod or dst is full write. If so, add pseudo kill for it.
12421
+ auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
12421
12422
{
12422
12423
// Only NoMask Inst without predicate will call this function!
12424
+ // isCondMod = true: check condMod
12425
+ // = false: check dst
12423
12426
G4_INST* I = *aII;
12424
- G4_DstRegRegion* aDst = I->getDst();
12425
- if (!aDst || aDst->isNullReg() ||
12426
- I->getImplAccSrc() != nullptr || I->isSend() ||
12427
- !aDst->getBase()->isRegVar() || aDst->getBase()->asRegVar()->getPhyReg())
12427
+ if (I->getImplAccSrc() != nullptr || I->isSend())
12428
12428
{
12429
12429
return;
12430
12430
}
12431
+ G4_CondMod* condMod = I->getCondMod();
12432
+ G4_DstRegRegion* dst = I->getDst();
12433
+ if (isCondMod)
12434
+ {
12435
+ if (!condMod || !condMod->getBase()->isRegVar() ||
12436
+ condMod->getBase()->asRegVar()->getPhyReg())
12437
+ {
12438
+ return;
12439
+ }
12440
+ }
12441
+ else
12442
+ {
12443
+ if (!dst || dst->isNullReg() ||
12444
+ !dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
12445
+ {
12446
+ return;
12447
+ }
12448
+ }
12431
12449
12450
+ G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
12432
12451
// Make sure dst var is not used in this inst.
12433
12452
{
12434
12453
G4_Operand* src0_0 = I->getSrc(0);
12435
12454
G4_Operand* src0_1 = I->getSrc(1);
12436
12455
G4_Operand* src0_2 = I->getSrc(2);
12437
12456
G4_Operand* src0_3 = I->getSrc(3);
12438
12457
12439
- if ((src0_0 && src0_0->compareOperand(aDst ) != Rel_disjoint) ||
12440
- (src0_1 && src0_1->compareOperand(aDst ) != Rel_disjoint) ||
12441
- (src0_2 && src0_2->compareOperand(aDst ) != Rel_disjoint) ||
12442
- (src0_3 && src0_3->compareOperand(aDst ) != Rel_disjoint))
12458
+ if ((src0_0 && src0_0->compareOperand(D ) != Rel_disjoint) ||
12459
+ (src0_1 && src0_1->compareOperand(D ) != Rel_disjoint) ||
12460
+ (src0_2 && src0_2->compareOperand(D ) != Rel_disjoint) ||
12461
+ (src0_3 && src0_3->compareOperand(D ) != Rel_disjoint))
12443
12462
{
12444
12463
return;
12445
12464
}
12446
12465
}
12447
12466
12448
12467
bool needKill = false;
12449
- const G4_Declare* decl = ((const G4_RegVar*)aDst ->getBase())->getDeclare();
12468
+ const G4_Declare* decl = ((const G4_RegVar*)D ->getBase())->getDeclare();
12450
12469
const G4_Declare* primaryDcl = decl->getRootDeclare();
12451
12470
12452
- if (aDst ->isFlag())
12471
+ if (D ->isFlag() || isCondMod )
12453
12472
{
12454
12473
// Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
12455
12474
// For example, "mov (1|M0) P3:uw 0"
12456
- needKill = (aDst ->getRightBound() - aDst ->getLeftBound() + 1) >=
12457
- aDst ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
12475
+ needKill = (D ->getRightBound() - D ->getLeftBound() + 1) >=
12476
+ D ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
12458
12477
}
12459
12478
else
12460
12479
{
12461
12480
if (decl->getAliasOffset() != 0 ||
12462
- aDst ->getRegAccess() != Direct ||
12463
- aDst ->getRegOff() != 0 ||
12464
- aDst ->getSubRegOff() != 0 ||
12465
- aDst ->getHorzStride() != 1 ||
12481
+ dst ->getRegAccess() != Direct ||
12482
+ dst ->getRegOff() != 0 ||
12483
+ dst ->getSubRegOff() != 0 ||
12484
+ dst ->getHorzStride() != 1 ||
12466
12485
I->isPartialWrite())
12467
12486
{
12468
12487
return;
12469
12488
}
12470
12489
if (fg.isPseudoDcl(primaryDcl) ||
12471
12490
primaryDcl->getRegVar()->isRegVarTransient() ||
12472
- ((aDst ->getTypeSize() * I->getExecSize()) ==
12491
+ ((dst ->getTypeSize() * I->getExecSize()) ==
12473
12492
(primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
12474
12493
{
12475
12494
needKill = true;
@@ -12592,7 +12611,7 @@ void Optimizer::newDoNoMaskWA()
12592
12611
assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
12593
12612
12594
12613
// add pseudoKill
12595
- addPseudoKillIfFullDstWrite (aBB, aII);
12614
+ addPseudoKillIfFullWrite (aBB, aII, false); // dst
12596
12615
12597
12616
// Create a temp that's big enough to hold data and possible gap
12598
12617
// b/w data due to alignment/hw restriction.
@@ -12683,8 +12702,9 @@ void Optimizer::newDoNoMaskWA()
12683
12702
return;
12684
12703
}
12685
12704
12686
- // Add pseudo kill for dst
12687
- addPseudoKillIfFullDstWrite(aBB, aII);
12705
+ // Add pseudo kill for dst and condMod
12706
+ addPseudoKillIfFullWrite(aBB, aII, false); // dst
12707
+ addPseudoKillIfFullWrite(aBB, aII, true); // condMod
12688
12708
12689
12709
const bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
12690
12710
G4_Declare* modDcl = P->getTopDcl();
@@ -13013,7 +13033,7 @@ void Optimizer::newDoNoMaskWA()
13013
13033
if (!condmod && !pred)
13014
13034
{
13015
13035
// Add pseudo Kill
13016
- addPseudoKillIfFullDstWrite (BB, II);
13036
+ addPseudoKillIfFullWrite (BB, II, false); // dst
13017
13037
13018
13038
// case 1: no predicate, no flagModifier (common case)
13019
13039
G4_Predicate* newPred = builder.createPredicate(
@@ -13213,58 +13233,78 @@ void Optimizer::doNoMaskWA()
13213
13233
return flagVar;
13214
13234
};
13215
13235
13216
- auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
13236
+ // Check if condMod or dst is full write. If so, add pseudo kill for it.
13237
+ auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
13217
13238
{
13218
13239
// Only NoMask Inst without predicate will call this function!
13240
+ // isCondMod = true: check condMod
13241
+ // = false: check dst
13219
13242
G4_INST* I = *aII;
13220
- G4_DstRegRegion* aDst = I->getDst();
13221
- if (!aDst || aDst->isNullReg() ||
13222
- I->getImplAccSrc() != nullptr || I->isSend() ||
13223
- aDst->getBase()->asRegVar()->getPhyReg())
13243
+ if (I->getImplAccSrc() != nullptr || I->isSend())
13224
13244
{
13225
13245
return;
13226
13246
}
13247
+ G4_CondMod* condMod = I->getCondMod();
13248
+ G4_DstRegRegion* dst = I->getDst();
13249
+ if (isCondMod)
13250
+ {
13251
+ if (!condMod || !condMod->getBase()->isRegVar() ||
13252
+ condMod->getBase()->asRegVar()->getPhyReg())
13253
+ {
13254
+ return;
13255
+ }
13256
+ }
13257
+ else
13258
+ {
13259
+ if (!dst || dst->isNullReg() ||
13260
+ !dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
13261
+ {
13262
+ return;
13263
+ }
13264
+ }
13227
13265
13266
+ G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
13228
13267
// Make sure dst var is not used in this inst.
13229
13268
{
13230
13269
G4_Operand* src0_0 = I->getSrc(0);
13231
13270
G4_Operand* src0_1 = I->getSrc(1);
13232
13271
G4_Operand* src0_2 = I->getSrc(2);
13233
13272
G4_Operand* src0_3 = I->getSrc(3);
13234
13273
13235
- if ((src0_0 && src0_0->compareOperand(aDst ) != Rel_disjoint) ||
13236
- (src0_1 && src0_1->compareOperand(aDst ) != Rel_disjoint) ||
13237
- (src0_2 && src0_2->compareOperand(aDst ) != Rel_disjoint) ||
13238
- (src0_3 && src0_3->compareOperand(aDst ) != Rel_disjoint))
13274
+ if ((src0_0 && src0_0->compareOperand(D ) != Rel_disjoint) ||
13275
+ (src0_1 && src0_1->compareOperand(D ) != Rel_disjoint) ||
13276
+ (src0_2 && src0_2->compareOperand(D ) != Rel_disjoint) ||
13277
+ (src0_3 && src0_3->compareOperand(D ) != Rel_disjoint))
13239
13278
{
13240
13279
return;
13241
13280
}
13242
13281
}
13243
13282
13244
13283
bool needKill = false;
13245
- const G4_Declare* decl = ((const G4_RegVar*)aDst ->getBase())->getDeclare();
13284
+ const G4_Declare* decl = ((const G4_RegVar*)D ->getBase())->getDeclare();
13246
13285
const G4_Declare* primaryDcl = decl->getRootDeclare();
13247
- if (aDst->isFlag())
13286
+
13287
+ if (D->isFlag() || isCondMod)
13248
13288
{
13249
13289
// Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
13250
13290
// For example, "mov (1|M0) P3:uw 0"
13251
- needKill = (aDst ->getRightBound() - aDst ->getLeftBound() + 1) >=
13252
- aDst ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
13291
+ needKill = (D ->getRightBound() - D ->getLeftBound() + 1) >=
13292
+ D ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
13253
13293
}
13254
13294
else
13255
13295
{
13256
13296
if (decl->getAliasOffset() != 0 ||
13257
- aDst ->getRegAccess() != Direct ||
13258
- aDst ->getRegOff() != 0 ||
13259
- aDst ->getSubRegOff() != 0 ||
13260
- aDst ->getHorzStride() != 1 ||
13297
+ dst ->getRegAccess() != Direct ||
13298
+ dst ->getRegOff() != 0 ||
13299
+ dst ->getSubRegOff() != 0 ||
13300
+ dst ->getHorzStride() != 1 ||
13261
13301
I->isPartialWrite())
13262
13302
{
13263
13303
return;
13264
13304
}
13265
13305
if (fg.isPseudoDcl(primaryDcl) ||
13266
13306
primaryDcl->getRegVar()->isRegVarTransient() ||
13267
- ((aDst ->getTypeSize() * I->getExecSize()) ==
13307
+ ((dst ->getTypeSize() * I->getExecSize()) ==
13268
13308
(primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
13269
13309
{
13270
13310
needKill = true;
@@ -13367,7 +13407,7 @@ void Optimizer::doNoMaskWA()
13367
13407
assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
13368
13408
13369
13409
// add pseudoKill
13370
- addPseudoKillIfFullDstWrite (currBB, currII);
13410
+ addPseudoKillIfFullWrite (currBB, currII, false); // dst
13371
13411
13372
13412
// Create a temp that's big enough to hold data and possible gap
13373
13413
// b/w data due to alignment/hw restriction.
@@ -13457,7 +13497,8 @@ void Optimizer::doNoMaskWA()
13457
13497
}
13458
13498
13459
13499
// Add pseudo kill for dst
13460
- addPseudoKillIfFullDstWrite(currBB, currII);
13500
+ addPseudoKillIfFullWrite(currBB, currII, false); // dst
13501
+ addPseudoKillIfFullWrite(currBB, currII, true); // condMod
13461
13502
13462
13503
bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
13463
13504
G4_Declare* modDcl = P->getTopDcl();
@@ -13728,7 +13769,7 @@ void Optimizer::doNoMaskWA()
13728
13769
if (!condmod && !pred)
13729
13770
{
13730
13771
// Add pseudo Kill
13731
- addPseudoKillIfFullDstWrite (BB, II);
13772
+ addPseudoKillIfFullWrite (BB, II, false); // dst
13732
13773
13733
13774
// case 1: no predicate, no flagModifier (common case)
13734
13775
G4_Predicate* newPred = builder.createPredicate(
@@ -13826,7 +13867,7 @@ void Optimizer::doNoMaskWA()
13826
13867
if (!condmod && !pred)
13827
13868
{
13828
13869
// Add pseudo Kill
13829
- addPseudoKillIfFullDstWrite (BB, II);
13870
+ addPseudoKillIfFullWrite (BB, II, false );
13830
13871
13831
13872
// case 1: no predicate, no flagModifier (common case)
13832
13873
G4_Predicate* newPred = builder.createPredicate(
0 commit comments