@@ -12417,78 +12417,59 @@ void Optimizer::newDoNoMaskWA()
12417
12417
return flagVar;
12418
12418
};
12419
12419
12420
- // Check if condMod or dst is full write. If so, add pseudo kill for it.
12421
- auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
12420
+ auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
12422
12421
{
12423
12422
// Only NoMask Inst without predicate will call this function!
12424
- // isCondMod = true: check condMod
12425
- // = false: check dst
12426
12423
G4_INST* I = *aII;
12427
- if (I->getImplAccSrc() != nullptr || I->isSend())
12424
+ G4_DstRegRegion* aDst = I->getDst();
12425
+ if (!aDst || aDst->isNullReg() ||
12426
+ I->getImplAccSrc() != nullptr || I->isSend() ||
12427
+ !aDst->getBase()->isRegVar() || aDst->getBase()->asRegVar()->getPhyReg())
12428
12428
{
12429
12429
return;
12430
12430
}
12431
- G4_CondMod* condMod = I->getCondMod();
12432
- G4_DstRegRegion* dst = I->getDst();
12433
- if (isCondMod)
12434
- {
12435
- if (!condMod || !condMod->getBase()->isRegVar() ||
12436
- condMod->getBase()->asRegVar()->getPhyReg())
12437
- {
12438
- return;
12439
- }
12440
- }
12441
- else
12442
- {
12443
- if (!dst || dst->isNullReg() ||
12444
- !dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
12445
- {
12446
- return;
12447
- }
12448
- }
12449
12431
12450
- G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
12451
12432
// Make sure dst var is not used in this inst.
12452
12433
{
12453
12434
G4_Operand* src0_0 = I->getSrc(0);
12454
12435
G4_Operand* src0_1 = I->getSrc(1);
12455
12436
G4_Operand* src0_2 = I->getSrc(2);
12456
12437
G4_Operand* src0_3 = I->getSrc(3);
12457
12438
12458
- if ((src0_0 && src0_0->compareOperand(D ) != Rel_disjoint) ||
12459
- (src0_1 && src0_1->compareOperand(D ) != Rel_disjoint) ||
12460
- (src0_2 && src0_2->compareOperand(D ) != Rel_disjoint) ||
12461
- (src0_3 && src0_3->compareOperand(D ) != Rel_disjoint))
12439
+ if ((src0_0 && src0_0->compareOperand(aDst ) != Rel_disjoint) ||
12440
+ (src0_1 && src0_1->compareOperand(aDst ) != Rel_disjoint) ||
12441
+ (src0_2 && src0_2->compareOperand(aDst ) != Rel_disjoint) ||
12442
+ (src0_3 && src0_3->compareOperand(aDst ) != Rel_disjoint))
12462
12443
{
12463
12444
return;
12464
12445
}
12465
12446
}
12466
12447
12467
12448
bool needKill = false;
12468
- const G4_Declare* decl = ((const G4_RegVar*)D ->getBase())->getDeclare();
12449
+ const G4_Declare* decl = ((const G4_RegVar*)aDst ->getBase())->getDeclare();
12469
12450
const G4_Declare* primaryDcl = decl->getRootDeclare();
12470
12451
12471
- if (D ->isFlag() || isCondMod )
12452
+ if (aDst ->isFlag())
12472
12453
{
12473
12454
// Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
12474
12455
// For example, "mov (1|M0) P3:uw 0"
12475
- needKill = (D ->getRightBound() - D ->getLeftBound() + 1) >=
12476
- D ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
12456
+ needKill = (aDst ->getRightBound() - aDst ->getLeftBound() + 1) >=
12457
+ aDst ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
12477
12458
}
12478
12459
else
12479
12460
{
12480
12461
if (decl->getAliasOffset() != 0 ||
12481
- dst ->getRegAccess() != Direct ||
12482
- dst ->getRegOff() != 0 ||
12483
- dst ->getSubRegOff() != 0 ||
12484
- dst ->getHorzStride() != 1 ||
12462
+ aDst ->getRegAccess() != Direct ||
12463
+ aDst ->getRegOff() != 0 ||
12464
+ aDst ->getSubRegOff() != 0 ||
12465
+ aDst ->getHorzStride() != 1 ||
12485
12466
I->isPartialWrite())
12486
12467
{
12487
12468
return;
12488
12469
}
12489
12470
if (fg.isPseudoDcl(primaryDcl) ||
12490
12471
primaryDcl->getRegVar()->isRegVarTransient() ||
12491
- ((dst ->getTypeSize() * I->getExecSize()) ==
12472
+ ((aDst ->getTypeSize() * I->getExecSize()) ==
12492
12473
(primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
12493
12474
{
12494
12475
needKill = true;
@@ -12611,7 +12592,7 @@ void Optimizer::newDoNoMaskWA()
12611
12592
assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
12612
12593
12613
12594
// add pseudoKill
12614
- addPseudoKillIfFullWrite (aBB, aII, false); // dst
12595
+ addPseudoKillIfFullDstWrite (aBB, aII);
12615
12596
12616
12597
// Create a temp that's big enough to hold data and possible gap
12617
12598
// b/w data due to alignment/hw restriction.
@@ -12702,9 +12683,8 @@ void Optimizer::newDoNoMaskWA()
12702
12683
return;
12703
12684
}
12704
12685
12705
- // Add pseudo kill for dst and condMod
12706
- addPseudoKillIfFullWrite(aBB, aII, false); // dst
12707
- addPseudoKillIfFullWrite(aBB, aII, true); // condMod
12686
+ // Add pseudo kill for dst
12687
+ addPseudoKillIfFullDstWrite(aBB, aII);
12708
12688
12709
12689
const bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
12710
12690
G4_Declare* modDcl = P->getTopDcl();
@@ -13033,7 +13013,7 @@ void Optimizer::newDoNoMaskWA()
13033
13013
if (!condmod && !pred)
13034
13014
{
13035
13015
// Add pseudo Kill
13036
- addPseudoKillIfFullWrite (BB, II, false); // dst
13016
+ addPseudoKillIfFullDstWrite (BB, II);
13037
13017
13038
13018
// case 1: no predicate, no flagModifier (common case)
13039
13019
G4_Predicate* newPred = builder.createPredicate(
@@ -13233,78 +13213,58 @@ void Optimizer::doNoMaskWA()
13233
13213
return flagVar;
13234
13214
};
13235
13215
13236
- // Check if condMod or dst is full write. If so, add pseudo kill for it.
13237
- auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
13216
+ auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
13238
13217
{
13239
13218
// Only NoMask Inst without predicate will call this function!
13240
- // isCondMod = true: check condMod
13241
- // = false: check dst
13242
13219
G4_INST* I = *aII;
13243
- if (I->getImplAccSrc() != nullptr || I->isSend())
13220
+ G4_DstRegRegion* aDst = I->getDst();
13221
+ if (!aDst || aDst->isNullReg() ||
13222
+ I->getImplAccSrc() != nullptr || I->isSend() ||
13223
+ aDst->getBase()->asRegVar()->getPhyReg())
13244
13224
{
13245
13225
return;
13246
13226
}
13247
- G4_CondMod* condMod = I->getCondMod();
13248
- G4_DstRegRegion* dst = I->getDst();
13249
- if (isCondMod)
13250
- {
13251
- if (!condMod || !condMod->getBase()->isRegVar() ||
13252
- condMod->getBase()->asRegVar()->getPhyReg())
13253
- {
13254
- return;
13255
- }
13256
- }
13257
- else
13258
- {
13259
- if (!dst || dst->isNullReg() ||
13260
- !dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
13261
- {
13262
- return;
13263
- }
13264
- }
13265
13227
13266
- G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
13267
13228
// Make sure dst var is not used in this inst.
13268
13229
{
13269
13230
G4_Operand* src0_0 = I->getSrc(0);
13270
13231
G4_Operand* src0_1 = I->getSrc(1);
13271
13232
G4_Operand* src0_2 = I->getSrc(2);
13272
13233
G4_Operand* src0_3 = I->getSrc(3);
13273
13234
13274
- if ((src0_0 && src0_0->compareOperand(D ) != Rel_disjoint) ||
13275
- (src0_1 && src0_1->compareOperand(D ) != Rel_disjoint) ||
13276
- (src0_2 && src0_2->compareOperand(D ) != Rel_disjoint) ||
13277
- (src0_3 && src0_3->compareOperand(D ) != Rel_disjoint))
13235
+ if ((src0_0 && src0_0->compareOperand(aDst ) != Rel_disjoint) ||
13236
+ (src0_1 && src0_1->compareOperand(aDst ) != Rel_disjoint) ||
13237
+ (src0_2 && src0_2->compareOperand(aDst ) != Rel_disjoint) ||
13238
+ (src0_3 && src0_3->compareOperand(aDst ) != Rel_disjoint))
13278
13239
{
13279
13240
return;
13280
13241
}
13281
13242
}
13282
13243
13283
13244
bool needKill = false;
13284
- const G4_Declare* decl = ((const G4_RegVar*)D ->getBase())->getDeclare();
13245
+ const G4_Declare* decl = ((const G4_RegVar*)aDst ->getBase())->getDeclare();
13285
13246
const G4_Declare* primaryDcl = decl->getRootDeclare();
13286
-
13287
- if (D->isFlag() || isCondMod)
13247
+ if (aDst->isFlag())
13288
13248
{
13289
13249
// Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
13290
13250
// For example, "mov (1|M0) P3:uw 0"
13291
- needKill = (D ->getRightBound() - D ->getLeftBound() + 1) >=
13292
- D ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
13251
+ needKill = (aDst ->getRightBound() - aDst ->getLeftBound() + 1) >=
13252
+ aDst ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
13293
13253
}
13294
13254
else
13295
13255
{
13296
13256
if (decl->getAliasOffset() != 0 ||
13297
- dst ->getRegAccess() != Direct ||
13298
- dst ->getRegOff() != 0 ||
13299
- dst ->getSubRegOff() != 0 ||
13300
- dst ->getHorzStride() != 1 ||
13257
+ aDst ->getRegAccess() != Direct ||
13258
+ aDst ->getRegOff() != 0 ||
13259
+ aDst ->getSubRegOff() != 0 ||
13260
+ aDst ->getHorzStride() != 1 ||
13301
13261
I->isPartialWrite())
13302
13262
{
13303
13263
return;
13304
13264
}
13305
13265
if (fg.isPseudoDcl(primaryDcl) ||
13306
13266
primaryDcl->getRegVar()->isRegVarTransient() ||
13307
- ((dst ->getTypeSize() * I->getExecSize()) ==
13267
+ ((aDst ->getTypeSize() * I->getExecSize()) ==
13308
13268
(primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
13309
13269
{
13310
13270
needKill = true;
@@ -13407,7 +13367,7 @@ void Optimizer::doNoMaskWA()
13407
13367
assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
13408
13368
13409
13369
// add pseudoKill
13410
- addPseudoKillIfFullWrite (currBB, currII, false); // dst
13370
+ addPseudoKillIfFullDstWrite (currBB, currII);
13411
13371
13412
13372
// Create a temp that's big enough to hold data and possible gap
13413
13373
// b/w data due to alignment/hw restriction.
@@ -13497,8 +13457,7 @@ void Optimizer::doNoMaskWA()
13497
13457
}
13498
13458
13499
13459
// Add pseudo kill for dst
13500
- addPseudoKillIfFullWrite(currBB, currII, false); // dst
13501
- addPseudoKillIfFullWrite(currBB, currII, true); // condMod
13460
+ addPseudoKillIfFullDstWrite(currBB, currII);
13502
13461
13503
13462
bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
13504
13463
G4_Declare* modDcl = P->getTopDcl();
@@ -13769,7 +13728,7 @@ void Optimizer::doNoMaskWA()
13769
13728
if (!condmod && !pred)
13770
13729
{
13771
13730
// Add pseudo Kill
13772
- addPseudoKillIfFullWrite (BB, II, false); // dst
13731
+ addPseudoKillIfFullDstWrite (BB, II);
13773
13732
13774
13733
// case 1: no predicate, no flagModifier (common case)
13775
13734
G4_Predicate* newPred = builder.createPredicate(
@@ -13867,7 +13826,7 @@ void Optimizer::doNoMaskWA()
13867
13826
if (!condmod && !pred)
13868
13827
{
13869
13828
// Add pseudo Kill
13870
- addPseudoKillIfFullWrite (BB, II, false );
13829
+ addPseudoKillIfFullDstWrite (BB, II);
13871
13830
13872
13831
// case 1: no predicate, no flagModifier (common case)
13873
13832
G4_Predicate* newPred = builder.createPredicate(
0 commit comments