Skip to content

Commit bb37b85

Browse files
jgu222igcbot
authored andcommitted
UCF needs to track execMask in addition to execsize
Trying to replace if-endif macro sequence with goto-join. For the following (skl): math.invm (4|M4) (eo)f1.1 r72.mme0:df r40.nomme:df r24.nomme:df (~f1.1) if (4|M4) ...... endif (4|M4) The current processGoto() will generates: math.invm (4|M4) (eo)f1.1 r72.mme0:df r40.nomme:df r24.nomme:df (~f1.1) goto (4|M4) ...... join (4|M0) Note that join's mask offset isn't correct. It should be (4|M4) This change will improve processGoto() to handle mask offset correctly.
1 parent 345a616 commit bb37b85

File tree

2 files changed

+110
-20
lines changed

2 files changed

+110
-20
lines changed

visa/FlowGraph.cpp

Lines changed: 107 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,79 @@ SPDX-License-Identifier: MIT
4646

4747
using namespace vISA;
4848

49+
//
50+
// Helper class for processGoto to merge join's execution masks.
51+
// For example,
52+
// (p1) goto (8|M8) label
53+
// ....
54+
// (p2) goto (4|M4) label
55+
// ....
56+
// label:
57+
// join (16|M0)
58+
// Merge( (8|M8) and (4|M4)) will be (16|M0)!
59+
//
60+
// Normally, we don't see this kind of code. But visa will generate macro sequence
61+
// like the following, and we have to match join's execMask to all of its gotos. We
62+
// do so by tracking excution mask (execSize + mask offset).
63+
//
64+
// (p) goto (8|M8) L
65+
// ......
66+
// L:
67+
// join (8|M8) // not join (8|M0)
68+
//
69+
class ExecMaskInfo
70+
{
71+
uint8_t ExecSize; // 1|2|4|8|16|32
72+
uint8_t MaskOffset; // 0|4|8|12|16|20|24|28
73+
74+
void mergeEM(ExecMaskInfo& aEM)
75+
{
76+
// The new execMask should cover at least [left, right)
77+
const uint32_t left = std::min(MaskOffset, aEM.getMaskOffset());
78+
const uint32_t right = std::max(MaskOffset + ExecSize, aEM.getMaskOffset() + aEM.getExecSize());
79+
// Divide 32 channels into 8 quarters
80+
uint32_t lowQuarter = left / 4;
81+
uint32_t highQuarter = (right - 1) / 4;
82+
if (lowQuarter < 4 && highQuarter >= 4)
83+
{
84+
// (32, M0)
85+
ExecSize = 32;
86+
MaskOffset = 0;
87+
}
88+
else if (lowQuarter < 2 && highQuarter >= 2)
89+
{
90+
// (16, M0|M16)
91+
ExecSize = 16;
92+
MaskOffset = 0;
93+
}
94+
else if (lowQuarter < 6 && highQuarter >= 6)
95+
{
96+
// (16, M16)
97+
ExecSize = 16;
98+
MaskOffset = 16;
99+
}
100+
// at this time, the range resides in one of [Q0,Q1], [Q2,Q3], [Q4,Q5], and [Q6,Q7].
101+
else
102+
{
103+
// (4|8, ...)
104+
ExecSize = (lowQuarter != highQuarter ? 8 : 4);
105+
MaskOffset = left;
106+
}
107+
}
108+
109+
public:
110+
ExecMaskInfo() : ExecSize(0), MaskOffset(0) {};
111+
ExecMaskInfo(uint8_t aE, uint8_t aM) : ExecSize(aE), MaskOffset(aM) {}
112+
113+
uint8_t getExecSize() const { return ExecSize; }
114+
uint8_t getMaskOffset() const { return MaskOffset; }
115+
116+
void mergeExecMask(G4_ExecSize aExSize, uint8_t aMaskOffset)
117+
{
118+
ExecMaskInfo anotherEM{ aExSize, aMaskOffset };
119+
mergeEM(anotherEM);
120+
}
121+
};
49122

50123
void GlobalOpndHashTable::HashNode::insert(uint16_t newLB, uint16_t newRB)
51124
{
@@ -3039,9 +3112,10 @@ G4_BB* FlowGraph::getUniqueReturnBlock()
30393112

30403113
/*
30413114
* Insert a join at the beginning of this basic block, immediately after the label
3042-
* If a join is already present, nothing will be done
3115+
* If a join is already present, make sure the join will cover the given 'execSize' and
3116+
* 'maskOffset'.
30433117
*/
3044-
void FlowGraph::insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip)
3118+
void FlowGraph::insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip, uint8_t maskOffset)
30453119
{
30463120
MUST_BE_TRUE(bb->size() > 0, "empty block");
30473121
INST_LIST_ITER iter = bb->begin();
@@ -3055,7 +3129,8 @@ void FlowGraph::insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip)
30553129
if (iter == bb->end())
30563130
{
30573131
// insert join at the end
3058-
G4_INST* jInst = builder->createInternalCFInst(NULL, G4_join, execSize, jip, NULL, InstOpt_NoOpt);
3132+
G4_InstOption instMask = G4_INST::offsetToMask(execSize, maskOffset, builder->hasNibCtrl());
3133+
G4_INST* jInst = builder->createInternalCFInst(NULL, G4_join, execSize, jip, NULL, instMask);
30593134
bb->push_back(jInst, false);
30603135
}
30613136
else
@@ -3064,22 +3139,34 @@ void FlowGraph::insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip)
30643139

30653140
if (secondInst->opcode() == G4_join)
30663141
{
3067-
if (execSize > secondInst->getExecSize())
3142+
G4_ExecSize origExSize = secondInst->getExecSize();
3143+
uint8_t origMaskOffset = (uint8_t)secondInst->getMaskOffset();
3144+
ExecMaskInfo joinEM{ origExSize, origMaskOffset };
3145+
joinEM.mergeExecMask(execSize, maskOffset);
3146+
if (joinEM.getExecSize() > origExSize)
3147+
{
3148+
secondInst->setExecSize(G4_ExecSize{ joinEM.getExecSize() });
3149+
}
3150+
if (joinEM.getMaskOffset() != origMaskOffset)
30683151
{
3069-
secondInst->setExecSize(execSize);
3152+
G4_InstOption nMask =
3153+
G4_INST::offsetToMask(joinEM.getExecSize(), joinEM.getMaskOffset(), builder->hasNibCtrl());
3154+
secondInst->setMaskOption(nMask);
30703155
}
30713156
}
30723157
else
30733158
{
3074-
G4_INST* jInst = builder->createInternalCFInst(NULL, G4_join, execSize, jip, NULL, InstOpt_NoOpt);
3159+
G4_InstOption instMask = G4_INST::offsetToMask(execSize, maskOffset, builder->hasNibCtrl());
3160+
G4_INST* jInst = builder->createInternalCFInst(NULL, G4_join, execSize, jip, NULL, instMask);
30753161
bb->insertBefore(iter, jInst, false);
30763162
}
30773163
}
30783164
}
30793165

3080-
typedef std::pair<G4_BB*, G4_ExecSize> BlockSizePair;
3166+
// For tracking execMask information of join.
3167+
typedef std::pair<G4_BB*, ExecMaskInfo> BlockSizePair;
30813168

3082-
static void addBBToActiveJoinList(std::list<BlockSizePair>& activeJoinBlocks, G4_BB* bb, G4_ExecSize execSize)
3169+
static void addBBToActiveJoinList(std::list<BlockSizePair>& activeJoinBlocks, G4_BB* bb, G4_ExecSize execSize, uint8_t maskOff)
30833170
{
30843171
// add goto target to list of active blocks that need a join
30853172
std::list<BlockSizePair>::iterator listIter;
@@ -3089,22 +3176,20 @@ static void addBBToActiveJoinList(std::list<BlockSizePair>& activeJoinBlocks, G4
30893176
if (aBB->getId() == bb->getId())
30903177
{
30913178
// block already in list, update exec size if necessary
3092-
if (execSize > (*listIter).second)
3093-
{
3094-
(*listIter).second = execSize;
3095-
}
3179+
ExecMaskInfo& EM = (*listIter).second;
3180+
EM.mergeExecMask(execSize, maskOff);
30963181
break;
30973182
}
30983183
else if (aBB->getId() > bb->getId())
30993184
{
3100-
activeJoinBlocks.insert(listIter, BlockSizePair(bb, execSize));
3185+
(void) activeJoinBlocks.insert(listIter, BlockSizePair(bb, ExecMaskInfo(execSize, maskOff)));
31013186
break;
31023187
}
31033188
}
31043189

31053190
if (listIter == activeJoinBlocks.end())
31063191
{
3107-
activeJoinBlocks.push_back(BlockSizePair(bb, execSize));
3192+
activeJoinBlocks.push_back(BlockSizePair(bb, ExecMaskInfo(execSize, maskOff)));
31083193
}
31093194
}
31103195

@@ -3373,7 +3458,9 @@ void FlowGraph::processGoto(bool HasSIMDCF)
33733458
{
33743459
// This block is the target of one or more forward goto,
33753460
// or the fall-thru of a backward goto, needs to insert a join
3376-
G4_ExecSize execSize = activeJoinBlocks.front().second;
3461+
ExecMaskInfo& EM = activeJoinBlocks.front().second;
3462+
uint8_t eSize = EM.getExecSize();
3463+
uint8_t mOff = EM.getMaskOffset();
33773464
G4_Label* joinJIP = NULL;
33783465

33793466
activeJoinBlocks.pop_front();
@@ -3384,7 +3471,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
33843471
joinJIP = joinBlock->getLabel();
33853472
}
33863473

3387-
insertJoinToBB(bb, execSize, joinJIP);
3474+
insertJoinToBB(bb, G4_ExecSize{eSize}, joinJIP, mOff);
33883475
}
33893476
}
33903477

@@ -3425,7 +3512,8 @@ void FlowGraph::processGoto(bool HasSIMDCF)
34253512
// join) within the loop body will has its JIP set to this join.
34263513
if (G4_BB* afterLoopJoinBB = getEarliestJmpOutBB(activeJoinBlocks, bb, predBB))
34273514
{
3428-
addBBToActiveJoinList(activeJoinBlocks, afterLoopJoinBB, eSize);
3515+
// conservatively use maskoffset = 0.
3516+
addBBToActiveJoinList(activeJoinBlocks, afterLoopJoinBB, eSize, 0);
34293517
}
34303518
}
34313519
else
@@ -3439,7 +3527,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
34393527
// add join to the fall-thru BB
34403528
if (G4_BB* fallThruBB = predBB->getPhysicalSucc())
34413529
{
3442-
addBBToActiveJoinList(activeJoinBlocks, fallThruBB, eSize);
3530+
addBBToActiveJoinList(activeJoinBlocks, fallThruBB, eSize, (uint8_t)lastInst->getMaskOffset());
34433531
lastInst->asCFInst()->setJip(fallThruBB->getLabel());
34443532
}
34453533
}
@@ -3466,7 +3554,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
34663554
// set goto JIP to the first active block
34673555
G4_ExecSize eSize = lastInst->getExecSize() > g4::SIMD1 ?
34683556
lastInst->getExecSize() : pKernel->getSimdSize();
3469-
addBBToActiveJoinList(activeJoinBlocks, gotoTargetBB, eSize);
3557+
addBBToActiveJoinList(activeJoinBlocks, gotoTargetBB, eSize, (uint8_t)lastInst->getMaskOffset());
34703558
G4_BB* joinBlock = activeJoinBlocks.front().first;
34713559
if (lastInst->getExecSize() == g4::SIMD1)
34723560
{ // For simd1 goto, convert it to a goto with the right execSize.

visa/FlowGraph.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,9 @@ class FlowGraph
373373
void normalizeSubRoutineBB(FuncInfoHashTable& funcInfoTable);
374374
void processGoto(bool HasSIMDCF);
375375
void processSCF(FuncInfoHashTable& FuncInfoMap);
376-
void insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip);
376+
// Insert a join at the beginning of 'bb' with given 'execsize' and 'maskoffset'.
377+
// If a join is already present, update that join to cover the given 'execsize' and 'maskoffset'.
378+
void insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip, uint8_t maskOffset = 0);
377379

378380
// functions for structure analysis
379381
G4_Kernel *getKernel() const { return pKernel; }

0 commit comments

Comments
 (0)