@@ -46,6 +46,79 @@ SPDX-License-Identifier: MIT
46
46
47
47
using namespace vISA ;
48
48
49
+ //
50
+ // Helper class for processGoto to merge join's execution masks.
51
+ // For example,
52
+ // (p1) goto (8|M8) label
53
+ // ....
54
+ // (p2) goto (4|M4) label
55
+ // ....
56
+ // label:
57
+ // join (16|M0)
58
+ // Merge( (8|M8) and (4|M4)) will be (16|M0)!
59
+ //
60
+ // Normally, we don't see this kind of code. But visa will generate macro sequence
61
+ // like the following, and we have to match join's execMask to all of its gotos. We
62
+ // do so by tracking excution mask (execSize + mask offset).
63
+ //
64
+ // (p) goto (8|M8) L
65
+ // ......
66
+ // L:
67
+ // join (8|M8) // not join (8|M0)
68
+ //
69
+ class ExecMaskInfo
70
+ {
71
+ uint8_t ExecSize; // 1|2|4|8|16|32
72
+ uint8_t MaskOffset; // 0|4|8|12|16|20|24|28
73
+
74
+ void mergeEM (ExecMaskInfo& aEM)
75
+ {
76
+ // The new execMask should cover at least [left, right)
77
+ const uint32_t left = std::min (MaskOffset, aEM.getMaskOffset ());
78
+ const uint32_t right = std::max (MaskOffset + ExecSize, aEM.getMaskOffset () + aEM.getExecSize ());
79
+ // Divide 32 channels into 8 quarters
80
+ uint32_t lowQuarter = left / 4 ;
81
+ uint32_t highQuarter = (right - 1 ) / 4 ;
82
+ if (lowQuarter < 4 && highQuarter >= 4 )
83
+ {
84
+ // (32, M0)
85
+ ExecSize = 32 ;
86
+ MaskOffset = 0 ;
87
+ }
88
+ else if (lowQuarter < 2 && highQuarter >= 2 )
89
+ {
90
+ // (16, M0|M16)
91
+ ExecSize = 16 ;
92
+ MaskOffset = 0 ;
93
+ }
94
+ else if (lowQuarter < 6 && highQuarter >= 6 )
95
+ {
96
+ // (16, M16)
97
+ ExecSize = 16 ;
98
+ MaskOffset = 16 ;
99
+ }
100
+ // at this time, the range resides in one of [Q0,Q1], [Q2,Q3], [Q4,Q5], and [Q6,Q7].
101
+ else
102
+ {
103
+ // (4|8, ...)
104
+ ExecSize = (lowQuarter != highQuarter ? 8 : 4 );
105
+ MaskOffset = left;
106
+ }
107
+ }
108
+
109
+ public:
110
+ ExecMaskInfo () : ExecSize(0 ), MaskOffset(0 ) {};
111
+ ExecMaskInfo (uint8_t aE, uint8_t aM) : ExecSize(aE), MaskOffset(aM) {}
112
+
113
+ uint8_t getExecSize () const { return ExecSize; }
114
+ uint8_t getMaskOffset () const { return MaskOffset; }
115
+
116
+ void mergeExecMask (G4_ExecSize aExSize, uint8_t aMaskOffset)
117
+ {
118
+ ExecMaskInfo anotherEM{ aExSize, aMaskOffset };
119
+ mergeEM (anotherEM);
120
+ }
121
+ };
49
122
50
123
void GlobalOpndHashTable::HashNode::insert (uint16_t newLB, uint16_t newRB)
51
124
{
@@ -3039,9 +3112,10 @@ G4_BB* FlowGraph::getUniqueReturnBlock()
3039
3112
3040
3113
/*
3041
3114
* Insert a join at the beginning of this basic block, immediately after the label
3042
- * If a join is already present, nothing will be done
3115
+ * If a join is already present, make sure the join will cover the given 'execSize' and
3116
+ * 'maskOffset'.
3043
3117
*/
3044
- void FlowGraph::insertJoinToBB (G4_BB* bb, G4_ExecSize execSize, G4_Label* jip)
3118
+ void FlowGraph::insertJoinToBB (G4_BB* bb, G4_ExecSize execSize, G4_Label* jip, uint8_t maskOffset )
3045
3119
{
3046
3120
MUST_BE_TRUE (bb->size () > 0 , " empty block" );
3047
3121
INST_LIST_ITER iter = bb->begin ();
@@ -3055,7 +3129,8 @@ void FlowGraph::insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip)
3055
3129
if (iter == bb->end ())
3056
3130
{
3057
3131
// insert join at the end
3058
- G4_INST* jInst = builder->createInternalCFInst (NULL , G4_join, execSize, jip, NULL , InstOpt_NoOpt);
3132
+ G4_InstOption instMask = G4_INST::offsetToMask (execSize, maskOffset, builder->hasNibCtrl ());
3133
+ G4_INST* jInst = builder->createInternalCFInst (NULL , G4_join, execSize, jip, NULL , instMask);
3059
3134
bb->push_back (jInst, false );
3060
3135
}
3061
3136
else
@@ -3064,22 +3139,34 @@ void FlowGraph::insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip)
3064
3139
3065
3140
if (secondInst->opcode () == G4_join)
3066
3141
{
3067
- if (execSize > secondInst->getExecSize ())
3142
+ G4_ExecSize origExSize = secondInst->getExecSize ();
3143
+ uint8_t origMaskOffset = (uint8_t )secondInst->getMaskOffset ();
3144
+ ExecMaskInfo joinEM{ origExSize, origMaskOffset };
3145
+ joinEM.mergeExecMask (execSize, maskOffset);
3146
+ if (joinEM.getExecSize () > origExSize)
3147
+ {
3148
+ secondInst->setExecSize (G4_ExecSize{ joinEM.getExecSize () });
3149
+ }
3150
+ if (joinEM.getMaskOffset () != origMaskOffset)
3068
3151
{
3069
- secondInst->setExecSize (execSize);
3152
+ G4_InstOption nMask =
3153
+ G4_INST::offsetToMask (joinEM.getExecSize (), joinEM.getMaskOffset (), builder->hasNibCtrl ());
3154
+ secondInst->setMaskOption (nMask);
3070
3155
}
3071
3156
}
3072
3157
else
3073
3158
{
3074
- G4_INST* jInst = builder->createInternalCFInst (NULL , G4_join, execSize, jip, NULL , InstOpt_NoOpt);
3159
+ G4_InstOption instMask = G4_INST::offsetToMask (execSize, maskOffset, builder->hasNibCtrl ());
3160
+ G4_INST* jInst = builder->createInternalCFInst (NULL , G4_join, execSize, jip, NULL , instMask);
3075
3161
bb->insertBefore (iter, jInst, false );
3076
3162
}
3077
3163
}
3078
3164
}
3079
3165
3080
- typedef std::pair<G4_BB*, G4_ExecSize> BlockSizePair;
3166
+ // For tracking execMask information of join.
3167
+ typedef std::pair<G4_BB*, ExecMaskInfo> BlockSizePair;
3081
3168
3082
- static void addBBToActiveJoinList (std::list<BlockSizePair>& activeJoinBlocks, G4_BB* bb, G4_ExecSize execSize)
3169
+ static void addBBToActiveJoinList (std::list<BlockSizePair>& activeJoinBlocks, G4_BB* bb, G4_ExecSize execSize, uint8_t maskOff )
3083
3170
{
3084
3171
// add goto target to list of active blocks that need a join
3085
3172
std::list<BlockSizePair>::iterator listIter;
@@ -3089,22 +3176,20 @@ static void addBBToActiveJoinList(std::list<BlockSizePair>& activeJoinBlocks, G4
3089
3176
if (aBB->getId () == bb->getId ())
3090
3177
{
3091
3178
// block already in list, update exec size if necessary
3092
- if (execSize > (*listIter).second )
3093
- {
3094
- (*listIter).second = execSize;
3095
- }
3179
+ ExecMaskInfo& EM = (*listIter).second ;
3180
+ EM.mergeExecMask (execSize, maskOff);
3096
3181
break ;
3097
3182
}
3098
3183
else if (aBB->getId () > bb->getId ())
3099
3184
{
3100
- activeJoinBlocks.insert (listIter, BlockSizePair (bb, execSize));
3185
+ ( void ) activeJoinBlocks.insert (listIter, BlockSizePair (bb, ExecMaskInfo ( execSize, maskOff) ));
3101
3186
break ;
3102
3187
}
3103
3188
}
3104
3189
3105
3190
if (listIter == activeJoinBlocks.end ())
3106
3191
{
3107
- activeJoinBlocks.push_back (BlockSizePair (bb, execSize));
3192
+ activeJoinBlocks.push_back (BlockSizePair (bb, ExecMaskInfo ( execSize, maskOff) ));
3108
3193
}
3109
3194
}
3110
3195
@@ -3373,7 +3458,9 @@ void FlowGraph::processGoto(bool HasSIMDCF)
3373
3458
{
3374
3459
// This block is the target of one or more forward goto,
3375
3460
// or the fall-thru of a backward goto, needs to insert a join
3376
- G4_ExecSize execSize = activeJoinBlocks.front ().second ;
3461
+ ExecMaskInfo& EM = activeJoinBlocks.front ().second ;
3462
+ uint8_t eSize = EM.getExecSize ();
3463
+ uint8_t mOff = EM.getMaskOffset ();
3377
3464
G4_Label* joinJIP = NULL ;
3378
3465
3379
3466
activeJoinBlocks.pop_front ();
@@ -3384,7 +3471,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
3384
3471
joinJIP = joinBlock->getLabel ();
3385
3472
}
3386
3473
3387
- insertJoinToBB (bb, execSize , joinJIP);
3474
+ insertJoinToBB (bb, G4_ExecSize{eSize} , joinJIP, mOff );
3388
3475
}
3389
3476
}
3390
3477
@@ -3425,7 +3512,8 @@ void FlowGraph::processGoto(bool HasSIMDCF)
3425
3512
// join) within the loop body will has its JIP set to this join.
3426
3513
if (G4_BB* afterLoopJoinBB = getEarliestJmpOutBB (activeJoinBlocks, bb, predBB))
3427
3514
{
3428
- addBBToActiveJoinList (activeJoinBlocks, afterLoopJoinBB, eSize);
3515
+ // conservatively use maskoffset = 0.
3516
+ addBBToActiveJoinList (activeJoinBlocks, afterLoopJoinBB, eSize, 0 );
3429
3517
}
3430
3518
}
3431
3519
else
@@ -3439,7 +3527,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
3439
3527
// add join to the fall-thru BB
3440
3528
if (G4_BB* fallThruBB = predBB->getPhysicalSucc ())
3441
3529
{
3442
- addBBToActiveJoinList (activeJoinBlocks, fallThruBB, eSize);
3530
+ addBBToActiveJoinList (activeJoinBlocks, fallThruBB, eSize, ( uint8_t )lastInst-> getMaskOffset () );
3443
3531
lastInst->asCFInst ()->setJip (fallThruBB->getLabel ());
3444
3532
}
3445
3533
}
@@ -3466,7 +3554,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
3466
3554
// set goto JIP to the first active block
3467
3555
G4_ExecSize eSize = lastInst->getExecSize () > g4::SIMD1 ?
3468
3556
lastInst->getExecSize () : pKernel->getSimdSize ();
3469
- addBBToActiveJoinList (activeJoinBlocks, gotoTargetBB, eSize);
3557
+ addBBToActiveJoinList (activeJoinBlocks, gotoTargetBB, eSize, ( uint8_t )lastInst-> getMaskOffset () );
3470
3558
G4_BB* joinBlock = activeJoinBlocks.front ().first ;
3471
3559
if (lastInst->getExecSize () == g4::SIMD1)
3472
3560
{ // For simd1 goto, convert it to a goto with the right execSize.
0 commit comments