@@ -3937,7 +3937,6 @@ bool HWConformity::generateAlign1Mad(G4_BB* bb, INST_LIST_ITER iter)
3937
3937
MUST_BE_TRUE (inst->opcode () == G4_pseudo_mad, " expect pseudo mad" );
3938
3938
bool mustDoMad = IS_TYPE_FLOAT_ALL (inst->getDst ()->getType ());
3939
3939
3940
-
3941
3940
// try swapping src0 (really src2) and src1 to see if we can save a move
3942
3941
// some conditions where swap may help:
3943
3942
// -- if src0 is D, as MAD only supports D + D * W
@@ -3946,8 +3945,9 @@ bool HWConformity::generateAlign1Mad(G4_BB* bb, INST_LIST_ITER iter)
3946
3945
// -- if src1 is scalar, as MAD src2 has more region restrictions
3947
3946
// We perform the swapping before the dst checks as some platforms require dst and src2 to have the same subreg
3948
3947
{
3949
- G4_Operand* src0 = inst->getSrc (0 );
3950
- G4_Operand* src1 = inst->getSrc (1 );
3948
+ auto src0 = inst->getSrc (0 );
3949
+ auto src1 = inst->getSrc (1 );
3950
+
3951
3951
if (IS_DTYPE (src0->getType ()) && src0->isSrcRegRegion () && !IS_DTYPE (src1->getType ()))
3952
3952
{
3953
3953
inst->swapSrc (0 , 1 );
@@ -3957,17 +3957,21 @@ bool HWConformity::generateAlign1Mad(G4_BB* bb, INST_LIST_ITER iter)
3957
3957
// swap src0 and src1 as src0 supports imm
3958
3958
inst->swapSrc (0 , 1 );
3959
3959
}
3960
- else if (src0->isSrcRegRegion () && !src0->asSrcRegRegion ()->isScalar () &&
3961
- src1->isSrcRegRegion () &&
3962
- src1->asSrcRegRegion ()->isScalar ())
3960
+ else if (isLowPrecisionFloatTy (src0->getType ()) && src1->getType () == Type_F)
3963
3961
{
3964
- // Swap src0 and src1 if src1 is scalar but src0 is not, as src2 regioning support is quite limited.
3965
3962
inst->swapSrc (0 , 1 );
3966
3963
}
3967
- else if (isLowPrecisionFloatTy (src0-> getType ()) && src1->getType () == Type_F )
3964
+ else if (src1-> isSrcRegRegion () && src1->asSrcRegRegion ()-> isScalar () )
3968
3965
{
3969
- inst->swapSrc (0 , 1 );
3966
+ bool src0NeedMove = !isGoodAlign1TernarySrc (inst, 0 , true ) ||
3967
+ (src0->isSrcRegRegion () && inst->getExecSize () * getTypeSize (src0->getType ()) < getGRFSize ());
3968
+ // Swap src0 and src1 if src1 is scalar but src0 may need a move due to limited src2 regioning support.
3969
+ if (src0NeedMove)
3970
+ {
3971
+ inst->swapSrc (0 , 1 );
3972
+ }
3970
3973
}
3974
+
3971
3975
}
3972
3976
3973
3977
if (!isGoodAlign1TernaryDst (inst))
0 commit comments