Skip to content

Commit 1a559d0

Browse files
weiyu-chensys_zuul
authored andcommitted
Place stricter conditions on when to swap src1 and src2 of mad instructions.
Change-Id: I9256178d2b198fc24b9fbecc462a8e54403bd84c
1 parent 8de48a1 commit 1a559d0

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

visa/HWConformity.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3937,7 +3937,6 @@ bool HWConformity::generateAlign1Mad(G4_BB* bb, INST_LIST_ITER iter)
39373937
MUST_BE_TRUE(inst->opcode() == G4_pseudo_mad, "expect pseudo mad");
39383938
bool mustDoMad = IS_TYPE_FLOAT_ALL(inst->getDst()->getType());
39393939

3940-
39413940
// try swapping src0 (really src2) and src1 to see if we can save a move
39423941
// some conditions where swap may help:
39433942
// -- if src0 is D, as MAD only supports D + D * W
@@ -3946,8 +3945,9 @@ bool HWConformity::generateAlign1Mad(G4_BB* bb, INST_LIST_ITER iter)
39463945
// -- if src1 is scalar, as MAD src2 has more region restrictions
39473946
// We perform the swapping before the dst checks as some platforms require dst and src2 to have the same subreg
39483947
{
3949-
G4_Operand* src0 = inst->getSrc(0);
3950-
G4_Operand* src1 = inst->getSrc(1);
3948+
auto src0 = inst->getSrc(0);
3949+
auto src1 = inst->getSrc(1);
3950+
39513951
if (IS_DTYPE(src0->getType()) && src0->isSrcRegRegion() && !IS_DTYPE(src1->getType()))
39523952
{
39533953
inst->swapSrc(0, 1);
@@ -3957,17 +3957,21 @@ bool HWConformity::generateAlign1Mad(G4_BB* bb, INST_LIST_ITER iter)
39573957
//swap src0 and src1 as src0 supports imm
39583958
inst->swapSrc(0, 1);
39593959
}
3960-
else if (src0->isSrcRegRegion() && !src0->asSrcRegRegion()->isScalar() &&
3961-
src1->isSrcRegRegion() &&
3962-
src1->asSrcRegRegion()->isScalar())
3960+
else if (isLowPrecisionFloatTy(src0->getType()) && src1->getType() == Type_F)
39633961
{
3964-
// Swap src0 and src1 if src1 is scalar but src0 is not, as src2 regioning support is quite limited.
39653962
inst->swapSrc(0, 1);
39663963
}
3967-
else if (isLowPrecisionFloatTy(src0->getType()) && src1->getType() == Type_F)
3964+
else if (src1->isSrcRegRegion() && src1->asSrcRegRegion()->isScalar())
39683965
{
3969-
inst->swapSrc(0, 1);
3966+
bool src0NeedMove = !isGoodAlign1TernarySrc(inst, 0, true) ||
3967+
(src0->isSrcRegRegion() && inst->getExecSize() * getTypeSize(src0->getType()) < getGRFSize());
3968+
// Swap src0 and src1 if src1 is scalar but src0 may need a move due to limited src2 regioning support.
3969+
if (src0NeedMove)
3970+
{
3971+
inst->swapSrc(0, 1);
3972+
}
39703973
}
3974+
39713975
}
39723976

39733977
if (!isGoodAlign1TernaryDst(inst))

0 commit comments

Comments
 (0)