Skip to content

Commit 9c572c3

Browse files
committed
[a64] Remove redundant OPCODE_DOT_PRODUCT_{3,4} lane-isolation
The last `FADDP` writes into an `S` register, which automatically masks all the other lanes to zero.
1 parent 9c8b067 commit 9c572c3

File tree

1 file changed

+0
-10
lines changed

1 file changed

+0
-10
lines changed

src/xenia/cpu/backend/a64/a64_sequences.cc

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2111,11 +2111,6 @@ struct DOT_PRODUCT_3_V128
21112111
e.MOV(dest.toQ().Selem()[3], WZR);
21122112
e.FADDP(dest.toQ().S4(), dest.toQ().S4(), dest.toQ().S4());
21132113
e.FADDP(dest.toS(), dest.toD().S2());
2114-
2115-
// Isolate lower lane
2116-
e.MOVI(Q0.D2(), RepImm(0b00'00'00'00));
2117-
e.INS(Q0.Selem()[0], dest.toQ().Selem()[0]);
2118-
e.MOV(dest.toQ().B16(), Q0.B16());
21192114
});
21202115
}
21212116
};
@@ -2134,11 +2129,6 @@ struct DOT_PRODUCT_4_V128
21342129
e.FMUL(dest.toQ().S4(), src1.S4(), src2.S4());
21352130
e.FADDP(dest.toQ().S4(), dest.toQ().S4(), dest.toQ().S4());
21362131
e.FADDP(dest.toS(), dest.toD().S2());
2137-
2138-
// Isolate lower lane
2139-
e.MOVI(Q0.D2(), RepImm(0b00'00'00'00));
2140-
e.INS(Q0.Selem()[0], dest.toQ().Selem()[0]);
2141-
e.MOV(dest.toQ().B16(), Q0.B16());
21422132
});
21432133
}
21442134
};

0 commit comments

Comments
 (0)