Skip to content

Commit 2486725

Browse files
committed
[a64] Optimize OPCODE_SPLAT byte-constants
Byte-sized constants can utilize the `MOVI` instructions. This makes many cases such as zero-splats much faster since this encodes as just a register-rename(similar to `xor` on x64).
1 parent 5c715c3 commit 2486725

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

src/xenia/cpu/backend/a64/a64_seq_vector.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,10 @@ EMITTER_OPCODE_TABLE(OPCODE_EXTRACT, EXTRACT_I8, EXTRACT_I16, EXTRACT_I32);
10261026
struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
10271027
static void Emit(A64Emitter& e, const EmitArgType& i) {
10281028
if (i.src1.is_constant) {
1029+
if (i.src1.constant() <= 0xFF) {
1030+
e.MOVI(i.dest.reg().B16(), i.src1.constant());
1031+
return;
1032+
}
10291033
e.MOV(W0, i.src1.constant());
10301034
e.DUP(i.dest.reg().B16(), W0);
10311035
} else {
@@ -1036,6 +1040,10 @@ struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
10361040
struct SPLAT_I16 : Sequence<SPLAT_I16, I<OPCODE_SPLAT, V128Op, I16Op>> {
10371041
static void Emit(A64Emitter& e, const EmitArgType& i) {
10381042
if (i.src1.is_constant) {
1043+
if (i.src1.constant() <= 0xFF) {
1044+
e.MOVI(i.dest.reg().H8(), i.src1.constant());
1045+
return;
1046+
}
10391047
e.MOV(W0, i.src1.constant());
10401048
e.DUP(i.dest.reg().H8(), W0);
10411049
} else {
@@ -1046,6 +1054,10 @@ struct SPLAT_I16 : Sequence<SPLAT_I16, I<OPCODE_SPLAT, V128Op, I16Op>> {
10461054
struct SPLAT_I32 : Sequence<SPLAT_I32, I<OPCODE_SPLAT, V128Op, I32Op>> {
10471055
static void Emit(A64Emitter& e, const EmitArgType& i) {
10481056
if (i.src1.is_constant) {
1057+
if (i.src1.constant() <= 0xFF) {
1058+
e.MOVI(i.dest.reg().S4(), i.src1.constant());
1059+
return;
1060+
}
10491061
e.MOV(W0, i.src1.constant());
10501062
e.DUP(i.dest.reg().S4(), W0);
10511063
} else {
@@ -1056,6 +1068,10 @@ struct SPLAT_I32 : Sequence<SPLAT_I32, I<OPCODE_SPLAT, V128Op, I32Op>> {
10561068
struct SPLAT_F32 : Sequence<SPLAT_F32, I<OPCODE_SPLAT, V128Op, F32Op>> {
10571069
static void Emit(A64Emitter& e, const EmitArgType& i) {
10581070
if (i.src1.is_constant) {
1071+
if (i.src1.value->constant.i32 <= 0xFF) {
1072+
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.i32);
1073+
return;
1074+
}
10591075
e.MOV(W0, i.src1.value->constant.i32);
10601076
e.DUP(i.dest.reg().S4(), W0);
10611077
} else {

0 commit comments

Comments
 (0)