Skip to content

Commit 40d2d33

Browse files
committed
[a64] Optimize OPCODE_SPLAT with MOVI/FMOV
Moves the `FMOV` constant functions into `a64_util` so it is available to other translation units. Optimize constant-splats with conditional use of `MOVI` and `FMOV`.
1 parent cde211c commit 40d2d33

File tree

3 files changed

+110
-79
lines changed

3 files changed

+110
-79
lines changed

src/xenia/cpu/backend/a64/a64_emitter.cc

Lines changed: 1 addition & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
*/
99

1010
#include "xenia/cpu/backend/a64/a64_emitter.h"
11+
#include "xenia/cpu/backend/a64/a64_util.h"
1112

1213
#include <cstddef>
1314

@@ -810,74 +811,6 @@ uintptr_t A64Emitter::GetVConstPtr(VConst id) const {
810811
return GetVConstPtr() + GetVConstOffset(id);
811812
}
812813

813-
// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV
814-
// returns false if the value cannot be represented
815-
// C2.2.3 Modified immediate constants in A64 floating-point instructions
816-
// abcdefgh
817-
// V
818-
// aBbbbbbc defgh000 00000000 00000000
819-
// B = NOT(b)
820-
static bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) {
821-
const uint32_t sign = (u32 >> 31) & 1;
822-
int32_t exp = ((u32 >> 23) & 0xff) - 127;
823-
int64_t mantissa = u32 & 0x7fffff;
824-
825-
// Too many mantissa bits
826-
if (mantissa & 0x7ffff) {
827-
return false;
828-
}
829-
// Too many exp bits
830-
if (exp < -3 || exp > 4) {
831-
return false;
832-
}
833-
834-
// mantissa = (16 + e:f:g:h) / 16.
835-
mantissa >>= 19;
836-
if ((mantissa & 0b1111) != mantissa) {
837-
return false;
838-
}
839-
840-
// exp = (NOT(b):c:d) - 3
841-
exp = ((exp + 3) & 0b111) ^ 0b100;
842-
843-
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
844-
return true;
845-
}
846-
847-
// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV
848-
// returns false if the value cannot be represented
849-
// C2.2.3 Modified immediate constants in A64 floating-point instructions
850-
// abcdefgh
851-
// V
852-
// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000
853-
// B = NOT(b)
854-
static bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) {
855-
const uint32_t sign = (u64 >> 63) & 1;
856-
int32_t exp = ((u64 >> 52) & 0x7ff) - 1023;
857-
int64_t mantissa = u64 & 0xfffffffffffffULL;
858-
859-
// Too many mantissa bits
860-
if (mantissa & 0xffffffffffffULL) {
861-
return false;
862-
}
863-
// Too many exp bits
864-
if (exp < -3 || exp > 4) {
865-
return false;
866-
}
867-
868-
// mantissa = (16 + e:f:g:h) / 16.
869-
mantissa >>= 48;
870-
if ((mantissa & 0b1111) != mantissa) {
871-
return false;
872-
}
873-
874-
// exp = (NOT(b):c:d) - 3
875-
exp = ((exp + 3) & 0b111) ^ 0b100;
876-
877-
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
878-
return true;
879-
}
880-
881814
// Implies possible StashV(0, ...)!
882815
void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) {
883816
if (!v.low && !v.high) {

src/xenia/cpu/backend/a64/a64_seq_vector.cc

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
*/
99

1010
#include "xenia/cpu/backend/a64/a64_sequences.h"
11+
#include "xenia/cpu/backend/a64/a64_util.h"
1112

1213
#include <algorithm>
1314
#include <cstring>
@@ -1026,12 +1027,7 @@ EMITTER_OPCODE_TABLE(OPCODE_EXTRACT, EXTRACT_I8, EXTRACT_I16, EXTRACT_I32);
10261027
struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
10271028
static void Emit(A64Emitter& e, const EmitArgType& i) {
10281029
if (i.src1.is_constant) {
1029-
if (i.src1.constant() <= 0xFF) {
1030-
e.MOVI(i.dest.reg().B16(), i.src1.constant());
1031-
return;
1032-
}
1033-
e.MOV(W0, i.src1.constant());
1034-
e.DUP(i.dest.reg().B16(), W0);
1030+
e.MOVI(i.dest.reg().B16(), i.src1.constant());
10351031
} else {
10361032
e.DUP(i.dest.reg().B16(), i.src1);
10371033
}
@@ -1040,9 +1036,12 @@ struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
10401036
struct SPLAT_I16 : Sequence<SPLAT_I16, I<OPCODE_SPLAT, V128Op, I16Op>> {
10411037
static void Emit(A64Emitter& e, const EmitArgType& i) {
10421038
if (i.src1.is_constant) {
1043-
if (i.src1.constant() <= 0xFF) {
1039+
if ((i.src1.constant() & 0xFF'00) == 0) {
10441040
e.MOVI(i.dest.reg().H8(), i.src1.constant());
10451041
return;
1042+
} else if ((i.src1.constant() & 0x00'FF) == 0) {
1043+
e.MOVI(i.dest.reg().H8(), i.src1.constant(), oaknut::util::LSL, 8);
1044+
return;
10461045
}
10471046
e.MOV(W0, i.src1.constant());
10481047
e.DUP(i.dest.reg().H8(), W0);
@@ -1054,9 +1053,22 @@ struct SPLAT_I16 : Sequence<SPLAT_I16, I<OPCODE_SPLAT, V128Op, I16Op>> {
10541053
struct SPLAT_I32 : Sequence<SPLAT_I32, I<OPCODE_SPLAT, V128Op, I32Op>> {
10551054
static void Emit(A64Emitter& e, const EmitArgType& i) {
10561055
if (i.src1.is_constant) {
1057-
if (i.src1.constant() <= 0xFF) {
1056+
oaknut::FImm8 fp8(0);
1057+
if (f32_to_fimm8(i.src1.value->constant.u32, fp8)) {
1058+
e.FMOV(i.dest.reg().S4(), fp8);
1059+
return;
1060+
} else if ((i.src1.constant() & 0xFF'FF'FF'00) == 0) {
10581061
e.MOVI(i.dest.reg().S4(), i.src1.constant());
10591062
return;
1063+
} else if ((i.src1.constant() & 0xFF'FF'00'FF) == 0) {
1064+
e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 8);
1065+
return;
1066+
} else if ((i.src1.constant() & 0xFF'00'FF'FF) == 0) {
1067+
e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 16);
1068+
return;
1069+
} else if ((i.src1.constant() & 0x00'FF'FF'FF) == 0) {
1070+
e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 24);
1071+
return;
10601072
}
10611073
e.MOV(W0, i.src1.constant());
10621074
e.DUP(i.dest.reg().S4(), W0);
@@ -1068,8 +1080,24 @@ struct SPLAT_I32 : Sequence<SPLAT_I32, I<OPCODE_SPLAT, V128Op, I32Op>> {
10681080
struct SPLAT_F32 : Sequence<SPLAT_F32, I<OPCODE_SPLAT, V128Op, F32Op>> {
10691081
static void Emit(A64Emitter& e, const EmitArgType& i) {
10701082
if (i.src1.is_constant) {
1071-
if (i.src1.value->constant.i32 <= 0xFF) {
1072-
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.i32);
1083+
oaknut::FImm8 fp8(0);
1084+
if (f32_to_fimm8(i.src1.value->constant.u32, fp8)) {
1085+
e.FMOV(i.dest.reg().S4(), fp8);
1086+
return;
1087+
} else if ((i.src1.value->constant.u32 & 0xFF'FF'FF'00) == 0) {
1088+
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32);
1089+
return;
1090+
} else if ((i.src1.value->constant.u32 & 0xFF'FF'00'FF) == 0) {
1091+
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL,
1092+
8);
1093+
return;
1094+
} else if ((i.src1.value->constant.u32 & 0xFF'00'FF'FF) == 0) {
1095+
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL,
1096+
16);
1097+
return;
1098+
} else if ((i.src1.value->constant.u32 & 0x00'FF'FF'FF) == 0) {
1099+
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL,
1100+
24);
10731101
return;
10741102
}
10751103
e.MOV(W0, i.src1.value->constant.i32);

src/xenia/cpu/backend/a64/a64_util.h

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,77 @@
1717
namespace xe {
1818
namespace cpu {
1919
namespace backend {
20-
namespace a64 {} // namespace a64
20+
namespace a64 {
21+
22+
// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV
23+
// returns false if the value cannot be represented
24+
// C2.2.3 Modified immediate constants in A64 ing-point instructions
25+
// abcdefgh
26+
// V
27+
// aBbbbbbc defgh000 00000000 00000000
28+
// B = NOT(b)
29+
constexpr bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) {
30+
const uint32_t sign = (u32 >> 31) & 1;
31+
int32_t exp = ((u32 >> 23) & 0xff) - 127;
32+
int64_t mantissa = u32 & 0x7fffff;
33+
34+
// Too many mantissa bits
35+
if (mantissa & 0x7ffff) {
36+
return false;
37+
}
38+
// Too many exp bits
39+
if (exp < -3 || exp > 4) {
40+
return false;
41+
}
42+
43+
// mantissa = (16 + e:f:g:h) / 16.
44+
mantissa >>= 19;
45+
if ((mantissa & 0b1111) != mantissa) {
46+
return false;
47+
}
48+
49+
// exp = (NOT(b):c:d) - 3
50+
exp = ((exp + 3) & 0b111) ^ 0b100;
51+
52+
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
53+
return true;
54+
}
55+
56+
// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV
57+
// returns false if the value cannot be represented
58+
// C2.2.3 Modified immediate constants in A64 floating-point instructions
59+
// abcdefgh
60+
// V
61+
// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000
62+
// B = NOT(b)
63+
constexpr bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) {
64+
const uint32_t sign = (u64 >> 63) & 1;
65+
int32_t exp = ((u64 >> 52) & 0x7ff) - 1023;
66+
int64_t mantissa = u64 & 0xfffffffffffffULL;
67+
68+
// Too many mantissa bits
69+
if (mantissa & 0xffffffffffffULL) {
70+
return false;
71+
}
72+
// Too many exp bits
73+
if (exp < -3 || exp > 4) {
74+
return false;
75+
}
76+
77+
// mantissa = (16 + e:f:g:h) / 16.
78+
mantissa >>= 48;
79+
if ((mantissa & 0b1111) != mantissa) {
80+
return false;
81+
}
82+
83+
// exp = (NOT(b):c:d) - 3
84+
exp = ((exp + 3) & 0b111) ^ 0b100;
85+
86+
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
87+
return true;
88+
}
89+
90+
} // namespace a64
2191
} // namespace backend
2292
} // namespace cpu
2393
} // namespace xe

0 commit comments

Comments
 (0)