Skip to content

Commit 47d801f

Browse files
committed
[a64] Optimize constant-loads with FMOV
`FMOV` encodes an 8-bit floating point immediate that can be used to accelerate the loading of certain constant floating point values between -31.0 and 32.0. A lot of immediates such as -1.0, 1.0, 0.5, etc fall within this range and this code gets lots of hits in my testing. This is much more optimal than trying to load a 32/64-bit value in W0/X0 and moving it into an FP register.
1 parent 0f50d6a commit 47d801f

File tree

1 file changed

+94
-2
lines changed

1 file changed

+94
-2
lines changed

src/xenia/cpu/backend/a64/a64_emitter.cc

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,74 @@ std::byte* A64Emitter::GetVConstPtr(VConst id) const {
838838
return GetVConstPtr() + GetVConstOffset(id);
839839
}
840840

841+
// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV
842+
// returns false if the value cannot be represented
843+
// C2.2.3 Modified immediate constants in A64 floating-point instructions
844+
// abcdefgh
845+
// V
846+
// aBbbbbbc defgh000 00000000 00000000
847+
// B = NOT(b)
848+
static bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) {
849+
const uint32_t sign = (u32 >> 31) & 1;
850+
int32_t exp = ((u32 >> 23) & 0xff) - 127;
851+
int64_t mantissa = u32 & 0x7fffff;
852+
853+
// Too many mantissa bits
854+
if (mantissa & 0x7ffff) {
855+
return false;
856+
}
857+
// Too many exp bits
858+
if (exp < -3 || exp > 4) {
859+
return false;
860+
}
861+
862+
// mantissa = (16 + e:f:g:h) / 16.
863+
mantissa >>= 19;
864+
if ((mantissa & 0b1111) != mantissa) {
865+
return false;
866+
}
867+
868+
// exp = (NOT(b):c:d) - 3
869+
exp = ((exp + 3) & 0b111) ^ 0b100;
870+
871+
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
872+
return true;
873+
}
874+
875+
// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV
876+
// returns false if the value cannot be represented
877+
// C2.2.3 Modified immediate constants in A64 floating-point instructions
878+
// abcdefgh
879+
// V
880+
// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000
881+
// B = NOT(b)
882+
static bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) {
883+
const uint32_t sign = (u64 >> 63) & 1;
884+
int32_t exp = ((u64 >> 52) & 0x7ff) - 1023;
885+
int64_t mantissa = u64 & 0xfffffffffffffULL;
886+
887+
// Too many mantissa bits
888+
if (mantissa & 0xffffffffffffULL) {
889+
return false;
890+
}
891+
// Too many exp bits
892+
if (exp < -3 || exp > 4) {
893+
return false;
894+
}
895+
896+
// mantissa = (16 + e:f:g:h) / 16.
897+
mantissa >>= 48;
898+
if ((mantissa & 0b1111) != mantissa) {
899+
return false;
900+
}
901+
902+
// exp = (NOT(b):c:d) - 3
903+
exp = ((exp + 3) & 0b111) ^ 0b100;
904+
905+
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
906+
return true;
907+
}
908+
841909
// Implies possible StashV(0, ...)!
842910
void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) {
843911
if (!v.low && !v.high) {
@@ -901,6 +969,13 @@ void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) {
901969
MOVI(dest.S4(), uint8_t(v.u32[0] >> 16), oaknut::util::LSL, 16);
902970
return;
903971
}
972+
973+
// Try to utilize FMOV if possible
974+
oaknut::FImm8 fp8(0);
975+
if (f32_to_fimm8(v.u32[0], fp8)) {
976+
FMOV(dest.S4(), fp8);
977+
return;
978+
}
904979
}
905980

906981
// TODO(benvanik): see what other common values are.
@@ -925,8 +1000,16 @@ void A64Emitter::LoadConstantV(oaknut::QReg dest, float v) {
9251000
} else {
9261001
// TODO(benvanik): see what other common values are.
9271002
// TODO(benvanik): build constant table - 99% are reused.
1003+
1004+
// Try to utilize FMOV if possible
1005+
oaknut::FImm8 fp8(0);
1006+
if (f32_to_fimm8(x.i, fp8)) {
1007+
FMOV(dest.toS(), fp8);
1008+
return;
1009+
}
1010+
9281011
MOV(W0, x.i);
929-
MOV(dest.Selem()[0], W0);
1012+
FMOV(dest.toS(), W0);
9301013
}
9311014
}
9321015

@@ -944,8 +1027,17 @@ void A64Emitter::LoadConstantV(oaknut::QReg dest, double v) {
9441027
} else {
9451028
// TODO(benvanik): see what other common values are.
9461029
// TODO(benvanik): build constant table - 99% are reused.
1030+
1031+
// Try to utilize FMOV if possible
1032+
oaknut::FImm8 fp8(0);
1033+
if (f64_to_fimm8(x.i, fp8)) {
1034+
;
1035+
FMOV(dest.toD(), fp8);
1036+
return;
1037+
}
1038+
9471039
MOV(X0, x.i);
948-
MOV(dest.Delem()[0], X0);
1040+
FMOV(dest.toD(), X0);
9491041
}
9501042
}
9511043

0 commit comments

Comments
 (0)