Skip to content

Commit 0f50d6a

Browse files
committed
[a64] Detect MOVI utilizations for vector-element splats(u8,u16,u32)
The 64-bit cases uses a particular Replicated 8-bit immediate so something else will have to handle that This cases a lot of cases without having to touch memory. Does not catch cases of `1.0`(0x3f800000).
1 parent e82f9ea commit 0f50d6a

File tree

1 file changed

+54
-4
lines changed

1 file changed

+54
-4
lines changed

src/xenia/cpu/backend/a64/a64_emitter.cc

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -848,11 +848,61 @@ void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) {
848848
} else if (v.low == ~uint64_t(0) && v.high == ~uint64_t(0)) {
849849
// 1111...
850850
MOVI(dest.B16(), 0xFF);
851-
} else if (std::adjacent_find(std::cbegin(v.u8), std::cend(v.u8),
852-
std::not_equal_to<>()) == std::cend(v.u8)) {
853-
// 0xXX, 0xXX, 0xXX...
854-
MOVI(dest.B16(), v.u8[0]);
855851
} else {
852+
// Try to figure out some common splat-patterns to utilize MOVI rather than
853+
// stashing to memory.
854+
const bool all_same_u8 =
855+
std::adjacent_find(std::cbegin(v.u8), std::cend(v.u8),
856+
std::not_equal_to<>()) == std::cend(v.u8);
857+
858+
if (all_same_u8) {
859+
// 0xXX, 0xXX, 0xXX...
860+
MOVI(dest.B16(), v.u8[0]);
861+
return;
862+
}
863+
864+
const bool all_same_u16 =
865+
std::adjacent_find(std::cbegin(v.u16), std::cend(v.u16),
866+
std::not_equal_to<>()) == std::cend(v.u16);
867+
868+
if (all_same_u16) {
869+
if ((v.u16[0] & 0xFF00) == 0) {
870+
// 0x00XX, 0x00XX, 0x00XX...
871+
MOVI(dest.H8(), uint8_t(v.u16[0]));
872+
return;
873+
} else if ((v.u16[0] & 0x00FF) == 0) {
874+
// 0xXX00, 0xXX00, 0xXX00...
875+
MOVI(dest.H8(), uint8_t(v.u16[0] >> 8), oaknut::util::LSL, 8);
876+
return;
877+
}
878+
}
879+
880+
const bool all_same_u32 =
881+
std::adjacent_find(std::cbegin(v.u32), std::cend(v.u32),
882+
std::not_equal_to<>()) == std::cend(v.u32);
883+
884+
if (all_same_u32) {
885+
if ((v.u32[0] & 0x00FFFFFF) == 0) {
886+
// This is used a lot for certain float-splats and should be checked
887+
// first before the others
888+
// 0xXX000000, 0xXX000000, 0xXX000000...
889+
MOVI(dest.S4(), uint8_t(v.u32[0] >> 24), oaknut::util::LSL, 24);
890+
return;
891+
} else if ((v.u32[0] & 0xFFFFFF00) == 0) {
892+
// 0x000000XX, 0x000000XX, 0x000000XX...
893+
MOVI(dest.S4(), uint8_t(v.u32[0]));
894+
return;
895+
} else if ((v.u32[0] & 0xFFFF00FF) == 0) {
896+
// 0x0000XX00, 0x0000XX00, 0x0000XX00...
897+
MOVI(dest.S4(), uint8_t(v.u32[0] >> 8), oaknut::util::LSL, 8);
898+
return;
899+
} else if ((v.u32[0] & 0xFF00FFFF) == 0) {
900+
// 0x00XX0000, 0x00XX0000, 0x00XX0000...
901+
MOVI(dest.S4(), uint8_t(v.u32[0] >> 16), oaknut::util::LSL, 16);
902+
return;
903+
}
904+
}
905+
856906
// TODO(benvanik): see what other common values are.
857907
// TODO(benvanik): build constant table - 99% are reused.
858908
MovMem64(SP, kStashOffset, v.low);

0 commit comments

Comments
 (0)