Skip to content

Commit 4fa2462

Browse files
committed
[a64] Use VectorCodeGenerator rather than CodeBlock+CodeGenerator
The emitter doesn't actually hold onto executable code, but just generates the assembly-data into a buffer for the currently-resolving function before placing it into a code-cache. When code gets pushed into the code-cache, it can just be copied from an `std::vector` and reset. The code-cache itself maintains the actual executable memory and stack-unwinding code and such. This also fixes a bunch of errornous relative-addressing glitches where relative addresses were calculated based on the address of the unused CodeBlock rather than being position-independent. `MOVP2R` in particular was generating different instructions depending on its distance from the code block when it should always just use `MOV` and not do any relative-address calculations since we can't predict where the actual instruction's offset will be(we cannot predict what the program counter will be). Oaknut probably needs a "position independent" policy or mode or something so that it avoids PC-relative instructions.
1 parent d926928 commit 4fa2462

File tree

4 files changed

+36
-35
lines changed

4 files changed

+36
-35
lines changed

src/xenia/cpu/backend/a64/a64_emitter.cc

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,7 @@ const uint8_t A64Emitter::fpr_reg_map_[A64Emitter::FPR_COUNT] = {
7373
};
7474

7575
A64Emitter::A64Emitter(A64Backend* backend)
76-
: CodeBlock(kMaxCodeSize),
77-
CodeGenerator(CodeBlock::ptr()),
76+
: VectorCodeGenerator(assembly_buffer),
7877
processor_(backend->processor()),
7978
backend_(backend),
8079
code_cache_(backend->code_cache()) {
@@ -138,23 +137,22 @@ bool A64Emitter::Emit(GuestFunction* function, HIRBuilder* builder,
138137
void* A64Emitter::Emplace(const EmitFunctionInfo& func_info,
139138
GuestFunction* function) {
140139
// Copy the current oaknut instruction-buffer into the code-cache
141-
uint32_t* old_address = CodeBlock::ptr();
142140
void* new_execute_address;
143141
void* new_write_address;
144142

145143
assert_true(func_info.code_size.total == offset());
146144

147145
if (function) {
148-
code_cache_->PlaceGuestCode(function->address(), CodeBlock::ptr(),
146+
code_cache_->PlaceGuestCode(function->address(), assembly_buffer.data(),
149147
func_info, function, new_execute_address,
150148
new_write_address);
151149
} else {
152-
code_cache_->PlaceHostCode(0, CodeBlock::ptr(), func_info,
150+
code_cache_->PlaceHostCode(0, assembly_buffer.data(), func_info,
153151
new_execute_address, new_write_address);
154152
}
155153

156154
// Reset the oaknut instruction-buffer
157-
set_wptr(reinterpret_cast<uint32_t*>(old_address));
155+
assembly_buffer.clear();
158156
label_lookup_.clear();
159157

160158
return new_execute_address;
@@ -224,7 +222,8 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
224222

225223
// Call count.
226224
MOV(W0, 1);
227-
MOVP2R(X5, low_address(&trace_header->function_call_count));
225+
MOV(X5, reinterpret_cast<uintptr_t>(
226+
low_address(&trace_header->function_call_count)));
228227
LDADDAL(X0, X0, X5);
229228

230229
// Get call history slot.
@@ -234,16 +233,17 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
234233
AND(W0, W0, 0b00000011);
235234

236235
// Record call history value into slot (guest addr in W1).
237-
MOV(X5, uint32_t(
238-
uint64_t(low_address(&trace_header->function_caller_history))));
236+
MOV(X5, reinterpret_cast<uintptr_t>(
237+
low_address(&trace_header->function_caller_history)));
239238
STR(W1, X5, X0, oaknut::IndexExt::LSL, 2);
240239

241240
// Calling thread. Load X0 with thread ID.
242241
EmitGetCurrentThreadId();
243242
MOV(W5, 1);
244243
LSL(W0, W5, W0);
245244

246-
MOVP2R(X5, low_address(&trace_header->function_thread_use));
245+
MOV(X5, reinterpret_cast<uintptr_t>(
246+
low_address(&trace_header->function_thread_use)));
247247
LDSET(W0, WZR, X5);
248248
}
249249

@@ -334,8 +334,9 @@ void A64Emitter::MarkSourceOffset(const Instr* i) {
334334
const uint32_t instruction_index =
335335
(entry->guest_address - trace_data_->start_address()) / 4;
336336
MOV(X0, 1);
337-
MOVP2R(X1, low_address(trace_data_->instruction_execute_counts() +
338-
instruction_index * 8));
337+
MOV(X1, reinterpret_cast<uintptr_t>(
338+
low_address(trace_data_->instruction_execute_counts() +
339+
instruction_index * 8)));
339340
LDADDAL(X0, ZR, X1);
340341
}
341342
}
@@ -803,11 +804,9 @@ void A64Emitter::FreeConstData(uintptr_t data) {
803804
memory::DeallocationType::kRelease);
804805
}
805806

806-
std::byte* A64Emitter::GetVConstPtr() const {
807-
return reinterpret_cast<std::byte*>(backend_->emitter_data());
808-
}
807+
uintptr_t A64Emitter::GetVConstPtr() const { return backend_->emitter_data(); }
809808

810-
std::byte* A64Emitter::GetVConstPtr(VConst id) const {
809+
uintptr_t A64Emitter::GetVConstPtr(VConst id) const {
811810
// Load through fixed constant table setup by PlaceConstData.
812811
// It's important that the pointer is not signed, as it will be sign-extended.
813812
return GetVConstPtr() + GetVConstOffset(id);

src/xenia/cpu/backend/a64/a64_emitter.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ enum A64EmitterFeatureFlags {
122122
kA64EmitF16C = 1 << 1,
123123
};
124124

125-
class A64Emitter : public oaknut::CodeBlock, public oaknut::CodeGenerator {
125+
class A64Emitter : public oaknut::VectorCodeGenerator {
126126
public:
127127
A64Emitter(A64Backend* backend);
128128
virtual ~A64Emitter();
@@ -203,8 +203,8 @@ class A64Emitter : public oaknut::CodeBlock, public oaknut::CodeGenerator {
203203
static bool ConstantFitsIn32Reg(uint64_t v);
204204
void MovMem64(const oaknut::XRegSp& addr, intptr_t offset, uint64_t v);
205205

206-
std::byte* GetVConstPtr() const;
207-
std::byte* GetVConstPtr(VConst id) const;
206+
uintptr_t GetVConstPtr() const;
207+
uintptr_t GetVConstPtr(VConst id) const;
208208
static constexpr uintptr_t GetVConstOffset(VConst id) {
209209
return sizeof(vec128_t) * id;
210210
}
@@ -239,6 +239,8 @@ class A64Emitter : public oaknut::CodeBlock, public oaknut::CodeGenerator {
239239
A64CodeCache* code_cache_ = nullptr;
240240
uint32_t feature_flags_ = 0;
241241

242+
std::vector<std::uint32_t> assembly_buffer;
243+
242244
oaknut::Label* epilog_label_ = nullptr;
243245

244246
// Convert from plain-text label-names into oaknut-labels

src/xenia/cpu/backend/a64/a64_seq_vector.cc

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,10 @@ struct LOAD_VECTOR_SHL_I8
8383
if (i.src1.is_constant) {
8484
auto sh = i.src1.constant();
8585
assert_true(sh < xe::countof(lvsl_table));
86-
e.MOVP2R(X0, &lvsl_table[sh]);
86+
e.MOV(X0, reinterpret_cast<uintptr_t>(&lvsl_table[sh]));
8787
e.LDR(i.dest, X0);
8888
} else {
89-
e.MOVP2R(X0, lvsl_table);
89+
e.MOV(X0, reinterpret_cast<uintptr_t>(lvsl_table));
9090
e.AND(X1, i.src1.reg().toX(), 0xf);
9191
e.LDR(i.dest, X0, X1, IndexExt::LSL, 4);
9292
}
@@ -121,10 +121,10 @@ struct LOAD_VECTOR_SHR_I8
121121
if (i.src1.is_constant) {
122122
auto sh = i.src1.constant();
123123
assert_true(sh < xe::countof(lvsr_table));
124-
e.MOVP2R(X0, &lvsr_table[sh]);
124+
e.MOV(X0, reinterpret_cast<uintptr_t>(&lvsr_table[sh]));
125125
e.LDR(i.dest, X0);
126126
} else {
127-
e.MOVP2R(X0, lvsr_table);
127+
e.MOV(X0, reinterpret_cast<uintptr_t>(lvsr_table));
128128
e.AND(X1, i.src1.reg().toX(), 0xf);
129129
e.LDR(i.dest, X0, X1, IndexExt::LSL, 4);
130130
}
@@ -1007,7 +1007,7 @@ struct EXTRACT_I32
10071007
e.AND(X0, i.src2.reg().toX(), 0b11);
10081008
e.LSL(X0, X0, 4);
10091009

1010-
e.MOVP2R(X1, extract_table_32);
1010+
e.MOV(X1, reinterpret_cast<uintptr_t>(extract_table_32));
10111011
e.LDR(Q0, X1, X0);
10121012

10131013
// Byte-table lookup
@@ -1335,7 +1335,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
13351335
}
13361336

13371337
const XReg VConstData = X3;
1338-
e.MOVP2R(VConstData, e.GetVConstPtr());
1338+
e.MOV(VConstData, e.GetVConstPtr());
13391339

13401340
// Saturate to [3,3....] so that only values between 3...[00] and 3...[FF]
13411341
// are valid - max before min to pack NaN as zero (5454082B is heavily
@@ -1435,7 +1435,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
14351435
e.LoadConstantV(src, i.src1.constant());
14361436
}
14371437
const XReg VConstData = X3;
1438-
e.MOVP2R(VConstData, e.GetVConstPtr());
1438+
e.MOV(VConstData, e.GetVConstPtr());
14391439

14401440
// Saturate
14411441
e.LDR(Q1, VConstData, e.GetVConstOffset(VPackSHORT_Min));
@@ -1456,7 +1456,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
14561456
e.LoadConstantV(src, i.src1.constant());
14571457
}
14581458
const XReg VConstData = X3;
1459-
e.MOVP2R(VConstData, e.GetVConstPtr());
1459+
e.MOV(VConstData, e.GetVConstPtr());
14601460

14611461
// Saturate
14621462
e.LDR(Q1, VConstData, e.GetVConstOffset(VPackSHORT_Min));
@@ -1478,7 +1478,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
14781478
e.LoadConstantV(src, i.src1.constant());
14791479
}
14801480
const XReg VConstData = X3;
1481-
e.MOVP2R(VConstData, e.GetVConstPtr());
1481+
e.MOV(VConstData, e.GetVConstPtr());
14821482

14831483
// Saturate.
14841484
e.LDR(Q1, VConstData, e.GetVConstOffset(VPackUINT_2101010_MinUnpacked));
@@ -1519,7 +1519,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
15191519
e.LoadConstantV(src, i.src1.constant());
15201520
}
15211521
const XReg VConstData = X3;
1522-
e.MOVP2R(VConstData, e.GetVConstPtr());
1522+
e.MOV(VConstData, e.GetVConstPtr());
15231523

15241524
// Saturate.
15251525
e.LDR(Q1, VConstData, e.GetVConstOffset(VPackULONG_4202020_MinUnpacked));
@@ -1740,7 +1740,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
17401740
static void EmitD3DCOLOR(A64Emitter& e, const EmitArgType& i) {
17411741
// ARGB (WXYZ) -> RGBA (XYZW)
17421742
const XReg VConstData = X3;
1743-
e.MOVP2R(VConstData, e.GetVConstPtr());
1743+
e.MOV(VConstData, e.GetVConstPtr());
17441744

17451745
QReg src(0);
17461746

@@ -1849,7 +1849,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
18491849
// (VD.w) = 1.0 (games splat W after unpacking to get vectors of 1.0f)
18501850
// src is (xx,xx,xx,VALUE)
18511851
const XReg VConstData = X3;
1852-
e.MOVP2R(VConstData, e.GetVConstPtr());
1852+
e.MOV(VConstData, e.GetVConstPtr());
18531853

18541854
QReg src(0);
18551855
if (i.src1.is_constant) {
@@ -1892,7 +1892,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
18921892
// src is (xx,xx,VALUE,VALUE)
18931893

18941894
const XReg VConstData = X3;
1895-
e.MOVP2R(VConstData, e.GetVConstPtr());
1895+
e.MOV(VConstData, e.GetVConstPtr());
18961896

18971897
QReg src(0);
18981898
if (i.src1.is_constant) {
@@ -1928,7 +1928,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
19281928
}
19291929
static void EmitUINT_2101010(A64Emitter& e, const EmitArgType& i) {
19301930
const XReg VConstData = X3;
1931-
e.MOVP2R(VConstData, e.GetVConstPtr());
1931+
e.MOV(VConstData, e.GetVConstPtr());
19321932

19331933
QReg src(0);
19341934
if (i.src1.is_constant) {
@@ -1972,7 +1972,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
19721972
}
19731973
static void EmitULONG_4202020(A64Emitter& e, const EmitArgType& i) {
19741974
const XReg VConstData = X3;
1975-
e.MOVP2R(VConstData, e.GetVConstPtr());
1975+
e.MOV(VConstData, e.GetVConstPtr());
19761976

19771977
QReg src(0);
19781978
if (i.src1.is_constant) {

src/xenia/cpu/backend/a64/a64_sequences.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2758,7 +2758,7 @@ struct SET_ROUNDING_MODE_I32
27582758
e.AND(W1, i.src1, 0b111);
27592759

27602760
// Use the low 3 bits as an index into a LUT
2761-
e.MOVP2R(X0, fpcr_table);
2761+
e.MOV(X0, reinterpret_cast<uintptr_t>(fpcr_table));
27622762
e.LDRB(W0, X0, X1);
27632763

27642764
// Replace FPCR bits with new value

0 commit comments

Comments
 (0)