Skip to content

Commit 6ce8366

Browse files
committed
Address comments
1 parent 2d3d563 commit 6ce8366

File tree

5 files changed

+26
-41
lines changed

5 files changed

+26
-41
lines changed

src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_emitter.hpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ class jit_emitter : public ov::snippets::Emitter {
7070
static std::set<std::vector<element::Type>> get_supported_precisions(
7171
const std::shared_ptr<ov::Node>& node = nullptr);
7272

73+
static constexpr int sp_alignment = 16;
74+
7375
protected:
7476
static size_t get_max_vecs_count();
7577
static int32_t get_vec_length();
@@ -155,6 +157,10 @@ class jit_emitter : public ov::snippets::Emitter {
155157
}
156158
}
157159

160+
int32_t get_gpr_length() const {
161+
return h->x0.getBit() / 8;
162+
}
163+
158164
private:
159165
mutable std::vector<size_t> preserved_vec_idxs;
160166
mutable std::vector<size_t> preserved_gpr_idxs;
@@ -179,10 +185,6 @@ class jit_emitter : public ov::snippets::Emitter {
179185
return 32;
180186
}
181187

182-
int32_t get_gpr_length() const {
183-
return h->x0.getBit() / 8;
184-
}
185-
186188
void store_context(const std::vector<size_t>& gpr_regs,
187189
const std::vector<size_t>& vec_regs,
188190
const std::unordered_set<size_t>& ignore_vec_regs = {}) const;

src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_gemm_copy_b_emitter.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818

1919
#include "emitters/snippets/aarch64/kernel_executors/gemm_copy_b.hpp"
2020
#include "emitters/snippets/aarch64/utils.hpp"
21-
#include "emitters/snippets/utils/utils.hpp"
2221
#include "emitters/snippets/jit_snippets_call_args.hpp"
22+
#include "emitters/snippets/utils/utils.hpp"
2323
#include "emitters/utils.hpp"
2424
#include "openvino/core/node.hpp"
2525
#include "openvino/core/type.hpp"
@@ -94,13 +94,12 @@ void jit_gemm_copy_b_emitter::emit_impl(const std::vector<size_t>& in, const std
9494
const auto& mem_ptrs = utils::transform_idxs_to_regs(mem_ptrs_idxs);
9595

9696
// Apply memory offsets to input/output pointers
97-
h->sub(h->sp, h->sp, 2 * get_vec_length());
98-
97+
const auto sp_size = rnd_up(2 * get_gpr_length(), sp_alignment);
98+
h->sub(h->sp, h->sp, sp_size);
9999
// Apply offsets and store pointers on stack
100100
for (size_t i = 0; i < mem_ptrs.size(); i++) {
101101
const auto& ptr_reg = mem_ptrs[i];
102-
int32_t stack_offset = i * get_vec_length();
103-
102+
int32_t stack_offset = i * get_gpr_length();
104103
if (ov::snippets::utils::is_dynamic_value(m_memory_offsets[i])) {
105104
// Dynamic offset: read from runtime parameters
106105
size_t runtime_offset = GET_OFF(buffer_offsets) + m_buffer_ids[i] * sizeof(size_t);
@@ -110,13 +109,12 @@ void jit_gemm_copy_b_emitter::emit_impl(const std::vector<size_t>& in, const std
110109
utils::push_ptr_with_static_offset_on_stack(h, stack_offset, ptr_reg, m_memory_offsets[i]);
111110
}
112111
}
113-
114112
// Load back the adjusted pointers for function call
115113
h->ldr(x1, Xbyak_aarch64::ptr(h->sp)); // input pointer
116-
h->ldr(x2, Xbyak_aarch64::ptr(h->sp, get_vec_length())); // output pointer
114+
h->ldr(x2, Xbyak_aarch64::ptr(h->sp, get_gpr_length())); // output pointer
117115

118116
// Restore stack pointer
119-
h->add(h->sp, h->sp, 2 * get_vec_length());
117+
h->add(h->sp, h->sp, sp_size);
120118

121119
// Set up executor pointer as first argument
122120
const auto& compiled_kernel = get_compiled_kernel_ptr();

src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_gemm_emitter.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818

1919
#include "emitters/snippets/aarch64/kernel_executors/gemm.hpp"
2020
#include "emitters/snippets/aarch64/utils.hpp"
21-
#include "emitters/snippets/utils/utils.hpp"
2221
#include "emitters/snippets/jit_snippets_call_args.hpp"
22+
#include "emitters/snippets/utils/utils.hpp"
2323
#include "emitters/utils.hpp"
2424
#include "openvino/core/node.hpp"
2525
#include "openvino/core/type/element_type.hpp"
@@ -87,13 +87,12 @@ void jit_gemm_emitter::emit_impl(const std::vector<size_t>& in, const std::vecto
8787
const auto& mem_ptrs = utils::transform_idxs_to_regs(mem_ptrs_idxs);
8888

8989
// Apply memory offsets to input/output pointers
90-
h->sub(h->sp, h->sp, 3 * get_vec_length());
91-
90+
const auto sp_size = rnd_up(3 * get_gpr_length(), sp_alignment);
91+
h->sub(h->sp, h->sp, sp_size);
9292
// Apply offsets and store pointers on stack
9393
for (size_t i = 0; i < mem_ptrs.size(); i++) {
9494
const auto& ptr_reg = mem_ptrs[i];
95-
int32_t stack_offset = i * get_vec_length();
96-
95+
int32_t stack_offset = i * get_gpr_length();
9796
if (ov::snippets::utils::is_dynamic_value(m_memory_offsets[i])) {
9897
// Dynamic offset: read from runtime parameters
9998
size_t runtime_offset = GET_OFF(buffer_offsets) + m_buffer_ids[i] * sizeof(size_t);
@@ -103,14 +102,13 @@ void jit_gemm_emitter::emit_impl(const std::vector<size_t>& in, const std::vecto
103102
utils::push_ptr_with_static_offset_on_stack(h, stack_offset, ptr_reg, m_memory_offsets[i]);
104103
}
105104
}
106-
107105
// Load back the adjusted pointers for function call
108106
h->ldr(x1, Xbyak_aarch64::ptr(h->sp)); // matrix A (in0)
109-
h->ldr(x2, Xbyak_aarch64::ptr(h->sp, get_vec_length())); // matrix B (in1)
110-
h->ldr(x3, Xbyak_aarch64::ptr(h->sp, 2 * get_vec_length())); // matrix C (out)
107+
h->ldr(x2, Xbyak_aarch64::ptr(h->sp, get_gpr_length())); // matrix B (in1)
108+
h->ldr(x3, Xbyak_aarch64::ptr(h->sp, 2 * get_gpr_length())); // matrix C (out)
111109

112110
// Restore stack pointer
113-
h->add(h->sp, h->sp, 3 * get_vec_length());
111+
h->add(h->sp, h->sp, sp_size);
114112

115113
// Set up executor pointer as first argument
116114
const auto& compiled_kernel = get_compiled_kernel_ptr();

src/plugins/intel_cpu/src/emitters/snippets/aarch64/utils.cpp

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ using namespace dnnl::impl::cpu::aarch64;
2727

2828
namespace ov::intel_cpu::aarch64::utils {
2929

30-
3130
Xbyak_aarch64::XReg get_aux_gpr(const std::vector<size_t>& used_gpr_idxs) {
3231
// SP - stack pointer should be preserved, X0 and X1 - runtime parameter registers in the kernel
3332
// X18 - platform register should not be used
@@ -74,16 +73,11 @@ void push_ptr_with_runtime_offset_on_stack(dnnl::impl::cpu::aarch64::jit_generat
7473
// Load the runtime offset from abi_param1 (X0) and add it to the pointer
7574
Xbyak_aarch64::XReg abi_param1(0);
7675
Xbyak_aarch64::XReg offset_reg(4);
76+
Xbyak_aarch64::XReg temp_reg(h->X_TMP_0);
7777

78-
// Handle large runtime offsets by using a temporary register
79-
if (runtime_offset > 4095) {
80-
Xbyak_aarch64::XReg temp_offset_reg(6);
81-
h->mov(temp_offset_reg, static_cast<uint64_t>(runtime_offset));
82-
h->add(temp_offset_reg, abi_param1, temp_offset_reg);
83-
h->ldr(offset_reg, Xbyak_aarch64::ptr(temp_offset_reg));
84-
} else {
85-
h->ldr(offset_reg, Xbyak_aarch64::ptr(abi_param1, static_cast<int32_t>(runtime_offset)));
86-
}
78+
// Load the offset value from the runtime parameter location
79+
h->add_imm(temp_reg, abi_param1, runtime_offset, Xbyak_aarch64::XReg(h->X_TMP_1));
80+
h->ldr(offset_reg, Xbyak_aarch64::ptr(temp_reg));
8781

8882
h->add(aux_reg, aux_reg, offset_reg);
8983

@@ -102,17 +96,9 @@ void push_ptr_with_static_offset_on_stack(dnnl::impl::cpu::aarch64::jit_generato
10296
}
10397

10498
// For non-zero offsets, apply the offset and then store
105-
Xbyak_aarch64::XReg temp_reg(4);
99+
Xbyak_aarch64::XReg temp_reg(h->X_TMP_0);
106100
h->mov(temp_reg, ptr_reg);
107-
108-
// For large offsets, use a register to hold the offset value
109-
if (ptr_offset > 4095) { // 12-bit immediate limit for add instruction
110-
Xbyak_aarch64::XReg offset_reg(6);
111-
h->mov(offset_reg, static_cast<uint64_t>(ptr_offset));
112-
h->add(temp_reg, temp_reg, offset_reg);
113-
} else {
114-
h->add(temp_reg, temp_reg, static_cast<int32_t>(ptr_offset));
115-
}
101+
h->add_imm(temp_reg, temp_reg, ptr_offset, Xbyak_aarch64::XReg(h->X_TMP_1));
116102

117103
// Store the adjusted pointer on stack
118104
h->str(temp_reg, Xbyak_aarch64::ptr(h->sp, stack_offset));

src/plugins/intel_cpu/src/emitters/snippets/aarch64/utils.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ void push_ptr_with_runtime_offset_on_stack(dnnl::impl::cpu::aarch64::jit_generat
6161

6262
/**
6363
* @brief Push data pointer on stack adding static offset `ptr_offset`
64+
* Note: This helper doesn't allocate stack space - the user should guarantee allocated space on stack
6465
* @param h generator
6566
* @param stack_offset stack offset
6667
* @param ptr_reg register contains data pointer

0 commit comments

Comments
 (0)