Skip to content

Commit 38a5ce1

Browse files
author
sidart
committed
Initial draft CMSIS-NN integration (WIP)
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent f6cc262 commit 38a5ce1

File tree

6 files changed

+127
-6
lines changed

6 files changed

+127
-6
lines changed

backends/cortex_m/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Codegen.cmake)
2525

2626
# Cortex-M ops kernel sources
2727
set(_cortex_m_kernels__srcs
28+
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_add.cpp
29+
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_aten_add_tensor.cpp
2830
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_quantize_per_tensor.cpp
2931
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_dequantize_per_tensor.cpp
30-
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_add.cpp
3132
)
3233

3334
# Generate C++ bindings to register kernels into Executorch (for runtime).

backends/cortex_m/ops/op_add.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <executorch/runtime/kernel/kernel_includes.h>
2-
#include <cinttypes>
2+
#include <iostream>
3+
34
namespace cortex_m {
45
namespace native {
56

@@ -13,7 +14,9 @@ Tensor& add_out(
1314
const Tensor& input2,
1415
const ScalarType dtype,
1516
Tensor& out) {
16-
17+
std::cout << "add_out kernel called" << std::endl;
18+
ET_LOG(Info, "xxxxxxxxxx add_out kernel called");
19+
1720
// Ensure input is char type
1821
ET_CHECK_MSG(
1922
input1.scalar_type() == ScalarType::Char,
@@ -37,6 +40,7 @@ Tensor& add_out(
3740
"dtype %" PRId8 " is not int8 (Char)",
3841
static_cast<int8_t>(dtype));
3942

43+
assert(false);
4044

4145
return out;
4246
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include <executorch/runtime/kernel/kernel_includes.h>
2+
#include <iostream>
3+
4+
namespace cortex_m {
5+
namespace native {
6+
7+
using Tensor = executorch::aten::Tensor;
8+
using ScalarType = executorch::aten::ScalarType;
9+
using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
10+
11+
Tensor& aten_add_tensor(
12+
KernelRuntimeContext& ctx,
13+
const Tensor& self,
14+
const Tensor& other,
15+
const ScalarType dtype,
16+
Tensor& out) {
17+
ET_LOG(Info, "xxxxxxxxxx aten_add_tensor kernel called");
18+
19+
// Ensure input is char type
20+
ET_CHECK_MSG(
21+
self.scalar_type() == ScalarType::Char,
22+
"self.scalar_type() %" PRId8 " is not char type",
23+
static_cast<int8_t>(self.scalar_type()));
24+
25+
ET_CHECK_MSG(
26+
other.scalar_type() == ScalarType::Char,
27+
"other.scalar_type() %" PRId8 " is not char type",
28+
static_cast<int8_t>(other.scalar_type()));
29+
30+
// Check dtype is int8 (Char)
31+
ET_CHECK_MSG(
32+
dtype == ScalarType::Char,
33+
"dtype %" PRId8 " is not int8 (Char)",
34+
static_cast<int8_t>(dtype));
35+
36+
// Example: element-wise add self and other into out
37+
// (Assuming Tensor has data() and size() methods)
38+
const int8_t* self_data = self.const_data_ptr<int8_t>();
39+
const int8_t* other_data = other.const_data_ptr<int8_t>();
40+
int8_t* out_data = out.mutable_data_ptr<int8_t>();
41+
size_t numel = self.numel(); // or self.size() if that's the API
42+
for (size_t i = 0; i < numel; ++i) {
43+
out_data[i] = self_data[i] + other_data[i];
44+
}
45+
return out;
46+
}
47+
48+
} // namespace native
49+
} // namespace cortex_m

backends/cortex_m/ops/operators.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,61 @@
1313
# New operator library with a custom namespace to allow fusion etc.
1414
lib = Library("cortex_m", "DEF")
1515

16+
###
17+
# add.Tensor
18+
###
19+
20+
lib.define(
21+
"add.Tensor(Tensor self, Tensor other, ScalarType dtype) -> (Tensor Z)"
22+
)
23+
24+
lib.define(
25+
"add_Tensor.out(Tensor self, Tensor other, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)"
26+
)
27+
28+
@impl(lib, "add.Tensor", "CompositeExplicitAutograd")
29+
def aten_add_tensor_impl(
30+
input1: torch.Tensor,
31+
input2: torch.Tensor,
32+
dtype: torch.dtype,
33+
out: torch.Tensor,
34+
) -> torch.Tensor:
35+
"""
36+
The implementation of aten add.Tensor.
37+
"""
38+
return exir_ops.edge.aten.add.Tensor(input1, input2, dtype)
39+
40+
###
41+
# add.out
42+
###
43+
44+
lib.define(
45+
"add(Tensor input1, Tensor input2, ScalarType dtype) -> (Tensor Z)"
46+
)
47+
48+
lib.define(
49+
"add.out(Tensor input1, Tensor input2, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)"
50+
)
51+
52+
@impl(lib, "add.out", "CompositeExplicitAutograd")
53+
def add_out_impl(
54+
input1: torch.Tensor,
55+
input2: torch.Tensor,
56+
dtype: torch.dtype,
57+
out: torch.Tensor,
58+
) -> torch.Tensor:
59+
"""
60+
The implementation of cmsis-nn add.out.
61+
"""
62+
if node.dtype == torch.qint8:
63+
return exir_ops.edge.quantized_decomposed.add.default(
64+
input1, input2, dtype, dtype
65+
)
66+
else:
67+
return exir_ops.edge.aten.add.default(
68+
input1, input2, dtype, dtype
69+
)
70+
1671
###
1772
# dequantize_per_tensor
1873
###
@@ -25,7 +80,6 @@
2580
"quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
2681
)
2782

28-
2983
@register_fake("cortex_m::quantize_per_tensor")
3084
def quantize_per_tensor_meta(
3185
input: torch.Tensor,
@@ -37,7 +91,6 @@ def quantize_per_tensor_meta(
3791
) -> torch.Tensor:
3892
return torch.empty_like(input, dtype=dtype)
3993

40-
4194
@impl(lib, "quantize_per_tensor", "CompositeExplicitAutograd")
4295
def quantize_per_tensor_impl(
4396
input: torch.Tensor,

backends/cortex_m/ops/operators.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,14 @@
1616
- arg_meta: null
1717
kernel_name: cortex_m::dequantize_per_tensor_out
1818

19-
- func: cortex_m::add.out(Tensor a, Tensor b, Scalar alpha, *, Tensor(a!) out) -> Tensor(a!)
19+
- func: cortex_m::add.out(Tensor a, Tensor b, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
2020
variants: function
2121
kernels:
2222
- arg_meta: null
2323
kernel_name: cortex_m::add_out
24+
25+
- func: cortex_m::add.Tensor(Tensor self, Tensor other, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
26+
variants: function
27+
kernels:
28+
- arg_meta: null
29+
kernel_name: cortex_m::aten_add_tensor

backends/cortex_m/passes/replace_quant_nodes_pass.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ def _is_qualified_int8_node(args) -> bool:
3131
def __init__(self):
3232
super().__init__()
3333
self.op_replacements = {
34+
exir_ops.edge.add: {
35+
"new_target": exir_ops.edge.cortex_m.add,
36+
"qualifier": self._is_qualified_int8_node,
37+
},
38+
exir_ops.edge.aten.add.Tensor: {
39+
"new_target": exir_ops.edge.cortex_m.add.Tensor,
40+
"qualifier": self._is_qualified_int8_node,
41+
},
3442
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: {
3543
"new_target": exir_ops.edge.cortex_m.quantize_per_tensor.default,
3644
"qualifier": self._is_qualified_int8_node,

0 commit comments

Comments
 (0)