Skip to content

Commit 8e16851

Browse files
author
sidart
committed
Initial draft CMSIS-NN integration (WIP)
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent f6cc262 commit 8e16851

File tree

7 files changed

+200
-12
lines changed

7 files changed

+200
-12
lines changed

backends/cortex_m/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Codegen.cmake)
2525

2626
# Cortex-M ops kernel sources
2727
set(_cortex_m_kernels__srcs
28+
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_add.cpp
29+
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_aten_add_tensor.cpp
30+
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_softmax.cpp
2831
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_quantize_per_tensor.cpp
2932
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_dequantize_per_tensor.cpp
30-
${CMAKE_CURRENT_SOURCE_DIR}/ops/op_add.cpp
3133
)
3234

3335
# Generate C++ bindings to register kernels into Executorch (for runtime).

backends/cortex_m/ops/op_add.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <executorch/runtime/kernel/kernel_includes.h>
2-
#include <cinttypes>
2+
#include <iostream>
3+
34
namespace cortex_m {
45
namespace native {
56

@@ -13,7 +14,9 @@ Tensor& add_out(
1314
const Tensor& input2,
1415
const ScalarType dtype,
1516
Tensor& out) {
16-
17+
std::cout << "add_out kernel called" << std::endl;
18+
ET_LOG(Info, "xxxxxxxxxx add_out kernel called");
19+
1720
// Ensure input is char type
1821
ET_CHECK_MSG(
1922
input1.scalar_type() == ScalarType::Char,
@@ -37,6 +40,7 @@ Tensor& add_out(
3740
"dtype %" PRId8 " is not int8 (Char)",
3841
static_cast<int8_t>(dtype));
3942

43+
assert(false);
4044

4145
return out;
4246
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include <executorch/runtime/kernel/kernel_includes.h>
2+
#include <iostream>
3+
4+
namespace cortex_m {
5+
namespace native {
6+
7+
using Tensor = executorch::aten::Tensor;
8+
using ScalarType = executorch::aten::ScalarType;
9+
using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
10+
11+
Tensor& aten_add_tensor(
12+
KernelRuntimeContext& ctx,
13+
const Tensor& self,
14+
const Tensor& other,
15+
const ScalarType dtype,
16+
Tensor& out) {
17+
ET_LOG(Info, "xxxxxxxxxx aten_add_tensor kernel called");
18+
19+
// Ensure input is char type
20+
ET_CHECK_MSG(
21+
self.scalar_type() == ScalarType::Char,
22+
"self.scalar_type() %" PRId8 " is not char type",
23+
static_cast<int8_t>(self.scalar_type()));
24+
25+
ET_CHECK_MSG(
26+
other.scalar_type() == ScalarType::Char,
27+
"other.scalar_type() %" PRId8 " is not char type",
28+
static_cast<int8_t>(other.scalar_type()));
29+
30+
// Check dtype is int8 (Char)
31+
ET_CHECK_MSG(
32+
dtype == ScalarType::Char,
33+
"dtype %" PRId8 " is not int8 (Char)",
34+
static_cast<int8_t>(dtype));
35+
36+
// Example: element-wise add self and other into out
37+
// (Assuming Tensor has data() and size() methods)
38+
const int8_t* self_data = self.const_data_ptr<int8_t>();
39+
const int8_t* other_data = other.const_data_ptr<int8_t>();
40+
int8_t* out_data = out.mutable_data_ptr<int8_t>();
41+
size_t numel = self.numel(); // or self.size() if that's the API
42+
for (size_t i = 0; i < numel; ++i) {
43+
out_data[i] = self_data[i] + other_data[i];
44+
}
45+
return out;
46+
}
47+
48+
} // namespace native
49+
} // namespace cortex_m

backends/cortex_m/ops/op_softmax.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#include <executorch/runtime/kernel/kernel_includes.h>
2+
#include <iostream>
3+
4+
namespace cortex_m {
5+
namespace native {
6+
7+
using Tensor = executorch::aten::Tensor;
8+
using ScalarType = executorch::aten::ScalarType;
9+
using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
10+
11+
Tensor& _softmax_out(
12+
KernelRuntimeContext& ctx,
13+
const Tensor& self,
14+
int64_t dim,
15+
bool half_to_float,
16+
Tensor& out) {
17+
// Your optimized implementation here
18+
// Fill 'out' with the result and return it
19+
std::cout << "xxxxxxxxxx softmax_out kernel called" << std::endl;
20+
std::cout.flush();
21+
ET_LOG(Error, "xxxxxxxxxx softmax_out kernel called");
22+
23+
return out;
24+
}
25+
26+
Tensor _softmax(
27+
KernelRuntimeContext& ctx,
28+
const Tensor& self,
29+
int64_t dim,
30+
bool half_to_float) {
31+
std::cout << "xxxxxxxxxx softmax_default kernel called" << std::endl;
32+
std::cout.flush();
33+
ET_LOG(Error, "xxxxxxxxxx softmax_default kernel called");
34+
return self;
35+
}
36+
37+
} // namespace native
38+
} // namespace cortex_m

backends/cortex_m/ops/operators.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,57 @@
1313
# New operator library with a custom namespace to allow fusion etc.
1414
lib = Library("cortex_m", "DEF")
1515

16+
###
17+
# add.Tensor
18+
###
19+
20+
lib.define(
21+
"add.Tensor(Tensor self, Tensor other, ScalarType dtype) -> (Tensor Z)"
22+
)
23+
24+
lib.define(
25+
"add_Tensor.out(Tensor self, Tensor other, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)"
26+
)
27+
28+
@impl(lib, "add.Tensor", "CompositeExplicitAutograd")
29+
def aten_add_tensor_impl(
30+
input1: torch.Tensor,
31+
input2: torch.Tensor,
32+
dtype: torch.dtype,
33+
out: torch.Tensor,
34+
) -> torch.Tensor:
35+
"""
36+
The implementation of aten add.Tensor.
37+
"""
38+
return exir_ops.edge.cortex_m.add.Tensor(input1, input2, dtype)
39+
40+
###
41+
# add.out
42+
###
43+
44+
lib.define(
45+
"add(Tensor input1, Tensor input2, ScalarType dtype) -> (Tensor Z)"
46+
)
47+
48+
lib.define(
49+
"add.out(Tensor input1, Tensor input2, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)"
50+
)
51+
52+
@impl(lib, "add.out", "CompositeExplicitAutograd")
53+
def add_out_impl(
54+
input1: torch.Tensor,
55+
input2: torch.Tensor,
56+
dtype: torch.dtype,
57+
out: torch.Tensor,
58+
) -> torch.Tensor:
59+
"""
60+
The implementation of cmsis-nn add.out.
61+
"""
62+
63+
return exir_ops.edge.cortex_m.add.default(
64+
input1, input2, dtype, dtype
65+
)
66+
1667
###
1768
# dequantize_per_tensor
1869
###
@@ -25,7 +76,6 @@
2576
"quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
2677
)
2778

28-
2979
@register_fake("cortex_m::quantize_per_tensor")
3080
def quantize_per_tensor_meta(
3181
input: torch.Tensor,
@@ -37,7 +87,6 @@ def quantize_per_tensor_meta(
3787
) -> torch.Tensor:
3888
return torch.empty_like(input, dtype=dtype)
3989

40-
4190
@impl(lib, "quantize_per_tensor", "CompositeExplicitAutograd")
4291
def quantize_per_tensor_impl(
4392
input: torch.Tensor,
@@ -96,3 +145,17 @@ def dequantize_per_tensor_impl(
96145
return exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default(
97146
input, scale, zero_point, quant_min, quant_max, dtype
98147
)
148+
149+
lib.define(
150+
"softmax(Tensor self, int dim, bool half_to_float) -> Tensor"
151+
)
152+
lib.define(
153+
"softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)"
154+
)
155+
@impl(lib, "softmax", "CompositeExplicitAutograd")
156+
def softmax_impl(self: torch.Tensor, dim: int, half_to_float: bool) -> torch.Tensor:
157+
# Call your custom edge op or fallback
158+
return exir_ops.edge.cortex_m._softmax(self, dim, half_to_float)
159+
@impl(lib, "softmax.out", "CompositeExplicitAutograd")
160+
def softmax_out_impl(self: torch.Tensor, dim: int, half_to_float: bool, out: torch.Tensor) -> torch.Tensor:
161+
return exir_ops.edge.cortex_m._softmax_out(self, dim, half_to_float, out)

backends/cortex_m/ops/operators.yaml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,26 @@
1616
- arg_meta: null
1717
kernel_name: cortex_m::dequantize_per_tensor_out
1818

19-
- func: cortex_m::add.out(Tensor a, Tensor b, Scalar alpha, *, Tensor(a!) out) -> Tensor(a!)
19+
- func: cortex_m::add.out(Tensor a, Tensor b, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
2020
variants: function
2121
kernels:
2222
- arg_meta: null
2323
kernel_name: cortex_m::add_out
24+
25+
- func: cortex_m::add.Tensor(Tensor self, Tensor other, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
26+
variants: function
27+
kernels:
28+
- arg_meta: null
29+
kernel_name: cortex_m::aten_add_tensor
30+
31+
- func: cortex_m::_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
32+
variants: function
33+
kernels:
34+
- arg_meta: null
35+
kernel_name: cortex_m::_softmax
36+
37+
- func: cortex_m::_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)
38+
variants: function
39+
kernels:
40+
- arg_meta: null
41+
kernel_name: cortex_m::_softmax_out

backends/cortex_m/passes/replace_quant_nodes_pass.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,18 @@ def _is_qualified_int8_node(args) -> bool:
3131
def __init__(self):
3232
super().__init__()
3333
self.op_replacements = {
34+
exir_ops.edge.add: {
35+
"new_target": exir_ops.edge.cortex_m.add,
36+
"qualifier": lambda args: True,
37+
},
38+
exir_ops.edge.aten.add.Tensor: {
39+
"new_target": exir_ops.edge.cortex_m.add.Tensor,
40+
"qualifier": lambda args: True,
41+
},
42+
exir_ops.edge.aten._softmax.out: {
43+
"new_target": exir_ops.edge.cortex_m.softmax.out,
44+
"qualifier": lambda args: True,
45+
},
3446
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: {
3547
"new_target": exir_ops.edge.cortex_m.quantize_per_tensor.default,
3648
"qualifier": self._is_qualified_int8_node,
@@ -51,12 +63,14 @@ def call_operator(
5163
assert isinstance(
5264
op, EdgeOpOverload
5365
), "Op must be an EdgeOpOverload. Run this pass after to_edge()."
66+
print(f"[ReplaceQuantNodesPass] Operator called: {op}, Args: {args}")
5467

55-
if op in self.op_replacements and self.op_replacements[op]["qualifier"](args):
68+
if op in self.op_replacements and self.op_replacements[op]["qualifier"](args):
69+
print(f"[ReplaceQuantNodesPass] Replacing {op} with {self.op_replacements[op]['new_target']}")
5670
return super().call_operator(
57-
self.op_replacements[op]["new_target"],
58-
args,
59-
kwargs,
60-
meta,
61-
)
71+
self.op_replacements[op]["new_target"],
72+
args,
73+
kwargs,
74+
meta,
75+
)
6276
return super().call_operator(op, args, kwargs, meta)

0 commit comments

Comments
 (0)