Skip to content

Commit 14b8584

Browse files
authored
Support for DECODE operator (#3190)
* Support for DECODE operator @tensorflow/micro Add initial support for DECODE operator. Add reference implementation. Add LUT decompression support. Update op resolvers. Update Makefiles and Bazel BUILD files. Add kernel unit test. bug=fixes #3131 * update copyright * Don't use constructors with global objects (bluepill will not call them). Cleanup unit test. * Support for DECODE operator @tensorflow/micro Additional support for DECODE operator. Add Xtensa optimizations for LUT decompression. Move all Xtensa kernel source references to the Xtensa target makefile. bug=fixes #3150 * Updates to Xtensa makefiles @tensorflow/micro Reorganize Xtensa makefiles such that all references to optimized kernel sources are moved to the Xtensa target makefile. Move hifimini kernel sources to the parent directory, and rename them so they do not interfere with the target overlay mechanism of the root makefile. bug=fixes #3153 * Fix incorrect include path. Fix code style errors. * fix copyright * update generic benchmark op resolver size * Support for DECODE operator @tensorflow/micro Add reference implementation of pruning to DECODE operator. Makefile and Bazel BUILD file changes. Additional unit tests. bug=fixes #3161 * xtensa int8 single channel working * xtensa per-channel int8 normal axis working * WIP * working xtensa optimizations * Add negative unit test * Support for DECODE operator @tensorflow/micro Add optimized xtensa implementation of pruning to DECODE operator. Makefile changes. Additional unit tests. bug=fixes #3171 * all tests pass * Support for DECODE operator @tensorflow/micro Add reference implementation of Huffman decompression to DECODE operator. Makefile and Bazel BUILD file changes. Additional unit tests. bug=fixes #3187 * Support for DECODE operator @tensorflow/micro Add optimized xtensa implementation of Huffman decompression to DECODE operator. Makefile changes. bug=fixes #3189 * Add ScopedMicroProfiler * unfinished merge changes * Split out huffman unit test. Remove xtensa optimizations. * cleanup * Cleanup. All tests pass. * Cleanup.
1 parent a94423c commit 14b8584

File tree

8 files changed

+1487
-0
lines changed

8 files changed

+1487
-0
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#include "tensorflow/lite/micro/kernels/decode_state.h"
17+
18+
#include "tensorflow/lite/micro/kernels/decode_state_huffman.h"
19+
#include "tensorflow/lite/micro/kernels/decode_state_lut.h"
20+
#include "tensorflow/lite/micro/kernels/decode_state_prune.h"
21+
#include "tensorflow/lite/micro/micro_context.h"
22+
23+
#ifdef HIFI5
24+
#include "tensorflow/lite/micro/kernels/xtensa/xtensa_decode_state_huffman.h"
25+
#include "tensorflow/lite/micro/kernels/xtensa/xtensa_decode_state_lut.h"
26+
#include "tensorflow/lite/micro/kernels/xtensa/xtensa_decode_state_prune.h"
27+
#endif // HIFI5
28+
29+
namespace tflite {
30+
31+
DecodeState* DecodeState::CreateDecodeStateLUT(
32+
const TfLiteContext* context, MicroProfilerInterface* profiler) {
33+
MicroContext* const micro_context = GetMicroContext(context);
34+
#ifdef HIFI5
35+
constexpr size_t kBufferSize = sizeof(XtensaDecodeStateLut);
36+
#else
37+
constexpr size_t kBufferSize = sizeof(DecodeStateLut);
38+
#endif // HIFI5
39+
void* buffer = micro_context->AllocatePersistentBuffer(kBufferSize);
40+
if (buffer == nullptr) {
41+
return nullptr;
42+
}
43+
#ifdef HIFI5
44+
DecodeState* dsp = new (buffer) XtensaDecodeStateLut(context, profiler);
45+
#else
46+
DecodeState* dsp = new (buffer) DecodeStateLut(context, profiler);
47+
#endif // HIFI5
48+
49+
return dsp;
50+
}
51+
52+
DecodeState* DecodeState::CreateDecodeStatePrune(
53+
const TfLiteContext* context, MicroProfilerInterface* profiler) {
54+
MicroContext* const micro_context = GetMicroContext(context);
55+
#ifdef HIFI5
56+
constexpr size_t kBufferSize = sizeof(XtensaDecodeStatePrune);
57+
#else
58+
constexpr size_t kBufferSize = sizeof(DecodeStatePrune);
59+
#endif // HIFI5
60+
void* buffer = micro_context->AllocatePersistentBuffer(kBufferSize);
61+
if (buffer == nullptr) {
62+
return nullptr;
63+
}
64+
#ifdef HIFI5
65+
DecodeState* dsp = new (buffer) XtensaDecodeStatePrune(context, profiler);
66+
#else
67+
DecodeState* dsp = new (buffer) DecodeStatePrune(context, profiler);
68+
#endif // HIFI5
69+
return dsp;
70+
}
71+
72+
DecodeState* DecodeState::CreateDecodeStateHuffman(
73+
const TfLiteContext* context, MicroProfilerInterface* profiler) {
74+
MicroContext* const micro_context = GetMicroContext(context);
75+
#ifdef HIFI5
76+
constexpr size_t kBufferSize = sizeof(XtensaDecodeStateHuffman);
77+
#else
78+
constexpr size_t kBufferSize = sizeof(DecodeStateHuffman);
79+
#endif // HIFI5
80+
void* buffer = micro_context->AllocatePersistentBuffer(kBufferSize);
81+
if (buffer == nullptr) {
82+
return nullptr;
83+
}
84+
#ifdef HIFI5
85+
DecodeState* dsp = new (buffer) XtensaDecodeStateHuffman(context, profiler);
86+
#else
87+
DecodeState* dsp = new (buffer) DecodeStateHuffman(context, profiler);
88+
#endif // HIFI5
89+
return dsp;
90+
}
91+
92+
} // namespace tflite
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#include "tensorflow/lite/micro/kernels/xtensa/xtensa_decode_state_huffman.h"
17+
18+
#include <cstddef>
19+
20+
#include "tensorflow/lite/kernels/internal/compatibility.h"
21+
#include "tensorflow/lite/micro/kernels/kernel_util.h"
22+
#include "tensorflow/lite/micro/kernels/xtensa/xtensa.h"
23+
#include "tensorflow/lite/micro/micro_log.h"
24+
#include "tensorflow/lite/micro/micro_profiler.h"
25+
26+
namespace tflite {
27+
28+
TfLiteStatus XtensaDecodeStateHuffman::Decode(const TfLiteEvalTensor& input,
29+
const TfLiteEvalTensor& ancillary,
30+
const TfLiteEvalTensor& output) {
31+
void* const buffer = const_cast<void*>(micro::GetTensorData<void>(&output));
32+
TFLITE_DCHECK(buffer != nullptr);
33+
34+
switch (output.type) {
35+
case kTfLiteInt8:
36+
if (use_32bit_table_) {
37+
Decompress32BitTable_Xtensa(static_cast<int8_t*>(buffer));
38+
} else {
39+
Decompress16BitTable_Xtensa(static_cast<int8_t*>(buffer));
40+
}
41+
break;
42+
case kTfLiteInt16:
43+
Decompress32BitTable_Xtensa(static_cast<int16_t*>(buffer));
44+
break;
45+
default:
46+
MicroPrintf("unsupported tensor type %s", TfLiteTypeGetName(output.type));
47+
return kTfLiteError;
48+
}
49+
50+
return kTfLiteOk;
51+
}
52+
53+
void XtensaDecodeStateHuffman::Decompress16BitTable_Xtensa(int8_t* buffer) {
54+
ScopedMicroProfiler scoped_profiler(__func__, micro_profiler_);
55+
56+
size_t remaining = count_codewords_;
57+
const uint16_t* huffman_tables =
58+
static_cast<const uint16_t*>(huffman_tables_);
59+
const uint16_t* __restrict p_stream =
60+
reinterpret_cast<const uint16_t*>(compressed_codewords_);
61+
62+
WAE_BITPTR(15);
63+
WAE_BITSUSED(1);
64+
// byte swap the preload half-word
65+
WAE_BITHEAD(p_stream[0] << 8 | p_stream[0] >> 8);
66+
WAE_SEARCHDONE(1);
67+
WAE_FIRST_TS(initial_table_size_);
68+
AE_VLDL16C(p_stream);
69+
70+
while (remaining--) {
71+
xtbool complete = 0;
72+
unsigned long int symbol;
73+
74+
while (!complete) {
75+
AE_VLDL16T(complete, symbol, huffman_tables);
76+
AE_VLDL16C(p_stream);
77+
}
78+
79+
*buffer++ = symbol;
80+
}
81+
}
82+
83+
template <typename T>
84+
void XtensaDecodeStateHuffman::Decompress32BitTable_Xtensa(T* buffer) {
85+
ScopedMicroProfiler scoped_profiler(__func__, micro_profiler_);
86+
87+
size_t remaining = count_codewords_;
88+
const uint32_t* huffman_tables =
89+
static_cast<const uint32_t*>(huffman_tables_);
90+
const uint16_t* __restrict p_stream =
91+
reinterpret_cast<const uint16_t*>(compressed_codewords_);
92+
93+
WAE_BITPTR(15);
94+
WAE_BITSUSED(1);
95+
// byte swap the preload half-word
96+
WAE_BITHEAD(p_stream[0] << 8 | p_stream[0] >> 8);
97+
WAE_SEARCHDONE(1);
98+
WAE_FIRST_TS(initial_table_size_);
99+
AE_VLDL16C(p_stream);
100+
101+
while (remaining--) {
102+
xtbool complete = 0;
103+
unsigned long int symbol;
104+
105+
while (!complete) {
106+
AE_VLDL32T(complete, symbol, huffman_tables);
107+
AE_VLDL16C(p_stream);
108+
}
109+
110+
*buffer++ = symbol;
111+
}
112+
}
113+
114+
template void XtensaDecodeStateHuffman::Decompress32BitTable_Xtensa<int8_t>(
115+
int8_t*);
116+
template void XtensaDecodeStateHuffman::Decompress32BitTable_Xtensa<int16_t>(
117+
int16_t*);
118+
119+
} // namespace tflite
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#ifndef TENSORFLOW_LITE_MICRO_MICRO_KERNELS_XTENSA_DECODE_STATE_HUFFMAN_H_
17+
#define TENSORFLOW_LITE_MICRO_MICRO_KERNELS_XTENSA_DECODE_STATE_HUFFMAN_H_
18+
19+
#include <cstdint>
20+
21+
#include "tensorflow/lite/micro/compatibility.h"
22+
#include "tensorflow/lite/micro/kernels/decode_state_huffman.h"
23+
24+
namespace tflite {
25+
26+
class XtensaDecodeStateHuffman : public DecodeStateHuffman {
27+
public:
28+
XtensaDecodeStateHuffman() = delete;
29+
30+
XtensaDecodeStateHuffman(const TfLiteContext* context,
31+
MicroProfilerInterface* profiler)
32+
: DecodeStateHuffman(context, profiler) {}
33+
34+
virtual TfLiteStatus Decode(const TfLiteEvalTensor& input,
35+
const TfLiteEvalTensor& ancillary,
36+
const TfLiteEvalTensor& output) override;
37+
38+
protected:
39+
virtual ~XtensaDecodeStateHuffman() = default;
40+
41+
template <typename T>
42+
void Decompress32BitTable_Xtensa(T* buffer);
43+
44+
void Decompress16BitTable_Xtensa(int8_t* buffer);
45+
46+
private:
47+
TF_LITE_REMOVE_VIRTUAL_DELETE
48+
};
49+
50+
} // namespace tflite
51+
52+
#endif // TENSORFLOW_LITE_MICRO_MICRO_KERNELS_XTENSA_DECODE_STATE_HUFFMAN_H_

0 commit comments

Comments
 (0)