Skip to content

Commit 9ac97b2

Browse files
committed
Math: FIR: Copy HiFi5 filter functions from HiFi3 version
This patch contains only minimal changes to build this with HiFi5 toolchain. Changed SOF_USE_HIFI() / SOF_USE_MIN_HIFI() macros usage and included hifi5 header. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
1 parent 94c4221 commit 9ac97b2

File tree

4 files changed

+259
-2
lines changed

4 files changed

+259
-2
lines changed

src/math/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ add_local_sources_ifdef(CONFIG_POWER_FIXED sof power.c)
2929

3030
add_local_sources_ifdef(CONFIG_BINARY_LOGARITHM_FIXED sof base2log.c)
3131

32-
add_local_sources_ifdef(CONFIG_MATH_FIR sof fir_generic.c fir_hifi2ep.c fir_hifi3.c)
32+
add_local_sources_ifdef(CONFIG_MATH_FIR sof fir_generic.c fir_hifi2ep.c fir_hifi3.c fir_hifi5.c)
3333

3434
if(CONFIG_MATH_FFT)
3535
add_subdirectory(fft)

src/math/fir_hifi3.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include <sof/math/fir_config.h>
88
#include <sof/common.h>
99

10-
#if SOF_USE_MIN_HIFI(3, FILTER)
10+
#if SOF_USE_HIFI(3, FILTER) || SOF_USE_HIFI(4, FILTER)
1111

1212
#include <sof/audio/buffer.h>
1313
#include <sof/math/fir_hifi3.h>

src/math/fir_hifi5.c

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
// SPDX-License-Identifier: BSD-3-Clause
2+
//
3+
// Copyright(c) 2017-2025 Intel Corporation.
4+
//
5+
// Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
6+
7+
#include <sof/math/fir_config.h>
8+
#include <sof/common.h>
9+
10+
#if SOF_USE_MIN_HIFI(5, FILTER)
11+
12+
#include <sof/audio/buffer.h>
13+
#include <sof/math/fir_hifi3.h>
14+
#include <user/fir.h>
15+
#include <xtensa/config/defs.h>
16+
#include <xtensa/tie/xt_hifi5.h>
17+
#include <rtos/symbol.h>
18+
#include <errno.h>
19+
#include <stddef.h>
20+
#include <stdint.h>
21+
22+
/*
23+
* EQ FIR algorithm code
24+
*/
25+
26+
void fir_reset(struct fir_state_32x16 *fir)
27+
{
28+
fir->taps = 0;
29+
fir->length = 0;
30+
fir->out_shift = 0;
31+
fir->coef = NULL;
32+
/* There may need to know the beginning of dynamic allocation after
33+
* reset so omitting setting also fir->delay to NULL.
34+
*/
35+
}
36+
EXPORT_SYMBOL(fir_reset);
37+
38+
int fir_delay_size(struct sof_fir_coef_data *config)
39+
{
40+
/* Check FIR tap count for implementation specific constraints */
41+
if (config->length > SOF_FIR_MAX_LENGTH || config->length < 4)
42+
return -EINVAL;
43+
44+
/* The optimization requires the tap count to be multiple of four */
45+
if (config->length & 0x3)
46+
return -EINVAL;
47+
48+
/* The dual sample version needs one more delay entry. To preserve
49+
* align for 64 bits need to add two.
50+
*/
51+
return (config->length + 2) * sizeof(int32_t);
52+
}
53+
EXPORT_SYMBOL(fir_delay_size);
54+
55+
int fir_init_coef(struct fir_state_32x16 *fir,
56+
struct sof_fir_coef_data *config)
57+
{
58+
/* The length is taps plus two since the filter computes two
59+
* samples per call. Length plus one would be minimum but the add
60+
* must be even. The even length is needed for 64 bit loads from delay
61+
* lines with 32 bit samples.
62+
*/
63+
fir->taps = (int)config->length;
64+
fir->length = fir->taps + 2;
65+
fir->out_shift = (int)config->out_shift;
66+
fir->coef = (ae_f16x4 *)&config->coef[0];
67+
return 0;
68+
}
69+
EXPORT_SYMBOL(fir_init_coef);
70+
71+
void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data)
72+
{
73+
fir->delay = (ae_int32 *)*data;
74+
fir->delay_end = fir->delay + fir->length;
75+
fir->rwp = (ae_int32 *)(fir->delay + fir->length - 1);
76+
*data += fir->length; /* Point to next delay line start */
77+
}
78+
EXPORT_SYMBOL(fir_init_delay);
79+
80+
void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift,
81+
int *rshift)
82+
{
83+
*lshift = (fir->out_shift < 0) ? -fir->out_shift : 0;
84+
*rshift = (fir->out_shift > 0) ? fir->out_shift : 0;
85+
}
86+
EXPORT_SYMBOL(fir_get_lrshifts);
87+
88+
/* HiFi EP has the follow number of reqisters that should not be exceeded
89+
* 4x 56 bit registers in register file Q
90+
* 8x 48 bit registers in register file P
91+
*/
92+
93+
void fir_32x16(struct fir_state_32x16 *fir, ae_int32 x, ae_int32 *y, int shift)
94+
{
95+
/* This function uses
96+
* 1x 56 bit registers Q,
97+
* 4x 48 bit registers P
98+
* 3x integers
99+
* 2x address pointers,
100+
*/
101+
ae_f64 a;
102+
ae_valign u;
103+
ae_f32x2 data2;
104+
ae_f16x4 coefs;
105+
ae_f32x2 d0;
106+
ae_f32x2 d1;
107+
int i;
108+
ae_int32 *dp = fir->rwp;
109+
ae_int16x4 *coefp = (ae_int16x4 *)fir->coef;
110+
const int taps_div_4 = fir->taps >> 2;
111+
const int inc = sizeof(int32_t);
112+
113+
/* Bypass samples if taps count is zero. */
114+
if (!taps_div_4) {
115+
*y = x;
116+
return;
117+
}
118+
119+
/* Write sample to delay */
120+
AE_S32_L_XC(x, fir->rwp, -sizeof(int32_t));
121+
122+
/* Prime the coefficients stream */
123+
u = AE_LA64_PP(coefp);
124+
125+
/* Note: If the next function is converted to handle two samples
126+
* per call the data load can be done with single instruction
127+
* AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f));
128+
*/
129+
a = AE_ZEROQ56();
130+
for (i = 0; i < taps_div_4; i++) {
131+
/* Load four coefficients. Coef_3 contains tap h[n],
132+
* coef_2 contains h[n+1], coef_1 contains h[n+2], and
133+
* coef_0 contains h[n+3];
134+
*/
135+
AE_LA16X4_IP(coefs, u, coefp);
136+
137+
/* Load two data samples and pack to d0 to data2_h and
138+
* d1 to data2_l.
139+
*/
140+
AE_L32_XC(d0, dp, inc);
141+
AE_L32_XC(d1, dp, inc);
142+
data2 = AE_SEL32_LL(d0, d1);
143+
144+
/* Accumulate
145+
* a += data2_h * coefs_3 + data2_l * coefs_2. The Q1.31
146+
* data and Q1.15 coefficients are used as 24 bits as
147+
* Q1.23 values.
148+
*/
149+
AE_MULAAFD32X16_H3_L2(a, data2, coefs);
150+
151+
/* Repeat the same for next two taps and increase coefp.
152+
* a += data2_h * coefs_1 + data2_l * coefs_0.
153+
*/
154+
AE_L32_XC(d0, dp, inc);
155+
AE_L32_XC(d1, dp, inc);
156+
data2 = AE_SEL32_LL(d0, d1);
157+
AE_MULAAFD32X16_H1_L0(a, data2, coefs);
158+
}
159+
160+
/* Do scaling shifts and store sample. */
161+
a = AE_SLAA64S(a, shift);
162+
AE_S32_L_I(AE_ROUND32F48SSYM(a), (ae_int32 *)y, 0);
163+
}
164+
EXPORT_SYMBOL(fir_32x16);
165+
166+
/* HiFi EP has the follow number of reqisters that should not be exceeded
167+
* 4x 56 bit registers in register file Q
168+
* 8x 48 bit registers in register file P
169+
*/
170+
171+
void fir_32x16_2x(struct fir_state_32x16 *fir, ae_int32 x0, ae_int32 x1,
172+
ae_int32 *y0, ae_int32 *y1, int shift)
173+
{
174+
/* This function uses
175+
* 2x 56 bit registers Q,
176+
* 4x 48 bit registers P
177+
* 3x integers
178+
* 2x address pointers,
179+
*/
180+
ae_f64 a;
181+
ae_f64 b;
182+
ae_valign u;
183+
ae_f32x2 d0;
184+
ae_f32x2 d1;
185+
ae_f16x4 coefs;
186+
int i;
187+
ae_f32x2 *dp;
188+
ae_f16x4 *coefp = fir->coef;
189+
const int taps_div_4 = fir->taps >> 2;
190+
const int inc = 2 * sizeof(int32_t);
191+
192+
/* Bypass samples if taps count is zero. */
193+
if (!taps_div_4) {
194+
*y0 = x0;
195+
*y1 = x1;
196+
return;
197+
}
198+
199+
/* Write samples to delay */
200+
AE_S32_L_XC(x0, fir->rwp, -sizeof(int32_t));
201+
dp = (ae_f32x2 *)fir->rwp;
202+
AE_S32_L_XC(x1, fir->rwp, -sizeof(int32_t));
203+
204+
/* Note: If the next function is converted to handle two samples
205+
* per call the data load can be done with single instruction
206+
* AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f));
207+
*/
208+
a = AE_ZERO64();
209+
b = AE_ZERO64();
210+
211+
/* Prime the coefficients stream */
212+
u = AE_LA64_PP(coefp);
213+
214+
/* Load two data samples and pack to d0 to data2_h and
215+
* d1 to data2_l.
216+
*/
217+
AE_L32X2_XC(d0, dp, inc);
218+
for (i = 0; i < taps_div_4; i++) {
219+
/* Load four coefficients. Coef_3 contains tap h[n],
220+
* coef_2 contains h[n+1], coef_1 contains h[n+2], and
221+
* coef_0 contains h[n+3];
222+
*/
223+
AE_LA16X4_IP(coefs, u, coefp);
224+
225+
/* Load two data samples. Upper part d1_h is x[n+1] and
226+
* lower part d1_l is x[n].
227+
*/
228+
AE_L32X2_XC(d1, dp, inc);
229+
230+
/* Quad MAC (HH)
231+
* b += d0_h * coefs_3 + d0_l * coefs_2
232+
* a += d0_l * coefs_3 + d1_h * coefs_2
233+
*/
234+
AE_MULAFD32X16X2_FIR_HH(b, a, d0, d1, coefs);
235+
d0 = d1;
236+
237+
/* Repeat the same for next two taps and increase coefp. */
238+
AE_L32X2_XC(d1, dp, inc);
239+
240+
/* Quad MAC (HL)
241+
* b += d0_h * coefs_1 + d0_l * coefs_0
242+
* a += d0_l * coefs_1 + d1_h * coefs_0
243+
*/
244+
AE_MULAFD32X16X2_FIR_HL(b, a, d0, d1, coefs);
245+
d0 = d1;
246+
}
247+
248+
/* Do scaling shifts and store sample. */
249+
b = AE_SLAA64S(b, shift);
250+
a = AE_SLAA64S(a, shift);
251+
AE_S32_L_I(AE_ROUND32F48SSYM(b), (ae_int32 *)y1, 0);
252+
AE_S32_L_I(AE_ROUND32F48SSYM(a), (ae_int32 *)y0, 0);
253+
}
254+
EXPORT_SYMBOL(fir_32x16_2x);
255+
256+
#endif

zephyr/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,7 @@ elseif(CONFIG_MATH_FIR)
694694
${SOF_MATH_PATH}/fir_generic.c
695695
${SOF_MATH_PATH}/fir_hifi2ep.c
696696
${SOF_MATH_PATH}/fir_hifi3.c
697+
${SOF_MATH_PATH}/fir_hifi5.c
697698
)
698699
endif()
699700

0 commit comments

Comments
 (0)