Skip to content

Commit df4253f

Browse files
author
Siyuan Cheng
committed
DSP: add dsp unit test
add dsp context switch test add complex multiplication test for ARC processor Signed-off-by: Siyuan Cheng <[email protected]>
1 parent 676bdaa commit df4253f

File tree

10 files changed

+701
-0
lines changed

10 files changed

+701
-0
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) 2022 Synopsys
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
cmake_minimum_required(VERSION 3.20.0)
5+
find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
6+
project(dsp_sharing)
7+
8+
target_sources(app PRIVATE
9+
src/main.c
10+
src/load_store.c
11+
)
12+
13+
if(CONFIG_ISA_ARCV2)
14+
zephyr_include_directories(
15+
${ARCMWDT_TOOLCHAIN_PATH}/MetaWare/arc/lib/src/fx/include/
16+
)
17+
target_sources(app PRIVATE src/calculation_arc.c)
18+
endif()

tests/kernel/dsp_sharing/README.txt

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
Title: Shared DSP Support
2+
3+
Description:
4+
5+
The Shared DSP Support test uses two tasks to:
6+
7+
1) Test load and store dsp registers (including arch specific registers)
8+
2) For ARC processor: compute complex vector product and check for any errors
9+
10+
This tests the ability of tasks to safely share dsp hardware
11+
resources, even when switching occurs preemptively (note that both sets of
12+
tests run concurrently even though they report their progress at different
13+
times).
14+
15+
The demonstration utilizes semaphores, round robin scheduling, DSP and XY
16+
memory support.
17+
18+
--------------------------------------------------------------------------------
19+
20+
Sample Output:
21+
22+
Running TESTSUITE dsp_sharing
23+
===================================================================
24+
START - test_load_store
25+
Load and store OK after 0 (high) + 84 (low) tests
26+
Load and store OK after 100 (high) + 11926 (low) tests
27+
Load and store OK after 200 (high) + 23767 (low) tests
28+
Load and store OK after 300 (high) + 35607 (low) tests
29+
Load and store OK after 400 (high) + 47448 (low) tests
30+
Load and store OK after 500 (high) + 59287 (low) tests
31+
PASS - test_load_store in 10.18 seconds
32+
===================================================================
33+
START - test_calculation
34+
complex product calculation OK after 50 (high) + 63297 (low) tests (computed -160)
35+
complex product calculation OK after 150 (high) + 188138 (low) tests (computed -160)
36+
complex product calculation OK after 250 (high) + 312972 (low) tests (computed -160)
37+
complex product calculation OK after 350 (high) + 437806 (low) tests (computed -160)
38+
complex product calculation OK after 450 (high) + 562639 (low) tests (computed -160)
39+
PASS - test_calculation in 10.16 seconds
40+
===================================================================
41+
TESTSUITE dsp_sharing succeeded
42+
===================================================================
43+
PROJECT EXECUTION SUCCESSFUL

tests/kernel/dsp_sharing/prj.conf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
CONFIG_ZTEST=y
2+
CONFIG_DSP=y
3+
CONFIG_DSP_SHARING=y
4+
CONFIG_MAIN_STACK_SIZE=1024
5+
CONFIG_ARC_DSP_BFLY_SHARING=y
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/*
2+
* Copyright (c) 2022 Synopsys
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
/*
8+
* @file
9+
* @brief complex number multiplication portion of DSP sharing test
10+
*
11+
* @ingroup kernel_dspsharing_tests
12+
*
13+
* This module is used for the DSP sharing test, and supplements the basic
14+
* load/store test by incorporating two additional threads that utilize the
15+
* DSP unit.
16+
*
17+
* Testing utilizes a pair of tasks that independently compute complex vector
18+
* dot product. The lower priority task is regularly preempted by the higher
19+
* priority task, thereby testing whether DSP context information is properly
20+
* preserved.
21+
*
22+
* A reference value of computed result is computed once at the start of the
23+
* test. All subsequent computations must produce the same value, otherwise
24+
* an error has occurred.
25+
*/
26+
27+
#include <ztest.h>
28+
#include "fxarc.h"
29+
#include "dsp_context.h"
30+
#include "test_common.h"
31+
32+
/* stored in XY memory, need AGU_SHARING */
33+
#define DATA_ATTR __xy __attribute__((section(".Xdata")))
34+
static DATA_ATTR const cq15_t cq15_a[3] = {{0x20, 10}, {0x10, 20}, {4, 30}};
35+
static DATA_ATTR const cq15_t cq15_b[3] = {{0x20, 11}, {0x10, 21}, {5, 31}};
36+
37+
static volatile short reference_result;
38+
39+
static volatile unsigned int calc_low_count;
40+
static volatile unsigned int calc_high_count;
41+
42+
/* Indicates that the load/store test exited */
43+
static volatile bool test_exited;
44+
45+
/* Semaphore for signaling end of test */
46+
static K_SEM_DEFINE(test_exit_sem, 0, 1);
47+
48+
/**
49+
* @brief Entry point for the low priority compute task
50+
*
51+
* @ingroup kernel_dspsharing_tests
52+
*/
53+
static void calculate_low(void)
54+
{
55+
volatile short res[2];
56+
/* Loop until the test finishes, or an error is detected. */
57+
for (calc_low_count = 0; !test_exited; calc_low_count++) {
58+
59+
v2accum32_t acc = {0, 0};
60+
61+
for (int i = 0; i < 3; i++) {
62+
acc = fx_v2a32_cmac_cq15(acc, cq15_a[i], cq15_b[i]);
63+
}
64+
/* cast reult from v2accum32_ to short type */
65+
res[0] = fx_q15_cast_asl_rnd_a32(fx_get_v2a32(acc, 0), 15);
66+
res[1] = fx_q15_cast_asl_rnd_a32(fx_get_v2a32(acc, 1), 15);
67+
68+
if (reference_result == 0) {
69+
reference_result = res[0];
70+
} else if (reference_result != res[0]) {
71+
printf("Computed result %d, reference result %d\n",
72+
res[0], reference_result);
73+
}
74+
75+
zassert_equal(reference_result, res[0],
76+
"complex product computation error");
77+
}
78+
}
79+
80+
/**
81+
* @brief Entry point for the high priority compute task
82+
*
83+
* @ingroup kernel_dspsharing_tests
84+
*/
85+
static void calculate_high(void)
86+
{
87+
volatile short res[2];
88+
/* Run the test until the specified maximum test count is reached */
89+
for (calc_high_count = 0; calc_high_count <= MAX_TESTS;
90+
calc_high_count++) {
91+
92+
v2accum32_t acc = {0, 0};
93+
94+
for (int i = 0; i < 3; i++) {
95+
acc = fx_v2a32_cmac_cq15(acc, cq15_a[i], cq15_b[i]);
96+
}
97+
98+
/*
99+
* Relinquish the processor for the remainder of the current
100+
* system clock tick, so that lower priority threads get a
101+
* chance to run.
102+
*
103+
* This exercises the ability of the kernel to restore the
104+
* DSP state of a low priority thread _and_ the ability of the
105+
* kernel to provide a "clean" DSP state to this thread
106+
* once the sleep ends.
107+
*/
108+
k_sleep(K_MSEC(10));
109+
110+
res[0] = fx_q15_cast_asl_rnd_a32(fx_get_v2a32(acc, 0), 15);
111+
res[1] = fx_q15_cast_asl_rnd_a32(fx_get_v2a32(acc, 1), 15);
112+
113+
if (reference_result == 0) {
114+
reference_result = res[0];
115+
} else if (reference_result != res[0]) {
116+
printf("Computed result %d, reference result %d\n",
117+
res[0], reference_result);
118+
}
119+
120+
zassert_equal(reference_result, res[0],
121+
"complex product computation error");
122+
123+
/* Periodically issue progress report */
124+
if ((calc_high_count % 100) == 50) {
125+
printf("complex product calculation OK after %u (high) "
126+
"+"
127+
" %u (low) tests (computed %d)\n",
128+
calc_high_count, calc_low_count, res[0]);
129+
}
130+
}
131+
132+
/* Signal end of test */
133+
test_exited = true;
134+
k_sem_give(&test_exit_sem);
135+
}
136+
137+
K_THREAD_DEFINE(cal_low, THREAD_STACK_SIZE, calculate_low, NULL, NULL, NULL,
138+
THREAD_LOW_PRIORITY, K_DSP_REGS | K_AGU_REGS, K_TICKS_FOREVER);
139+
140+
K_THREAD_DEFINE(cal_high, THREAD_STACK_SIZE, calculate_high, NULL, NULL, NULL,
141+
THREAD_HIGH_PRIORITY, K_DSP_REGS | K_AGU_REGS, K_TICKS_FOREVER);
142+
143+
void test_calculation(void)
144+
{
145+
/* Initialise test states */
146+
test_exited = false;
147+
k_sem_reset(&test_exit_sem);
148+
149+
/* Start test threads */
150+
k_thread_start(cal_low);
151+
k_thread_start(cal_high);
152+
153+
/* Wait for test threads to exit */
154+
k_sem_take(&test_exit_sem, K_FOREVER);
155+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/**
2+
* @file
3+
* @brief common definitions for the DSP sharing test application
4+
*/
5+
6+
/*
7+
* Copyright (c) 2022 Synopsys
8+
*
9+
* SPDX-License-Identifier: Apache-2.0
10+
*/
11+
12+
#ifndef _DSPCONTEXT_H
13+
#define _DSPCONTEXT_H
14+
15+
/*
16+
* Each architecture must define the following structures (which may be empty):
17+
* 'struct dsp_volatile_register_set'
18+
* 'struct dsp_non_volatile_register_set'
19+
*
20+
* Each architecture must also define the following macros:
21+
* SIZEOF_DSP_VOLATILE_REGISTER_SET
22+
* SIZEOF_DSP_NON_VOLATILE_REGISTER_SET
23+
* Those macros are used as sizeof(<an empty structure>) is compiler specific;
24+
* that is, it may evaluate to a non-zero value.
25+
*
26+
* Each architecture shall also have custom implementations of:
27+
* _load_all_dsp_registers()
28+
* _load_then_store_all_dsp_registers()
29+
* _store_all_dsp_registers()
30+
*/
31+
32+
#if defined(CONFIG_ISA_ARCV2)
33+
34+
struct dsp_volatile_register_set {
35+
#ifdef CONFIG_ARC_DSP_BFLY_SHARING
36+
uintptr_t dsp_bfly0;
37+
uintptr_t agu_ap0;
38+
uintptr_t agu_os0;
39+
#endif
40+
};
41+
42+
struct dsp_non_volatile_register_set {
43+
/* No non-volatile dsp registers */
44+
};
45+
46+
#define SIZEOF_DSP_VOLATILE_REGISTER_SET sizeof(struct dsp_volatile_register_set)
47+
#define SIZEOF_DSP_NON_VOLATILE_REGISTER_SET 0
48+
49+
#else
50+
51+
#error "Architecture must provide the following definitions:\n"
52+
"\t'struct dsp_volatile_registers'\n"
53+
"\t'struct dsp_non_volatile_registers'\n"
54+
"\t'SIZEOF_DSP_VOLATILE_REGISTER_SET'\n"
55+
"\t'SIZEOF_DSP_NON_VOLATILE_REGISTER_SET'\n"
56+
#endif /* CONFIG_ISA_ARCV2 */
57+
58+
/* the set of ALL dsp registers */
59+
60+
struct dsp_register_set {
61+
struct dsp_volatile_register_set dsp_volatile;
62+
struct dsp_non_volatile_register_set dsp_non_volatile;
63+
};
64+
65+
#define SIZEOF_DSP_REGISTER_SET \
66+
(SIZEOF_DSP_VOLATILE_REGISTER_SET + SIZEOF_DSP_NON_VOLATILE_REGISTER_SET)
67+
68+
/*
69+
* The following constants define the initial byte value used by the background
70+
* task, and the thread when loading up the dsp registers.
71+
*/
72+
73+
#define MAIN_DSP_REG_CHECK_BYTE ((unsigned char)0xe5)
74+
#define FIBER_DSP_REG_CHECK_BYTE ((unsigned char)0xf9)
75+
76+
#endif /* _DSPCONTEXT_H */

0 commit comments

Comments
 (0)