Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions apps/sel4test-tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ file(
)

add_executable(sel4test-tests EXCLUDE_FROM_ALL ${deps})
set_source_files_properties(src/tests/trivial.c PROPERTIES COMPILE_OPTIONS "-fno-stack-protector")
# special handling for "arm_hyp", it's really "aarch32"
set(_inc_folder_KernelSel4Arch "${KernelSel4Arch}")
if("${KernelSel4Arch}" STREQUAL "arm_hyp")
Expand Down Expand Up @@ -57,3 +58,6 @@ target_link_libraries(
sel4serialserver_tests
PRIVATE sel4test-driver_Config
)

# add memcpy microbenchmark
target_sources(sel4test-tests PRIVATE src/tests/bench_memcpy.c)
16 changes: 16 additions & 0 deletions apps/sel4test-tests/README-bench_memcpy.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# MEMCPY_BENCH micro-benchmark

This adds a simple memcpy micro-benchmark to `sel4test-tests`.

## What’s included
- `apps/sel4test-tests/src/tests/bench_memcpy.c`
- CMakeLists updated to compile and register the test.

## Build & run
```bash
cd ~/sel4-ws/build-x86
../init-build.sh -DPLATFORM=x86_64 -DSIMULATION=TRUE -GNinja
ninja
ninja simulate # generates the ./simulate script
./simulate | tee run.log # boots QEMU and runs tests

48 changes: 48 additions & 0 deletions apps/sel4test-tests/src/tests/bench_memcpy.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include <sel4test/test.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include <time.h>

static inline uint64_t nsecs_since(const struct timespec* t0, const struct timespec* t1) {
return (uint64_t)(t1->tv_sec - t0->tv_sec) * 1000000000ull +
(uint64_t)(t1->tv_nsec - t0->tv_nsec);
}

static void do_once(size_t n) {
static uint8_t __attribute__((aligned(64))) src[1<<20];
static uint8_t __attribute__((aligned(64))) dst[1<<20];
for (size_t i = 0; i < n; i++) src[i] = (uint8_t)(i * 131u);
memcpy(dst, src, n);
volatile uint8_t sink = dst[n - 1];
(void)sink;
}

static int memcpy_bench_once(env_t env, size_t n, unsigned iters) {
// warm-up
for (unsigned i = 0; i < (iters < 10 ? iters : 10); i++) do_once(n);

struct timespec t0, t1;
clock_gettime(CLOCK_MONOTONIC, &t0);
for (unsigned i = 0; i < iters; i++) do_once(n);
clock_gettime(CLOCK_MONOTONIC, &t1);

uint64_t ns = nsecs_since(&t0, &t1);
double bytes = (double)n * (double)iters;
double mbps = (bytes / (1024.0 * 1024.0)) / ((double)ns / 1e9);

printf("memcpy: size=%zu, iters=%u, total_ns=%llu, MB/s=%.1f\n",
n, iters, (unsigned long long)ns, mbps);
return sel4test_get_result();
}

static int test_memcpy_bench(env_t env)
{
size_t sizes[] = {64, 128, 256, 512, 1024, 4096, 16384, 65536};
for (unsigned i = 0; i < sizeof(sizes)/sizeof(sizes[0]); i++) {
(void)memcpy_bench_once(env, sizes[i], 500);
}
return sel4test_get_result();
}

DEFINE_TEST(MEMCPY_BENCH, "Memcpy micro-benchmark", test_memcpy_bench, true)
106 changes: 106 additions & 0 deletions apps/sel4test-tests/src/tests/trivial.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,109 @@ int test_allocator(env_t env)
}
DEFINE_TEST(TRIVIAL0001, "Ensure the allocator works", test_allocator, true)
DEFINE_TEST(TRIVIAL0002, "Ensure the allocator works more than once", test_allocator, true)




/* ==== BEGIN: MEMCPY_BENCH_GUARD ==== */
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <sel4test/test.h> /* for env_t and DEFINE_TEST */

typedef struct {
uint8_t *base; /* pointer returned by malloc (for free) */
uint8_t *aligned; /* 64B-aligned pointer within the allocation */
size_t size; /* usable payload size from aligned */
} bench_buf;

static bench_buf bench_alloc64(size_t payload) {
size_t need = payload + 128; /* headroom for align/misalign */
uint8_t *base = (uint8_t*)malloc(need);
assert(base);
uintptr_t p = (uintptr_t)base;
uintptr_t aligned = (p + 63u) & ~(uintptr_t)63u;
size_t head = (size_t)(aligned - p);
size_t usable = (head <= need) ? (need - head) : 0;
bench_buf b = { .base = base, .aligned = (uint8_t*)aligned, .size = usable };
return b;
}

static void bench_free(bench_buf *b) {
if (b && b->base) free(b->base);
if (b) *b = (bench_buf){0};
}

#if defined(__x86_64__)
static inline uint64_t rdtsc_serialized(void) {
unsigned int lo, hi;
asm volatile("cpuid" : : "a"(0) : "rbx","rcx","rdx");
asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
uint64_t t0 = ((uint64_t)hi << 32) | lo;
asm volatile("cpuid" : : "a"(0) : "rbx","rcx","rdx");
return t0;
}
#else
# error "This microbench currently targets x86_64 only."
#endif

static void memcpy_touch(void *p, size_t n) {
volatile uint8_t *q = (volatile uint8_t*)p;
for (size_t i = 0; i < n; i += 64) q[i] ^= 0;
if (n) q[n-1] ^= 0;
}

static double memcpy_bench_once(uint8_t *dst, uint8_t *src, size_t n, int iters) {
memcpy_touch(src, n); memcpy_touch(dst, n);
(void)memcpy(dst, src, n); /* warm */

uint64_t t0 = rdtsc_serialized();
for (int i = 0; i < iters; i++) {
memcpy(dst, src, n);
}
uint64_t t1 = rdtsc_serialized();

uint64_t cycles = (t1 - t0);
return (double)cycles / (double)(n * (size_t)iters);
}

/* NOTE: correct sel4test signature */
static int bench_memcpy(env_t *env) {
(void)env;
const size_t sizes[] = {64, 256, 1024, 4096, 65536, 1048576};
const int iters[] = {512, 256, 128, 64, 16, 4};

for (size_t si = 0; si < sizeof(sizes)/sizeof(sizes[0]); si++) {
size_t n = sizes[si];

bench_buf a = bench_alloc64(n + 1); /* +1 for misalign+1 case */
bench_buf b = bench_alloc64(n + 1);
assert(a.aligned && b.aligned && a.size >= n+1 && b.size >= n+1);

for (size_t i = 0; i < n+1; i++) a.aligned[i] = (uint8_t)(i * 131u);
memset(b.aligned, 0, n+1);

struct {
const char *label;
uint8_t *src;
uint8_t *dst;
} cases[2] = {
{ "aligned", a.aligned, b.aligned },
{ "misalign+1", a.aligned+1, b.aligned+1 },
};

for (int ci = 0; ci < 2; ci++) {
double cpb = memcpy_bench_once(cases[ci].dst, cases[ci].src, n, iters[si]);
printf("memcpy: size=%7zu %-10s cpb=%.3f\n", n, cases[ci].label, cpb);
}

bench_free(&b);
bench_free(&a);
}
return 0;
}

/* Always-on registration */
/* ==== END: MEMCPY_BENCH_GUARD ==== */