-
Hi all, I have been trying to benchmark the memory usage of a server. I tried using multiple custom allocators, and the results seem to not align with the memory usage reported by systemd. The following benchmark highlights the behaviour I am seeing: #![allow(unused_variables)]
use axum::{
Router,
body::Body,
extract::Query,
http::{Response, StatusCode, header::CONTENT_TYPE},
};
use divan::AllocProfiler;
use tokio::task::JoinSet;
#[global_allocator]
static ALLOC: AllocProfiler = AllocProfiler::system();
#[divan::bench]
pub fn test_vec() {
let test: Vec<u8> = Vec::with_capacity(1024 * 1024 * 1024);
}
#[divan::bench]
pub fn test_vec_async() {
tokio::runtime::Builder::new_multi_thread()
.enable_time()
.enable_io()
.build()
.expect("Failed to build tokio runtime")
.block_on(async move {
let test: Vec<u8> = Vec::with_capacity(1024 * 1024 * 1024);
})
}
#[divan::bench]
pub fn test_vec_async_join() {
tokio::runtime::Builder::new_multi_thread()
.enable_time()
.enable_io()
.build()
.expect("Failed to build tokio runtime")
.block_on(async move {
let mut join_set = JoinSet::new();
for _ in 1..10 {
join_set.spawn(async move {
let test: Vec<u8> = Vec::with_capacity(1024 * 1024 * 1024);
});
}
join_set.join_all().await;
})
}
#[divan::bench]
pub fn test_vec_axum() {
let rt = tokio::runtime::Builder::new_multi_thread()
.enable_time()
.enable_io()
.build()
.expect("Failed to build tokio runtime");
pub async fn scrape(Query(_): Query<()>) -> Response<Body> {
let test: Vec<u8> = Vec::with_capacity(1024 * 1024 * 1024);
Response::builder()
.status(StatusCode::OK)
.header(
CONTENT_TYPE,
"application/openmetrics-text; version=1.0.0; charset=utf-8",
)
.body(Body::from("hello"))
.expect("Failed to build response")
}
let (kill_s, kill_r) = tokio::sync::oneshot::channel();
let server = async move {
let listener = tokio::net::TcpListener::bind(("0.0.0.0", 12345))
.await
.expect("Failed to bind to port 12345");
axum::serve(listener, {
Router::new().route("/", axum::routing::get(scrape))
})
.with_graceful_shutdown(async move {
kill_r.await.ok();
})
.await
.expect("Failed to serve app.");
};
let request = async move {
reqwest::get("http://localhost:12345/")
.await
.expect("Request failed")
.text()
.await
.expect("Parsing response failed");
kill_s.send(()).ok();
};
rt.block_on(async move {
let mut join_set = JoinSet::new();
join_set.spawn(request);
join_set.spawn(server);
join_set.join_all().await
});
}
fn main() {
divan::main()
} The output stops seeing allocations as soon as I spawn tasks:
This also happened when generating flamehgraphs with other allocators. Is this expected ? I imagine there is some kind of thread situation in the background. Is there a way to specify the allocator to use in tokio ? |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
tokio always use the allocator specified by the I tried a very simple toy profiler, which correctly reports the memory usage. #![allow(unused_variables)]
use tokio::task::JoinSet;
use std::alloc::{GlobalAlloc, System, Layout};
use std::sync::atomic::{AtomicUsize, Ordering::*};
static ALLOCATED: AtomicUsize = AtomicUsize::new(0);
static DEALLOCATED: AtomicUsize = AtomicUsize::new(0);
struct MyAllocator;
#[global_allocator]
static GLOBAL: MyAllocator = MyAllocator;
unsafe impl GlobalAlloc for MyAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
ALLOCATED.fetch_add(layout.size(), Relaxed);
unsafe { System.alloc(layout) }
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
DEALLOCATED.fetch_add(layout.size(), Relaxed);
unsafe { System.dealloc(ptr, layout) }
}
}
pub fn test_vec_async_join() {
tokio::runtime::Builder::new_multi_thread()
.enable_time()
.enable_io()
.build()
.expect("Failed to build tokio runtime")
.block_on(async move {
let mut join_set = JoinSet::new();
for _ in 1..10 {
join_set.spawn(async move {
let test: Vec<u8> = Vec::with_capacity(1024 * 1024 * 1024);
// avoid dead code elimination
assert!(test.capacity() >= 1024 * 1024 * 1024);
});
}
join_set.join_all().await;
})
}
fn main() {
test_vec_async_join();
println!("Allocated: {} GiB", ALLOCATED.load(Relaxed) / 1024 / 1024 / 1024);
println!("Deallocated: {} GiB", DEALLOCATED.load(Relaxed) / 1024 / 1024 / 1024);
} |
Beta Was this translation helpful? Give feedback.
tokio always use the allocator specified by the
#[global_allocator]
I tried a very simple toy profiler, which correctly reports the memory usage.