Skip to content

Commit c1561b1

Browse files
committed
feat: recognize and use over sized allocations
Allocators are allowed to return a larger memory chunk than was asked for. If the amount extra is large enough, then the hash table can use the extra space. The Global allocator will not hit this path, because it won't over-size enough to matter, but custom allocators may. An example of an allocator which allocates full system pages is included in the test suite (UNIX only because it uses `mmap`).
1 parent 0ff220c commit c1561b1

File tree

4 files changed

+225
-11
lines changed

4 files changed

+225
-11
lines changed

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ serde_test = "1.0"
4747
doc-comment = "0.3.1"
4848
bumpalo = { version = "3.13.0", features = ["allocator-api2"] }
4949

50+
[target.'cfg(unix)'.dev-dependencies]
51+
libc = "0.2.155"
52+
5053
[features]
5154
default = ["default-hasher", "inline-more", "allocator-api2", "equivalent", "raw-entry"]
5255

src/map.rs

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6631,3 +6631,136 @@ mod test_map {
66316631
);
66326632
}
66336633
}
6634+
6635+
#[cfg(all(test, unix, any(feature = "nightly", feature = "allocator-api2")))]
6636+
mod test_map_with_mmap_allocations {
6637+
use super::HashMap;
6638+
use crate::raw::prev_pow2;
6639+
use core::alloc::Layout;
6640+
use core::ptr::{null_mut, NonNull};
6641+
6642+
#[cfg(feature = "nightly")]
6643+
use core::alloc::{AllocError, Allocator};
6644+
6645+
#[cfg(all(feature = "allocator-api2", not(feature = "nightly")))]
6646+
use allocator_api2::alloc::{AllocError, Allocator};
6647+
6648+
/// This is not a production quality allocator, just good enough for
6649+
/// some basic tests.
6650+
#[derive(Clone, Copy, Debug)]
6651+
struct MmapAllocator {
6652+
/// Guarantee this is a power of 2.
6653+
page_size: usize,
6654+
}
6655+
6656+
impl MmapAllocator {
6657+
fn new() -> Result<Self, AllocError> {
6658+
let result = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
6659+
if result < 1 {
6660+
return Err(AllocError);
6661+
}
6662+
6663+
let page_size = result as usize;
6664+
if !page_size.is_power_of_two() {
6665+
Err(AllocError)
6666+
} else {
6667+
Ok(Self { page_size })
6668+
}
6669+
}
6670+
6671+
fn fit_to_page_size(&self, n: usize) -> Result<usize, AllocError> {
6672+
// If n=0, give a single page (wasteful, I know).
6673+
let n = if n == 0 { self.page_size } else { n };
6674+
6675+
match n & (self.page_size - 1) {
6676+
0 => Ok(n),
6677+
rem => n.checked_add(self.page_size - rem).ok_or(AllocError),
6678+
}
6679+
}
6680+
}
6681+
6682+
unsafe impl Allocator for MmapAllocator {
6683+
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
6684+
if layout.align() > self.page_size {
6685+
return Err(AllocError);
6686+
}
6687+
6688+
let null = null_mut();
6689+
let len = self.fit_to_page_size(layout.size())? as libc::size_t;
6690+
let prot = libc::PROT_READ | libc::PROT_WRITE;
6691+
let flags = libc::MAP_PRIVATE | libc::MAP_ANON;
6692+
let addr = unsafe { libc::mmap(null, len, prot, flags, -1, 0) };
6693+
6694+
// mmap returns MAP_FAILED on failure, not Null.
6695+
if addr == libc::MAP_FAILED {
6696+
return Err(AllocError);
6697+
}
6698+
6699+
match NonNull::new(addr.cast()) {
6700+
Some(data) => {
6701+
// SAFETY: this is NonNull::slice_from_raw_parts.
6702+
Ok(unsafe {
6703+
NonNull::new_unchecked(core::ptr::slice_from_raw_parts_mut(
6704+
data.as_ptr(),
6705+
len,
6706+
))
6707+
})
6708+
}
6709+
6710+
// This branch shouldn't be taken in practice, but since we
6711+
// cannot return null as a valid pointer in our type system,
6712+
// we attempt to handle it.
6713+
None => {
6714+
_ = unsafe { libc::munmap(addr, len) };
6715+
Err(AllocError)
6716+
}
6717+
}
6718+
}
6719+
6720+
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
6721+
// If they allocated it with this layout, it must round correctly.
6722+
let size = self.fit_to_page_size(layout.size()).unwrap();
6723+
let _result = libc::munmap(ptr.as_ptr().cast(), size);
6724+
debug_assert_eq!(0, _result)
6725+
}
6726+
}
6727+
6728+
#[test]
6729+
fn test_tiny_allocation_gets_rounded_to_page_size() {
6730+
let alloc = MmapAllocator::new().unwrap();
6731+
let mut map: HashMap<usize, (), _, _> = HashMap::with_capacity_in(1, alloc);
6732+
6733+
// Size of an element plus its control byte.
6734+
let rough_bucket_size = core::mem::size_of::<(usize, ())>() + 1;
6735+
6736+
// Accounting for some misc. padding that's likely in the allocation
6737+
// due to rounding to group width, etc.
6738+
let overhead = 3 * core::mem::size_of::<usize>();
6739+
let num_buckets = (alloc.page_size - overhead) / rough_bucket_size;
6740+
// Buckets are always powers of 2.
6741+
let min_elems = prev_pow2(num_buckets);
6742+
// Real load-factor is 7/8, but this is a lower estimation, so 1/2.
6743+
let min_capacity = min_elems >> 1;
6744+
let capacity = map.capacity();
6745+
assert!(
6746+
capacity >= min_capacity,
6747+
"failed: {capacity} >= {min_capacity}"
6748+
);
6749+
6750+
// Fill it up.
6751+
for i in 0..capacity {
6752+
map.insert(i, ());
6753+
}
6754+
// Capacity should not have changed and it should be full.
6755+
assert_eq!(capacity, map.len());
6756+
assert_eq!(capacity, map.capacity());
6757+
6758+
// Alright, make it grow.
6759+
map.insert(capacity, ());
6760+
assert!(
6761+
capacity < map.capacity(),
6762+
"failed: {capacity} < {}",
6763+
map.capacity()
6764+
);
6765+
}
6766+
}

src/raw/alloc.rs

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ mod inner {
1515
use core::ptr::NonNull;
1616

1717
#[allow(clippy::map_err_ignore)]
18-
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<u8>, ()> {
18+
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<[u8]>, ()> {
1919
match alloc.allocate(layout) {
20-
Ok(ptr) => Ok(ptr.as_non_null_ptr()),
20+
Ok(ptr) => Ok(ptr),
2121
Err(_) => Err(()),
2222
}
2323
}
@@ -38,9 +38,9 @@ mod inner {
3838
use core::ptr::NonNull;
3939

4040
#[allow(clippy::map_err_ignore)]
41-
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<u8>, ()> {
41+
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<[u8]>, ()> {
4242
match alloc.allocate(layout) {
43-
Ok(ptr) => Ok(ptr.cast()),
43+
Ok(ptr) => Ok(ptr),
4444
Err(_) => Err(()),
4545
}
4646
}
@@ -61,7 +61,7 @@ mod inner {
6161

6262
#[allow(clippy::missing_safety_doc)] // not exposed outside of this crate
6363
pub unsafe trait Allocator {
64-
fn allocate(&self, layout: Layout) -> Result<NonNull<u8>, ()>;
64+
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, ()>;
6565
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout);
6666
}
6767

@@ -70,8 +70,19 @@ mod inner {
7070

7171
unsafe impl Allocator for Global {
7272
#[inline]
73-
fn allocate(&self, layout: Layout) -> Result<NonNull<u8>, ()> {
74-
unsafe { NonNull::new(alloc(layout)).ok_or(()) }
73+
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, ()> {
74+
match unsafe { NonNull::new(alloc(layout)) } {
75+
Some(data) => {
76+
// SAFETY: this is NonNull::slice_from_raw_parts.
77+
Ok(unsafe {
78+
NonNull::new_unchecked(core::ptr::slice_from_raw_parts_mut(
79+
data.as_ptr(),
80+
layout.size(),
81+
))
82+
})
83+
}
84+
None => Err(()),
85+
}
7586
}
7687
#[inline]
7788
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
@@ -86,7 +97,7 @@ mod inner {
8697
}
8798
}
8899

89-
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<u8>, ()> {
100+
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<[u8]>, ()> {
90101
alloc.allocate(layout)
91102
}
92103
}

src/raw/mod.rs

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,40 @@ impl RawTableInner {
14421442
}
14431443
}
14441444

1445+
/// Find the previous power of 2. If it's already a power of 2, it's unchanged.
1446+
/// Passing zero is undefined behavior.
1447+
pub(crate) fn prev_pow2(z: usize) -> usize {
1448+
let shift = mem::size_of::<usize>() * 8 - 1;
1449+
1 << (shift - (z.leading_zeros() as usize))
1450+
}
1451+
1452+
fn maximum_buckets_in(
1453+
allocation_size: usize,
1454+
table_layout: TableLayout,
1455+
group_width: usize,
1456+
) -> usize {
1457+
// Given an equation like:
1458+
// z >= x * y + x + g
1459+
// x can be maximized by doing:
1460+
// x = (z - g) / (y + 1)
1461+
// If you squint:
1462+
// x is the number of buckets
1463+
// y is the table_layout.size
1464+
// z is the size of the allocation
1465+
// g is the group width
1466+
// But this is ignoring the padding needed for ctrl_align.
1467+
// If we remember these restrictions:
1468+
// x is always a power of 2
1469+
// Layout size for T must always be a multiple of T
1470+
// Then the alignment can be ignored if we add the constraint:
1471+
// x * y >= table_layout.ctrl_align
1472+
// This is taken care of by `capacity_to_buckets`.
1473+
let numerator = allocation_size - group_width;
1474+
let denominator = table_layout.size + 1; // todo: ZSTs?
1475+
let quotient = numerator / denominator;
1476+
prev_pow2(quotient)
1477+
}
1478+
14451479
impl RawTableInner {
14461480
/// Allocates a new [`RawTableInner`] with the given number of buckets.
14471481
/// The control bytes and buckets are left uninitialized.
@@ -1459,7 +1493,7 @@ impl RawTableInner {
14591493
unsafe fn new_uninitialized<A>(
14601494
alloc: &A,
14611495
table_layout: TableLayout,
1462-
buckets: usize,
1496+
mut buckets: usize,
14631497
fallibility: Fallibility,
14641498
) -> Result<Self, TryReserveError>
14651499
where
@@ -1468,13 +1502,29 @@ impl RawTableInner {
14681502
debug_assert!(buckets.is_power_of_two());
14691503

14701504
// Avoid `Option::ok_or_else` because it bloats LLVM IR.
1471-
let (layout, ctrl_offset) = match table_layout.calculate_layout_for(buckets) {
1505+
let (layout, mut ctrl_offset) = match table_layout.calculate_layout_for(buckets) {
14721506
Some(lco) => lco,
14731507
None => return Err(fallibility.capacity_overflow()),
14741508
};
14751509

14761510
let ptr: NonNull<u8> = match do_alloc(alloc, layout) {
1477-
Ok(block) => block.cast(),
1511+
Ok(block) => {
1512+
// Utilize over-sized allocations.
1513+
let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH);
1514+
debug_assert!(x >= buckets);
1515+
// Calculate the new ctrl_offset.
1516+
let (_oversized_layout, oversized_ctrl_offset) =
1517+
match table_layout.calculate_layout_for(x) {
1518+
Some(lco) => lco,
1519+
None => unsafe { hint::unreachable_unchecked() },
1520+
};
1521+
debug_assert!(_oversized_layout.size() <= block.len());
1522+
debug_assert!(oversized_ctrl_offset >= ctrl_offset);
1523+
ctrl_offset = oversized_ctrl_offset;
1524+
buckets = x;
1525+
1526+
block.cast()
1527+
}
14781528
Err(_) => return Err(fallibility.alloc_err(layout)),
14791529
};
14801530

@@ -4168,6 +4218,23 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
41684218
mod test_map {
41694219
use super::*;
41704220

4221+
#[test]
4222+
fn test_prev_pow2() {
4223+
// Skip 0, not defined for that input.
4224+
let mut pow2: usize = 1;
4225+
while (pow2 << 1) > 0 {
4226+
let next_pow2 = pow2 << 1;
4227+
assert_eq!(pow2, prev_pow2(pow2));
4228+
// Need to skip 2, because it's also a power of 2, so it doesn't
4229+
// return the previous power of 2.
4230+
if next_pow2 > 2 {
4231+
assert_eq!(pow2, prev_pow2(pow2 + 1));
4232+
assert_eq!(pow2, prev_pow2(next_pow2 - 1));
4233+
}
4234+
pow2 = next_pow2;
4235+
}
4236+
}
4237+
41714238
#[test]
41724239
fn test_minimum_capacity_for_small_types() {
41734240
#[track_caller]

0 commit comments

Comments
 (0)