Skip to content

Commit 1458aad

Browse files
committed
perf: sample filtering and hashing
Adds inlinevec and bitset as helper datatypes.
1 parent b3136db commit 1458aad

File tree

8 files changed

+1172
-69
lines changed

8 files changed

+1172
-69
lines changed

Cargo.lock

Lines changed: 49 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

profiling/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ features = ["env-filter", "fmt", "smallvec", "std"]
4848
allocator-api2 = { version = "0.2", default-features = false, features = ["alloc"] }
4949
criterion = { version = "0.5.1" }
5050
datadog-php-profiling = { path = ".", features = ["test"] }
51+
proptest = { version = "1" }
5152

5253
[target.'cfg(target_arch = "x86_64")'.dev-dependencies]
5354
criterion-perf-events = "0.4.0"

profiling/src/bitset.rs

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
2+
// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
3+
4+
use std::cell::Cell;
5+
use std::hash::{Hash, Hasher};
6+
7+
/// Very simple bitset which doesn't allocate, and doesn't change after it has
8+
/// been created.
9+
#[derive(Clone, Debug, Eq, PartialEq)]
10+
#[repr(transparent)]
11+
pub struct BitSet {
12+
bits: Cell<u32>,
13+
}
14+
15+
impl Hash for BitSet {
16+
fn hash<H: Hasher>(&self, state: &mut H) {
17+
let bits = self.bits.get();
18+
bits.count_ones().hash(state);
19+
bits.hash(state);
20+
}
21+
}
22+
23+
impl BitSet {
24+
pub const MAX: usize = u32::BITS as usize;
25+
26+
/// Creates a new bitset from the provided number.
27+
pub const fn new(bitset: u32) -> Self {
28+
Self {
29+
bits: Cell::new(bitset),
30+
}
31+
}
32+
33+
/// Creates a new bitset from the iterator.
34+
///
35+
/// # Panics
36+
///
37+
/// Panics if an item is out of the range of the bitset e.g. [`u32::MAX`].
38+
pub fn from_iter<I: Iterator<Item = usize>>(iter: I) -> BitSet {
39+
let mut bits = 0;
40+
let mut insert = |bit| {
41+
// todo: add non-panic API
42+
assert!(bit < BitSet::MAX);
43+
bits |= 1u32 << bit;
44+
};
45+
for bit in iter {
46+
insert(bit);
47+
}
48+
BitSet {
49+
bits: Cell::new(bits),
50+
}
51+
}
52+
53+
#[inline]
54+
pub fn len(&self) -> usize {
55+
self.bits.get().count_ones() as usize
56+
}
57+
58+
#[inline]
59+
pub fn is_empty(&self) -> bool {
60+
self.bits.get() == 0
61+
}
62+
63+
#[inline]
64+
pub fn contains(&self, bit: usize) -> bool {
65+
if bit < 32 {
66+
let mask = 1u32 << bit;
67+
let masked = self.bits.get() & mask;
68+
masked != 0
69+
} else {
70+
false
71+
}
72+
}
73+
74+
pub fn iter(&self) -> BitSetIter {
75+
BitSetIter::new(self)
76+
}
77+
}
78+
79+
pub struct BitSetIter {
80+
bitset: u32,
81+
offset: u32,
82+
end: u32,
83+
}
84+
85+
impl BitSetIter {
86+
pub fn new(bitset: &BitSet) -> BitSetIter {
87+
let bitset = bitset.bits.get();
88+
let offset = 0;
89+
let end = {
90+
let num_bits = u32::BITS;
91+
let leading_zeros = bitset.leading_zeros();
92+
num_bits - leading_zeros
93+
};
94+
BitSetIter {
95+
bitset,
96+
offset,
97+
end,
98+
}
99+
}
100+
}
101+
102+
impl Iterator for BitSetIter {
103+
type Item = usize;
104+
105+
fn next(&mut self) -> Option<Self::Item> {
106+
while self.offset != self.end {
107+
let offset = self.offset;
108+
self.offset += 1;
109+
let mask = 1 << offset;
110+
let masked = self.bitset & mask;
111+
if masked != 0 {
112+
return Some(offset as usize);
113+
}
114+
}
115+
None
116+
}
117+
}
118+
119+
impl ExactSizeIterator for BitSetIter {
120+
fn len(&self) -> usize {
121+
if self.offset < self.end {
122+
let shifted = self.bitset >> self.offset;
123+
shifted.count_ones() as usize
124+
} else {
125+
0
126+
}
127+
}
128+
}
129+
130+
impl IntoIterator for BitSet {
131+
type Item = usize;
132+
type IntoIter = BitSetIter;
133+
fn into_iter(self) -> Self::IntoIter {
134+
BitSetIter::new(&self)
135+
}
136+
}
137+
138+
#[cfg(test)]
139+
mod tests {
140+
use super::*;
141+
use proptest::prelude::*;
142+
use proptest::test_runner::{RngAlgorithm, TestRng};
143+
use std::collections::HashSet;
144+
145+
#[test]
146+
fn bitset_full() {
147+
let bitset = BitSet::new(u32::MAX);
148+
assert_eq!(bitset.len(), BitSet::MAX);
149+
assert!(!bitset.is_empty());
150+
151+
for i in 0..BitSet::MAX {
152+
assert!(bitset.contains(i));
153+
}
154+
155+
for (offset, bit) in bitset.iter().enumerate() {
156+
assert_eq!(bit, offset);
157+
}
158+
159+
let mut iter = bitset.iter();
160+
let mut len = BitSet::MAX;
161+
assert_eq!(len, iter.len());
162+
163+
while let Some(_) = iter.next() {
164+
len -= 1;
165+
assert_eq!(len, iter.len());
166+
}
167+
}
168+
169+
#[test]
170+
fn bitset_empty() {
171+
let bitset = BitSet::new(0);
172+
assert_eq!(0, bitset.len());
173+
assert!(bitset.is_empty());
174+
for i in 0..BitSet::MAX {
175+
assert!(!bitset.contains(i));
176+
}
177+
178+
let mut iter = bitset.iter();
179+
let len = 0;
180+
assert_eq!(len, iter.len());
181+
assert_eq!(None, iter.next());
182+
}
183+
184+
// There's nothing special about 27, just testing a single possible number.
185+
#[test]
186+
fn bitset_27() {
187+
let bitset = BitSet::new(1 << 27);
188+
assert_eq!(1, bitset.len());
189+
assert!(!bitset.is_empty());
190+
assert!(bitset.contains(27));
191+
192+
let mut iter = bitset.iter();
193+
let len = 1;
194+
assert_eq!(len, iter.len());
195+
assert_eq!(Some(27), iter.next());
196+
}
197+
198+
static IOTA: [usize; BitSet::MAX] = [
199+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
200+
25, 26, 27, 28, 29, 30, 31,
201+
];
202+
203+
proptest! {
204+
#[test]
205+
fn bitset_acts_like_a_special_hashset(
206+
oracle in proptest::sample::subsequence(&IOTA, 1..IOTA.len())
207+
.prop_map(HashSet::<usize>::from_iter),
208+
) {
209+
let bitset1 = BitSet::from_iter(oracle.iter().cloned());
210+
prop_assert_eq!(bitset1.len(), oracle.len());
211+
212+
// Items in the oracle exist in the bitset.
213+
for item in oracle.iter() {
214+
prop_assert!(bitset1.contains(*item));
215+
}
216+
217+
// Test the other way around to check the iterator implementation.
218+
let mut i = 0;
219+
for item in bitset1.iter() {
220+
prop_assert!(oracle.contains(&item));
221+
i += 1;
222+
}
223+
// Make sure the iterator ran as many times as we expected.
224+
prop_assert_eq!(i, oracle.len(),
225+
"BitSet's iterator didn't have the expected number of iterations"
226+
);
227+
228+
// Like regular sets, insertion order doesn't matter in bitsets.
229+
let mut shuffled = oracle.iter().copied().collect::<Vec<_>>();
230+
let mut rng = TestRng::deterministic_rng(RngAlgorithm::ChaCha);
231+
use rand::seq::SliceRandom;
232+
shuffled.shuffle(&mut rng);
233+
let bitset2 = BitSet::from_iter(shuffled.iter().cloned());
234+
235+
prop_assert_eq!(
236+
bitset1.clone(), bitset2.clone(),
237+
"Insertion order unexpectedly mattered, diff in binary: {:b} vs {:b}",
238+
bitset1.bits.get(), bitset2.bits.get()
239+
);
240+
}
241+
}
242+
}

0 commit comments

Comments
 (0)