|
| 1 | +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ |
| 2 | +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause |
| 3 | + |
| 4 | +use std::cell::Cell; |
| 5 | +use std::hash::{Hash, Hasher}; |
| 6 | + |
| 7 | +/// Very simple bitset which doesn't allocate, and doesn't change after it has |
| 8 | +/// been created. |
| 9 | +#[derive(Clone, Debug, Eq, PartialEq)] |
| 10 | +#[repr(transparent)] |
| 11 | +pub struct BitSet { |
| 12 | + bits: Cell<u32>, |
| 13 | +} |
| 14 | + |
| 15 | +impl Hash for BitSet { |
| 16 | + fn hash<H: Hasher>(&self, state: &mut H) { |
| 17 | + let bits = self.bits.get(); |
| 18 | + bits.count_ones().hash(state); |
| 19 | + bits.hash(state); |
| 20 | + } |
| 21 | +} |
| 22 | + |
| 23 | +impl BitSet { |
| 24 | + pub const MAX: usize = u32::BITS as usize; |
| 25 | + |
| 26 | + /// Creates a new bitset from the provided number. |
| 27 | + pub const fn new(bitset: u32) -> Self { |
| 28 | + Self { |
| 29 | + bits: Cell::new(bitset), |
| 30 | + } |
| 31 | + } |
| 32 | + |
| 33 | + /// Creates a new bitset from the iterator. |
| 34 | + /// |
| 35 | + /// # Panics |
| 36 | + /// |
| 37 | + /// Panics if an item is out of the range of the bitset e.g. [`u32::MAX`]. |
| 38 | + pub fn from_iter<I: Iterator<Item = usize>>(iter: I) -> BitSet { |
| 39 | + let mut bits = 0; |
| 40 | + let mut insert = |bit| { |
| 41 | + // todo: add non-panic API |
| 42 | + assert!(bit < BitSet::MAX); |
| 43 | + bits |= 1u32 << bit; |
| 44 | + }; |
| 45 | + for bit in iter { |
| 46 | + insert(bit); |
| 47 | + } |
| 48 | + BitSet { |
| 49 | + bits: Cell::new(bits), |
| 50 | + } |
| 51 | + } |
| 52 | + |
| 53 | + #[inline] |
| 54 | + pub fn len(&self) -> usize { |
| 55 | + self.bits.get().count_ones() as usize |
| 56 | + } |
| 57 | + |
| 58 | + #[inline] |
| 59 | + pub fn is_empty(&self) -> bool { |
| 60 | + self.bits.get() == 0 |
| 61 | + } |
| 62 | + |
| 63 | + #[inline] |
| 64 | + pub fn contains(&self, bit: usize) -> bool { |
| 65 | + if bit < 32 { |
| 66 | + let mask = 1u32 << bit; |
| 67 | + let masked = self.bits.get() & mask; |
| 68 | + masked != 0 |
| 69 | + } else { |
| 70 | + false |
| 71 | + } |
| 72 | + } |
| 73 | + |
| 74 | + pub fn iter(&self) -> BitSetIter { |
| 75 | + BitSetIter::new(self) |
| 76 | + } |
| 77 | +} |
| 78 | + |
| 79 | +pub struct BitSetIter { |
| 80 | + bitset: u32, |
| 81 | + offset: u32, |
| 82 | + end: u32, |
| 83 | +} |
| 84 | + |
| 85 | +impl BitSetIter { |
| 86 | + pub fn new(bitset: &BitSet) -> BitSetIter { |
| 87 | + let bitset = bitset.bits.get(); |
| 88 | + let offset = 0; |
| 89 | + let end = { |
| 90 | + let num_bits = u32::BITS; |
| 91 | + let leading_zeros = bitset.leading_zeros(); |
| 92 | + num_bits - leading_zeros |
| 93 | + }; |
| 94 | + BitSetIter { |
| 95 | + bitset, |
| 96 | + offset, |
| 97 | + end, |
| 98 | + } |
| 99 | + } |
| 100 | +} |
| 101 | + |
| 102 | +impl Iterator for BitSetIter { |
| 103 | + type Item = usize; |
| 104 | + |
| 105 | + fn next(&mut self) -> Option<Self::Item> { |
| 106 | + while self.offset != self.end { |
| 107 | + let offset = self.offset; |
| 108 | + self.offset += 1; |
| 109 | + let mask = 1 << offset; |
| 110 | + let masked = self.bitset & mask; |
| 111 | + if masked != 0 { |
| 112 | + return Some(offset as usize); |
| 113 | + } |
| 114 | + } |
| 115 | + None |
| 116 | + } |
| 117 | +} |
| 118 | + |
| 119 | +impl ExactSizeIterator for BitSetIter { |
| 120 | + fn len(&self) -> usize { |
| 121 | + if self.offset < self.end { |
| 122 | + let shifted = self.bitset >> self.offset; |
| 123 | + shifted.count_ones() as usize |
| 124 | + } else { |
| 125 | + 0 |
| 126 | + } |
| 127 | + } |
| 128 | +} |
| 129 | + |
| 130 | +impl IntoIterator for BitSet { |
| 131 | + type Item = usize; |
| 132 | + type IntoIter = BitSetIter; |
| 133 | + fn into_iter(self) -> Self::IntoIter { |
| 134 | + BitSetIter::new(&self) |
| 135 | + } |
| 136 | +} |
| 137 | + |
| 138 | +#[cfg(test)] |
| 139 | +mod tests { |
| 140 | + use super::*; |
| 141 | + use proptest::prelude::*; |
| 142 | + use proptest::test_runner::{RngAlgorithm, TestRng}; |
| 143 | + use std::collections::HashSet; |
| 144 | + |
| 145 | + #[test] |
| 146 | + fn bitset_full() { |
| 147 | + let bitset = BitSet::new(u32::MAX); |
| 148 | + assert_eq!(bitset.len(), BitSet::MAX); |
| 149 | + assert!(!bitset.is_empty()); |
| 150 | + |
| 151 | + for i in 0..BitSet::MAX { |
| 152 | + assert!(bitset.contains(i)); |
| 153 | + } |
| 154 | + |
| 155 | + for (offset, bit) in bitset.iter().enumerate() { |
| 156 | + assert_eq!(bit, offset); |
| 157 | + } |
| 158 | + |
| 159 | + let mut iter = bitset.iter(); |
| 160 | + let mut len = BitSet::MAX; |
| 161 | + assert_eq!(len, iter.len()); |
| 162 | + |
| 163 | + while let Some(_) = iter.next() { |
| 164 | + len -= 1; |
| 165 | + assert_eq!(len, iter.len()); |
| 166 | + } |
| 167 | + } |
| 168 | + |
| 169 | + #[test] |
| 170 | + fn bitset_empty() { |
| 171 | + let bitset = BitSet::new(0); |
| 172 | + assert_eq!(0, bitset.len()); |
| 173 | + assert!(bitset.is_empty()); |
| 174 | + for i in 0..BitSet::MAX { |
| 175 | + assert!(!bitset.contains(i)); |
| 176 | + } |
| 177 | + |
| 178 | + let mut iter = bitset.iter(); |
| 179 | + let len = 0; |
| 180 | + assert_eq!(len, iter.len()); |
| 181 | + assert_eq!(None, iter.next()); |
| 182 | + } |
| 183 | + |
| 184 | + // There's nothing special about 27, just testing a single possible number. |
| 185 | + #[test] |
| 186 | + fn bitset_27() { |
| 187 | + let bitset = BitSet::new(1 << 27); |
| 188 | + assert_eq!(1, bitset.len()); |
| 189 | + assert!(!bitset.is_empty()); |
| 190 | + assert!(bitset.contains(27)); |
| 191 | + |
| 192 | + let mut iter = bitset.iter(); |
| 193 | + let len = 1; |
| 194 | + assert_eq!(len, iter.len()); |
| 195 | + assert_eq!(Some(27), iter.next()); |
| 196 | + } |
| 197 | + |
| 198 | + static IOTA: [usize; BitSet::MAX] = [ |
| 199 | + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
| 200 | + 25, 26, 27, 28, 29, 30, 31, |
| 201 | + ]; |
| 202 | + |
| 203 | + proptest! { |
| 204 | + #[test] |
| 205 | + fn bitset_acts_like_a_special_hashset( |
| 206 | + oracle in proptest::sample::subsequence(&IOTA, 1..IOTA.len()) |
| 207 | + .prop_map(HashSet::<usize>::from_iter), |
| 208 | + ) { |
| 209 | + let bitset1 = BitSet::from_iter(oracle.iter().cloned()); |
| 210 | + prop_assert_eq!(bitset1.len(), oracle.len()); |
| 211 | + |
| 212 | + // Items in the oracle exist in the bitset. |
| 213 | + for item in oracle.iter() { |
| 214 | + prop_assert!(bitset1.contains(*item)); |
| 215 | + } |
| 216 | + |
| 217 | + // Test the other way around to check the iterator implementation. |
| 218 | + let mut i = 0; |
| 219 | + for item in bitset1.iter() { |
| 220 | + prop_assert!(oracle.contains(&item)); |
| 221 | + i += 1; |
| 222 | + } |
| 223 | + // Make sure the iterator ran as many times as we expected. |
| 224 | + prop_assert_eq!(i, oracle.len(), |
| 225 | + "BitSet's iterator didn't have the expected number of iterations" |
| 226 | + ); |
| 227 | + |
| 228 | + // Like regular sets, insertion order doesn't matter in bitsets. |
| 229 | + let mut shuffled = oracle.iter().copied().collect::<Vec<_>>(); |
| 230 | + let mut rng = TestRng::deterministic_rng(RngAlgorithm::ChaCha); |
| 231 | + use rand::seq::SliceRandom; |
| 232 | + shuffled.shuffle(&mut rng); |
| 233 | + let bitset2 = BitSet::from_iter(shuffled.iter().cloned()); |
| 234 | + |
| 235 | + prop_assert_eq!( |
| 236 | + bitset1.clone(), bitset2.clone(), |
| 237 | + "Insertion order unexpectedly mattered, diff in binary: {:b} vs {:b}", |
| 238 | + bitset1.bits.get(), bitset2.bits.get() |
| 239 | + ); |
| 240 | + } |
| 241 | + } |
| 242 | +} |
0 commit comments