Skip to content

Commit 5f72f7a

Browse files
committed
Allow user to provide a pre-hashed value
This is ~10% faster for u64 lookups. For lookups and construction the time is constant regardless of the true length of the input (assuming you can amortize the hashing cost somehow externally to this library). For example, for a 128 byte string construction is ~2.5x faster and lookups are ~1.7x faster.
1 parent 6763f88 commit 5f72f7a

File tree

6 files changed

+419
-114
lines changed

6 files changed

+419
-114
lines changed

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,7 @@ harness = false
3030
[features]
3131
default = ["parallel"]
3232
parallel = ["rayon", "crossbeam-utils"]
33+
34+
[profile.release]
35+
lto = true
36+
debug = 2

benches/build.rs

Lines changed: 111 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ extern crate bencher;
44

55
use bencher::Bencher;
66

7-
use boomphf::Mphf;
7+
use boomphf::{ExternallyHashed, Mphf};
88

99
fn build1_ser_u64(bench: &mut Bencher) {
1010
let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
@@ -13,13 +13,48 @@ fn build1_ser_u64(bench: &mut Bencher) {
1313
});
1414
}
1515

16+
fn build1_ser_externally_hashed(bench: &mut Bencher) {
17+
let items: Vec<ExternallyHashed> = (0..1000000u64)
18+
.map(|x| ExternallyHashed(wyhash::wyrng(&mut (x * 2))))
19+
.collect();
20+
bench.iter(|| {
21+
std::hint::black_box(Mphf::new(2.0, &items));
22+
});
23+
}
24+
1625
fn build1_ser_slices(bench: &mut Bencher) {
1726
let items: Vec<[u8; 8]> = (0..1000000u64).map(|x| (x * 2).to_le_bytes()).collect();
1827
bench.iter(|| {
1928
std::hint::black_box(Mphf::new(2.0, &items));
2029
});
2130
}
2231

32+
fn build1_ser_long_slices(bench: &mut Bencher) {
33+
let items = (0..1000000u64)
34+
.map(|x| {
35+
let mut long_key = [0u8; 128];
36+
long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
37+
long_key
38+
})
39+
.collect::<Vec<_>>();
40+
bench.iter(|| {
41+
std::hint::black_box(Mphf::new(2.0, &items));
42+
});
43+
}
44+
45+
fn build1_ser_long_slices_externally_hashed(bench: &mut Bencher) {
46+
let items = (0..1000000u64)
47+
.map(|x| {
48+
let mut long_key = [0u8; 128];
49+
long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
50+
ExternallyHashed(wyhash::wyhash(&long_key, 0))
51+
})
52+
.collect::<Vec<_>>();
53+
bench.iter(|| {
54+
std::hint::black_box(Mphf::new(2.0, &items));
55+
});
56+
}
57+
2358
#[allow(dead_code)]
2459
fn build1_par_u64(bench: &mut Bencher) {
2560
let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
@@ -38,16 +73,88 @@ fn build1_par_slices(bench: &mut Bencher) {
3873
});
3974
}
4075

41-
fn scan1_ser(bench: &mut Bencher) {
76+
fn scan1_ser_u64(bench: &mut Bencher) {
4277
let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
4378
let phf = Mphf::new(2.0, &items);
4479

4580
bench.iter(|| {
46-
for i in (0..1000000u64).map(|x| x * 2) {
81+
for i in &items {
4782
std::hint::black_box(phf.hash(&i));
4883
}
4984
});
5085
}
5186

52-
benchmark_group!(benches, build1_ser_u64, build1_ser_slices, build1_par_u64, build1_par_slices, scan1_ser);
87+
fn scan1_ser_slice(bench: &mut Bencher) {
88+
let items: Vec<[u8; 8]> = (0..1000000u64).map(|x| (x * 2).to_le_bytes()).collect();
89+
let phf = Mphf::new(2.0, &items);
90+
91+
bench.iter(|| {
92+
for i in &items {
93+
std::hint::black_box(phf.hash(i));
94+
}
95+
});
96+
}
97+
98+
fn scan1_ser_externally_hashed(bench: &mut Bencher) {
99+
let items: Vec<ExternallyHashed> = (0..1000000u64)
100+
.map(|x| ExternallyHashed(wyhash::wyrng(&mut (x * 2))))
101+
.collect();
102+
let phf = Mphf::new(2.0, &items);
103+
104+
bench.iter(|| {
105+
for i in &items {
106+
std::hint::black_box(phf.hash(i));
107+
}
108+
});
109+
}
110+
111+
fn scan1_ser_long_key(bench: &mut Bencher) {
112+
let items = (0..1000000u64)
113+
.map(|x| {
114+
let mut long_key = [0u8; 128];
115+
long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
116+
long_key
117+
})
118+
.collect::<Vec<_>>();
119+
let phf = Mphf::new(2.0, &items);
120+
121+
bench.iter(|| {
122+
for i in &items {
123+
std::hint::black_box(phf.hash(i));
124+
}
125+
});
126+
}
127+
128+
fn scan1_ser_long_key_externally_hashed(bench: &mut Bencher) {
129+
let items: Vec<ExternallyHashed> = (0..1000000u64)
130+
.map(|x| {
131+
let mut long_key = [0u8; 128];
132+
long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
133+
ExternallyHashed(wyhash::wyhash(&long_key, 0))
134+
})
135+
.collect();
136+
let phf = Mphf::new(2.0, &items);
137+
138+
bench.iter(|| {
139+
for i in &items {
140+
std::hint::black_box(phf.hash(i));
141+
}
142+
});
143+
}
144+
145+
benchmark_group!(
146+
benches,
147+
build1_ser_externally_hashed,
148+
build1_ser_u64,
149+
build1_ser_slices,
150+
build1_ser_long_slices,
151+
build1_ser_long_slices_externally_hashed,
152+
build1_par_u64,
153+
build1_par_slices,
154+
scan1_ser_u64,
155+
scan1_ser_slice,
156+
scan1_ser_externally_hashed,
157+
scan1_ser_long_key,
158+
scan1_ser_long_key_externally_hashed
159+
);
53160
benchmark_main!(benches);

src/bitvector.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ impl BitVector {
363363
#[inline]
364364
pub fn get_word(&self, word: usize) -> u64 {
365365
#[cfg(feature = "parallel")]
366-
return self.vector[word].load(Ordering::Relaxed) as u64;
366+
return self.vector[word].load(Ordering::Relaxed);
367367

368368
#[cfg(not(feature = "parallel"))]
369369
return self.vector[word] as u64;

0 commit comments

Comments
 (0)