diff --git a/Cargo.toml b/Cargo.toml index 312f46d..0b09dcb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "crates/*", "crates/bpe/benchmarks", "crates/bpe/tests", + "crates/consistent-hashing/benchmarks", ] resolver = "2" diff --git a/crates/consistent-hashing/Cargo.toml b/crates/consistent-hashing/Cargo.toml new file mode 100644 index 0000000..53f4e02 --- /dev/null +++ b/crates/consistent-hashing/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "consistent-hashing" +version = "0.1.0" +edition = "2021" +description = "Constant time consistent hashing algorithms." +repository = "https://github.com/github/rust-gems" +license = "MIT" +keywords = ["probabilistic", "algorithm", "consistent hashing", "jump hashing", "rendezvous hashing"] +categories = ["algorithms", "data-structures", "mathematics", "science"] + +[lib] +crate-type = ["lib", "staticlib"] +bench = false + +[dependencies] + +[dev-dependencies] diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md new file mode 100644 index 0000000..0830c66 --- /dev/null +++ b/crates/consistent-hashing/README.md @@ -0,0 +1,116 @@ +# Consistent Hashing + +Consistent hashing maps keys to a changing set of nodes (shards, servers) so that when nodes join or leave, only a small fraction of keys move. It is used in distributed caches, databases, object stores, and load balancers to achieve scalability and high availability with minimal data reshuffling. + +Common algorithms +- [Consistent hashing](https://en.wikipedia.org/wiki/Consistent_hashing) (hash ring with virtual nodes) +- [Rendezvous hashing](https://en.wikipedia.org/wiki/Rendezvous_hashing) +- [Jump consistent hash](https://arxiv.org/pdf/1406.2294) +- [Maglev hashing](https://research.google/pubs/pub44824) +- [AnchorHash: A Scalable Consistent Hash](https://arxiv.org/abs/1812.09674) +- [DXHash](https://arxiv.org/abs/2107.07930) +- [JumpBackHash](https://arxiv.org/abs/2403.18682) + +## Complexity summary + +where `N` is the number of nodes and `R` is the number of replicas. + +| Algorithm | Lookup per key
(no replication) | Node add/remove | Memory | Lookup with replication | +|-------------------------|--------------------------------------------------------------------------|-----------------|----------------|-----------------------------------------------| +| Hash ring (with vnodes) | O(log N): binary search over N points; O(1): with specialized structures | O(log N) | O(N) | O(log N + R): Take next R distinct successors | +| Rendezvous | O(N): max score | O(1) | O(N) node list | O(N log R): pick top R scores | +| Jump consistent hash | O(log(N)) expected | 0 | O(1) | O(R log N) | +| AnchorHash | O(1) expected | O(1) | O(N) | Not native | +| DXHash | O(1) expected | O(1) | O(N) | Not native | +| JumpBackHash | O(1) expected | 0 | O(1) | Not native | +| **ConsistentChooseK** | **O(1) expected** | **0** | **O(1)** | **O(R^2)**; **O(R log(R))**: using heap | + +Replication of keys +- Hash ring: replicate by walking clockwise to the next R distinct nodes. Virtual nodes help spread replicas more evenly. Replicas are not independently distributed. +- Rendezvous hashing: replicate by selecting the top R nodes by score for the key. This naturally yields R distinct owners and supports weights. +- Jump consistent hash: the base function doesn't support replication. While the math can be modified to support consistent replication, it cannot be efficiently solved for large k and even for small k (=2 or =3), a quadratic or cubic equation has to be solved. +- JumpBackHash and variants: The trick of Jump consistent hash to support replication won't work here due to the introduction of additional state. +- ConsistentChooseK: Faster and more memory efficient than all other solutions. + +Why replication matters +- Tolerates node failures and maintenance without data unavailability. +- Distributes read/write load across multiple owners, reducing hotspots. +- Enables fast recovery and higher tail-latency resilience. + +## ConsistentChooseK algorithm + +The following functions summarize the core algorithmic innovation as a minimal Rust excerpt. +`n` is the number of nodes and `k` is the number of desired replica. +The chosen nodes are returned as distinct integers in the range `0..n`. + +``` +fn consistent_choose_k(key: Key, k: usize, n: usize) -> Vec { + (0..k).rev().scan(n, |n, k| Some(consistent_choose_max(key, k + 1, n))).collect() +} + +fn consistent_choose_max(key: Key, k: usize, n: usize) -> usize { + (0..k).map(|k| consistent_hash(key, k, n - k) + k).max() +} + +fn consistent_hash(key: Key, i: usize, n: usize) -> usize { + // compute the i-th independent consistent hash for `key` and `n` nodes. +} +``` + +`consistent_choose_k` makes `k` calls to `consistent_choose_max` which calls `consistent_hash` another `k` times. +In total, `consistent_hash` is called `k * (k+1) / 2` many times. Utilizing a `O(1)` solution for `consistent_hash` leads to a `O(k^2)` runtime. +This runtime can be further improved by replacing the max operation with a heap where popped elements are updated according to the new arguments `n` and `k`. +With this optimization, the complexity reduces to `O(k log k)`. +With some probabilistic bucketing strategy, it should be possible to reduce the expected runtime to `O(k)`. +For small `k` neither optimization is probably improving the actual performance though. + +The next section proves the correctness of this algorithm. + +## N-Choose-K replication + +We define the consistent `n-choose-k` replication as follows: + +1. For a given number `n` of nodes, choose `k` distinct nodes `S`. +2. For a given `key` the chosen set of nodes must be uniformly chosen from all possible sets of size `k`. +3. When `n` increases by one, exactly one node in the chosen set will be changed. +4. and the node will be changed with probability `k/(n+1)`. + +In the remainder of this section we prove that the `consistent_choose_k` algorithm satisfies those properties. + +Let's define `M(k,n) = consistent_choose_max(_, k, n)` and `S(k, n) := consistent_choose_k(_, k, n)` as short-cuts for some arbitrary fixed `key`. +We assume that `consistent_hash(key, k, n)` computes `k` independent consistent hash functions. + +### Property 1 + +Since `M(k, n) < n` and `S(k, n) = {M(k, n)} ∪ S(k - 1, M(k, n))` for `k > 1`, `S(k, n)` constructs a strictly monotonically decreasing sequence. The sequence outputs exactly `k` elements which therefore must all be distinct which proves property 1 for `k <= n`. + +Properties 2, 3, and 4 can be proven via induction as follows. + +### Property 4 + +`k = 1`: We expect that `consistent_hash` returns a single uniformly distributed node index which is consistent in `n`, i.e. changes the hash value with probability `1/(n+1)`, when `n` increments by one. In our implementation, we use an `O(1)` implementation of the jump-hash algorithm. For `k=1`, `consistent_choose_k(key, 1, n)` becomes a single function call to `consistent_choose_max(key, 1, n)` which in turn calls `consistent_hash(key, 0, n)`. I.e. `consistent_choose_k` inherits the all the desired properties from `consistent_hash` for `k=1` and all `n>=1`. + +`k → k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for this is `(n+1-k)/(n+1)` for the former by induction and `(n-k)/(n+1-k)` by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4. + +### Property 3 + +Property 3 is trivially satisfied if `S(k+1, n+1) = S(k+1, n)`. So, we focus on the case where `S(k+1, n+1) != S(k+1, n)`, which implies that `n ∈ S(k+1, n+1)` as largest element. +We know that `S(k+1, n) = {m} ∪ S(k, m)` for some `m` by definition and `S(k, n) = S(k, u) ∖ {v} ∪ {w}` by induction for some `u`, `v`, and `w`. Thus far we have `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, u) ∖ {v} ∪ {w}`. + +If `u = m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exactly in the elemetns `m` and `n` proving property 3. + +If `u ≠ m`, then `consistent_hash(_, k, n) = m`, since that's the only way how the largest values in `S(k+1, n)` and `S(k, n)` can differ. In this case, `m ∉ S(k+1, n+1)`, since `n` (and not `m`) is the largest element of `S(k+1, n+1)`. Furthermore, `S(k, n) = S(k, m)`, since `consistent_hash(_, i, n) < m` for all `i < k` (otherwise there is a contradiction). +Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof. + +### Property 2 + +The final part is to prove property 2. This time we have an inducation over `k` and `n`. +As before, the base case of the induction for `k=1` and all `n>0` is inherited from the `consistency_hash` implementation. The case `n=k` is also trivially covered, since the only valid set are the numbers `{0, ..., k-1}` which the algorithm correctly outputs. So, we only need to care about the induction step where `k>1` and `n>k`. + +We need to prove that `P(i ∈ S(k+1, n+1)) = (k+1)/(n+1)` for all `0 <= i <= n`. Property 3 already proves the case `i = n`. Furthermore we know that `P(n ∈ S(k+1, n+1)) = (k+1)/(n+1)` and vice versa `P(n ∉ S(k+1, n+1)) = 1 - (k+1)/(n+1)`. Let's consider those two cases separately. + +`n ∈ S(k+1, n+1)`: By the definition of `S`, we know that `S(k+1, n+1) = {n} ∪ S(k, n)`. `P(i ∈ S(k+1, n+1)) = P(i ∈ S(k, n)) P(n ∈ S(k+1, n+1)) = k/n * (k+1)/(n+1)` for all `0 <= i < n`. + +`n ∉ S(k+1, n+1)`: Once more by definition, `S(k+1, n+1) = S(k+1, n)` in this case. `P(i ∈ S(k+1, n+1)) = P(i ∈ S(k+1, n)) P(n ∉ S(k+1, n+1)) = (k+1)/n * (1 - (k+1)/(n+1))` for all `0 <= i < n`. + +Summing both cases together leads to `P(i ∈ S(k+1, n+1)) = k/n * (k+1)/(n+1) + (k+1)/n * (1 - (k+1)/(n+1)) = k/n * (k+1)/(n+1) + k/n * (1 - (k+1)/(n+1)) + 1/n * (1 - (k+1)/(n+1)) = k/n * (k+1)/(n+1) + k/n - k/n * (k+1)/(n+1) + 1/n - 1/n * (k+1)/(n+1) = k/n + 1/n - 1/n * (k+1)/(n+1) = (k+1)/n - (k+1)/(n+1)/n = (k+1)/n * (1 - 1/(n+1)) = (k+1)/(n+1)` for all `0 <= i < n` which concludes the proof. diff --git a/crates/consistent-hashing/benchmarks/Cargo.toml b/crates/consistent-hashing/benchmarks/Cargo.toml new file mode 100644 index 0000000..580e5ab --- /dev/null +++ b/crates/consistent-hashing/benchmarks/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "consistent-hashing-benchmarks" +edition = "2021" + +[[bench]] +name = "performance" +path = "performance.rs" +harness = false +test = false + +[dependencies] +consistent-hashing = { path = "../" } + +criterion = { version = "0.7", features = ["csv_output"] } +rand = "0.9" diff --git a/crates/consistent-hashing/benchmarks/criterion.toml b/crates/consistent-hashing/benchmarks/criterion.toml new file mode 100644 index 0000000..0e43927 --- /dev/null +++ b/crates/consistent-hashing/benchmarks/criterion.toml @@ -0,0 +1,18 @@ +# save report in this directory, even if a custom target directory is set +criterion_home = "./target/criterion" + +# The colors table allows users to configure the colors used by the charts +# cargo-criterion generates. +[colors] +# Color-blind friendly color scheme from https://personal.sron.nl/~pault/. +comparison_colors = [ + {r = 51, g = 34, b = 136 }, # indigo + {r = 136, g = 204, b = 238 }, # cyan + {r = 68, g = 170, b = 153 }, # teal + {r = 17, g = 119, b = 51 }, # green + {r = 153, g = 153, b = 51 }, # olive + {r = 221, g = 204, b = 119 }, # sand + {r = 204, g = 102, b = 119 }, # rose + {r = 136, g = 34, b = 85 }, # wine + {r = 170, g = 68, b = 153 }, # purple +] diff --git a/crates/consistent-hashing/benchmarks/performance.rs b/crates/consistent-hashing/benchmarks/performance.rs new file mode 100644 index 0000000..05dd929 --- /dev/null +++ b/crates/consistent-hashing/benchmarks/performance.rs @@ -0,0 +1,65 @@ +use std::{ + hash::{DefaultHasher, Hash, Hasher}, + hint::black_box, + time::Duration, +}; + +use consistent_hashing::{ConsistentChooseKHasher, ConsistentHasher}; +use criterion::{ + criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion, PlotConfiguration, + Throughput, +}; +use rand::{rng, Rng}; + +fn throughput_benchmark(c: &mut Criterion) { + let keys: Vec = rng().random_iter().take(1000).collect(); + + let mut group = c.benchmark_group(format!("choose")); + group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + for n in [1usize, 10, 100, 1000, 10000] { + group.throughput(Throughput::Elements(keys.len() as u64)); + group.bench_with_input(BenchmarkId::new(format!("1"), n), &n, |b, n| { + b.iter_batched( + || &keys, + |keys| { + for key in keys { + let mut h = DefaultHasher::default(); + key.hash(&mut h); + black_box(ConsistentHasher::new(h).prev(*n + 1)); + } + }, + criterion::BatchSize::SmallInput, + ) + }); + for k in [1, 2, 3, 10, 100] { + group.bench_with_input(BenchmarkId::new(format!("k_{k}"), n), &n, |b, n| { + b.iter_batched( + || &keys, + |keys| { + let mut res = Vec::with_capacity(k); + for key in keys { + let mut h = DefaultHasher::default(); + key.hash(&mut h); + black_box( + ConsistentChooseKHasher::new(h, k).prev_with_vec(*n + k, &mut res), + ); + } + }, + criterion::BatchSize::SmallInput, + ) + }); + } + } + group.finish(); +} + +criterion_group!( + name = benches; + config = Criterion::default() + .warm_up_time(Duration::from_millis(500)) + .measurement_time(Duration::from_millis(4000)) + .nresamples(1000); + + targets = throughput_benchmark, +); +criterion_main!(benches); diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs new file mode 100644 index 0000000..dcfbb0c --- /dev/null +++ b/crates/consistent-hashing/src/lib.rs @@ -0,0 +1,373 @@ +use std::hash::{Hash, Hasher}; + +/// A trait which behaves like a pseudo-random number generator. +/// It is used to generate consistent hashes within one bucket. +/// Note: the hasher must have been seeded with the key during construction. +pub trait HashSequence { + fn next(&mut self) -> u64; +} + +/// A trait for building a special bit mask and sequences of hashes for different bit positions. +/// Note: the hasher must have been seeded with the key during construction. +pub trait HashSeqBuilder { + type Seq: HashSequence; + + /// Returns a bit mask indicating which buckets have at least one hash. + fn bit_mask(&self) -> u64; + /// Return a HashSequence instance which is seeded with the given bit position + /// and the seed of this builder. + fn hash_seq(&self, bit: u64) -> Self::Seq; +} + +/// A trait for building multiple independent hash builders +/// Note: the hasher must have been seeded with the key during construction. +pub trait ManySeqBuilder { + type Builder: HashSeqBuilder; + + /// Returns the i-th independent hash builder. + fn seq_builder(&self, i: usize) -> Self::Builder; +} + +impl HashSequence for H { + fn next(&mut self) -> u64 { + 54387634019u64.hash(self); + self.finish() + } +} + +impl HashSeqBuilder for H { + type Seq = H; + + fn bit_mask(&self) -> u64 { + self.finish() + } + + fn hash_seq(&self, bit: u64) -> Self::Seq { + let mut hasher = self.clone(); + bit.hash(&mut hasher); + hasher + } +} + +impl ManySeqBuilder for H { + type Builder = H; + + fn seq_builder(&self, i: usize) -> Self::Builder { + let mut hasher = self.clone(); + i.hash(&mut hasher); + hasher + } +} + +/// One building block for the consistent hashing algorithm is a consistent +/// hash iterator which enumerates all the hashes for a specific bucket. +/// A bucket covers the range `(1< { + hasher: H, + n: usize, + is_first: bool, + bit: u64, // A bitmask with a single bit set. +} + +impl BucketIterator { + fn new(n: usize, bit: u64, hasher: H) -> Self { + Self { + hasher, + n, + is_first: true, + bit, + } + } +} + +impl Iterator for BucketIterator { + type Item = usize; + + fn next(&mut self) -> Option { + if self.bit == 0 { + return None; + } + if self.is_first { + let res = (self.hasher.next() & (self.bit - 1)) + self.bit; + self.is_first = false; + if res < self.n as u64 { + self.n = res as usize; + return Some(self.n); + } + } + loop { + let res = self.hasher.next() & (self.bit * 2 - 1); + if res & self.bit == 0 { + return None; + } + if res < self.n as u64 { + self.n = res as usize; + return Some(self.n); + } + } + } +} + +/// An iterator which enumerates all the consistent hashes for a given key +/// from largest to smallest in the range `0..n`. +pub struct ConsistentHashRevIterator { + builder: H, + bits: u64, + n: usize, + inner: Option>, +} + +impl ConsistentHashRevIterator { + pub fn new(n: usize, builder: H) -> Self { + Self { + bits: builder.bit_mask() & (n.next_power_of_two() as u64 - 1), + builder, + n, + inner: None, + } + } +} + +impl Iterator for ConsistentHashRevIterator { + type Item = usize; + + fn next(&mut self) -> Option { + if self.n == 0 { + return None; + } + if let Some(res) = self.inner.as_mut().and_then(|inner| inner.next()) { + return Some(res); + } + while self.bits > 0 { + let bit = 1 << self.bits.ilog2(); + self.bits ^= bit; + let seq = self.builder.hash_seq(bit); + let mut iter = BucketIterator::new(self.n, bit, seq); + if let Some(res) = iter.next() { + self.inner = Some(iter); + return Some(res); + } + } + self.n = 0; + Some(0) + } +} + +/// Same as `ConsistentHashRevIterator`, but iterates from smallest to largest +/// for the range `n..`. +pub struct ConsistentHashIterator { + bits: u64, + n: usize, + builder: H, + stack: Vec, +} + +impl ConsistentHashIterator { + pub fn new(n: usize, builder: H) -> Self { + Self { + bits: builder.bit_mask() & !((n + 2).next_power_of_two() as u64 / 2 - 1), + stack: if n == 0 { vec![0] } else { vec![] }, + builder, + n, + } + } +} + +impl Iterator for ConsistentHashIterator { + type Item = usize; + + fn next(&mut self) -> Option { + if let Some(res) = self.stack.pop() { + return Some(res); + } + while self.bits > 0 { + let bit = self.bits & !(self.bits - 1); + self.bits &= self.bits - 1; + let inner = BucketIterator::new(bit as usize * 2, bit, self.builder.hash_seq(bit)); + self.stack = inner.take_while(|x| *x >= self.n).collect(); + if let Some(res) = self.stack.pop() { + return Some(res); + } + } + None + } +} + +/// Wrapper around `ConsistentHashIterator` and `ConsistentHashRevIterator` to compute +/// the next or previous consistent hash for a given key for a given number of nodes `n`. +pub struct ConsistentHasher { + builder: H, +} + +impl ConsistentHasher { + pub fn new(builder: H) -> Self { + Self { builder } + } + + pub fn prev(&self, n: usize) -> Option + where + H: Clone, + { + let mut sampler = ConsistentHashRevIterator::new(n, self.builder.clone()); + sampler.next() + } + + pub fn next(&self, n: usize) -> Option + where + H: Clone, + { + let mut sampler = ConsistentHashIterator::new(n, self.builder.clone()); + sampler.next() + } + + pub fn into_prev(self, n: usize) -> Option { + ConsistentHashRevIterator::new(n, self.builder).next() + } +} + +/// Implementation of a consistent choose k hashing algorithm. +/// It returns k distinct consistent hashes in the range `0..n`. +/// The hashes are consistent when `n` changes and when `k` changes! +/// I.e. on average exactly `1/(n+1)` (resp. `1/(k+1)`) many hashes will change +/// when `n` (resp. `k`) increases by one. Additionally, the returned `k` tuple +/// is guaranteed to be uniformely chosen from all possible `n-choose-k` tuples. +pub struct ConsistentChooseKHasher { + builder: H, + k: usize, +} + +impl ConsistentChooseKHasher { + pub fn new(builder: H, k: usize) -> Self { + Self { builder, k } + } + + pub fn prev(&self, n: usize) -> Vec { + let mut res = Vec::with_capacity(self.k); + self.prev_with_vec(n, &mut res); + res + } + + pub fn prev_with_vec(&self, mut n: usize, samples: &mut Vec) { + assert!(n >= self.k, "n must be at least k"); + samples.clear(); + for i in 0..self.k { + samples.push( + ConsistentHasher::new(self.builder.seq_builder(i)) + .into_prev(n - i) + .expect("must not fail") + + i, + ); + } + for i in (0..self.k).rev() { + n = samples[0..=i].iter().copied().max().expect(""); + samples[i] = n; + for (j, sample) in samples[0..i].iter_mut().enumerate() { + if *sample == n { + *sample = ConsistentHasher::new(self.builder.seq_builder(j)) + .into_prev(n - j) + .expect("must not fail") + + j; + } + } + } + } +} + +#[cfg(test)] +mod tests { + use std::hash::DefaultHasher; + + use super::*; + + fn hasher_for_key(key: u64) -> DefaultHasher { + let mut hasher = DefaultHasher::default(); + key.hash(&mut hasher); + hasher + } + + #[test] + fn test_uniform_1() { + for k in 0..100 { + let hasher = hasher_for_key(k); + let sampler = ConsistentHasher::new(hasher.clone()); + for n in 0..1000 { + assert!(sampler.prev(n + 1) <= sampler.prev(n + 2)); + let next = sampler.next(n).unwrap(); + assert_eq!(next, sampler.prev(next + 1).unwrap()); + } + let mut iter_rev: Vec<_> = ConsistentHashIterator::new(0, hasher.clone()) + .take_while(|x| *x < 1000) + .collect(); + iter_rev.reverse(); + let iter: Vec<_> = ConsistentHashRevIterator::new(1000, hasher).collect(); + assert_eq!(iter, iter_rev); + } + let mut stats = vec![0; 13]; + for i in 0..100000 { + let hasher = hasher_for_key(i); + let sampler = ConsistentHasher::new(hasher); + let x = sampler.prev(stats.len()).unwrap(); + stats[x] += 1; + } + println!("{stats:?}"); + } + + #[test] + fn test_uniform_k() { + const K: usize = 3; + for k in 0..100 { + let hasher = hasher_for_key(k); + let sampler = ConsistentChooseKHasher::new(hasher, K); + for n in K..1000 { + let samples = sampler.prev(n + 1); + assert!(samples.len() == K); + for i in 0..K - 1 { + assert!(samples[i] < samples[i + 1]); + } + let next = sampler.prev(n + 2); + for i in 0..K { + assert!(samples[i] <= next[i]); + } + let mut merged = samples.clone(); + merged.extend(next.clone()); + merged.sort(); + merged.dedup(); + assert!( + merged.len() == K || merged.len() == K + 1, + "Unexpected {samples:?} vs. {next:?}" + ); + } + } + let mut stats = vec![0; 8]; + for i in 0..32 { + let hasher = hasher_for_key(i + 32783); + let sampler = ConsistentChooseKHasher::new(hasher, 2); + let samples = sampler.prev(stats.len()); + for s in samples { + stats[s] += 1; + } + } + println!("{stats:?}"); + assert_eq!(stats, vec![10, 12, 6, 6, 6, 5, 9, 10]); + // Test consistency when increasing k! + for k in 1..10 { + for n in k + 1..20 { + for key in 0..1000 { + let hasher = hasher_for_key(key); + let sampler1 = ConsistentChooseKHasher::new(hasher.clone(), k); + let sampler2 = ConsistentChooseKHasher::new(hasher, k + 1); + let set1 = sampler1.prev(n); + let set2 = sampler2.prev(n); + assert_eq!(set1.len(), k); + assert_eq!(set2.len(), k + 1); + let mut merged = set1.clone(); + merged.extend(set2); + merged.sort(); + merged.dedup(); + assert_eq!(merged.len(), k + 1); + } + } + } + } +}