- 
                Notifications
    You must be signed in to change notification settings 
- Fork 20
Add associated functions/methods for exposing/loading filter data in BloomFilter + CuckooFilter #125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Add associated functions/methods for exposing/loading filter data in BloomFilter + CuckooFilter #125
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -5,7 +5,7 @@ use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}; | |
| use std::marker::PhantomData; | ||
|  | ||
| use rand::Rng; | ||
| use succinct::{IntVec, IntVecMut, IntVector}; | ||
| use succinct::{BitVec, BitVecMut, IntVec, IntVecMut, IntVector}; | ||
|  | ||
| use crate::filters::Filter; | ||
| use crate::helpers::all_zero_intvector; | ||
|  | @@ -186,6 +186,36 @@ where | |
| Self::with_params_and_hash(rng, bucketsize, n_buckets, l_fingerprint, bh) | ||
| } | ||
|  | ||
| /// Create CuckooFilter with existing filter table data: | ||
| /// | ||
| /// - `rng`: random number generator used for certain random actions | ||
| /// - `bucketsize`: number of elements per bucket, must be at least 2 | ||
| /// - `n_buckets`: number of buckets, must be a power of 2 and at least 2 | ||
| /// - `l_fingerprint`: size of the fingerprint in bits | ||
| /// - `n_elements`: number of elements in existing filter | ||
| /// - `table_succinct_blocks`: filter table block data | ||
| /// | ||
| /// The BuildHasher is set to the `DefaultHasher`. | ||
| pub fn with_existing_filter<I: IntoIterator<Item = u64>>( | ||
| rng: R, | ||
| bucketsize: usize, | ||
| n_buckets: usize, | ||
| l_fingerprint: usize, | ||
| n_elements: usize, | ||
| table_succinct_blocks: I, | ||
| ) -> Self { | ||
| let bh = BuildHasherDefault::<DefaultHasher>::default(); | ||
| Self::with_existing_filter_and_hash( | ||
| rng, | ||
| bucketsize, | ||
| n_buckets, | ||
| l_fingerprint, | ||
| n_elements, | ||
| table_succinct_blocks, | ||
| bh, | ||
| ) | ||
| } | ||
|  | ||
| /// Construct new `bucketsize=4`-cuckoofilter with properties: | ||
| /// | ||
| /// - `false_positive_rate`: false positive lookup rate | ||
|  | @@ -260,6 +290,28 @@ where | |
| } | ||
| } | ||
|  | ||
| /// Same as `with_existing_filter` but with specific `BuildHasher`. | ||
| pub fn with_existing_filter_and_hash<I: IntoIterator<Item = u64>>( | ||
| rng: R, | ||
| bucketsize: usize, | ||
| n_buckets: usize, | ||
| l_fingerprint: usize, | ||
| n_elements: usize, | ||
| table_succinct_blocks: I, | ||
| bh: B, | ||
| ) -> Self { | ||
| let mut filter = Self::with_params_and_hash(rng, bucketsize, n_buckets, l_fingerprint, bh); | ||
| for (i, block) in table_succinct_blocks.into_iter().enumerate() { | ||
| assert!( | ||
| i < filter.table.block_len(), | ||
| "existing input table block length must not exceed filter table block length" | ||
| ); | ||
| filter.table.set_block(i, block); | ||
| } | ||
| filter.n_elements = n_elements; | ||
| 
      Comment on lines
    
      +304
     to 
      +311
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this just use  | ||
| filter | ||
| } | ||
|  | ||
| /// Construct new `bucketsize=4`-cuckoofilter with properties: | ||
| /// | ||
| /// - `false_positive_rate`: false positive lookup rate | ||
|  | @@ -481,6 +533,35 @@ where | |
| self.table.set(pos as u64, data); | ||
| } | ||
| } | ||
|  | ||
| /// Clear and load filter table with individual filter table elements | ||
| /// and existing element count. | ||
| pub fn load_table<I: IntoIterator<Item = u64>>(&mut self, table: I, n_elements: usize) { | ||
| self.clear(); | ||
| for (i, value) in table.into_iter().enumerate() { | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should probably validate that the iterator has the correct length because this depends on the filter parameters. | ||
| let i = i as u64; | ||
| assert!( | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be a  | ||
| i < self.table.len(), | ||
| "input table length must not exceed filter table length" | ||
| ); | ||
| self.table.set(i, value); | ||
| } | ||
| self.n_elements = n_elements; | ||
| } | ||
|  | ||
| /// Return the individual filter table elements. | ||
| pub fn table(&self) -> Vec<u64> { | ||
| self.table.iter().collect() | ||
| } | ||
|  | ||
| /// Return the filter table succinct block data. | ||
| pub fn table_succinct_blocks(&self) -> Vec<u64> { | ||
| let mut result = Vec::with_capacity(self.table.block_len()); | ||
| for i in 0..self.table.block_len() { | ||
| result.push(self.table.get_block(i)); | ||
| } | ||
| result | ||
| } | ||
| } | ||
|  | ||
| impl<T, R, B> Filter<T> for CuckooFilter<T, R, B> | ||
|  | @@ -949,4 +1030,42 @@ mod tests { | |
| let cf = CuckooFilter::<NotSend, _>::with_params(ChaChaRng::from_seed([0; 32]), 2, 16, 8); | ||
| assert_send(&cf); | ||
| } | ||
|  | ||
| #[test] | ||
| fn succinct_table_save_load() { | ||
| let mut cf = CuckooFilter::with_params(ChaChaRng::from_seed([0; 32]), 2, 16, 8); | ||
| assert!(cf.insert(&10).unwrap()); | ||
| assert!(cf.insert(&51).unwrap()); | ||
| assert_eq!(cf.len(), 2); | ||
|  | ||
| let loaded_cf = CuckooFilter::with_existing_filter( | ||
| ChaChaRng::from_seed([0; 32]), | ||
| 2, | ||
| 16, | ||
| 8, | ||
| cf.len(), | ||
| cf.table_succinct_blocks(), | ||
| ); | ||
|  | ||
| assert!(loaded_cf.query(&10)); | ||
| assert!(loaded_cf.query(&51)); | ||
| assert!(!loaded_cf.query(&33)); | ||
| assert_eq!(loaded_cf.len(), 2); | ||
| } | ||
|  | ||
| #[test] | ||
| fn table_save_load() { | ||
| let mut cf = CuckooFilter::with_params(ChaChaRng::from_seed([0; 32]), 2, 16, 8); | ||
| assert!(cf.insert(&10).unwrap()); | ||
| assert!(cf.insert(&51).unwrap()); | ||
| assert_eq!(cf.len(), 2); | ||
|  | ||
| let mut loaded_cf = CuckooFilter::with_params(ChaChaRng::from_seed([0; 32]), 2, 16, 8); | ||
| loaded_cf.load_table(cf.table(), cf.len()); | ||
|  | ||
| assert!(loaded_cf.query(&10)); | ||
| assert!(loaded_cf.query(&51)); | ||
| assert!(!loaded_cf.query(&33)); | ||
| assert_eq!(loaded_cf.len(), 2); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this check that the iterator has the correct number of elements for the given parameters?