Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,24 @@ jobs:
- checkout: self
submodules: recursive

- script: echo "##vso[task.prependpath]$CONDA/bin"
displayName: Add conda to PATH
- script: |
mkdir -p ~/miniforge3
curl -L https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -o ~/miniforge3/miniforge.sh
bash ~/miniforge3/miniforge.sh -b -u -p ~/miniforge3
rm -rf ~/miniforge3/miniforge.sh
~/miniforge3/bin/conda init bash
~/miniforge3/bin/conda init zsh
export CONDA=$(realpath ~/miniforge3/bin)
echo "##vso[task.prependpath]$CONDA"
displayName: Install conda

- script: conda create --yes --quiet --name btllib_CI
displayName: Create Anaconda environment

- script: |
source activate btllib_CI
conda install --yes -c conda-forge mamba
mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson ninja cmake openmp
mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson ninja 'cmake<4' openmp
pip install gcovr
displayName: Install dependencies

Expand Down Expand Up @@ -93,7 +101,7 @@ jobs:

- script: |
source activate btllib_CI
mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson ninja cmake openmp gcovr
mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson ninja 'cmake<4' openmp gcovr
displayName: 'Install required software'

- script: |
Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ <h2><a class="anchor" id="autotoc_md4"></a>
</ul>
<p>The following are all the available <code>ninja</code> commands which can be run within <code>build</code> directory:</p><ul>
<li><code>ninja clang-format</code> formats the whitespace in code (requires clang-format 8+).</li>
<li><code>ninja wrap</code> wraps C++ code for Python (requires SWIG 4.0+).</li>
<li><code>ninja wrap</code> wraps C++ code for Python (requires SWIG 4.0 and &lt;4.3).</li>
<li><code>ninja clang-tidy</code> runs clang-tidy on C++ code and makes sure it passes (requires clang-tidy 8+).</li>
<li><code>ninja</code> builds the tests and wrapper libraries / makes sure they compile.</li>
<li><code>ninja test</code> runs the tests.</li>
Expand Down
80 changes: 41 additions & 39 deletions docs/mi__bloom__filter_8hpp_source.html
Original file line number Diff line number Diff line change
Expand Up @@ -223,46 +223,48 @@
<div class="line"><a id="l00240" name="l00240"></a><span class="lineno"> 240</span> </div>
<div class="line"><a id="l00242" name="l00242"></a><span class="lineno"> 242</span> std::vector&lt;size_t&gt; get_id_occurence_count(<span class="keyword">const</span> <span class="keywordtype">bool</span>&amp; include_saturated);</div>
<div class="line"><a id="l00243" name="l00243"></a><span class="lineno"> 243</span> </div>
<div class="line"><a id="l00246" name="l00246"></a><span class="lineno"> 246</span> <span class="keyword">static</span> <span class="keywordtype">size_t</span> calc_optimal_size(<span class="keywordtype">size_t</span> entries,</div>
<div class="line"><a id="l00247" name="l00247"></a><span class="lineno"> 247</span> <span class="keywordtype">unsigned</span> hash_num,</div>
<div class="line"><a id="l00248" name="l00248"></a><span class="lineno"> 248</span> <span class="keywordtype">double</span> occupancy);</div>
<div class="line"><a id="l00249" name="l00249"></a><span class="lineno"> 249</span> </div>
<div class="line"><a id="l00250" name="l00250"></a><span class="lineno"> 250</span><span class="keyword">private</span>:</div>
<div class="line"><a id="l00251" name="l00251"></a><span class="lineno"> 251</span> MIBloomFilter(<span class="keyword">const</span> std::shared_ptr&lt;MIBloomFilterInitializer&gt;&amp; mibfi);</div>
<div class="line"><a id="l00252" name="l00252"></a><span class="lineno"> 252</span> <span class="keyword">static</span> <span class="keywordtype">void</span> save(<span class="keyword">const</span> std::string&amp; path,</div>
<div class="line"><a id="l00253" name="l00253"></a><span class="lineno"> 253</span> <span class="keyword">const</span> cpptoml::table&amp; table,</div>
<div class="line"><a id="l00254" name="l00254"></a><span class="lineno"> 254</span> <span class="keyword">const</span> <span class="keywordtype">char</span>* data,</div>
<div class="line"><a id="l00255" name="l00255"></a><span class="lineno"> 255</span> <span class="keywordtype">size_t</span> n);</div>
<div class="line"><a id="l00256" name="l00256"></a><span class="lineno"> 256</span> std::vector&lt;uint64_t&gt; get_rank_pos(<span class="keyword">const</span> uint64_t* hashes) <span class="keyword">const</span>;</div>
<div class="line"><a id="l00257" name="l00257"></a><span class="lineno"> 257</span> uint64_t get_rank_pos(<span class="keyword">const</span> uint64_t hash)<span class="keyword"> const</span></div>
<div class="line"><a id="l00258" name="l00258"></a><span class="lineno"> 258</span><span class="keyword"> </span>{</div>
<div class="line"><a id="l00259" name="l00259"></a><span class="lineno"> 259</span> <span class="keywordflow">return</span> bv_rank_support(hash % il_bit_vector.size());</div>
<div class="line"><a id="l00260" name="l00260"></a><span class="lineno"> 260</span> }</div>
<div class="line"><a id="l00261" name="l00261"></a><span class="lineno"> 261</span> std::vector&lt;T&gt; get_data(<span class="keyword">const</span> std::vector&lt;uint64_t&gt;&amp; rank_pos) <span class="keyword">const</span>;</div>
<div class="line"><a id="l00262" name="l00262"></a><span class="lineno"> 262</span> T get_data(<span class="keyword">const</span> uint64_t&amp; rank)<span class="keyword"> const </span>{ <span class="keywordflow">return</span> id_array[rank]; }</div>
<div class="line"><a id="l00263" name="l00263"></a><span class="lineno"> 263</span> <span class="keywordtype">void</span> set_data(<span class="keyword">const</span> uint64_t&amp; pos, <span class="keyword">const</span> T&amp; <span class="keywordtype">id</span>);</div>
<div class="line"><a id="l00264" name="l00264"></a><span class="lineno"> 264</span> <span class="keywordtype">void</span> set_saturated(<span class="keyword">const</span> uint64_t* hashes);</div>
<div class="line"><a id="l00265" name="l00265"></a><span class="lineno"> 265</span> </div>
<div class="line"><a id="l00266" name="l00266"></a><span class="lineno"> 266</span> <span class="keywordtype">size_t</span> id_array_size = 0;</div>
<div class="line"><a id="l00267" name="l00267"></a><span class="lineno"> 267</span> <span class="keywordtype">size_t</span> bv_size = 0;</div>
<div class="line"><a id="l00268" name="l00268"></a><span class="lineno"> 268</span> <span class="keywordtype">unsigned</span> kmer_size = 0;</div>
<div class="line"><a id="l00269" name="l00269"></a><span class="lineno"> 269</span> <span class="keywordtype">unsigned</span> hash_num = 0;</div>
<div class="line"><a id="l00270" name="l00270"></a><span class="lineno"> 270</span> std::string hash_fn;</div>
<div class="line"><a id="l00271" name="l00271"></a><span class="lineno"> 271</span> </div>
<div class="line"><a id="l00272" name="l00272"></a><span class="lineno"> 272</span> sdsl::bit_vector bit_vector;</div>
<div class="line"><a id="l00273" name="l00273"></a><span class="lineno"> 273</span> sdsl::bit_vector_il&lt;BLOCKSIZE&gt; il_bit_vector;</div>
<div class="line"><a id="l00274" name="l00274"></a><span class="lineno"> 274</span> sdsl::rank_support_il&lt;1&gt; bv_rank_support;</div>
<div class="line"><a id="l00275" name="l00275"></a><span class="lineno"> 275</span> std::unique_ptr&lt;std::atomic&lt;uint16_t&gt;[]&gt; counts_array;</div>
<div class="line"><a id="l00276" name="l00276"></a><span class="lineno"> 276</span> std::unique_ptr&lt;std::atomic&lt;T&gt;[]&gt; id_array;</div>
<div class="line"><a id="l00277" name="l00277"></a><span class="lineno"> 277</span> </div>
<div class="line"><a id="l00278" name="l00278"></a><span class="lineno"> 278</span> <span class="keywordtype">bool</span> bv_insertion_completed = <span class="keyword">false</span>, id_insertion_completed = <span class="keyword">false</span>;</div>
<div class="line"><a id="l00279" name="l00279"></a><span class="lineno"> 279</span>};</div>
<div class="line"><a id="l00245" name="l00245"></a><span class="lineno"> 245</span> <span class="keywordtype">void</span> set_k(<span class="keywordtype">unsigned</span> k) { kmer_size = k; }</div>
<div class="line"><a id="l00246" name="l00246"></a><span class="lineno"> 246</span> </div>
<div class="line"><a id="l00249" name="l00249"></a><span class="lineno"> 249</span> <span class="keyword">static</span> <span class="keywordtype">size_t</span> calc_optimal_size(<span class="keywordtype">size_t</span> entries,</div>
<div class="line"><a id="l00250" name="l00250"></a><span class="lineno"> 250</span> <span class="keywordtype">unsigned</span> hash_num,</div>
<div class="line"><a id="l00251" name="l00251"></a><span class="lineno"> 251</span> <span class="keywordtype">double</span> occupancy);</div>
<div class="line"><a id="l00252" name="l00252"></a><span class="lineno"> 252</span> </div>
<div class="line"><a id="l00253" name="l00253"></a><span class="lineno"> 253</span><span class="keyword">private</span>:</div>
<div class="line"><a id="l00254" name="l00254"></a><span class="lineno"> 254</span> MIBloomFilter(<span class="keyword">const</span> std::shared_ptr&lt;MIBloomFilterInitializer&gt;&amp; mibfi);</div>
<div class="line"><a id="l00255" name="l00255"></a><span class="lineno"> 255</span> <span class="keyword">static</span> <span class="keywordtype">void</span> save(<span class="keyword">const</span> std::string&amp; path,</div>
<div class="line"><a id="l00256" name="l00256"></a><span class="lineno"> 256</span> <span class="keyword">const</span> cpptoml::table&amp; table,</div>
<div class="line"><a id="l00257" name="l00257"></a><span class="lineno"> 257</span> <span class="keyword">const</span> <span class="keywordtype">char</span>* data,</div>
<div class="line"><a id="l00258" name="l00258"></a><span class="lineno"> 258</span> <span class="keywordtype">size_t</span> n);</div>
<div class="line"><a id="l00259" name="l00259"></a><span class="lineno"> 259</span> std::vector&lt;uint64_t&gt; get_rank_pos(<span class="keyword">const</span> uint64_t* hashes) <span class="keyword">const</span>;</div>
<div class="line"><a id="l00260" name="l00260"></a><span class="lineno"> 260</span> uint64_t get_rank_pos(<span class="keyword">const</span> uint64_t hash)<span class="keyword"> const</span></div>
<div class="line"><a id="l00261" name="l00261"></a><span class="lineno"> 261</span><span class="keyword"> </span>{</div>
<div class="line"><a id="l00262" name="l00262"></a><span class="lineno"> 262</span> <span class="keywordflow">return</span> bv_rank_support(hash % il_bit_vector.size());</div>
<div class="line"><a id="l00263" name="l00263"></a><span class="lineno"> 263</span> }</div>
<div class="line"><a id="l00264" name="l00264"></a><span class="lineno"> 264</span> std::vector&lt;T&gt; get_data(<span class="keyword">const</span> std::vector&lt;uint64_t&gt;&amp; rank_pos) <span class="keyword">const</span>;</div>
<div class="line"><a id="l00265" name="l00265"></a><span class="lineno"> 265</span> T get_data(<span class="keyword">const</span> uint64_t&amp; rank)<span class="keyword"> const </span>{ <span class="keywordflow">return</span> id_array[rank]; }</div>
<div class="line"><a id="l00266" name="l00266"></a><span class="lineno"> 266</span> <span class="keywordtype">void</span> set_data(<span class="keyword">const</span> uint64_t&amp; pos, <span class="keyword">const</span> T&amp; <span class="keywordtype">id</span>);</div>
<div class="line"><a id="l00267" name="l00267"></a><span class="lineno"> 267</span> <span class="keywordtype">void</span> set_saturated(<span class="keyword">const</span> uint64_t* hashes);</div>
<div class="line"><a id="l00268" name="l00268"></a><span class="lineno"> 268</span> </div>
<div class="line"><a id="l00269" name="l00269"></a><span class="lineno"> 269</span> <span class="keywordtype">size_t</span> id_array_size = 0;</div>
<div class="line"><a id="l00270" name="l00270"></a><span class="lineno"> 270</span> <span class="keywordtype">size_t</span> bv_size = 0;</div>
<div class="line"><a id="l00271" name="l00271"></a><span class="lineno"> 271</span> <span class="keywordtype">unsigned</span> kmer_size = 0;</div>
<div class="line"><a id="l00272" name="l00272"></a><span class="lineno"> 272</span> <span class="keywordtype">unsigned</span> hash_num = 0;</div>
<div class="line"><a id="l00273" name="l00273"></a><span class="lineno"> 273</span> std::string hash_fn;</div>
<div class="line"><a id="l00274" name="l00274"></a><span class="lineno"> 274</span> </div>
<div class="line"><a id="l00275" name="l00275"></a><span class="lineno"> 275</span> sdsl::bit_vector bit_vector;</div>
<div class="line"><a id="l00276" name="l00276"></a><span class="lineno"> 276</span> sdsl::bit_vector_il&lt;BLOCKSIZE&gt; il_bit_vector;</div>
<div class="line"><a id="l00277" name="l00277"></a><span class="lineno"> 277</span> sdsl::rank_support_il&lt;1&gt; bv_rank_support;</div>
<div class="line"><a id="l00278" name="l00278"></a><span class="lineno"> 278</span> std::unique_ptr&lt;std::atomic&lt;uint16_t&gt;[]&gt; counts_array;</div>
<div class="line"><a id="l00279" name="l00279"></a><span class="lineno"> 279</span> std::unique_ptr&lt;std::atomic&lt;T&gt;[]&gt; id_array;</div>
<div class="line"><a id="l00280" name="l00280"></a><span class="lineno"> 280</span> </div>
<div class="line"><a id="l00281" name="l00281"></a><span class="lineno"> 281</span>} <span class="comment">// namespace btllib</span></div>
<div class="line"><a id="l00282" name="l00282"></a><span class="lineno"> 282</span> </div>
<div class="line"><a id="l00283" name="l00283"></a><span class="lineno"> 283</span><span class="preprocessor">#include &quot;mi_bloom_filter-inl.hpp&quot;</span></div>
<div class="line"><a id="l00284" name="l00284"></a><span class="lineno"> 284</span> </div>
<div class="line"><a id="l00285" name="l00285"></a><span class="lineno"> 285</span><span class="preprocessor">#endif</span></div>
<div class="line"><a id="l00281" name="l00281"></a><span class="lineno"> 281</span> <span class="keywordtype">bool</span> bv_insertion_completed = <span class="keyword">false</span>, id_insertion_completed = <span class="keyword">false</span>;</div>
<div class="line"><a id="l00282" name="l00282"></a><span class="lineno"> 282</span>};</div>
<div class="line"><a id="l00283" name="l00283"></a><span class="lineno"> 283</span> </div>
<div class="line"><a id="l00284" name="l00284"></a><span class="lineno"> 284</span>} <span class="comment">// namespace btllib</span></div>
<div class="line"><a id="l00285" name="l00285"></a><span class="lineno"> 285</span> </div>
<div class="line"><a id="l00286" name="l00286"></a><span class="lineno"> 286</span><span class="preprocessor">#include &quot;mi_bloom_filter-inl.hpp&quot;</span></div>
<div class="line"><a id="l00287" name="l00287"></a><span class="lineno"> 287</span> </div>
<div class="line"><a id="l00288" name="l00288"></a><span class="lineno"> 288</span><span class="preprocessor">#endif</span></div>
<div class="ttc" id="anamespacebtllib_html"><div class="ttname"><a href="namespacebtllib.html">btllib</a></div><div class="ttdef"><b>Definition</b> aahash.hpp:12</div></div>
</div><!-- fragment --></div><!-- contents -->
<!-- start footer part -->
Expand Down
3 changes: 3 additions & 0 deletions include/btllib/mi_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ class MIBloomFilter
/** Returns the occurence count for each ID in the miBF */
std::vector<size_t> get_id_occurence_count(const bool& include_saturated);

/** set k-mer size*/
void set_k(unsigned k) { kmer_size = k; }

/** Returns an a filter size large enough to maintain an occupancy specified
*/
static size_t calc_optimal_size(size_t entries,
Expand Down
5 changes: 5 additions & 0 deletions tests/mi_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ main()
TEST_ASSERT(mi_bf_1.bv_contains({ 100, 200, 300 }));
TEST_ASSERT(!mi_bf_1.bv_contains({ 1, 20, 100 }));

unsigned kmer_size = 10;
mi_bf_1.set_k(kmer_size);
unsigned set_kmer_size = mi_bf_1.get_k();
TEST_ASSERT(set_kmer_size = kmer_size);

uint8_t ID_1 = 12;
mi_bf_1.insert_id({ 1, 10, 100 }, ID_1);

Expand Down
5 changes: 5 additions & 0 deletions tests/python/test_mi_bloom_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def setUp(self):
def set_up_mi_bf_1(self):

self.mi_bf_1 = btllib.MIBloomFilter8(1024 * 1024, 3, "ntHash")
self.mi_bf_1.set_k(10)

for h in self.test_hashes_1:
self.mi_bf_1.insert_bv(h)
Expand Down Expand Up @@ -89,6 +90,10 @@ def test_mibloomfilter_id_occurence(self):
self.assertEqual(len(self.test_hashes_1[0]),
self.mi_bf_1.get_id_occurence_count(include_saturated)[expected_id])

def test_mibloomfilter_id_occurence(self):
self.set_up_mi_bf_1()
self.assertEqual(10, self.mi_bf_1.get_k())

def test_mibloomfilter_random_sampling(self):

self.set_up_mi_bf_2()
Expand Down
Loading
Loading