diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 522fabc6..5f6ccc69 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -11,8 +11,16 @@ jobs:
- checkout: self
submodules: recursive
- - script: echo "##vso[task.prependpath]$CONDA/bin"
- displayName: Add conda to PATH
+ - script: |
+ mkdir -p ~/miniforge3
+ curl -L https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -o ~/miniforge3/miniforge.sh
+ bash ~/miniforge3/miniforge.sh -b -u -p ~/miniforge3
+ rm -rf ~/miniforge3/miniforge.sh
+ ~/miniforge3/bin/conda init bash
+ ~/miniforge3/bin/conda init zsh
+ export CONDA=$(realpath ~/miniforge3/bin)
+ echo "##vso[task.prependpath]$CONDA"
+ displayName: Install conda
- script: conda create --yes --quiet --name btllib_CI
displayName: Create Anaconda environment
@@ -20,7 +28,7 @@ jobs:
- script: |
source activate btllib_CI
conda install --yes -c conda-forge mamba
- mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson ninja cmake openmp
+ mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson ninja 'cmake<4' openmp
pip install gcovr
displayName: Install dependencies
@@ -93,7 +101,7 @@ jobs:
- script: |
source activate btllib_CI
- mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson ninja cmake openmp gcovr
+ mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson ninja 'cmake<4' openmp gcovr
displayName: 'Install required software'
- script: |
diff --git a/docs/index.html b/docs/index.html
index 651ffc0f..675dc6b5 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -181,7 +181,7 @@
ninja clang-format formats the whitespace in code (requires clang-format 8+).
-ninja wrap wraps C++ code for Python (requires SWIG 4.0+).
+ninja wrap wraps C++ code for Python (requires SWIG ≥4.0 and <4.3).
ninja clang-tidy runs clang-tidy on C++ code and makes sure it passes (requires clang-tidy 8+).
ninja builds the tests and wrapper libraries / makes sure they compile.
ninja test runs the tests.
diff --git a/docs/mi__bloom__filter_8hpp_source.html b/docs/mi__bloom__filter_8hpp_source.html
index 3c2f07fe..e4a2fc78 100644
--- a/docs/mi__bloom__filter_8hpp_source.html
+++ b/docs/mi__bloom__filter_8hpp_source.html
@@ -223,46 +223,48 @@
242 std::vector<size_t> get_id_occurence_count(
const bool& include_saturated);
- 246 static size_t calc_optimal_size(
size_t entries,
-
-
-
-
- 251 MIBloomFilter(
const std::shared_ptr<MIBloomFilterInitializer>& mibfi);
- 252 static void save(
const std::string& path,
- 253 const cpptoml::table& table,
-
-
- 256 std::vector<uint64_t> get_rank_pos(
const uint64_t* hashes)
const;
- 257 uint64_t get_rank_pos(
const uint64_t hash)
const
-
- 259 return bv_rank_support(hash % il_bit_vector.size());
-
- 261 std::vector<T> get_data(
const std::vector<uint64_t>& rank_pos)
const;
- 262 T get_data(
const uint64_t& rank)
const {
return id_array[rank]; }
- 263 void set_data(
const uint64_t& pos,
const T&
id);
- 264 void set_saturated(
const uint64_t* hashes);
-
- 266 size_t id_array_size = 0;
-
- 268 unsigned kmer_size = 0;
- 269 unsigned hash_num = 0;
-
-
- 272 sdsl::bit_vector bit_vector;
- 273 sdsl::bit_vector_il<BLOCKSIZE> il_bit_vector;
- 274 sdsl::rank_support_il<1> bv_rank_support;
- 275 std::unique_ptr<std::atomic<uint16_t>[]> counts_array;
- 276 std::unique_ptr<std::atomic<T>[]> id_array;
-
- 278 bool bv_insertion_completed =
false, id_insertion_completed =
false;
-
+ 245 void set_k(
unsigned k) { kmer_size = k; }
+
+ 249 static size_t calc_optimal_size(
size_t entries,
+
+
+
+
+ 254 MIBloomFilter(
const std::shared_ptr<MIBloomFilterInitializer>& mibfi);
+ 255 static void save(
const std::string& path,
+ 256 const cpptoml::table& table,
+
+
+ 259 std::vector<uint64_t> get_rank_pos(
const uint64_t* hashes)
const;
+ 260 uint64_t get_rank_pos(
const uint64_t hash)
const
+
+ 262 return bv_rank_support(hash % il_bit_vector.size());
+
+ 264 std::vector<T> get_data(
const std::vector<uint64_t>& rank_pos)
const;
+ 265 T get_data(
const uint64_t& rank)
const {
return id_array[rank]; }
+ 266 void set_data(
const uint64_t& pos,
const T&
id);
+ 267 void set_saturated(
const uint64_t* hashes);
+
+ 269 size_t id_array_size = 0;
+
+ 271 unsigned kmer_size = 0;
+ 272 unsigned hash_num = 0;
+
+
+ 275 sdsl::bit_vector bit_vector;
+ 276 sdsl::bit_vector_il<BLOCKSIZE> il_bit_vector;
+ 277 sdsl::rank_support_il<1> bv_rank_support;
+ 278 std::unique_ptr<std::atomic<uint16_t>[]> counts_array;
+ 279 std::unique_ptr<std::atomic<T>[]> id_array;
-
-
- 283#include "mi_bloom_filter-inl.hpp"
-
-
+ 281 bool bv_insertion_completed =
false, id_insertion_completed =
false;
+
+
+
+
+ 286#include "mi_bloom_filter-inl.hpp"
+
+
diff --git a/include/btllib/mi_bloom_filter.hpp b/include/btllib/mi_bloom_filter.hpp
index da2417bd..bea47e76 100644
--- a/include/btllib/mi_bloom_filter.hpp
+++ b/include/btllib/mi_bloom_filter.hpp
@@ -241,6 +241,9 @@ class MIBloomFilter
/** Returns the occurence count for each ID in the miBF */
std::vector get_id_occurence_count(const bool& include_saturated);
+ /** set k-mer size*/
+ void set_k(unsigned k) { kmer_size = k; }
+
/** Returns an a filter size large enough to maintain an occupancy specified
*/
static size_t calc_optimal_size(size_t entries,
diff --git a/tests/mi_bloom_filter.cpp b/tests/mi_bloom_filter.cpp
index 383d747d..a0f3111c 100644
--- a/tests/mi_bloom_filter.cpp
+++ b/tests/mi_bloom_filter.cpp
@@ -22,6 +22,11 @@ main()
TEST_ASSERT(mi_bf_1.bv_contains({ 100, 200, 300 }));
TEST_ASSERT(!mi_bf_1.bv_contains({ 1, 20, 100 }));
+ unsigned kmer_size = 10;
+ mi_bf_1.set_k(kmer_size);
+ unsigned set_kmer_size = mi_bf_1.get_k();
+ TEST_ASSERT(set_kmer_size = kmer_size);
+
uint8_t ID_1 = 12;
mi_bf_1.insert_id({ 1, 10, 100 }, ID_1);
diff --git a/tests/python/test_mi_bloom_filter.py b/tests/python/test_mi_bloom_filter.py
index 8d5c40be..a02bbaee 100644
--- a/tests/python/test_mi_bloom_filter.py
+++ b/tests/python/test_mi_bloom_filter.py
@@ -25,6 +25,7 @@ def setUp(self):
def set_up_mi_bf_1(self):
self.mi_bf_1 = btllib.MIBloomFilter8(1024 * 1024, 3, "ntHash")
+ self.mi_bf_1.set_k(10)
for h in self.test_hashes_1:
self.mi_bf_1.insert_bv(h)
@@ -89,6 +90,10 @@ def test_mibloomfilter_id_occurence(self):
self.assertEqual(len(self.test_hashes_1[0]),
self.mi_bf_1.get_id_occurence_count(include_saturated)[expected_id])
+ def test_mibloomfilter_id_occurence(self):
+ self.set_up_mi_bf_1()
+ self.assertEqual(10, self.mi_bf_1.get_k())
+
def test_mibloomfilter_random_sampling(self):
self.set_up_mi_bf_2()
diff --git a/wrappers/python/btllib_wrap.cxx b/wrappers/python/btllib_wrap.cxx
index a2722021..a2c9efe6 100644
--- a/wrappers/python/btllib_wrap.cxx
+++ b/wrappers/python/btllib_wrap.cxx
@@ -56460,6 +56460,36 @@ SWIGINTERN PyObject *_wrap_MIBloomFilter8_get_id_occurence_count(PyObject *self,
}
+SWIGINTERN PyObject *_wrap_MIBloomFilter8_set_k(PyObject *self, PyObject *args) {
+ PyObject *resultobj = 0;
+ btllib::MIBloomFilter< uint8_t > *arg1 = (btllib::MIBloomFilter< uint8_t > *) 0 ;
+ unsigned int arg2 ;
+ void *argp1 = 0 ;
+ int res1 = 0 ;
+ unsigned int val2 ;
+ int ecode2 = 0 ;
+ PyObject *swig_obj[2] ;
+
+ if (!args) SWIG_fail;
+ swig_obj[0] = args;
+ res1 = SWIG_ConvertPtr(self, &argp1,SWIGTYPE_p_btllib__MIBloomFilterT_uint8_t_t, 0 | 0 );
+ if (!SWIG_IsOK(res1)) {
+ SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "MIBloomFilter8_set_k" "', argument " "1"" of type '" "btllib::MIBloomFilter< uint8_t > *""'");
+ }
+ arg1 = reinterpret_cast< btllib::MIBloomFilter< uint8_t > * >(argp1);
+ ecode2 = SWIG_AsVal_unsigned_SS_int(swig_obj[0], &val2);
+ if (!SWIG_IsOK(ecode2)) {
+ SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "MIBloomFilter8_set_k" "', argument " "2"" of type '" "unsigned int""'");
+ }
+ arg2 = static_cast< unsigned int >(val2);
+ (arg1)->set_k(arg2);
+ resultobj = SWIG_Py_Void();
+ return resultobj;
+fail:
+ return NULL;
+}
+
+
SWIGINTERN PyObject *_wrap_MIBloomFilter8_calc_optimal_size(PyObject *self, PyObject *args) {
PyObject *resultobj = 0;
size_t arg1 ;
@@ -57554,6 +57584,36 @@ SWIGINTERN PyObject *_wrap_MIBloomFilter16_get_id_occurence_count(PyObject *self
}
+SWIGINTERN PyObject *_wrap_MIBloomFilter16_set_k(PyObject *self, PyObject *args) {
+ PyObject *resultobj = 0;
+ btllib::MIBloomFilter< uint16_t > *arg1 = (btllib::MIBloomFilter< uint16_t > *) 0 ;
+ unsigned int arg2 ;
+ void *argp1 = 0 ;
+ int res1 = 0 ;
+ unsigned int val2 ;
+ int ecode2 = 0 ;
+ PyObject *swig_obj[2] ;
+
+ if (!args) SWIG_fail;
+ swig_obj[0] = args;
+ res1 = SWIG_ConvertPtr(self, &argp1,SWIGTYPE_p_btllib__MIBloomFilterT_uint16_t_t, 0 | 0 );
+ if (!SWIG_IsOK(res1)) {
+ SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "MIBloomFilter16_set_k" "', argument " "1"" of type '" "btllib::MIBloomFilter< uint16_t > *""'");
+ }
+ arg1 = reinterpret_cast< btllib::MIBloomFilter< uint16_t > * >(argp1);
+ ecode2 = SWIG_AsVal_unsigned_SS_int(swig_obj[0], &val2);
+ if (!SWIG_IsOK(ecode2)) {
+ SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "MIBloomFilter16_set_k" "', argument " "2"" of type '" "unsigned int""'");
+ }
+ arg2 = static_cast< unsigned int >(val2);
+ (arg1)->set_k(arg2);
+ resultobj = SWIG_Py_Void();
+ return resultobj;
+fail:
+ return NULL;
+}
+
+
SWIGINTERN PyObject *_wrap_MIBloomFilter16_calc_optimal_size(PyObject *self, PyObject *args) {
PyObject *resultobj = 0;
size_t arg1 ;
@@ -58648,6 +58708,36 @@ SWIGINTERN PyObject *_wrap_MIBloomFilter32_get_id_occurence_count(PyObject *self
}
+SWIGINTERN PyObject *_wrap_MIBloomFilter32_set_k(PyObject *self, PyObject *args) {
+ PyObject *resultobj = 0;
+ btllib::MIBloomFilter< uint32_t > *arg1 = (btllib::MIBloomFilter< uint32_t > *) 0 ;
+ unsigned int arg2 ;
+ void *argp1 = 0 ;
+ int res1 = 0 ;
+ unsigned int val2 ;
+ int ecode2 = 0 ;
+ PyObject *swig_obj[2] ;
+
+ if (!args) SWIG_fail;
+ swig_obj[0] = args;
+ res1 = SWIG_ConvertPtr(self, &argp1,SWIGTYPE_p_btllib__MIBloomFilterT_uint32_t_t, 0 | 0 );
+ if (!SWIG_IsOK(res1)) {
+ SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "MIBloomFilter32_set_k" "', argument " "1"" of type '" "btllib::MIBloomFilter< uint32_t > *""'");
+ }
+ arg1 = reinterpret_cast< btllib::MIBloomFilter< uint32_t > * >(argp1);
+ ecode2 = SWIG_AsVal_unsigned_SS_int(swig_obj[0], &val2);
+ if (!SWIG_IsOK(ecode2)) {
+ SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "MIBloomFilter32_set_k" "', argument " "2"" of type '" "unsigned int""'");
+ }
+ arg2 = static_cast< unsigned int >(val2);
+ (arg1)->set_k(arg2);
+ resultobj = SWIG_Py_Void();
+ return resultobj;
+fail:
+ return NULL;
+}
+
+
SWIGINTERN PyObject *_wrap_MIBloomFilter32_calc_optimal_size(PyObject *self, PyObject *args) {
PyObject *resultobj = 0;
size_t arg1 ;
@@ -69855,6 +69945,7 @@ SWIGINTERN PyMethodDef SwigPyBuiltin__btllib__MIBloomFilterT_uint8_t_t_methods[]
{ "get_k", _wrap_MIBloomFilter8_get_k, METH_NOARGS, "" },
{ "get_hash_fn", _wrap_MIBloomFilter8_get_hash_fn, METH_NOARGS, "" },
{ "get_id_occurence_count", _wrap_MIBloomFilter8_get_id_occurence_count, METH_O, "" },
+ { "set_k", _wrap_MIBloomFilter8_set_k, METH_O, "" },
{ "calc_optimal_size", (PyCFunction)(void(*)(void))_wrap_MIBloomFilter8_calc_optimal_size, METH_STATIC|METH_VARARGS, "" },
{ NULL, NULL, 0, NULL } /* Sentinel */
};
@@ -70104,6 +70195,7 @@ SWIGINTERN PyMethodDef SwigPyBuiltin__btllib__MIBloomFilterT_uint16_t_t_methods[
{ "get_k", _wrap_MIBloomFilter16_get_k, METH_NOARGS, "" },
{ "get_hash_fn", _wrap_MIBloomFilter16_get_hash_fn, METH_NOARGS, "" },
{ "get_id_occurence_count", _wrap_MIBloomFilter16_get_id_occurence_count, METH_O, "" },
+ { "set_k", _wrap_MIBloomFilter16_set_k, METH_O, "" },
{ "calc_optimal_size", (PyCFunction)(void(*)(void))_wrap_MIBloomFilter16_calc_optimal_size, METH_STATIC|METH_VARARGS, "" },
{ NULL, NULL, 0, NULL } /* Sentinel */
};
@@ -70353,6 +70445,7 @@ SWIGINTERN PyMethodDef SwigPyBuiltin__btllib__MIBloomFilterT_uint32_t_t_methods[
{ "get_k", _wrap_MIBloomFilter32_get_k, METH_NOARGS, "" },
{ "get_hash_fn", _wrap_MIBloomFilter32_get_hash_fn, METH_NOARGS, "" },
{ "get_id_occurence_count", _wrap_MIBloomFilter32_get_id_occurence_count, METH_O, "" },
+ { "set_k", _wrap_MIBloomFilter32_set_k, METH_O, "" },
{ "calc_optimal_size", (PyCFunction)(void(*)(void))_wrap_MIBloomFilter32_calc_optimal_size, METH_STATIC|METH_VARARGS, "" },
{ NULL, NULL, 0, NULL } /* Sentinel */
};