16
16
#include < thread>
17
17
#include < VecSim/algorithms/hnsw/hnsw_single.h>
18
18
#include < VecSim/algorithms/brute_force/brute_force_single.h>
19
+ #include " tiered_index_mock.h"
19
20
20
21
namespace py = pybind11;
22
+ using namespace tiered_index_mock ;
21
23
22
24
// Helper function that iterates query results and wrap them in python numpy object -
23
25
// a tuple of two 2D arrays: (labels, distances)
@@ -174,6 +176,13 @@ class PyVecSimIndex {
174
176
175
177
size_t indexSize () { return VecSimIndex_IndexSize (index.get ()); }
176
178
179
+ size_t indexMemory () { return this ->index ->getAllocationSize (); }
180
+
181
+ double getGetDistanceFrom (size_t id, const py::object &input) {
182
+ py::array query (input);
183
+ return this ->index ->getDistanceFrom (id, (const char *)query.data (0 ));
184
+ }
185
+
177
186
PyBatchIterator createBatchIterator (const py::object &input, VecSimQueryParams *query_params) {
178
187
py::array query (input);
179
188
return PyBatchIterator (
@@ -360,6 +369,94 @@ class PyHNSWLibIndex : public PyVecSimIndex {
360
369
}
361
370
};
362
371
372
+ class PyTIEREDIndex : public PyVecSimIndex {
373
+
374
+ protected:
375
+ JobQueue jobQueue; // External queue that holds the jobs.
376
+ IndexExtCtx jobQueueCtx; // External context to be sent to the submit callback.
377
+ SubmitCB submitCb; // A callback that submits an array of jobs into a given jobQueue.
378
+ size_t memoryCtx; // External context that stores the index memory consumption.
379
+ UpdateMemoryCB UpdateMemCb; // A callback that updates the memoryCtx
380
+ // with a given memory (number).
381
+ size_t flatBufferLimit; // Maximum size allowed for the flat buffer. If flat buffer is full, use
382
+ // in-place insertion.
383
+ bool run_thread;
384
+ std::bitset<MAX_POOL_SIZE> executions_status;
385
+
386
+ TieredIndexParams TieredIndexParams_Init () {
387
+ TieredIndexParams ret = {
388
+ .jobQueue = &this ->jobQueue ,
389
+ .jobQueueCtx = &this ->jobQueueCtx ,
390
+ .submitCb = this ->submitCb ,
391
+ .memoryCtx = &this ->memoryCtx ,
392
+ .UpdateMemCb = this ->UpdateMemCb ,
393
+ .flatBufferLimit = this ->flatBufferLimit ,
394
+ };
395
+
396
+ return ret;
397
+ }
398
+
399
+ public:
400
+ explicit PyTIEREDIndex (size_t BufferLimit = 20000000 )
401
+ : submitCb(submit_callback), memoryCtx(0 ), UpdateMemCb(update_mem_callback), flatBufferLimit(BufferLimit),
402
+ run_thread(true ) {
403
+
404
+ for (size_t i = 0 ; i < THREAD_POOL_SIZE; i++) {
405
+ ThreadParams params (run_thread, executions_status, i, jobQueue);
406
+ thread_pool.emplace_back (thread_main_loop, params);
407
+ }
408
+ }
409
+
410
+ virtual ~PyTIEREDIndex () = 0 ;
411
+
412
+ void WaitForIndex (size_t waiting_duration = 10 ) {
413
+ bool keep_wating = true ;
414
+ while (keep_wating) {
415
+ std::this_thread::sleep_for (std::chrono::milliseconds (waiting_duration));
416
+ std::unique_lock<std::mutex> lock (queue_guard);
417
+ if (jobQueue.empty ()) {
418
+ while (true ) {
419
+ if (executions_status.count () == 0 ) {
420
+ keep_wating = false ;
421
+ break ;
422
+ }
423
+ std::this_thread::sleep_for (std::chrono::milliseconds (waiting_duration));
424
+ }
425
+ }
426
+ }
427
+ }
428
+
429
+
430
+ static size_t GetThreadsNum () { return THREAD_POOL_SIZE; }
431
+
432
+ size_t getBufferLimit () {return flatBufferLimit; }
433
+ };
434
+
435
+ PyTIEREDIndex::~PyTIEREDIndex () { thread_pool_terminate (jobQueue, run_thread); }
436
+ class PyTIERED_HNSWIndex : public PyTIEREDIndex {
437
+ public:
438
+ explicit PyTIERED_HNSWIndex (const HNSWParams &hnsw_params,
439
+ const TieredHNSWParams &tiered_hnsw_params) {
440
+
441
+ // Create primaryIndexParams and specific params for hnsw tiered index.
442
+ VecSimParams primary_index_params = {.algo = VecSimAlgo_HNSWLIB, .hnswParams = hnsw_params};
443
+
444
+ // create TieredIndexParams
445
+ TieredIndexParams tiered_params = TieredIndexParams_Init ();
446
+
447
+ tiered_params.primaryIndexParams = &primary_index_params;
448
+ tiered_params.specificParams .tieredHnswParams = tiered_hnsw_params;
449
+
450
+ // create VecSimParams for TieredIndexParams
451
+ VecSimParams params = {.algo = VecSimAlgo_TIERED, .tieredParams = tiered_params};
452
+
453
+ this ->index = std::shared_ptr<VecSimIndex>(VecSimIndex_New (¶ms), VecSimIndex_Free);
454
+ // Set the created tiered index in the index external context.
455
+ this ->jobQueueCtx .index_strong_ref = this ->index ;
456
+ }
457
+ size_t HNSWLabelCount () { return this ->index ->info ().hnswInfo .indexLabelCount ; }
458
+ };
459
+
363
460
class PyBFIndex : public PyVecSimIndex {
364
461
public:
365
462
explicit PyBFIndex (const BFParams &bf_params) {
@@ -413,6 +510,10 @@ PYBIND11_MODULE(VecSim, m) {
413
510
.def_readwrite (" initialCapacity" , &BFParams::initialCapacity)
414
511
.def_readwrite (" blockSize" , &BFParams::blockSize);
415
512
513
+ py::class_<TieredHNSWParams>(m, " TieredHNSWParams" )
514
+ .def (py::init ())
515
+ .def_readwrite (" swapJobThreshold" , &TieredHNSWParams::swapJobThreshold);
516
+
416
517
py::class_<VecSimParams>(m, " VecSimParams" )
417
518
.def (py::init ())
418
519
.def_readwrite (" algo" , &VecSimParams::algo)
@@ -439,8 +540,11 @@ PYBIND11_MODULE(VecSim, m) {
439
540
.def (" range_query" , &PyVecSimIndex::range, py::arg (" vector" ), py::arg (" radius" ),
440
541
py::arg (" query_param" ) = nullptr )
441
542
.def (" index_size" , &PyVecSimIndex::indexSize)
543
+ .def (" index_memory" , &PyVecSimIndex::indexMemory)
442
544
.def (" create_batch_iterator" , &PyVecSimIndex::createBatchIterator, py::arg (" query_blob" ),
443
545
py::arg (" query_param" ) = nullptr )
546
+ .def (" get_distance_from" , &PyVecSimIndex::getGetDistanceFrom, py::arg (" label" ),
547
+ py::arg (" blob" ))
444
548
.def (" get_vector" , &PyVecSimIndex::getVector);
445
549
446
550
py::class_<PyHNSWLibIndex, PyVecSimIndex>(m, " HNSWIndex" )
@@ -460,6 +564,19 @@ PYBIND11_MODULE(VecSim, m) {
460
564
.def (" range_parallel" , &PyHNSWLibIndex::searchRangeParallel, py::arg (" queries" ),
461
565
py::arg (" radius" ), py::arg (" query_param" ) = nullptr , py::arg (" num_threads" ) = -1 );
462
566
567
+ py::class_<PyTIEREDIndex, PyVecSimIndex>(m, " TIEREDIndex" )
568
+ .def (" wait_for_index" , &PyTIERED_HNSWIndex::WaitForIndex, py::arg (" waiting_duration" ) = 10 )
569
+ .def (" get_buffer_limit" , &PyTIERED_HNSWIndex::getBufferLimit)
570
+ .def_static (" get_threads_num" , &PyTIEREDIndex::GetThreadsNum);
571
+
572
+ py::class_<PyTIERED_HNSWIndex, PyTIEREDIndex>(m, " TIERED_HNSWIndex" )
573
+ .def (
574
+ py::init ([](const HNSWParams &hnsw_params, const TieredHNSWParams &tiered_hnsw_params) {
575
+ return new PyTIERED_HNSWIndex (hnsw_params, tiered_hnsw_params);
576
+ }),
577
+ py::arg (" hnsw_params" ), py::arg (" tiered_hnsw_params" ))
578
+ .def (" hnsw_label_count" , &PyTIERED_HNSWIndex::HNSWLabelCount);
579
+
463
580
py::class_<PyBFIndex, PyVecSimIndex>(m, " BFIndex" )
464
581
.def (py::init ([](const BFParams ¶ms) { return new PyBFIndex (params); }),
465
582
py::arg (" params" ));
0 commit comments