Skip to content

Commit fc07800

Browse files
authored
[8.2] [MOD-10926] Expose SVS Marked Deleted Vector Count via Statistics API (#797)
[MOD-10926] Expose SVS Marked Deleted Vector Count via Statistics API (#796) SVSIndex rename chnages_num -> num_marked_deleted to emphasize it onluy counts deletions SVSIndexBase: move num_marked_deleted to SVSIndexBase so it can be simply fetched without complex cast add getNumMarkedDeleted to API add virtual getNumMarkedDeleted to tiered index to be called by VecSimTieredIndex::statisticInfo hnsw tiered implmnetaion: call hnsw->VecSimIndexTombstone::getNumMarkedDeleted, (hnsw inhrits VecSimIndexTombstone) svs imp: returns SVSIndexBase::getNumMarkedDeleted (cherry picked from commit 1c0ec77)
1 parent 1d696c4 commit fc07800

File tree

6 files changed

+116
-22
lines changed

6 files changed

+116
-22
lines changed

src/VecSim/algorithms/hnsw/hnsw_tiered.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,9 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
201201

202202
int addVector(const void *blob, labelType label) override;
203203
int deleteVector(labelType label) override;
204+
size_t getNumMarkedDeleted() const override {
205+
return this->getHNSWIndex()->getNumMarkedDeleted();
206+
}
204207
size_t indexSize() const override;
205208
size_t indexCapacity() const override;
206209
double getDistanceFrom_Unsafe(labelType label, const void *blob) const override;

src/VecSim/algorithms/svs/svs.h

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ struct SVSIndexBase
3535
: public SVSSerializer
3636
#endif
3737
{
38-
38+
SVSIndexBase() : num_marked_deleted{0} {};
3939
virtual ~SVSIndexBase() = default;
4040
virtual int addVectors(const void *vectors_data, const labelType *labels, size_t n) = 0;
4141
virtual int deleteVectors(const labelType *labels, size_t n) = 0;
@@ -44,9 +44,14 @@ struct SVSIndexBase
4444
virtual void setNumThreads(size_t numThreads) = 0;
4545
virtual size_t getThreadPoolCapacity() const = 0;
4646
virtual bool isCompressed() const = 0;
47+
size_t getNumMarkedDeleted() const { return num_marked_deleted; }
4748
#ifdef BUILD_TESTS
4849
virtual svs::logging::logger_ptr getLogger() const = 0;
4950
#endif
51+
protected:
52+
// Index marked deleted vectors counter to initiate reindexing if it exceeds threshold
53+
// markIndexUpdate() manages this counter
54+
size_t num_marked_deleted;
5055
};
5156

5257
template <typename MetricType, typename DataType, bool isMulti, size_t QuantBits,
@@ -72,10 +77,6 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
7277

7378
bool forcePreprocessing;
7479

75-
// Index severe changes counter to initiate reindexing if number of changes exceed threshold
76-
// markIndexUpdated() manages this counter
77-
size_t changes_num;
78-
7980
// Index build parameters
8081
svs::index::vamana::VamanaBuildParameters buildParams;
8182

@@ -273,26 +274,27 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
273274
return deleted_num;
274275
}
275276

276-
// Count severe index changes (currently deletions only) and consolidate index if needed
277+
// Count deletions and consolidate index if needed
277278
void markIndexUpdate(size_t n = 1) {
278279
if (!impl_)
279280
return;
280281

281282
// SVS index instance should not be empty
282283
if (indexSize() == 0) {
283284
this->impl_.reset();
284-
changes_num = 0;
285+
num_marked_deleted = 0;
285286
return;
286287
}
287288

288-
changes_num += n;
289+
num_marked_deleted += n;
289290
// consolidate index if number of changes bigger than 50% of index size
290291
const float consolidation_threshold = .5f;
291292
// indexSize() should not be 0 see above lines
292293
assert(indexSize() > 0);
293-
if (static_cast<float>(changes_num) / indexSize() > consolidation_threshold) {
294+
// Note: if this function is called after deleteVectorsImpl, indexSize is already updated
295+
if (static_cast<float>(num_marked_deleted) / indexSize() > consolidation_threshold) {
294296
impl_->consolidate();
295-
changes_num = 0;
297+
num_marked_deleted = 0;
296298
}
297299
}
298300

@@ -312,7 +314,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
312314
SVSIndex(const SVSParams &params, const AbstractIndexInitParams &abstractInitParams,
313315
const index_component_t &components, bool force_preprocessing)
314316
: Base{abstractInitParams, components}, forcePreprocessing{force_preprocessing},
315-
changes_num{0}, buildParams{svs_details::makeVamanaBuildParameters(params)},
317+
buildParams{svs_details::makeVamanaBuildParameters(params)},
316318
search_window_size{svs_details::getOrDefault(params.search_window_size,
317319
SVS_VAMANA_DEFAULT_SEARCH_WINDOW_SIZE)},
318320
search_buffer_capacity{
@@ -373,7 +375,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
373375
.pruneTo = this->buildParams.prune_to,
374376
.useSearchHistory = this->buildParams.use_full_search_history,
375377
.numThreads = this->getNumThreads(),
376-
.numberOfMarkedDeletedNodes = this->changes_num,
378+
.numberOfMarkedDeletedNodes = this->num_marked_deleted,
377379
.searchWindowSize = this->search_window_size,
378380
.searchBufferCapacity = this->search_buffer_capacity,
379381
.leanvecDim = this->leanvec_dim,
@@ -673,7 +675,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
673675
// https://intel.github.io/ScalableVectorSearch/python/dynamic.html#svs.DynamicVamana.compact
674676
impl_->compact();
675677
}
676-
changes_num = 0;
678+
num_marked_deleted = 0;
677679
}
678680

679681
#ifdef BUILD_TESTS

src/VecSim/algorithms/svs/svs_tiered.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,9 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
726726
assert(ret <= 2 && "unexpected deleteVector result");
727727
return ret;
728728
}
729+
size_t getNumMarkedDeleted() const override {
730+
return this->GetSVSIndex()->getNumMarkedDeleted();
731+
}
729732

730733
size_t indexSize() const override {
731734
std::shared_lock<std::shared_mutex> flat_lock(this->flatIndexGuard);

src/VecSim/vec_sim_tiered_index.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ class VecSimTieredIndex : public VecSimIndexInterface {
139139
return this->allocator->getAllocationSize() + this->backendIndex->getAllocationSize() +
140140
this->frontendIndex->getAllocationSize();
141141
}
142-
142+
virtual size_t getNumMarkedDeleted() const = 0;
143143
size_t indexLabelCount() const override;
144144
VecSimIndexStatsInfo statisticInfo() const override;
145145
virtual VecSimIndexDebugInfo debugInfo() const override;
@@ -319,14 +319,9 @@ template <typename DataType, typename DistType>
319319
VecSimIndexStatsInfo VecSimTieredIndex<DataType, DistType>::statisticInfo() const {
320320
auto stats = VecSimIndexStatsInfo{
321321
.memory = this->getAllocationSize(),
322-
.numberOfMarkedDeleted = 0, // Default value if cast fails
322+
.numberOfMarkedDeleted = this->getNumMarkedDeleted(),
323323
};
324324

325-
// If backend implements VecSimIndexTombstone, get number of marked deleted
326-
if (auto tombstone = dynamic_cast<VecSimIndexTombstone *>(this->backendIndex)) {
327-
stats.numberOfMarkedDeleted = tombstone->getNumMarkedDeleted();
328-
}
329-
330325
return stats;
331326
}
332327

tests/unit/test_svs.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ TYPED_TEST(SVSTest, svs_vector_update_test) {
182182
// Delete the last vector.
183183
VecSimIndex_DeleteVector(index, 1);
184184
EXPECT_EQ(VecSimIndex_IndexSize(index), 0);
185+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
185186

186187
VecSimIndex_Free(index);
187188
}
@@ -261,9 +262,19 @@ TYPED_TEST(SVSTest, svs_bulk_vectors_add_delete_test) {
261262
runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID);
262263

263264
// Delete almost all vectors
265+
// First delete small amount of vector to prevent consolidation.
266+
const size_t first_batch_deletion = 10;
267+
ASSERT_EQ(svs_index->deleteVectors(ids.data(), first_batch_deletion), first_batch_deletion);
268+
ASSERT_EQ(VecSimIndex_IndexSize(index), n - first_batch_deletion);
269+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), first_batch_deletion);
270+
271+
// Now delete enough vectors to trigger consolidation.
264272
const size_t keep_num = 1;
265-
ASSERT_EQ(svs_index->deleteVectors(ids.data(), n - keep_num), n - keep_num);
273+
ASSERT_EQ(svs_index->deleteVectors(ids.data() + first_batch_deletion,
274+
n - keep_num - first_batch_deletion),
275+
n - keep_num - first_batch_deletion);
266276
ASSERT_EQ(VecSimIndex_IndexSize(index), keep_num);
277+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
267278

268279
VecSimIndex_Free(index);
269280
}
@@ -3012,7 +3023,7 @@ TYPED_TEST(SVSTest, logging_runtime_params) {
30123023
for (size_t i = 0; i < 10; i++) {
30133024
index->addVector(v[i].data(), ids[i]);
30143025
}
3015-
3026+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 10);
30163027
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
30173028

30183029
float query[] = {50, 50, 50, 50};

tests/unit/test_svs_tiered.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,79 @@ TYPED_TEST(SVSTieredIndexTest, deleteVector) {
738738
}
739739
}
740740

741+
TYPED_TEST(SVSTieredIndexTestBasic, markedDeleted) {
742+
// Create TieredSVS index instance with a mock queue.
743+
size_t dim = 4;
744+
constexpr size_t n = 10;
745+
constexpr size_t transfer_trigger = n;
746+
SVSParams params = {.type = TypeParam::get_index_type(),
747+
.dim = dim,
748+
.metric = VecSimMetric_L2,
749+
.num_threads = 1};
750+
VecSimParams svs_params = CreateParams(params);
751+
auto mock_thread_pool = tieredIndexMock();
752+
753+
auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, transfer_trigger,
754+
transfer_trigger);
755+
ASSERT_INDEX(tiered_index);
756+
757+
for (size_t i = 0; i < n; i++) {
758+
GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, i);
759+
}
760+
// Vectors are still in the flat buffer.
761+
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), n);
762+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 0);
763+
// Override a vector while in the flat buffer
764+
765+
GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, 0);
766+
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), n);
767+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 0);
768+
769+
// Move vectors to the backend
770+
mock_thread_pool.thread_iteration();
771+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
772+
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);
773+
ASSERT_EQ(tiered_index->indexSize(), n);
774+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 0);
775+
776+
// Override a vector while in the backend
777+
GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, 1);
778+
ASSERT_EQ(tiered_index->indexSize(), n);
779+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 1);
780+
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 1);
781+
782+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 1);
783+
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 1);
784+
785+
// Delete the overriden vector
786+
VecSimIndex_DeleteVector(tiered_index, 1);
787+
ASSERT_EQ(tiered_index->indexSize(), n - 1);
788+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 1);
789+
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 1);
790+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 1);
791+
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);
792+
793+
// Delete another arbirtrary vector
794+
VecSimIndex_DeleteVector(tiered_index, 0);
795+
ASSERT_EQ(tiered_index->indexSize(), n - 2);
796+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 2);
797+
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 2);
798+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 2);
799+
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);
800+
801+
// Empty Index
802+
for (size_t i = 2; i < n; i++) {
803+
VecSimIndex_DeleteVector(tiered_index, i);
804+
}
805+
806+
// Consolidate should be triggered and mark deleted count should be zeroed.
807+
ASSERT_EQ(tiered_index->indexSize(), 0);
808+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 0);
809+
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 0);
810+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 0);
811+
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);
812+
}
813+
741814
TYPED_TEST(SVSTieredIndexTest, manageIndexOwnership) {
742815

743816
// Create TieredSVS index instance with a mock queue.
@@ -2211,6 +2284,8 @@ TYPED_TEST(SVSTieredIndexTest, writeInPlaceMode) {
22112284
// Validate that the vector is removed in place.
22122285
tiered_index->deleteVector(vec_label);
22132286
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 1);
2287+
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 0);
2288+
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, 0);
22142289
}
22152290

22162291
TYPED_TEST(SVSTieredIndexTest, switchWriteModes) {
@@ -2375,6 +2450,9 @@ TYPED_TEST(SVSTieredIndexTestBasic, runGCAPI) {
23752450
for (size_t i = 0; i < threshold; i++) {
23762451
tiered_index->deleteVector(i);
23772452
}
2453+
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), threshold);
2454+
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, threshold);
2455+
23782456
// Launch the BG threads loop that takes jobs from the queue and executes them.
23792457
mock_thread_pool.init_threads();
23802458
mock_thread_pool.thread_pool_join();
@@ -2391,6 +2469,8 @@ TYPED_TEST(SVSTieredIndexTestBasic, runGCAPI) {
23912469
auto size_after_gc = tiered_index->getAllocationSize();
23922470
// Expect that the size of the index was reduced.
23932471
ASSERT_LT(size_after_gc, size_before_gc);
2472+
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 0);
2473+
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, 0);
23942474
}
23952475

23962476
TYPED_TEST(SVSTieredIndexTestBasic, switchDeleteModes) {

0 commit comments

Comments
 (0)