@@ -23,6 +23,8 @@ template <typename DataType, typename DistType>
2323class BF_BatchIterator : public VecSimBatchIterator {
2424protected:
2525 const BruteForceIndex<DataType, DistType> *index;
26+ size_t index_label_count; // number of labels in the index when calculating the scores,
27+ // which is the only time we access the index.
2628 vecsim_stl::vector<pair<DistType, labelType>> scores; // vector of scores for every label.
2729 size_t scores_valid_start_pos; // the first index in the scores vector that contains a vector
2830 // that hasn't been returned already.
@@ -56,13 +58,15 @@ template <typename DataType, typename DistType>
5658VecSimQueryResult_List
5759BF_BatchIterator<DataType, DistType>::searchByHeuristics(size_t n_res,
5860 VecSimQueryResult_Order order) {
59- if ((this ->index -> indexLabelCount () - this ->getResultsCount ()) / 1000 > n_res) {
61+ if ((this ->index_label_count - this ->getResultsCount ()) / 1000 > n_res) {
6062 // Heap based search always returns the results ordered by score
6163 return this ->heapBasedSearch (n_res);
6264 }
6365 VecSimQueryResult_List rl = this ->selectBasedSearch (n_res);
6466 if (order == BY_SCORE) {
6567 sort_results_by_score (rl);
68+ } else if (order == BY_SCORE_THEN_ID) {
69+ sort_results_by_score_then_id (rl);
6670 }
6771 return rl;
6872}
@@ -167,17 +171,17 @@ BF_BatchIterator<DataType, DistType>::BF_BatchIterator(
167171 void *query_vector, const BruteForceIndex<DataType, DistType> *bf_index,
168172 VecSimQueryParams *queryParams, std::shared_ptr<VecSimAllocator> allocator)
169173 : VecSimBatchIterator(query_vector, queryParams ? queryParams->timeoutCtx : nullptr , allocator),
170- index (bf_index), scores(allocator), scores_valid_start_pos(0 ) {}
174+ index (bf_index), index_label_count(index->indexLabelCount ()), scores(allocator),
175+ scores_valid_start_pos(0 ) {}
171176
172177template <typename DataType, typename DistType>
173178VecSimQueryResult_List
174179BF_BatchIterator<DataType, DistType>::getNextResults(size_t n_res, VecSimQueryResult_Order order) {
175- assert ((order == BY_ID || order == BY_SCORE) &&
176- " Possible order values are only 'BY_ID' or 'BY_SCORE'" );
177180 // Only in the first iteration we need to compute all the scores
178181 if (this ->scores .empty ()) {
179182 assert (getResultsCount () == 0 );
180183
184+ // The only time we access the index. This function also updates the iterator's label count.
181185 auto rc = calculateScores ();
182186
183187 if (VecSim_OK != rc) {
@@ -198,8 +202,8 @@ BF_BatchIterator<DataType, DistType>::getNextResults(size_t n_res, VecSimQueryRe
198202
199203template <typename DataType, typename DistType>
200204bool BF_BatchIterator<DataType, DistType>::isDepleted() {
201- assert (this ->getResultsCount () <= this ->index -> indexLabelCount () );
202- bool depleted = this ->getResultsCount () == this ->index -> indexLabelCount () ;
205+ assert (this ->getResultsCount () <= this ->index_label_count );
206+ bool depleted = this ->getResultsCount () == this ->index_label_count ;
203207 return depleted;
204208}
205209
0 commit comments