@@ -35,6 +35,8 @@ typedef unsigned int linklistsizeint;
3535
3636template <typename dist_t >
3737using candidatesMaxHeap = vecsim_stl::max_priority_queue<pair<dist_t , tableint>>;
38+ template <typename dist_t >
39+ using candidatesLabelsMaxHeap = vecsim_stl::max_priority_queue<pair<dist_t , labeltype>>;
3840
3941template <typename dist_t >
4042class HierarchicalNSW : public VecsimBaseObject {
@@ -110,8 +112,17 @@ class HierarchicalNSW : public VecsimBaseObject {
110112 size_t Mcurmax, tableint *node_neighbors,
111113 const vecsim_stl::set<tableint> &orig_neighbors, tableint *removed_links,
112114 size_t *removed_links_num);
115+ inline dist_t processCandidate (tableint curNodeId, const void *data_point, size_t layer,
116+ size_t ef, tag_t visited_tag,
117+ candidatesMaxHeap<dist_t > &top_candidates,
118+ candidatesMaxHeap<dist_t > &candidates_set,
119+ dist_t lowerBound) const ;
113120 candidatesMaxHeap<dist_t > searchLayer (tableint ep_id, const void *data_point, size_t layer,
114121 size_t ef) const ;
122+ candidatesLabelsMaxHeap<dist_t > searchBottomLayer_WithTimeout (tableint ep_id,
123+ const void *data_point, size_t ef,
124+ size_t k, void *timeoutCtx,
125+ VecSimQueryResult_Code *rc) const ;
115126 void getNeighborsByHeuristic2 (candidatesMaxHeap<dist_t > &top_candidates, size_t M);
116127 tableint mutuallyConnectNewElement (tableint cur_c, candidatesMaxHeap<dist_t > &top_candidates,
117128 size_t level);
@@ -144,9 +155,10 @@ class HierarchicalNSW : public VecsimBaseObject {
144155 bool removePoint (labeltype label);
145156 void addPoint (const void *data_point, labeltype label);
146157 dist_t getDistanceByLabelFromPoint (labeltype label, const void *data_point);
147- tableint searchBottomLayerEP (const void *query_data) const ;
148- vecsim_stl::max_priority_queue<pair<dist_t , labeltype>> searchKnn (const void *query_data,
149- size_t k) const ;
158+ tableint searchBottomLayerEP (const void *query_data, void *timeoutCtx,
159+ VecSimQueryResult_Code *rc) const ;
160+ vecsim_stl::max_priority_queue<pair<dist_t , labeltype>>
161+ searchKnn (const void *query_data, size_t k, void *timeoutCtx, VecSimQueryResult_Code *rc) const ;
150162};
151163
152164/* *
@@ -337,6 +349,57 @@ void HierarchicalNSW<dist_t>::removeExtraLinks(linklistsizeint *node_ll,
337349 *removed_links_num = removed_idx;
338350}
339351
352+ template <typename dist_t >
353+ dist_t HierarchicalNSW<dist_t >::processCandidate(tableint curNodeId, const void *data_point,
354+ size_t layer, size_t ef, tag_t visited_tag,
355+ candidatesMaxHeap<dist_t > &top_candidates,
356+ candidatesMaxHeap<dist_t > &candidate_set,
357+ dist_t lowerBound) const {
358+
359+ #ifdef ENABLE_PARALLELIZATION
360+ std::unique_lock<std::mutex> lock (link_list_locks_[curNodeId]);
361+ #endif
362+ linklistsizeint *node_ll = get_linklist_at_level (curNodeId, layer);
363+ size_t links_num = getListCount (node_ll);
364+ auto *node_links = (tableint *)(node_ll + 1 );
365+ #ifdef USE_SSE
366+ _mm_prefetch ((char *)(visited_nodes_handler->getElementsTags () + *(node_ll + 1 )), _MM_HINT_T0);
367+ _mm_prefetch ((char *)(visited_nodes_handler->getElementsTags () + *(node_ll + 1 ) + 64 ),
368+ _MM_HINT_T0);
369+ _mm_prefetch (getDataByInternalId (*node_links), _MM_HINT_T0);
370+ _mm_prefetch (getDataByInternalId (*(node_links + 1 )), _MM_HINT_T0);
371+ #endif
372+
373+ for (size_t j = 0 ; j < links_num; j++) {
374+ tableint candidate_id = *(node_links + j);
375+ #ifdef USE_SSE
376+ _mm_prefetch ((char *)(visited_nodes_handler->getElementsTags () + *(node_links + j + 1 )),
377+ _MM_HINT_T0);
378+ _mm_prefetch (getDataByInternalId (*(node_links + j + 1 )), _MM_HINT_T0);
379+ #endif
380+ if (this ->visited_nodes_handler ->getNodeTag (candidate_id) == visited_tag)
381+ continue ;
382+ this ->visited_nodes_handler ->tagNode (candidate_id, visited_tag);
383+ char *currObj1 = (getDataByInternalId (candidate_id));
384+
385+ dist_t dist1 = fstdistfunc_ (data_point, currObj1, dist_func_param_);
386+ if (top_candidates.size () < ef || lowerBound > dist1) {
387+ candidate_set.emplace (-dist1, candidate_id);
388+ #ifdef USE_SSE
389+ _mm_prefetch (getDataByInternalId (candidate_set.top ().second ), _MM_HINT_T0);
390+ #endif
391+ top_candidates.emplace (dist1, candidate_id);
392+
393+ if (top_candidates.size () > ef)
394+ top_candidates.pop ();
395+
396+ if (!top_candidates.empty ())
397+ lowerBound = top_candidates.top ().first ;
398+ }
399+ }
400+ return lowerBound;
401+ }
402+
340403template <typename dist_t >
341404candidatesMaxHeap<dist_t > HierarchicalNSW<dist_t >::searchLayer(tableint ep_id,
342405 const void *data_point, size_t layer,
@@ -366,50 +429,10 @@ candidatesMaxHeap<dist_t> HierarchicalNSW<dist_t>::searchLayer(tableint ep_id,
366429 }
367430 candidate_set.pop ();
368431
369- tableint curNodeNum = curr_el_pair.second ;
370- #ifdef ENABLE_PARALLELIZATION
371- std::unique_lock<std::mutex> lock (link_list_locks_[curNodeNum]);
372- #endif
373- linklistsizeint *node_ll = get_linklist_at_level (curNodeNum, layer);
374- size_t links_num = getListCount (node_ll);
375- auto *node_links = (tableint *)(node_ll + 1 );
376- #ifdef USE_SSE
377- _mm_prefetch ((char *)(visited_nodes_handler->getElementsTags () + *(node_ll + 1 )),
378- _MM_HINT_T0);
379- _mm_prefetch ((char *)(visited_nodes_handler->getElementsTags () + *(node_ll + 1 ) + 64 ),
380- _MM_HINT_T0);
381- _mm_prefetch (getDataByInternalId (*node_links), _MM_HINT_T0);
382- _mm_prefetch (getDataByInternalId (*(node_links + 1 )), _MM_HINT_T0);
383- #endif
384-
385- for (size_t j = 0 ; j < links_num; j++) {
386- tableint candidate_id = *(node_links + j);
387- #ifdef USE_SSE
388- _mm_prefetch ((char *)(visited_nodes_handler->getElementsTags () + *(node_links + j + 1 )),
389- _MM_HINT_T0);
390- _mm_prefetch (getDataByInternalId (*(node_links + j + 1 )), _MM_HINT_T0);
391- #endif
392- if (this ->visited_nodes_handler ->getNodeTag (candidate_id) == visited_tag)
393- continue ;
394- this ->visited_nodes_handler ->tagNode (candidate_id, visited_tag);
395- char *currObj1 = (getDataByInternalId (candidate_id));
396-
397- dist_t dist1 = fstdistfunc_ (data_point, currObj1, dist_func_param_);
398- if (top_candidates.size () < ef || lowerBound > dist1) {
399- candidate_set.emplace (-dist1, candidate_id);
400- #ifdef USE_SSE
401- _mm_prefetch (getDataByInternalId (candidate_set.top ().second ), _MM_HINT_T0);
402- #endif
403- top_candidates.emplace (dist1, candidate_id);
404-
405- if (top_candidates.size () > ef)
406- top_candidates.pop ();
407-
408- if (!top_candidates.empty ())
409- lowerBound = top_candidates.top ().first ;
410- }
411- }
432+ lowerBound = processCandidate (curr_el_pair.second , data_point, layer, ef, visited_tag,
433+ top_candidates, candidate_set, lowerBound);
412434 }
435+
413436#ifdef ENABLE_PARALLELIZATION
414437 visited_nodes_handler_pool->returnVisitedNodesHandlerToPool (this ->visited_nodes_handler );
415438#endif
@@ -1034,7 +1057,8 @@ void HierarchicalNSW<dist_t>::addPoint(const void *data_point, const labeltype l
10341057}
10351058
10361059template <typename dist_t >
1037- tableint HierarchicalNSW<dist_t >::searchBottomLayerEP(const void *query_data) const {
1060+ tableint HierarchicalNSW<dist_t >::searchBottomLayerEP(const void *query_data, void *timeoutCtx,
1061+ VecSimQueryResult_Code *rc) const {
10381062
10391063 if (cur_element_count == 0 ) {
10401064 return entrypoint_node_;
@@ -1045,6 +1069,10 @@ tableint HierarchicalNSW<dist_t>::searchBottomLayerEP(const void *query_data) co
10451069 for (size_t level = maxlevel_; level > 0 ; level--) {
10461070 bool changed = true ;
10471071 while (changed) {
1072+ if (__builtin_expect (VecSimIndex::timeoutCallback (timeoutCtx), 0 )) {
1073+ *rc = VecSim_QueryResult_TimedOut;
1074+ return HNSW_INVALID_ID;
1075+ }
10481076 changed = false ;
10491077 linklistsizeint *node_ll = get_linklist (currObj, level);
10501078 unsigned short links_count = getListCount (node_ll);
@@ -1064,30 +1092,81 @@ tableint HierarchicalNSW<dist_t>::searchBottomLayerEP(const void *query_data) co
10641092 }
10651093 }
10661094 }
1095+ *rc = VecSim_QueryResult_OK;
10671096 return currObj;
10681097}
10691098
10701099template <typename dist_t >
1071- vecsim_stl::max_priority_queue<pair<dist_t , labeltype>>
1072- HierarchicalNSW<dist_t >::searchKnn(const void *query_data, size_t k) const {
1100+ candidatesLabelsMaxHeap<dist_t >
1101+ HierarchicalNSW<dist_t >::searchBottomLayer_WithTimeout(tableint ep_id, const void *data_point,
1102+ size_t ef, size_t k, void *timeoutCtx,
1103+ VecSimQueryResult_Code *rc) const {
1104+ candidatesLabelsMaxHeap<dist_t > results (this ->allocator );
1105+
1106+ #ifdef ENABLE_PARALLELIZATION
1107+ this ->visited_nodes_handler =
1108+ this ->visited_nodes_handler_pool ->getAvailableVisitedNodesHandler ();
1109+ #endif
10731110
1074- vecsim_stl::max_priority_queue<std::pair<dist_t , labeltype>> result (this ->allocator );
1075- if (cur_element_count == 0 )
1076- return result;
1111+ tag_t visited_tag = this ->visited_nodes_handler ->getFreshTag ();
10771112
1078- tableint bottom_layer_ep = searchBottomLayerEP (query_data);
1079- vecsim_stl::max_priority_queue<pair<dist_t , tableint>> top_candidates =
1080- searchLayer (bottom_layer_ep, query_data, 0 , std::max (ef_, k));
1113+ candidatesMaxHeap<dist_t > top_candidates (this ->allocator );
1114+ candidatesMaxHeap<dist_t > candidate_set (this ->allocator );
1115+
1116+ dist_t dist = fstdistfunc_ (data_point, getDataByInternalId (ep_id), dist_func_param_);
1117+ dist_t lowerBound = dist;
1118+ top_candidates.emplace (dist, ep_id);
1119+ candidate_set.emplace (-dist, ep_id);
1120+
1121+ this ->visited_nodes_handler ->tagNode (ep_id, visited_tag);
1122+
1123+ while (!candidate_set.empty ()) {
1124+ std::pair<dist_t , tableint> curr_el_pair = candidate_set.top ();
1125+ if ((-curr_el_pair.first ) > lowerBound) {
1126+ break ;
1127+ }
1128+ if (__builtin_expect (VecSimIndex::timeoutCallback (timeoutCtx), 0 )) {
1129+ *rc = VecSim_QueryResult_TimedOut;
1130+ return results;
1131+ }
1132+ candidate_set.pop ();
10811133
1134+ lowerBound = processCandidate (curr_el_pair.second , data_point, 0 , ef, visited_tag,
1135+ top_candidates, candidate_set, lowerBound);
1136+ }
1137+ #ifdef ENABLE_PARALLELIZATION
1138+ visited_nodes_handler_pool->returnVisitedNodesHandlerToPool (this ->visited_nodes_handler );
1139+ #endif
10821140 while (top_candidates.size () > k) {
10831141 top_candidates.pop ();
10841142 }
10851143 while (top_candidates.size () > 0 ) {
1086- std::pair< dist_t , tableint> rez = top_candidates.top ();
1087- result. push (std::pair< dist_t , labeltype>(rez .first , getExternalLabel (rez .second ) ));
1144+ auto &res = top_candidates.top ();
1145+ results. emplace (res .first , getExternalLabel (res .second ));
10881146 top_candidates.pop ();
10891147 }
1090- return result;
1148+ *rc = VecSim_QueryResult_OK;
1149+ return results;
1150+ }
1151+
1152+ template <typename dist_t >
1153+ candidatesLabelsMaxHeap<dist_t >
1154+ HierarchicalNSW<dist_t >::searchKnn(const void *query_data, size_t k, void *timeoutCtx,
1155+ VecSimQueryResult_Code *rc) const {
1156+
1157+ if (cur_element_count == 0 ) {
1158+ *rc = VecSim_QueryResult_OK;
1159+ return candidatesLabelsMaxHeap<dist_t >(this ->allocator );
1160+ }
1161+
1162+ tableint bottom_layer_ep = searchBottomLayerEP (query_data, timeoutCtx, rc);
1163+ if (VecSim_OK != *rc) {
1164+ return candidatesLabelsMaxHeap<dist_t >(this ->allocator );
1165+ }
1166+ candidatesLabelsMaxHeap<dist_t > results = searchBottomLayer_WithTimeout (
1167+ bottom_layer_ep, query_data, std::max (ef_, k), k, timeoutCtx, rc);
1168+
1169+ return results;
10911170}
10921171
10931172} // namespace hnswlib
0 commit comments