@@ -797,7 +797,7 @@ def get_serializer_class(self):
797797
798798 return ConceptListSerializer
799799
800- def filter_queryset (self , _ = None ): # pylint:disable=too-many-locals,too-many-statements
800+ def filter_queryset (self , _ = None ): # pylint: disable=too-many-locals
801801 rows = self .request .data .get ('rows' )
802802 target_repo_url = self .request .data .get ('target_repo_url' )
803803 target_repo_params = self .request .data .get ('target_repo' )
@@ -808,8 +808,8 @@ def filter_queryset(self, _=None): # pylint:disable=too-many-locals,too-many-st
808808 map_config = self .request .data .get ('map_config' , [])
809809 filters = self .request .data .get ('filter' , {})
810810 include_retired = self .request .query_params .get (INCLUDE_RETIRED_PARAM ) in get_truthy_values ()
811- num_candidates = min (to_int (self .request .query_params .get ('numCandidates' , 0 ), 2000 ), 2000 )
812- k_nearest = min (to_int (self .request .query_params .get ('kNearest' , 0 ), 50 ), 50 )
811+ num_candidates = min (to_int (self .request .query_params .get ('numCandidates' , 0 ), 3000 ), 3000 )
812+ k_nearest = min (to_int (self .request .query_params .get ('kNearest' , 0 ), 100 ), 100 )
813813 offset = max (to_int (self .request .GET .get ('offset' ), 0 ), 0 )
814814 limit = max (to_int (self .request .GET .get ('limit' ), 0 ), 0 ) or self .default_limit
815815 page = max (to_int (self .request .GET .get ('page' ), 1 ), 1 )
@@ -823,57 +823,82 @@ def filter_queryset(self, _=None): # pylint:disable=too-many-locals,too-many-st
823823 locale_filter = filters .pop ('locale' , None ) if is_semantic else get (filters , 'locale' , None )
824824 faceted_criterion = self .get_faceted_criterion (False , filters , minimum_should_match = 1 ) if filters else None
825825 apply_for_name_locale = locale_filter and isinstance (locale_filter , str ) and len (locale_filter .split (',' )) == 1
826+ encoder_model = self .request .GET .get ('encoder_model' , None )
827+ reranker = self .request .GET .get ('reranker' , None ) in get_truthy_values () # enables reranker
828+ reranker = reranker and self .request .user .is_mapper_cross_encoder_group
829+ score_to_sort = 'search_rerank_score' if reranker else 'search_normalized_score'
826830 results = []
827- import time
828831 for row in rows :
829- start_time = time .time ()
830832 search = ConceptFuzzySearch .search (
831833 row , target_repo_url , repo_params , include_retired ,
832834 is_semantic , num_candidates , k_nearest , map_config , faceted_criterion , locale_filter
833835 )
834- print ("Search Query" , time .time () - start_time )
835- start_time = time .time ()
836836 search = search .params (track_total_hits = False , request_cache = True )
837837 es_search = CustomESSearch (search [start :end ], ConceptDocument )
838- es_search . to_queryset ( False , True , False )
839- print ( "Search to Queryset" , time . time () - start_time )
838+ name = row . get ( 'name' ) or row . get ( 'Name' ) if reranker else None
839+ es_search . to_queryset ( False , True , False , name , encoder_model )
840840 result = {'row' : row , 'results' : [], 'map_config' : map_config , 'filter' : filters }
841- start_time = time .time ()
842841 for concept in es_search .queryset :
843842 concept ._highlight = es_search .highlights .get (concept .id , {}) # pylint:disable=protected-access
844843 score_info = es_search .scores .get (concept .id , {})
845- score = get (score_info , 'raw' ) or None
846- normalized_score = get (score_info , 'normalized' ) or None
847- concept ._score = score # pylint:disable=protected-access
848- concept ._normalized_score = normalized_score # pylint:disable=protected-access
849- if limit > 1 :
850- concept ._match_type = 'low' # pylint:disable=protected-access
851- score_to_check = normalized_score if normalized_score is not None else score
852- if concept ._highlight .get ('name' , None ) or (is_semantic and score_to_check >= score_threshold ): # pylint:disable=protected-access
853- concept ._match_type = 'very_high' # pylint:disable=protected-access
854- elif concept ._highlight .get ('synonyms' , None ): # pylint:disable=protected-access
855- concept ._match_type = 'high' # pylint:disable=protected-access
856- elif concept ._highlight : # pylint:disable=protected-access
857- concept ._match_type = 'medium' # pylint:disable=protected-access
858- else :
859- concept ._match_type = 'very_high' # pylint:disable=protected-access
844+ normalized_score = get (score_info , 'normalized' ) or 0
845+ self .apply_score (concept , is_semantic , score_info , score_threshold , reranker , limit )
860846 if not best_match or concept ._match_type in ['medium' , 'high' , 'very_high' ]: # pylint:disable=protected-access
861847 if apply_for_name_locale :
862848 concept ._requested_locale = locale_filter # pylint:disable=protected-access
863849 serializer = ConceptDetailSerializer if self .is_verbose () else ConceptMinimalSerializer
864850 data = serializer (concept , context = {'request' : self .request }).data
865851 data ['search_meta' ]['search_normalized_score' ] = normalized_score * 100
866852 result ['results' ].append (data )
867- print ("Queryset to Serializer" , time .time () - start_time )
868- start_time = time .time ()
869853 if 'results' in result :
870854 result ['results' ] = sorted (
871- result ['results' ], key = lambda res : get (res , 'search_meta.search_normalized_score ' ), reverse = True )
855+ result ['results' ], key = lambda res : get (res , f 'search_meta.{ score_to_sort } ' ), reverse = True )
872856 results .append (result )
873- print ("Sorting" , time .time () - start_time )
874857
875858 return results
876859
860+ @staticmethod
861+ def apply_score (concept , is_semantic , scores , score_threshold , reranker , limit ): # pylint: disable=too-many-arguments,too-many-branches
862+ score = get (scores , 'raw' ) or 0
863+ normalized_score = get (scores , 'normalized' ) or 0
864+ rerank_score = get (scores , 'rerank' ) or 0
865+
866+ concept ._score = score # pylint:disable=protected-access
867+ concept ._normalized_score = normalized_score # pylint:disable=protected-access
868+ if reranker :
869+ concept ._rerank_score = rerank_score # pylint:disable=protected-access
870+ highlight = concept ._highlight # pylint:disable=protected-access
871+
872+ match_type = 'low'
873+ if limit > 1 :
874+ if is_semantic :
875+ if reranker :
876+ if normalized_score >= 0.9 :
877+ match_type = 'very_high'
878+ elif normalized_score >= 0.65 :
879+ match_type = 'high'
880+ elif normalized_score >= 0.5 :
881+ match_type = 'medium'
882+ else :
883+ score_to_check = normalized_score if normalized_score is not None else score
884+ if highlight .get ('name' , None ) or score_to_check >= score_threshold :
885+ match_type = 'very_high'
886+ elif highlight .get ('synonyms' , None ):
887+ match_type = 'high'
888+ elif highlight :
889+ match_type = 'medium'
890+ else :
891+ if highlight .get ('name' , None ):
892+ match_type = 'very_high'
893+ elif highlight .get ('synonyms' , None ):
894+ match_type = 'high'
895+ elif highlight :
896+ match_type = 'medium'
897+ else :
898+ match_type = 'very_high'
899+
900+ concept ._match_type = match_type # pylint:disable=protected-access
901+
877902 @staticmethod
878903 def get_repo_params (is_semantic , target_repo_params , target_repo_url ):
879904 repo = ConceptFuzzySearch .get_target_repo (target_repo_url )
0 commit comments