diff --git a/solr/core/src/java/org/apache/solr/handler/component/CombinedQueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/CombinedQueryComponent.java new file mode 100644 index 00000000000..e84c80519df --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/CombinedQueryComponent.java @@ -0,0 +1,620 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import static java.lang.Math.max; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.CursorMarkParams; +import org.apache.solr.common.params.GroupParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.response.BasicResultContext; +import org.apache.solr.response.ResultContext; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocListAndSet; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SolrReturnFields; +import org.apache.solr.search.SortSpec; +import org.apache.solr.search.combine.QueryAndResponseCombiner; +import org.apache.solr.search.combine.ReciprocalRankFusion; +import org.apache.solr.util.SolrResponseUtil; +import org.apache.solr.util.plugin.SolrCoreAware; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The CombinedQueryComponent class extends QueryComponent and provides support for executing + * multiple queries and combining their results. + */ +public class CombinedQueryComponent extends QueryComponent implements SolrCoreAware { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + public static final String COMPONENT_NAME = "combined_query"; + protected NamedList initParams; + private final Map combiners = new HashMap<>(); + private int maxCombinerQueries; + private static final String RESPONSE_PER_QUERY_KEY = "response_per_query"; + + @Override + public void init(NamedList args) { + super.init(args); + this.initParams = args; + this.maxCombinerQueries = CombinerParams.DEFAULT_MAX_COMBINER_QUERIES; + } + + @Override + public void inform(SolrCore core) { + for (Map.Entry initEntry : initParams) { + if ("combiners".equals(initEntry.getKey()) + && initEntry.getValue() instanceof NamedList all) { + for (int i = 0; i < all.size(); i++) { + String name = all.getName(i); + NamedList combinerConfig = (NamedList) all.getVal(i); + String className = (String) combinerConfig.get("class"); + QueryAndResponseCombiner combiner = + core.getResourceLoader().newInstance(className, QueryAndResponseCombiner.class); + combiner.init(combinerConfig); + combiners.compute( + name, + (k, existingCombiner) -> { + if (existingCombiner == null) { + return combiner; + } + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "Found more than one combiner with same name"); + }); + } + } + } + Object maxQueries = initParams.get("maxCombinerQueries"); + if (maxQueries != null) { + this.maxCombinerQueries = Integer.parseInt(maxQueries.toString()); + } + combiners.computeIfAbsent( + CombinerParams.RECIPROCAL_RANK_FUSION, + key -> { + ReciprocalRankFusion reciprocalRankFusion = new ReciprocalRankFusion(); + reciprocalRankFusion.init(initParams); + return reciprocalRankFusion; + }); + } + + @Override + protected boolean isForceDistributed() { + return true; + } + + /** + * Overrides the prepare method to handle combined queries. + * + * @param rb the ResponseBuilder to prepare + * @throws IOException if an I/O error occurs during preparation + */ + @Override + public void prepare(ResponseBuilder rb) throws IOException { + if (rb instanceof CombinedQueryResponseBuilder crb) { + SolrParams params = crb.req.getParams(); + if (params.get(CursorMarkParams.CURSOR_MARK_PARAM) != null + || params.getBool(GroupParams.GROUP, false)) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, "Unsupported functionality for Combined Queries."); + } + String[] queriesToCombineKeys = params.getParams(CombinerParams.COMBINER_QUERY); + if (queriesToCombineKeys.length > maxCombinerQueries) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "Too many queries to combine: limit is " + maxCombinerQueries); + } + for (String queryKey : queriesToCombineKeys) { + final var unparsedQuery = params.get(queryKey); + ResponseBuilder rbNew = new ResponseBuilder(rb.req, new SolrQueryResponse(), rb.components); + rbNew.setQueryString(unparsedQuery); + super.prepare(rbNew); + crb.setFilters(rbNew.getFilters()); + crb.responseBuilders.add(rbNew); + } + } + super.prepare(rb); + } + + /** + * Overrides the process method to handle CombinedQueryResponseBuilder instances. This method + * processes the responses from multiple queries, combines them using the specified + * QueryAndResponseCombiner strategy, and sets the appropriate results and metadata in the + * CombinedQueryResponseBuilder. + * + * @param rb the ResponseBuilder object to process + * @throws IOException if an I/O error occurs during processing + */ + @Override + @SuppressWarnings("unchecked") + public void process(ResponseBuilder rb) throws IOException { + if (rb instanceof CombinedQueryResponseBuilder crb) { + boolean partialResults = false; + boolean segmentTerminatedEarly = false; + Boolean setMaxHitsTerminatedEarly = null; + List queryResults = new ArrayList<>(); + int rbIndex = 0; + boolean shouldReturn = false; + // TODO: to be parallelized + for (ResponseBuilder thisRb : crb.responseBuilders) { + // Just a placeholder for future implementation for Cursors + thisRb.setCursorMark(crb.getCursorMark()); + super.process(thisRb); + int purpose = + thisRb + .req + .getParams() + .getInt(ShardParams.SHARDS_PURPOSE, ShardRequest.PURPOSE_GET_TOP_IDS); + if ((purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) { + shouldReturn = true; + continue; + } + DocListAndSet docListAndSet = thisRb.getResults(); + QueryResult queryResult = new QueryResult(); + queryResult.setDocListAndSet(docListAndSet); + queryResults.add(queryResult); + partialResults |= queryResult.isPartialResults(); + if (queryResult.getSegmentTerminatedEarly() != null) { + segmentTerminatedEarly |= queryResult.getSegmentTerminatedEarly(); + } + if (queryResult.getMaxHitsTerminatedEarly() != null) { + if (setMaxHitsTerminatedEarly == null) { + setMaxHitsTerminatedEarly = queryResult.getMaxHitsTerminatedEarly(); + } + setMaxHitsTerminatedEarly |= queryResult.getMaxHitsTerminatedEarly(); + } + doFieldSortValues(thisRb, crb.req.getSearcher()); + NamedList sortValues = + (NamedList) thisRb.rsp.getValues().get("sort_values"); + crb.rsp.add(String.format(Locale.ROOT, "sort_values_%s", rbIndex), sortValues); + ResultContext ctx = new BasicResultContext(thisRb); + if (crb.rsp.getValues().get(RESPONSE_PER_QUERY_KEY) == null) { + crb.rsp.add(RESPONSE_PER_QUERY_KEY, new ArrayList<>(List.of(ctx))); + } else { + ((List) crb.rsp.getValues().get(RESPONSE_PER_QUERY_KEY)).add(ctx); + } + rbIndex++; + } + if (shouldReturn) { + return; + } + prepareCombinedResponseBuilder( + crb, queryResults, partialResults, segmentTerminatedEarly, setMaxHitsTerminatedEarly); + if (crb.mergeFieldHandler != null) { + crb.mergeFieldHandler.handleMergeFields(crb, crb.req.getSearcher()); + } else { + doFieldSortValues(rb, crb.req.getSearcher()); + } + doPrefetch(crb); + } else { + super.process(rb); + } + } + + private void prepareCombinedResponseBuilder( + CombinedQueryResponseBuilder crb, + List queryResults, + boolean partialResults, + boolean segmentTerminatedEarly, + Boolean setMaxHitsTerminatedEarly) { + QueryResult combinedQueryResult = QueryAndResponseCombiner.simpleCombine(queryResults); + combinedQueryResult.setPartialResults(partialResults); + combinedQueryResult.setSegmentTerminatedEarly(segmentTerminatedEarly); + combinedQueryResult.setMaxHitsTerminatedEarly(setMaxHitsTerminatedEarly); + crb.setResult(combinedQueryResult); + ResultContext ctx = new BasicResultContext(crb); + crb.rsp.addResponse(ctx); + crb.rsp.addToLog( + "hits", + crb.getResults() == null || crb.getResults().docList == null + ? 0 + : crb.getResults().docList.matches()); + if (!crb.req.getParams().getBool(ShardParams.IS_SHARD, false)) { + // for non-distributed request and future cursor improvement + if (null != crb.getNextCursorMark()) { + crb.rsp.add( + CursorMarkParams.CURSOR_MARK_NEXT, + crb.responseBuilders.getFirst().getNextCursorMark().getSerializedTotem()); + } + } + } + + @Override + protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { + SortSpec ss = rb.getSortSpec(); + + Set scoreDependentFields; + if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) == 0) { + scoreDependentFields = + rb.rsp.getReturnFields().getScoreDependentReturnFields().keySet().stream() + .filter(field -> !field.equals(SolrReturnFields.SCORE)) + .collect(Collectors.toSet()); + } else { + scoreDependentFields = Collections.emptySet(); + } + + IndexSchema schema = rb.req.getSchema(); + SchemaField uniqueKeyField = schema.getUniqueKeyField(); + + NamedList shardInfo = null; + if (rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { + shardInfo = new SimpleOrderedMap<>(); + rb.rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo); + } + + long numFound = 0; + boolean hitCountIsExact = true; + boolean thereArePartialResults = false; + Boolean segmentTerminatedEarly = null; + boolean maxHitsTerminatedEarly = false; + long approximateTotalHits = 0; + Map> shardDocMap = new HashMap<>(); + String[] queriesToCombineKeys = rb.req.getParams().getParams(CombinerParams.COMBINER_QUERY); + // TODO: to be parallelized outer loop + for (int queryIndex = 0; queryIndex < queriesToCombineKeys.length; queryIndex++) { + int failedShardCount = 0; + long queryNumFound = 0; + Map uniqueDoc = new HashMap<>(); + for (ShardResponse srsp : sreq.responses) { + SolrDocumentList docs = null; + NamedList responseHeader; + + if (SolrResponseUtil.getSubsectionFromShardResponse(rb, srsp, RESPONSE_PER_QUERY_KEY, false) + instanceof List docList + && docList.get(queryIndex) instanceof SolrDocumentList solrDocumentList) { + docs = Objects.requireNonNull(solrDocumentList); + queryNumFound += docs.getNumFound(); + hitCountIsExact = hitCountIsExact && Boolean.FALSE.equals(docs.getNumFoundExact()); + } + failedShardCount += + addShardInfo( + shardInfo, failedShardCount, srsp, rb, queriesToCombineKeys[queryIndex], docs); + if (srsp.getException() != null) { + thereArePartialResults = true; + continue; + } + + responseHeader = + Objects.requireNonNull( + (NamedList) + SolrResponseUtil.getSubsectionFromShardResponse( + rb, srsp, "responseHeader", false)); + + final boolean thisResponseIsPartial; + thisResponseIsPartial = + Boolean.TRUE.equals( + responseHeader.getBooleanArg( + SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY)); + thereArePartialResults |= thisResponseIsPartial; + + if (!Boolean.TRUE.equals(segmentTerminatedEarly)) { + final Object ste = + responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY); + if (ste instanceof Boolean steFlag) { + segmentTerminatedEarly = steFlag; + } + } + + if (!maxHitsTerminatedEarly + && Boolean.TRUE.equals( + responseHeader.get( + SolrQueryResponse.RESPONSE_HEADER_MAX_HITS_TERMINATED_EARLY_KEY))) { + maxHitsTerminatedEarly = true; + } + + Object ath = + responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY); + if (ath == null) { + approximateTotalHits += queryNumFound; + } else { + approximateTotalHits += ((Number) ath).longValue(); + } + + @SuppressWarnings("unchecked") + NamedList> sortFieldValues = + (NamedList>) + SolrResponseUtil.getSubsectionFromShardResponse( + rb, srsp, String.format(Locale.ROOT, "sort_values_%s", queryIndex), true); + if (null == sortFieldValues) { + sortFieldValues = new NamedList<>(); + } + + boolean needsUnmarshalling = ss.includesNonScoreOrDocField(); + if (thisResponseIsPartial && sortFieldValues.size() == 0 && needsUnmarshalling) { + continue; + } + NamedList> unmarshalledSortFieldValues = + needsUnmarshalling + ? unmarshalSortValues(ss, sortFieldValues, schema) + : new NamedList<>(); + // go through every doc in this response, construct a ShardDoc, and + // put it in the uniqueDoc to dedup + for (int i = 0; i < docs.size(); i++) { + SolrDocument doc = docs.get(i); + Object id = doc.getFieldValue(uniqueKeyField.getName()); + ShardDoc shardDoc = new ShardDoc(); + shardDoc.id = id; + shardDoc.orderInShard = i; + shardDoc.shard = srsp.getShard(); + Object scoreObj = doc.getFieldValue(SolrReturnFields.SCORE); + if (scoreObj != null) { + if (scoreObj instanceof String scoreStr) { + shardDoc.score = Float.parseFloat(scoreStr); + } else { + shardDoc.score = ((Number) scoreObj).floatValue(); + } + } + if (!scoreDependentFields.isEmpty()) { + shardDoc.scoreDependentFields = doc.getSubsetOfFields(scoreDependentFields); + } + shardDoc.sortFieldValues = unmarshalledSortFieldValues; + ShardDoc prevShard = uniqueDoc.put(id, shardDoc); + if (prevShard != null) { + queryNumFound--; + } + } // end for-each-doc-in-response + } // end for-each-response + shardDocMap.put(queriesToCombineKeys[queryIndex], uniqueDoc.values().stream().toList()); + numFound = max(numFound, queryNumFound); + } + + SolrDocumentList responseDocs = new SolrDocumentList(); + rb.rsp.addToLog("hits", numFound); + + responseDocs.setNumFound(numFound); + responseDocs.setNumFoundExact(hitCountIsExact); + responseDocs.setStart(ss.getOffset()); + + rb.resultIds = createShardResult(rb, shardDocMap, responseDocs); + rb.setResponseDocs(responseDocs); + + populateNextCursorMarkFromMergedShards(rb); + + postMergeIds( + rb, + thereArePartialResults, + segmentTerminatedEarly, + maxHitsTerminatedEarly, + approximateTotalHits); + } + + private static void postMergeIds( + ResponseBuilder rb, + boolean thereArePartialResults, + Boolean segmentTerminatedEarly, + boolean maxHitsTerminatedEarly, + long approximateTotalHits) { + if (thereArePartialResults) { + rb.rsp + .getResponseHeader() + .asShallowMap() + .put(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY, Boolean.TRUE); + } + if (segmentTerminatedEarly != null) { + final Object existingSegmentTerminatedEarly = + rb.rsp + .getResponseHeader() + .get(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY); + if (existingSegmentTerminatedEarly == null) { + rb.rsp + .getResponseHeader() + .add( + SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY, + segmentTerminatedEarly); + } else if (!Boolean.TRUE.equals(existingSegmentTerminatedEarly) && segmentTerminatedEarly) { + rb.rsp + .getResponseHeader() + .remove(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY); + rb.rsp + .getResponseHeader() + .add(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY, true); + } + } + if (maxHitsTerminatedEarly) { + rb.rsp + .getResponseHeader() + .add(SolrQueryResponse.RESPONSE_HEADER_MAX_HITS_TERMINATED_EARLY_KEY, Boolean.TRUE); + if (approximateTotalHits > 0) { + rb.rsp + .getResponseHeader() + .add( + SolrQueryResponse.RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY, approximateTotalHits); + } + } + } + + private int addShardInfo( + NamedList shardInfo, + int failedShardCount, + ShardResponse srsp, + ResponseBuilder rb, + String queryKey, + SolrDocumentList docs) { + if (shardInfo != null) { + SimpleOrderedMap nl = new SimpleOrderedMap<>(); + NamedList responseHeader; + if (srsp.getException() != null) { + Throwable t = srsp.getException(); + if (t instanceof SolrServerException && t.getCause() != null) { + t = t.getCause(); + } + nl.add("error", t.toString()); + if (!rb.req.getCore().getCoreContainer().hideStackTrace()) { + StringWriter trace = new StringWriter(); + t.printStackTrace(new PrintWriter(trace)); + nl.add("trace", trace.toString()); + } + if (!StrUtils.isNullOrEmpty(srsp.getShardAddress())) { + nl.add("shardAddress", srsp.getShardAddress()); + } + } else { + responseHeader = + (NamedList) + SolrResponseUtil.getSubsectionFromShardResponse(rb, srsp, "responseHeader", false); + if (responseHeader == null) { + return failedShardCount; + } + final Object rhste = + responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY); + if (rhste != null) { + nl.add(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY, rhste); + } + final Object rhmhte = + responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_MAX_HITS_TERMINATED_EARLY_KEY); + if (rhmhte != null) { + nl.add(SolrQueryResponse.RESPONSE_HEADER_MAX_HITS_TERMINATED_EARLY_KEY, rhmhte); + } + final Object rhath = + responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY); + if (rhath != null) { + nl.add(SolrQueryResponse.RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY, rhath); + } + if (docs == null) { + return failedShardCount; + } + nl.add("numFound", docs.getNumFound()); + nl.add("numFoundExact", docs.getNumFoundExact()); + nl.add("maxScore", docs.getMaxScore()); + nl.add("shardAddress", srsp.getShardAddress()); + } + if (srsp.getSolrResponse() != null) { + nl.add("time", srsp.getSolrResponse().getElapsedTime()); + } + // This ought to be better, but at least this ensures no duplicate keys in JSON result + String shard = srsp.getShard() + "_" + queryKey; + if (StrUtils.isNullOrEmpty(shard)) { + failedShardCount += 1; + shard = "unknown_shard_" + queryKey + "_" + failedShardCount; + } + nl.add("queryKey", queryKey); + shardInfo.add(shard, nl); + } + return failedShardCount; + } + + /** + * Combines and sorts documents from multiple shards to create the final result set. This method + * uses a combiner strategy to merge shard responses, then sorts the resulting documents using a + * priority queue based on the request's sort specification. It handles pagination (offset and + * count) and calculates the maximum score for the response. + * + * @param rb The ResponseBuilder containing the request and context, such as sort specifications. + * @param shardDocMap A map from shard addresses to the list of documents returned by each shard. + * @param responseDocs The final response document list, which will be populated with null + * placeholders and have its max score set. + * @return A map from document IDs to the corresponding ShardDoc objects for the documents in the + * final sorted page of results. + */ + protected Map createShardResult( + ResponseBuilder rb, Map> shardDocMap, SolrDocumentList responseDocs) { + String algorithm = + rb.req.getParams().get(CombinerParams.COMBINER_ALGORITHM, CombinerParams.DEFAULT_COMBINER); + QueryAndResponseCombiner combinerStrategy = + QueryAndResponseCombiner.getImplementation(algorithm, combiners); + List combinedShardDocs = combinerStrategy.combine(shardDocMap, rb.req.getParams()); + if (rb.isDebugResults()) { + String[] queryKeys = rb.req.getParams().getParams(CombinerParams.COMBINER_QUERY); + NamedList explanations = + combinerStrategy.getExplanations( + queryKeys, shardDocMap, combinedShardDocs, rb.req.getParams()); + rb.addDebugInfo("combinerExplanations", explanations); + } + Map shardDocIdMap = new HashMap<>(); + shardDocMap.forEach( + (shardKey, shardDocs) -> + shardDocs.forEach(shardDoc -> shardDocIdMap.put(shardDoc.id.toString(), shardDoc))); + Map resultIds = new HashMap<>(); + float maxScore = 0.0f; + Sort sort = rb.getSortSpec().getSort(); + SortField[] sortFields; + if (sort != null) { + sortFields = sort.getSort(); + } else { + sortFields = new SortField[] {SortField.FIELD_SCORE}; + } + final ShardFieldSortedHitQueue queue = + new ShardFieldSortedHitQueue( + sortFields, + rb.getSortSpec().getOffset() + rb.getSortSpec().getCount(), + rb.req.getSearcher()) { + @Override + protected boolean lessThan(ShardDoc docA, ShardDoc docB) { + int c = 0; + for (int i = 0; i < comparators.length && c == 0; i++) { + c = + (fields[i].getReverse()) + ? comparators[i].compare(docB, docA) + : comparators[i].compare(docA, docB); + } + + if (c == 0) { + c = docA.id.toString().compareTo(docB.id.toString()); + } + return c < 0; + } + }; + combinedShardDocs.forEach(queue::insertWithOverflow); + int resultSize = queue.size() - rb.getSortSpec().getOffset(); + resultSize = max(0, resultSize); + for (int i = resultSize - 1; i >= 0; i--) { + ShardDoc shardDoc = queue.pop(); + shardDoc.positionInResponse = i; + maxScore = max(maxScore, shardDoc.score); + if (Float.isNaN(shardDocIdMap.get(shardDoc.id.toString()).score)) { + shardDoc.score = Float.NaN; + } + resultIds.put(shardDoc.id.toString(), shardDoc); + } + responseDocs.setMaxScore(maxScore); + for (int i = 0; i < resultSize; i++) responseDocs.add(null); + return resultIds; + } + + @Override + public String getDescription() { + return "Combined Query Component to support multiple query execution"; + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/CombinedQueryResponseBuilder.java b/solr/core/src/java/org/apache/solr/handler/component/CombinedQueryResponseBuilder.java new file mode 100644 index 00000000000..ad506675e24 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/CombinedQueryResponseBuilder.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import java.util.ArrayList; +import java.util.List; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; + +/** + * The CombinedQueryResponseBuilder class extends the ResponseBuilder class and is responsible for + * building a combined response for multiple SearchComponent objects. It orchestrates the process of + * constructing the SolrQueryResponse by aggregating results from various components. + */ +public class CombinedQueryResponseBuilder extends ResponseBuilder { + + public final List responseBuilders = new ArrayList<>(); + + /** + * Constructs a CombinedQueryResponseBuilder instance. + * + * @param req the SolrQueryRequest object containing the query parameters and context. + * @param rsp the SolrQueryResponse object to which the combined results will be added. + * @param components a list of SearchComponent objects that will be used to build the response. + */ + public CombinedQueryResponseBuilder( + SolrQueryRequest req, SolrQueryResponse rsp, List components) { + super(req, rsp, components); + } + + /** + * Propagates all the properties from parent ResponseBuilder to the all the children which are + * being set later after the CombinedQueryComponent is prepared. + */ + public final void propagate() { + responseBuilders.forEach( + thisRb -> { + thisRb.setNeedDocSet(isNeedDocSet()); + thisRb.setNeedDocList(isNeedDocList()); + thisRb.doFacets = doFacets; + thisRb.doHighlights = doHighlights; + thisRb.doExpand = doExpand; + thisRb.doTerms = doTerms; + thisRb.doStats = doStats; + thisRb.setDistribStatsDisabled(isDistribStatsDisabled()); + }); + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/CombinedQuerySearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/CombinedQuerySearchHandler.java new file mode 100644 index 00000000000..834aa95ed20 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/CombinedQuerySearchHandler.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.List; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.search.facet.FacetModule; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The CombinedQuerySearchHandler class extends the SearchHandler and provides custom behavior for + * handling combined queries. It overrides methods to create a response builder based on the {@link + * CombinerParams#COMBINER} parameter and to define the default components included in the search + * configuration. + */ +public class CombinedQuerySearchHandler extends SearchHandler { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + /** + * Overrides the default response builder creation method. This method checks if the {@link + * CombinerParams#COMBINER} parameter is set to true in the request. If it is, it returns an + * instance of {@link CombinedQueryResponseBuilder}, otherwise, it returns an instance of {@link + * ResponseBuilder}. + * + * @param req the SolrQueryRequest object + * @param rsp the SolrQueryResponse object + * @param components the list of SearchComponent objects + * @return the appropriate ResponseBuilder instance based on the CombinerParams.COMBINER parameter + */ + @Override + protected ResponseBuilder newResponseBuilder( + SolrQueryRequest req, SolrQueryResponse rsp, List components) { + if (req.getParams().getBool(CombinerParams.COMBINER, false)) { + return new CombinedQueryResponseBuilder(req, rsp, components); + } + return super.newResponseBuilder(req, rsp, components); + } + + /** + * Overrides the default components and returns a list of component names that are included in the + * default configuration. + * + * @return a list of component names + */ + @Override + @SuppressWarnings("unchecked") + protected List getDefaultComponents() { + List names = new ArrayList<>(9); + names.add(CombinedQueryComponent.COMPONENT_NAME); + names.add(FacetComponent.COMPONENT_NAME); + names.add(FacetModule.COMPONENT_NAME); + names.add(MoreLikeThisComponent.COMPONENT_NAME); + names.add(HighlightComponent.COMPONENT_NAME); + names.add(StatsComponent.COMPONENT_NAME); + names.add(DebugComponent.COMPONENT_NAME); + names.add(ExpandComponent.COMPONENT_NAME); + names.add(TermsComponent.COMPONENT_NAME); + return names; + } + + @Override + protected void postPrepareComponents(ResponseBuilder rb) { + super.postPrepareComponents(rb); + // propagate the CombinedQueryResponseBuilder's state to all subBuilders after prepare + if (rb instanceof CombinedQueryResponseBuilder crb) { + crb.propagate(); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java index b58da55859a..503dc4dccea 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java @@ -22,6 +22,7 @@ import java.lang.invoke.MethodHandles; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CompletableFuture; @@ -31,6 +32,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import net.jcip.annotations.NotThreadSafe; +import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.impl.Http2SolrClient; @@ -404,6 +406,9 @@ public void cancelAll() { @Override public void prepDistributed(ResponseBuilder rb) { + if (rb.isForcedDistrib()) { + forceDistributed(rb); + } final SolrQueryRequest req = rb.req; final SolrParams params = req.getParams(); final String shards = params.get(ShardParams.SHARDS); @@ -500,6 +505,28 @@ public void prepDistributed(ResponseBuilder rb) { } } + private static void forceDistributed(ResponseBuilder rb) { + SolrQueryRequest req = rb.req; + ModifiableSolrParams solrParams = new ModifiableSolrParams(req.getParams()); + solrParams.set("shortCircuit", false); + req.setParams(solrParams); + if (req.getHttpSolrCall() != null + && StringUtils.isEmpty(req.getParams().get(ShardParams.SHARDS))) { + String scheme = req.getHttpSolrCall().getReq().getScheme(); + String host = req.getHttpSolrCall().getReq().getServerName(); + int port = req.getHttpSolrCall().getReq().getServerPort(); + String context = req.getHttpSolrCall().getReq().getContextPath(); + String core = req.getCore().getName(); + String localShardUrl = + String.format(Locale.ROOT, "%s://%s:%d%s/%s", scheme, host, port, context, core); + solrParams.set(ShardParams.SHARDS, localShardUrl); + req.setParams(solrParams); + return; + } + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, "Force Distributed cannot be enabled"); + } + private static String createSliceShardsStr(final List shardUrls) { return String.join("|", shardUrls); } diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 42909359883..6f14d142dd5 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -905,6 +905,67 @@ protected boolean addFL(StringBuilder fl, String field, boolean additionalAdded) return true; } + protected abstract static class ShardDocQueue { + public abstract boolean push(ShardDoc shardDoc); + + public abstract Map resultIds(int offset); + } + ; + + protected ShardDocQueue newShardDocQueue( + SolrIndexSearcher searcher, SortField[] sortFields, Integer size) { + return new ShardDocQueue() { + + // id to shard mapping, to eliminate any accidental dups + private final HashMap uniqueDoc = new HashMap<>(); + + private final ShardFieldSortedHitQueue queue = + new ShardFieldSortedHitQueue(sortFields, size, searcher); + + @Override + public boolean push(ShardDoc shardDoc) { + final String prevShard = uniqueDoc.put(shardDoc.id, shardDoc.shard); + if (prevShard != null) { + // duplicate detected + + // For now, just always use the first encountered since we can't currently + // remove the previous one added to the priority queue. If we switched + // to the Java5 PriorityQueue, this would be easier. + return false; + // make which duplicate is used deterministic based on shard + // if (prevShard.compareTo(shardDoc.shard) >= 0) { + // TODO: remove previous from priority queue + // return false; + // } + } + + queue.insertWithOverflow(shardDoc); + return true; + } + + @Override + public Map resultIds(int offset) { + final Map resultIds = new HashMap<>(); + + // The queue now has 0 -> queuesize docs, where queuesize <= start + rows + // So we want to pop the last documents off the queue to get + // the docs offset -> queuesize + int resultSize = queue.size() - offset; + resultSize = Math.max(0, resultSize); // there may not be any docs in range + + for (int i = resultSize - 1; i >= 0; i--) { + ShardDoc shardDoc = queue.pop(); + shardDoc.positionInResponse = i; + // Need the toString() for correlation with other lists that must + // be strings (like keys in highlighting, explain, etc) + resultIds.put(shardDoc.id.toString(), shardDoc); + } + + return resultIds; + } + }; + } + protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { List mergeStrategies = rb.getMergeStrategies(); if (mergeStrategies != null) { @@ -947,14 +1008,10 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { IndexSchema schema = rb.req.getSchema(); SchemaField uniqueKeyField = schema.getUniqueKeyField(); - // id to shard mapping, to eliminate any accidental dups - HashMap uniqueDoc = new HashMap<>(); - // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) - final ShardFieldSortedHitQueue queue = - new ShardFieldSortedHitQueue( - sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); + final ShardDocQueue shardDocQueue = + newShardDocQueue(rb.req.getSearcher(), sortFields, ss.getOffset() + ss.getCount()); NamedList shardInfo = null; if (rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { @@ -1125,23 +1182,6 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { for (int i = 0; i < docs.size(); i++) { SolrDocument doc = docs.get(i); Object id = doc.getFieldValue(uniqueKeyField.getName()); - - String prevShard = uniqueDoc.put(id, srsp.getShard()); - if (prevShard != null) { - // duplicate detected - numFound--; - - // For now, just always use the first encountered since we can't currently - // remove the previous one added to the priority queue. If we switched - // to the Java5 PriorityQueue, this would be easier. - continue; - // make which duplicate is used deterministic based on shard - // if (prevShard.compareTo(srsp.shard) >= 0) { - // TODO: remove previous from priority queue - // continue; - // } - } - ShardDoc shardDoc = new ShardDoc(); shardDoc.id = id; shardDoc.shard = srsp.getShard(); @@ -1160,42 +1200,18 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { shardDoc.sortFieldValues = unmarshalledSortFieldValues; - queue.insertWithOverflow(shardDoc); + if (!shardDocQueue.push(shardDoc)) { + numFound--; + } } // end for-each-doc-in-response } // end for-each-response - // The queue now has 0 -> queuesize docs, where queuesize <= start + rows - // So we want to pop the last documents off the queue to get - // the docs offset -> queuesize - int resultSize = queue.size() - ss.getOffset(); - resultSize = Math.max(0, resultSize); // there may not be any docs in range - - Map resultIds = new HashMap<>(); - for (int i = resultSize - 1; i >= 0; i--) { - ShardDoc shardDoc = queue.pop(); - shardDoc.positionInResponse = i; - // Need the toString() for correlation with other lists that must - // be strings (like keys in highlighting, explain, etc) - resultIds.put(shardDoc.id.toString(), shardDoc); - } - // Add hits for distributed requests // https://issues.apache.org/jira/browse/SOLR-3518 rb.rsp.addToLog("hits", numFound); - SolrDocumentList responseDocs = new SolrDocumentList(); - if (maxScore != null) responseDocs.setMaxScore(maxScore); - responseDocs.setNumFound(numFound); - responseDocs.setNumFoundExact(hitCountIsExact); - responseDocs.setStart(ss.getOffset()); - // size appropriately - for (int i = 0; i < resultSize; i++) responseDocs.add(null); - - // save these results in a private area so we can access them - // again when retrieving stored fields. - // TODO: use ResponseBuilder (w/ comments) or the request context? - rb.resultIds = resultIds; - rb.setResponseDocs(responseDocs); + setResultIdsAndResponseDocs( + rb, shardDocQueue, maxScore, numFound, hitCountIsExact, ss.getOffset()); populateNextCursorMarkFromMergedShards(rb); @@ -1241,6 +1257,30 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { } } + protected void setResultIdsAndResponseDocs( + ResponseBuilder rb, + ShardDocQueue shardDocQueue, + Float maxScore, + long numFound, + boolean hitCountIsExact, + int offset) { + final Map resultIds = shardDocQueue.resultIds(offset); + + final SolrDocumentList responseDocs = new SolrDocumentList(); + if (maxScore != null) responseDocs.setMaxScore(maxScore); + responseDocs.setNumFound(numFound); + responseDocs.setNumFoundExact(hitCountIsExact); + responseDocs.setStart(offset); + // size appropriately + for (int i = 0; i < resultIds.size(); i++) responseDocs.add(null); + + // save these results in a private area so we can access them + // again when retrieving stored fields. + // TODO: use ResponseBuilder (w/ comments) or the request context? + rb.resultIds = resultIds; + rb.setResponseDocs(responseDocs); + } + /** * Inspects the state of the {@link ResponseBuilder} and populates the next {@link * ResponseBuilder#setNextCursorMark} as appropriate based on the merged sort values from diff --git a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java index 20b620c3b26..2ba0709f3fb 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java @@ -141,6 +141,15 @@ public ResponseBuilder( public List outgoing; // requests to be sent public List finished; // requests that have received responses from all shards public String shortCircuitedURL; + private boolean forcedDistrib = false; + + public boolean isForcedDistrib() { + return forcedDistrib; + } + + public void setForcedDistrib(boolean forcedDistrib) { + this.forcedDistrib = forcedDistrib; + } /** This function will return true if this was a distributed search request. */ public boolean isDistributed() { diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java index 913078c27ee..e2d58fb947e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java @@ -93,6 +93,16 @@ public String getName() { @Override public abstract String getDescription(); + /** + * A component can force solr to run in distributed mode to prevent extra development cost of an + * optimized single-shard algorithm. + * + * @return the boolean + */ + protected boolean isForceDistributed() { + return false; + } + @Override public Category getCategory() { return Category.OTHER; diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java index 1352164883f..dc3836db26d 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java @@ -472,23 +472,13 @@ private void processComponents( RTimerTree timer, List components) throws IOException { + updateForcedDistributed(req, rb, components); // creates a ShardHandler object only if it's needed final ShardHandler shardHandler1 = getAndPrepShardHandler(req, rb); if (!prepareComponents(req, rb, timer, components)) return; - { // Once all of our components have been prepared, check if this request involves a SortSpec. - // If it does, and if our request includes a cursorMark param, then parse & init the - // CursorMark state (This must happen after the prepare() of all components, because any - // component may have modified the SortSpec) - final SortSpec spec = rb.getSortSpec(); - final String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM); - if (null != spec && null != cursorStr) { - final CursorMark cursorMark = new CursorMark(rb.req.getSchema(), spec); - cursorMark.parseSerializedTotem(cursorStr); - rb.setCursorMark(cursorMark); - } - } + postPrepareComponents(rb); if (!rb.isDistrib) { // a normal non-distributed request @@ -708,6 +698,25 @@ private void processComponents( } } + /** + * Operations to be performed post prepare for all components. + * + * @param rb the ResponseBuilder containing the request and context, such as sort specifications. + */ + protected void postPrepareComponents(ResponseBuilder rb) { + // Once all of our components have been prepared, check if this request involves a SortSpec. + // If it does, and if our request includes a cursorMark param, then parse & init the + // CursorMark state (This must happen after the prepare() of all components, because any + // component may have modified the SortSpec) + final SortSpec spec = rb.getSortSpec(); + final String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM); + if (null != spec && null != cursorStr) { + final CursorMark cursorMark = new CursorMark(rb.req.getSchema(), spec); + cursorMark.parseSerializedTotem(cursorStr); + rb.setCursorMark(cursorMark); + } + } + private static boolean prepareComponents( SolrQueryRequest req, ResponseBuilder rb, RTimerTree timer, List components) throws IOException { @@ -737,6 +746,19 @@ private static boolean prepareComponents( return true; } + private static void updateForcedDistributed( + SolrQueryRequest req, ResponseBuilder rb, List components) { + if (!rb.isDistrib && !req.getParams().getBool(ShardParams.IS_SHARD, false)) { + for (SearchComponent component : components) { + if (component.isForceDistributed()) { + rb.isDistrib = true; + rb.setForcedDistrib(true); + return; + } + } + } + } + protected String stageToString(int stage) { // This should probably be a enum, but that change should be its own ticket. switch (stage) { diff --git a/solr/core/src/java/org/apache/solr/search/combine/QueryAndResponseCombiner.java b/solr/core/src/java/org/apache/solr/search/combine/QueryAndResponseCombiner.java new file mode 100644 index 00000000000..aea13a04f43 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/combine/QueryAndResponseCombiner.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.combine; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.TotalHits; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.handler.component.ShardDoc; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.DocSlice; +import org.apache.solr.search.QueryResult; +import org.apache.solr.util.plugin.NamedListInitializedPlugin; + +/** + * The QueryAndResponseCombiner class is an abstract base class for combining query results and + * shard documents. It provides a framework for different algorithms to be implemented for merging + * ranked lists and shard documents. + */ +public abstract class QueryAndResponseCombiner implements NamedListInitializedPlugin { + /** + * Combines shard documents corresponding to multiple queries based on the provided map. + * + * @param queriesDocMap a map where keys represent combiner query keys and values are lists of + * ShardDocs for corresponding to each key + * @param solrParams params to be used when provided at query time + * @return a combined list of ShardDocs from all queries + */ + public abstract List combine( + Map> queriesDocMap, SolrParams solrParams); + + /** + * Simple combine query result list as a union. + * + * @param queryResults the query results to be combined + * @return the combined query result + */ + public static QueryResult simpleCombine(List queryResults) { + QueryResult combinedQueryResults = new QueryResult(); + DocSet combinedDocSet = null; + Map uniqueDocIds = new HashMap<>(); + long totalMatches = 0; + for (QueryResult queryResult : queryResults) { + DocIterator docs = queryResult.getDocList().iterator(); + totalMatches = Math.max(totalMatches, queryResult.getDocList().matches()); + while (docs.hasNext()) { + uniqueDocIds.put(docs.nextDoc(), queryResult.getDocList().hasScores() ? docs.score() : 0f); + } + if (combinedDocSet == null) { + combinedDocSet = queryResult.getDocSet(); + } else if (queryResult.getDocSet() != null) { + combinedDocSet = combinedDocSet.union(queryResult.getDocSet()); + } + } + int combinedResultsLength = uniqueDocIds.size(); + int[] combinedResultsDocIds = new int[combinedResultsLength]; + float[] combinedResultScores = new float[combinedResultsLength]; + + int i = 0; + for (Map.Entry scoredDoc : uniqueDocIds.entrySet()) { + combinedResultsDocIds[i] = scoredDoc.getKey(); + combinedResultScores[i] = scoredDoc.getValue(); + i++; + } + DocSlice combinedResultSlice = + new DocSlice( + 0, + combinedResultsLength, + combinedResultsDocIds, + combinedResultScores, + Math.max(combinedResultsLength, totalMatches), + combinedResultScores.length > 0 ? combinedResultScores[0] : 0, + TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO); + combinedQueryResults.setDocList(combinedResultSlice); + combinedQueryResults.setDocSet(combinedDocSet); + return combinedQueryResults; + } + + /** + * Retrieves a list of explanations for the given queries and results. + * + * @param queryKeys the keys associated with the queries + * @param queriesDocMap a map where keys represent combiner query keys and values are lists of + * ShardDocs for corresponding to each key + * @param combinedQueriesDocs a list of ShardDocs after combiner operation + * @param solrParams params to be used when provided at query time + * @return a SimpleOrderedMap of explanations for the given queries and results + */ + public abstract SimpleOrderedMap getExplanations( + String[] queryKeys, + Map> queriesDocMap, + List combinedQueriesDocs, + SolrParams solrParams); + + /** + * Retrieves an implementation of the QueryAndResponseCombiner based on the specified algorithm. + * + * @param algorithm the combiner algorithm + * @param combiners The already initialised map of QueryAndResponseCombiner + * @return an instance of QueryAndResponseCombiner corresponding to the specified algorithm. + * @throws SolrException if an unknown combiner algorithm is specified. + */ + public static QueryAndResponseCombiner getImplementation( + String algorithm, Map combiners) { + if (combiners.get(algorithm) != null) { + return combiners.get(algorithm); + } + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, "Unknown Combining algorithm: " + algorithm); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/combine/ReciprocalRankFusion.java b/solr/core/src/java/org/apache/solr/search/combine/ReciprocalRankFusion.java new file mode 100644 index 00000000000..55bc2c9424b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/combine/ReciprocalRankFusion.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.combine; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.StringJoiner; +import org.apache.lucene.search.Explanation; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.handler.component.ShardDoc; + +/** + * This class implements a query and response combiner that uses the Reciprocal Rank Fusion (RRF) + * algorithm to combine multiple ranked lists into a single ranked list. + */ +public class ReciprocalRankFusion extends QueryAndResponseCombiner { + + private int k; + + public ReciprocalRankFusion() { + this.k = CombinerParams.DEFAULT_COMBINER_RRF_K; + } + + @Override + public void init(NamedList args) { + Object kParam = args.get("k"); + if (kParam != null) { + this.k = Integer.parseInt(kParam.toString()); + } + } + + public int getK() { + return k; + } + + /** + * Merges per-query ranked results using Reciprocal Rank Fusion (RRF). + * + *

Each query doc list is assumed to be ordered by descending relevance. For a document at rank + * r in the list, the contribution is {@code 1 / (k + r)} where {@code k} is read from {@link + * CombinerParams#COMBINER_RRF_K} or falls back to {@code this.k}. Contributions for the same + * document ID across multiple queries (if found) are summed, and documents are returned sorted by + * the fused score (descending). + * + * @param queriesDocMap per-query ranked results; + * @param solrParams parameters; optional {@link CombinerParams#COMBINER_RRF_K} overrides k. + * @return one {@link ShardDoc} per unique document ID, ordered by fused score. + */ + @Override + public List combine(Map> queriesDocMap, SolrParams solrParams) { + int kVal = solrParams.getInt(CombinerParams.COMBINER_RRF_K, this.k); + HashMap docIdToScore = new HashMap<>(); + Map docIdToShardDoc = new HashMap<>(); + List finalShardDocList = new ArrayList<>(); + for (Map.Entry> shardDocEntry : queriesDocMap.entrySet()) { + List shardDocList = shardDocEntry.getValue(); + int ranking = 1; + while (ranking <= shardDocList.size()) { + String docId = shardDocList.get(ranking - 1).id.toString(); + docIdToShardDoc.put(docId, shardDocList.get(ranking - 1)); + float rrfScore = 1f / (kVal + ranking); + docIdToScore.compute(docId, (id, score) -> (score == null) ? rrfScore : score + rrfScore); + ranking++; + } + } + List> sortedByScoreDescending = + docIdToScore.entrySet().stream() + .sorted( + Comparator.comparing(Map.Entry::getValue, Comparator.reverseOrder()) + .thenComparing(Map.Entry::getKey)) + .toList(); + for (Map.Entry scoredDoc : sortedByScoreDescending) { + String docId = scoredDoc.getKey(); + Float score = scoredDoc.getValue(); + ShardDoc shardDoc = docIdToShardDoc.get(docId); + shardDoc.score = score; + finalShardDocList.add(shardDoc); + } + return finalShardDocList; + } + + private Map getRanks( + Collection> shardDocs, List combinedShardDocs) { + Map docIdToRanks; + docIdToRanks = new HashMap<>(); + for (ShardDoc shardDoc : combinedShardDocs) { + docIdToRanks.put(shardDoc.id.toString(), new Integer[shardDocs.size()]); + } + int docIdx = 0; + for (List shardDocList : shardDocs) { + int rank = 1; + for (ShardDoc shardDoc : shardDocList) { + String docId = shardDoc.id.toString(); + docIdToRanks.get(docId)[docIdx] = rank; + rank++; + } + docIdx++; + } + return docIdToRanks; + } + + @Override + public SimpleOrderedMap getExplanations( + String[] queryKeys, + Map> queriesDocMap, + List combinedQueriesDocs, + SolrParams solrParams) { + int kVal = solrParams.getInt(CombinerParams.COMBINER_RRF_K, this.k); + SimpleOrderedMap docIdsExplanations = new SimpleOrderedMap<>(); + Map docIdToRanks = getRanks(queriesDocMap.values(), combinedQueriesDocs); + for (ShardDoc shardDoc : combinedQueriesDocs) { + String docId = shardDoc.id.toString(); + Integer[] rankPerQuery = docIdToRanks.get(docId); + Explanation fullDocIdExplanation = + Explanation.match( + shardDoc.score, getReciprocalRankFusionExplain(queryKeys, rankPerQuery, kVal)); + docIdsExplanations.add(docId, fullDocIdExplanation); + } + return docIdsExplanations; + } + + private String getReciprocalRankFusionExplain( + String[] queryKeys, Integer[] rankPerQuery, int kVal) { + StringBuilder reciprocalRankFusionExplain = new StringBuilder(); + StringJoiner scoreComponents = new StringJoiner(" + "); + for (Integer rank : rankPerQuery) { + if (rank != null) { + scoreComponents.add("1/(" + kVal + "+" + rank + ")"); + } + } + reciprocalRankFusionExplain.append(scoreComponents); + reciprocalRankFusionExplain.append(" because its ranks were: "); + StringJoiner rankComponents = new StringJoiner(", "); + for (int i = 0; i < queryKeys.length; i++) { + Integer rank = rankPerQuery[i]; + if (rank == null) { + rankComponents.add("not in the results for query(" + queryKeys[i] + ")"); + } else { + rankComponents.add(rank + " for query(" + queryKeys[i] + ")"); + } + } + reciprocalRankFusionExplain.append(rankComponents); + return reciprocalRankFusionExplain.toString(); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/combine/package-info.java b/solr/core/src/java/org/apache/solr/search/combine/package-info.java new file mode 100644 index 00000000000..4c1225e6e21 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/combine/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This contains the classes to combine the scores from search index results as well as from across + * shards. Multiple implementation of algorithms can be added to support them. + */ +package org.apache.solr.search.combine; diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-combined-query.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-combined-query.xml new file mode 100644 index 00000000000..38d234167c5 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-combined-query.xml @@ -0,0 +1,165 @@ + + + + + + + + + + ${solr.data.dir:} + + + + ${tests.luceneMatchVersion:LATEST} + + + + + + + ${solr.autoCommit.maxTime:-1} + + + + ${solr.ulog.dir:} + + + + ${solr.commitwithin.softcommit:true} + + + + + + + ${solr.max.booleanClauses:1024} + + + + + + + + + + + 10 + + 2000 + + + + + + + + + + + + + + 2 + + + org.apache.solr.search.combine.TestCombiner + 30 + test + + + + + + + + + + 100 + + + + + + 70 + + + + + + + ]]> + ]]> + + + + + + + + + + + + + 10 + .,!? + + + + + + WORD + en + US + + + + + + + + text + + + + diff --git a/solr/core/src/test/org/apache/solr/handler/component/CombinedQueryComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/CombinedQueryComponentTest.java new file mode 100644 index 00000000000..0a64a527935 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/CombinedQueryComponentTest.java @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * The CombinedQueryComponentTest class is an integration test suite for the CombinedQueryComponent + * in Solr. It verifies the functionality of the component by performing few basic queries in single + * sharded mode and validating the responses including limitations and combiner plugin. + */ +public class CombinedQueryComponentTest extends BaseDistributedSearchTestCase { + + private static final int NUM_DOCS = 10; + private static final String vectorField = "vector"; + + public CombinedQueryComponentTest() { + super(); + fixShardCount(1); + } + + /** + * Sets up the test class by initializing the core and setting system properties. This method is + * executed before all test methods in the class. + * + * @throws Exception if any exception occurs during initialization + */ + @BeforeClass + public static void setUpClass() throws Exception { + initCore("solrconfig-combined-query.xml", "schema-vector-catchall.xml"); + System.setProperty("validateAfterInactivity", "200"); + System.setProperty("solr.httpclient.retries", "0"); + System.setProperty("distribUpdateSoTimeout", "5000"); + } + + /** + * Prepares Solr input documents for indexing, including adding sample data and vector fields. + * This method populates the Solr index with test data, including text, title, and vector fields. + * The vector fields are used to calculate cosine distance for testing purposes. + * + * @throws Exception if any error occurs during the indexing process. + */ + private synchronized void prepareIndexDocs() throws Exception { + List docs = new ArrayList<>(); + for (int i = 1; i <= NUM_DOCS; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", Integer.toString(i)); + doc.addField("text", "test text for doc " + i); + doc.addField("title", "title test for doc " + i); + doc.addField("mod3_idv", (i % 3)); + docs.add(doc); + } + // cosine distance vector1= 1.0 + docs.get(0).addField(vectorField, Arrays.asList(1f, 2f, 3f, 4f)); + // cosine distance vector1= 0.998 + docs.get(1).addField(vectorField, Arrays.asList(1.5f, 2.5f, 3.5f, 4.5f)); + // cosine distance vector1= 0.992 + docs.get(2).addField(vectorField, Arrays.asList(7.5f, 15.5f, 17.5f, 22.5f)); + // cosine distance vector1= 0.999 + docs.get(3).addField(vectorField, Arrays.asList(1.4f, 2.4f, 3.4f, 4.4f)); + // cosine distance vector1= 0.862 + docs.get(4).addField(vectorField, Arrays.asList(30f, 22f, 35f, 20f)); + // cosine distance vector1= 0.756 + docs.get(5).addField(vectorField, Arrays.asList(40f, 1f, 1f, 200f)); + // cosine distance vector1= 0.970 + docs.get(6).addField(vectorField, Arrays.asList(5f, 10f, 20f, 40f)); + // cosine distance vector1= 0.515 + docs.get(7).addField(vectorField, Arrays.asList(120f, 60f, 30f, 15f)); + // cosine distance vector1= 0.554 + docs.get(8).addField(vectorField, Arrays.asList(200f, 50f, 100f, 25f)); + // cosine distance vector1= 0.997 + docs.get(9).addField(vectorField, Arrays.asList(1.8f, 2.5f, 3.7f, 4.9f)); + del("*:*"); + for (SolrInputDocument doc : docs) { + indexDoc(doc); + } + commit(); + } + + /** Performs a single lexical query using the provided JSON request and verifies the response. */ + public void testSingleLexicalQuery() throws Exception { + prepareIndexDocs(); + QueryResponse rsp = + query( + CommonParams.JSON, + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"title:title test for doc 5\"}}}," + + "\"limit\":5," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical1\"]}}", + CommonParams.QT, + "/search"); + assertEquals(5, rsp.getResults().size()); + } + + /** Performs multiple lexical queries and verifies the results. */ + public void testMultipleLexicalQueryWithDebug() throws Exception { + prepareIndexDocs(); + QueryResponse rsp = + query( + CommonParams.JSON, + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"title:title test for doc 1\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"text:test text for doc 2\"}}}," + + "\"limit\":5," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"debug\":[\"results\"],\"combiner.query\":[\"lexical1\",\"lexical2\"]," + + "\"rid\": \"test-1\"}}", + CommonParams.QT, + "/search"); + assertEquals(10, rsp.getResults().getNumFound()); + assertTrue(rsp.getDebugMap().containsKey("combinerExplanations")); + } + + /** Test no results in combined queries. */ + @Test + public void testNoResults() throws Exception { + prepareIndexDocs(); + QueryResponse rsp = + query( + CommonParams.JSON, + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"title:Solr is the blazing-fast, open source search platform\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"text:Solr powers the search\"}}}," + + "\"limit\":5," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical1\",\"lexical2\"]}}", + CommonParams.QT, + "/search"); + assertEquals(0, rsp.getResults().size()); + } + + /** Test max combiner queries limit set from solrconfig to 2. */ + @Test + public void testMaxQueriesLimit() throws Exception { + prepareIndexDocs(); + RuntimeException exceptionThrown = + expectThrows( + SolrException.class, + () -> + query( + CommonParams.JSON, + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"id:(2^=2 OR 3^=1)\"}}," + + "\"vector\":{\"knn\":{ \"f\": \"vector\", \"topK\": 5, \"query\": \"[1.0, 2.0, 3.0, 4.0]\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"text:test text for doc 2\"}}}," + + "\"limit\":5," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical1\",\"vector\", \"lexical2\"]}}", + CommonParams.QT, + "/search")); + assertTrue(exceptionThrown.getMessage().contains("Too many queries to combine: limit is 2")); + } + + /** + * Test to ensure the TestCombiner Algorithm is injected through solrconfigs and is being executed + * when sent the command through SolrParams + */ + @Test + public void testCombinerPlugin() throws Exception { + prepareIndexDocs(); + QueryResponse rsp = + query( + CommonParams.JSON, + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"title:title test for doc 1\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"text:test text for doc 2\"}}}," + + "\"limit\":5," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.algorithm\":test,\"combiner.query\"" + + ":[\"lexical1\",\"lexical2\"],\"debug\":[\"results\"]}}", + CommonParams.QT, + "/search"); + assertEquals(10, rsp.getResults().getNumFound()); + assertEquals( + "org.apache.lucene.search.Explanation:30 = this is test combiner\n", + ((SimpleOrderedMap) rsp.getDebugMap().get("combinerExplanations")) + .get("combinerDetails")); + } + + /** + * Tests that using unsupported features with Combined Queries throws the expected exception. + * + *

This test case verifies that requests for Combined Queries that include either the + * 'cursorMark' or 'group' parameters. + */ + @Test + public void testNonEnabledFeature() throws Exception { + prepareIndexDocs(); + String combinedQueryStr = + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"title:title test for doc 1\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"text:test text for doc 2\"}}}," + + "\"sort\":\"id asc\"," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.algorithm\":test,\"combiner.query\":[\"lexical1\",\"lexical2\"]}}"; + + RuntimeException exceptionThrown = + expectThrows( + SolrException.class, + () -> + query( + CommonParams.JSON, + combinedQueryStr, + CommonParams.QT, + "/search", + "cursorMark", + CURSOR_MARK_START)); + assertTrue( + exceptionThrown.getMessage().contains("Unsupported functionality for Combined Queries.")); + exceptionThrown = + expectThrows( + SolrException.class, + () -> + query( + CommonParams.JSON, + combinedQueryStr, + CommonParams.QT, + "/search", + "group", + "true")); + assertTrue( + exceptionThrown.getMessage().contains("Unsupported functionality for Combined Queries.")); + } +} diff --git a/solr/core/src/test/org/apache/solr/handler/component/CombinedQuerySearchHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/component/CombinedQuerySearchHandlerTest.java new file mode 100644 index 00000000000..ef8de683a95 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/CombinedQuerySearchHandlerTest.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import jakarta.servlet.http.HttpServletRequest; +import java.io.IOException; +import java.util.ArrayList; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.junit.BeforeClass; +import org.junit.Test; + +/** The type Combined query search handler test. */ +public class CombinedQuerySearchHandlerTest extends SolrTestCaseJ4 { + + private CoreContainer mockCoreContainer; + private HttpServletRequest httpServletRequest; + + /** + * Before tests. + * + * @throws Exception the exception + */ + @BeforeClass + public static void beforeTests() throws Exception { + initCore("solrconfig.xml", "schema.xml"); + } + + /** Test combined component init in search components list. */ + @Test + public void testCombinedComponentInit() { + SolrCore core = h.getCore(); + + try (CombinedQuerySearchHandler handler = new CombinedQuerySearchHandler()) { + handler.init(new NamedList<>()); + handler.inform(core); + assertEquals(9, handler.getComponents().size()); + assertEquals( + core.getSearchComponent(CombinedQueryComponent.COMPONENT_NAME), + handler.getComponents().getFirst()); + } catch (IOException e) { + fail("Exception when closing CombinedQuerySearchHandler"); + } + } + + /** Test combined response buildr type create dynamically. */ + @Test + public void testCombinedResponseBuilder() { + SolrQueryRequest request = req("q", "testQuery"); + try (CombinedQuerySearchHandler handler = new CombinedQuerySearchHandler()) { + assertFalse( + handler.newResponseBuilder(request, new SolrQueryResponse(), new ArrayList<>()) + instanceof CombinedQueryResponseBuilder); + request = req("q", "testQuery", CombinerParams.COMBINER, "true"); + assertTrue( + handler.newResponseBuilder(request, new SolrQueryResponse(), new ArrayList<>()) + instanceof CombinedQueryResponseBuilder); + } catch (IOException e) { + fail("Exception when closing CombinedQuerySearchHandler"); + } + } +} diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedCombinedQueryComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedCombinedQueryComponentTest.java new file mode 100644 index 00000000000..dceffc4a69b --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedCombinedQueryComponentTest.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.CommonParams; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * The DistributedCombinedQueryComponentTest class is a JUnit test suite that evaluates the + * functionality of the CombinedQueryComponent in a Solr distributed search environment. It focuses + * on testing the integration of combiner queries with different configurations. + */ +public class DistributedCombinedQueryComponentTest extends BaseDistributedSearchTestCase { + + private static final int NUM_DOCS = 10; + private static final String vectorField = "vector"; + + /** + * Sets up the test class by initializing the core and setting system properties. This method is + * executed before all test methods in the class. + * + * @throws Exception if any exception occurs during initialization + */ + @BeforeClass + public static void setUpClass() throws Exception { + initCore("solrconfig-combined-query.xml", "schema-vector-catchall.xml"); + System.setProperty("validateAfterInactivity", "200"); + System.setProperty("solr.httpclient.retries", "0"); + System.setProperty("distribUpdateSoTimeout", "5000"); + } + + /** + * Prepares Solr input documents for indexing, including adding sample data and vector fields. + * This method populates the Solr index with test data, including text, title, and vector fields. + * The vector fields are used to calculate cosine distance for testing purposes. + * + * @throws Exception if any error occurs during the indexing process. + */ + private synchronized void prepareIndexDocs() throws Exception { + List docs = new ArrayList<>(); + fixShardCount(2); + for (int i = 1; i <= NUM_DOCS; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", Integer.toString(i)); + doc.addField("text", "test text for doc " + i); + doc.addField("title", "title test for doc " + i); + doc.addField("mod3_idv", (i % 3)); + docs.add(doc); + } + // cosine distance vector1= 1.0 + docs.get(0).addField(vectorField, Arrays.asList(1f, 2f, 3f, 4f)); + // cosine distance vector1= 0.998 + docs.get(1).addField(vectorField, Arrays.asList(1.5f, 2.5f, 3.5f, 4.5f)); + // cosine distance vector1= 0.992 + docs.get(2).addField(vectorField, Arrays.asList(7.5f, 15.5f, 17.5f, 22.5f)); + // cosine distance vector1= 0.999 + docs.get(3).addField(vectorField, Arrays.asList(1.4f, 2.4f, 3.4f, 4.4f)); + // cosine distance vector1= 0.862 + docs.get(4).addField(vectorField, Arrays.asList(30f, 22f, 35f, 20f)); + // cosine distance vector1= 0.756 + docs.get(5).addField(vectorField, Arrays.asList(40f, 1f, 1f, 200f)); + // cosine distance vector1= 0.970 + docs.get(6).addField(vectorField, Arrays.asList(5f, 10f, 20f, 40f)); + // cosine distance vector1= 0.515 + docs.get(7).addField(vectorField, Arrays.asList(120f, 60f, 30f, 15f)); + // cosine distance vector1= 0.554 + docs.get(8).addField(vectorField, Arrays.asList(200f, 50f, 100f, 25f)); + // cosine distance vector1= 0.997 + docs.get(9).addField(vectorField, Arrays.asList(1.8f, 2.5f, 3.7f, 4.9f)); + del("*:*"); + clients.sort( + (client1, client2) -> { + try { + if (client2 instanceof HttpSolrClient httpClient2 + && client1 instanceof HttpSolrClient httpClient1) + return new URI(httpClient1.getBaseURL()).getPort() + - new URI(httpClient2.getBaseURL()).getPort(); + } catch (URISyntaxException e) { + throw new RuntimeException("Unable to get URI from SolrClient", e); + } + return 0; + }); + for (SolrInputDocument doc : docs) { + indexDoc(doc); + } + commit(); + } + + /** + * Tests a single lexical query against the Solr server using both combiner methods. + * + * @throws Exception if any exception occurs during the test execution + */ + @Test + public void testSingleLexicalQuery() throws Exception { + prepareIndexDocs(); + QueryResponse rsp = + query( + CommonParams.JSON, + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"id:2^10\"}}}," + + "\"limit\":5," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical1\"]}}", + CommonParams.QT, + "/search"); + assertEquals(1, rsp.getResults().size()); + assertFieldValues(rsp.getResults(), id, "2"); + } + + @Override + protected String getShardsString() { + if (deadServers == null) return shards; + Arrays.sort(shardsArr); + StringBuilder sb = new StringBuilder(); + for (String shard : shardsArr) { + if (!sb.isEmpty()) sb.append(','); + sb.append(shard); + } + return sb.toString(); + } + + /** + * Tests multiple lexical queries using the distributed solr client. + * + * @throws Exception if any error occurs during the test execution + */ + @Test + public void testMultipleLexicalQuery() throws Exception { + prepareIndexDocs(); + String jsonQuery = + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"id:(2^2 OR 3^1 OR 6^2 OR 5^1)\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"id:(4^2 OR 5^1 OR 7^3 OR 10^2)\"}}}," + + "\"limit\":5," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical1\",\"lexical2\"]}}"; + QueryResponse rsp = query(CommonParams.JSON, jsonQuery, CommonParams.QT, "/search"); + assertEquals(5, rsp.getResults().size()); + assertFieldValues(rsp.getResults(), id, "5", "4", "2", "3", "7"); + } + + /** + * Test multiple query execution with sort. + * + * @throws Exception the exception + */ + @Test + public void testMultipleQueryWithSort() throws Exception { + prepareIndexDocs(); + String jsonQuery = + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"id:(2^2 OR 3^1 OR 6^2 OR 5^1)\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"id:(4^2 OR 5^1 OR 7^3 OR 10^2)\"}}}," + + "\"limit\":5,\"sort\":\"mod3_idv desc\"" + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical1\",\"lexical2\"]}}"; + QueryResponse rsp = query(CommonParams.JSON, jsonQuery, CommonParams.QT, "/search"); + assertEquals(5, rsp.getResults().size()); + assertFieldValues(rsp.getResults(), id, "5", "2", "7", "4", "10"); + } + + /** + * Tests the hybrid query functionality of the system with various setting of pagination. + * + * @throws Exception if any unexpected error occurs during the test execution. + */ + @Test + public void testHybridQueryWithPaginationPre() throws Exception { + prepareIndexDocs(); + // lexical => 2,3 + // vector => 1,4,2,10,3,6 + QueryResponse rsp = + query( + CommonParams.JSON, + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"id:(2^2 OR 3^1 OR 6^2 OR 5^1)\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"id:(4^2 OR 5^1 OR 7^3 OR 10^2)\"}}}," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical1\",\"lexical2\"]}}", + CommonParams.QT, + "/search"); + assertFieldValues(rsp.getResults(), id, "5", "4", "2", "3", "7", "6", "10"); + rsp = + query( + CommonParams.JSON, + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"id:(2^2 OR 3^1 OR 6^2 OR 5^1)\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"id:(4^2 OR 5^1 OR 7^3 OR 10^2)\"}}}," + + "\"limit\":4," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical1\",\"lexical2\"]}}", + CommonParams.QT, + "/search"); + assertFieldValues(rsp.getResults(), id, "5", "4", "2", "3"); + rsp = + query( + CommonParams.JSON, + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"id:(2^2 OR 3^1 OR 6^2 OR 5^1)\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"id:(4^2 OR 5^1 OR 7^3 OR 10^2)\"}}}," + + "\"limit\":4,\"offset\":3," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical1\",\"lexical2\"]}}", + CommonParams.QT, + "/search"); + assertEquals(4, rsp.getResults().size()); + assertFieldValues(rsp.getResults(), id, "3", "7", "6", "10"); + } + + /** + * Tests the single query functionality with faceting only. + * + * @throws Exception if any unexpected error occurs during the test execution. + */ + @Test + public void testVectorQueryWithFaceting() throws Exception { + prepareIndexDocs(); + String jsonQuery = + "{\"queries\":" + + "{\"lexical\":{\"lucene\":{\"query\":\"id:(2^2 OR 3^1 OR 6^2 OR 5^1)\"}}}," + + "\"limit\":3,\"offset\":1" + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"facet\":true,\"facet.field\":\"mod3_idv\"," + + "\"combiner.query\":[\"lexical\"]}}"; + QueryResponse rsp = query(CommonParams.JSON, jsonQuery, CommonParams.QT, "/search"); + assertEquals(3, rsp.getResults().size()); + assertEquals(4, rsp.getResults().getNumFound()); + assertEquals("[0 (2), 2 (2)]", rsp.getFacetFields().getFirst().getValues().toString()); + } + + /** + * Tests the combined query feature with faceting and highlighting. + * + * @throws Exception if any unexpected error occurs during the test execution. + */ + @Test + public void testQueriesWithFacetAndHighlights() throws Exception { + prepareIndexDocs(); + String jsonQuery = + "{\"queries\":" + + "{\"lexical1\":{\"lucene\":{\"query\":\"id:(2^2 OR 3^1 OR 6^2 OR 5^1)\"}}," + + "\"lexical2\":{\"lucene\":{\"query\":\"id:(4^2 OR 5^1 OR 7^3 OR 10^2)\"}}}," + + "\"limit\":4," + + "\"fields\":[\"id\",\"score\",\"title\"]," + + "\"params\":{\"combiner\":true,\"facet\":true,\"facet.field\":\"mod3_idv\"," + + "\"combiner.query\":[\"lexical1\",\"lexical2\"], \"hl\": true," + + "\"hl.fl\": \"title\",\"hl.q\":\"test doc\"}}"; + QueryResponse rsp = query(CommonParams.JSON, jsonQuery, CommonParams.QT, "/search"); + assertEquals(4, rsp.getResults().size()); + assertFieldValues(rsp.getResults(), id, "5", "4", "2", "3"); + assertEquals("mod3_idv", rsp.getFacetFields().getFirst().getName()); + assertEquals("[1 (3), 0 (2), 2 (2)]", rsp.getFacetFields().getFirst().getValues().toString()); + assertEquals(4, rsp.getHighlighting().size()); + assertEquals( + "title test for doc 2", + rsp.getHighlighting().get("2").get("title").getFirst()); + assertEquals( + "title test for doc 5", + rsp.getHighlighting().get("5").get("title").getFirst()); + } +} diff --git a/solr/core/src/test/org/apache/solr/search/combine/QueryAndResponseCombinerTest.java b/solr/core/src/test/org/apache/solr/search/combine/QueryAndResponseCombinerTest.java new file mode 100644 index 00000000000..89e91a01ac6 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/combine/QueryAndResponseCombinerTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.combine; + +import java.util.List; +import org.apache.lucene.search.TotalHits; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.search.DocSlice; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SortedIntDocSet; +import org.junit.Test; + +public class QueryAndResponseCombinerTest extends SolrTestCaseJ4 { + + public static List getQueryResults() { + QueryResult r1 = new QueryResult(); + r1.setDocList( + new DocSlice( + 0, + 2, + new int[] {1, 2}, + new float[] {0.67f, 0, 0.62f}, + 3, + 0.67f, + TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO)); + r1.setDocSet(new SortedIntDocSet(new int[] {1, 2, 3}, 3)); + QueryResult r2 = new QueryResult(); + r2.setDocList( + new DocSlice( + 0, + 1, + new int[] {0}, + new float[] {0.87f}, + 2, + 0.87f, + TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO)); + r2.setDocSet(new SortedIntDocSet(new int[] {0, 1}, 2)); + return List.of(r1, r2); + } + + @Test + public void simpleCombine() { + QueryResult queryResult = QueryAndResponseCombiner.simpleCombine(getQueryResults()); + assertEquals(3, queryResult.getDocList().size()); + assertEquals(4, queryResult.getDocSet().size()); + } +} diff --git a/solr/core/src/test/org/apache/solr/search/combine/ReciprocalRankFusionTest.java b/solr/core/src/test/org/apache/solr/search/combine/ReciprocalRankFusionTest.java new file mode 100644 index 00000000000..7717692246a --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/combine/ReciprocalRankFusionTest.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.combine; + +import static org.apache.solr.common.params.CombinerParams.RECIPROCAL_RANK_FUSION; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.handler.component.ShardDoc; +import org.apache.solr.search.QueryResult; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * The ReciprocalRankFusionTest class is a unit test suite for the {@link ReciprocalRankFusion} + * class. It verifies the correctness of the fusion algorithm and its supporting methods. + */ +public class ReciprocalRankFusionTest extends SolrTestCaseJ4 { + + public static ReciprocalRankFusion reciprocalRankFusion; + + /** + * Initializes the test environment by setting up the {@link ReciprocalRankFusion} instance with + * specific parameters. + */ + @BeforeClass + public static void beforeClass() { + NamedList args = new NamedList<>(Map.of("k", "20")); + reciprocalRankFusion = new ReciprocalRankFusion(); + reciprocalRankFusion.init(args); + } + + /** Tests the functionality of combining the QueryResults across local search indices. */ + @Test + public void testSimpleCombine() { + List rankedList = QueryAndResponseCombinerTest.getQueryResults(); + QueryResult result = QueryAndResponseCombiner.simpleCombine(rankedList); + assertEquals(3, result.getDocList().size()); + assertEquals(4, result.getDocSet().size()); + } + + /** Test combine docs per queries using RRF. */ + @Test + public void testQueryListCombine() { + Map> queriesDocMap = new HashMap<>(); + ShardDoc shardDoc = new ShardDoc(); + shardDoc.id = "id1"; + shardDoc.shard = "shard1"; + shardDoc.orderInShard = 1; + List shardDocList = new ArrayList<>(); + shardDocList.add(shardDoc); + shardDoc = new ShardDoc(); + shardDoc.id = "id2"; + shardDoc.shard = "shard2"; + shardDoc.orderInShard = 2; + shardDocList.add(shardDoc); + queriesDocMap.put(shardDoc.shard, shardDocList); + + shardDoc = new ShardDoc(); + shardDoc.id = "id2"; + shardDoc.shard = "shard1"; + shardDoc.orderInShard = 1; + queriesDocMap.put(shardDoc.shard, List.of(shardDoc)); + SolrParams solrParams = params(); + assertEquals(20, reciprocalRankFusion.getK()); + List shardDocs = reciprocalRankFusion.combine(queriesDocMap, solrParams); + assertEquals(2, shardDocs.size()); + assertEquals("id2", shardDocs.getFirst().id); + } + + @Test + public void testImplementationFactory() { + Map combinerMap = new HashMap<>(1); + SolrParams emptySolrParms = params(); + String emptyParamAlgorithm = + emptySolrParms.get(CombinerParams.COMBINER_ALGORITHM, CombinerParams.DEFAULT_COMBINER); + assertThrows( + SolrException.class, + () -> QueryAndResponseCombiner.getImplementation(emptyParamAlgorithm, combinerMap)); + SolrParams solrParams = params(CombinerParams.COMBINER_ALGORITHM, RECIPROCAL_RANK_FUSION); + String algorithm = + solrParams.get(CombinerParams.COMBINER_ALGORITHM, CombinerParams.DEFAULT_COMBINER); + combinerMap.put(RECIPROCAL_RANK_FUSION, new ReciprocalRankFusion()); + assertTrue( + QueryAndResponseCombiner.getImplementation(algorithm, combinerMap) + instanceof ReciprocalRankFusion); + } +} diff --git a/solr/core/src/test/org/apache/solr/search/combine/TestCombiner.java b/solr/core/src/test/org/apache/solr/search/combine/TestCombiner.java new file mode 100644 index 00000000000..dde07cb3d94 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/combine/TestCombiner.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.combine; + +import java.util.List; +import java.util.Map; +import org.apache.lucene.search.Explanation; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.handler.component.ShardDoc; + +/** + * The TestCombiner class is an extension of QueryAndResponseCombiner that implements custom logic + * for combining ranked lists using linear sorting of score from all rank lists. This is just for + * testing purpose which has been used in test suite CombinedQueryComponentTest for e2e testing of + * Plugin based Combiner approach. + */ +public class TestCombiner extends QueryAndResponseCombiner { + + private int testInt; + + public int getTestInt() { + return testInt; + } + + @Override + public void init(NamedList args) { + Object kParam = args.get("var1"); + if (kParam != null) { + this.testInt = Integer.parseInt(kParam.toString()); + } + } + + @Override + public List combine(Map> shardDocMap, SolrParams solrParams) { + return List.of(); + } + + @Override + public SimpleOrderedMap getExplanations( + String[] queryKeys, + Map> queriesDocMap, + List combinedQueriesDocs, + SolrParams solrParams) { + SimpleOrderedMap docIdsExplanations = new SimpleOrderedMap<>(); + docIdsExplanations.add("combinerDetails", Explanation.match(testInt, "this is test combiner")); + return docIdsExplanations; + } +} diff --git a/solr/solr-ref-guide/modules/query-guide/pages/json-combined-query-dsl.adoc b/solr/solr-ref-guide/modules/query-guide/pages/json-combined-query-dsl.adoc new file mode 100644 index 00000000000..98d3c2e96b8 --- /dev/null +++ b/solr/solr-ref-guide/modules/query-guide/pages/json-combined-query-dsl.adoc @@ -0,0 +1,107 @@ += JSON Combined Query DSL +:tabs-sync-option: +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +The Combined Query feature aims to execute multiple queries of multiple kinds across multiple shards of a collection and combine their result basis an algorithm (like Reciprocal Rank Fusion). +It is extending JSON Query DSL ultimately enabling Hybrid Search. + +[NOTE] +==== +This feature is currently unsupported for grouping and Cursors. +==== + +== Query DSL Structure +The query structure is similar to JSON Query DSL except for how multiple queries are defined along with their parameters. + +* Multiple queries can be defined under the `queries` key by providing their name with the same syntax as a single query is defined with the key `query`. +* In addition to the other supported parameters, there are several parameters which can be defined under `params` key as below: +`combiner` | Default: `false`:: + Enables the combined query mode when set to `true`. +`combiner.query`:: + The list of queries to be executed as defined in the `queries` key. Example: `["query1", "query2"]` +`combiner.algorithm` | Default: `rrf`:: + The algorithm to be used for combining the results. Reciprocal Rank Fusion (RRF) is the in-built fusion algorithm. + Any other algorithm can be configured using xref:json-combined-query-dsl.adoc#combiner-algorithm-plugin[plugin]. +`combiner.rrf.k` | Default: `60`:: + The k parameter in the RRF algorithm. + +=== Example + +Below is a sample JSON query payload: + +``` +{ + "queries": { + "lexical1": { + "lucene": { + "query": "title:sales" + } + }, + "vector": { + "knn": { + "f": "vector", + "topK" :5, + "query": "[0.1,-0.34,0.89,0.02]" + } + } + }, + "limit": 5, + "fields": ["id", "score", "title"], + "params": { + "combiner": true, + "combiner.query": ["lexical1", "vector"], + "combiner.algorithm": "rrf", + "combiner.rrf.k": "15" + } +} +``` + +== Search Handler Configuration + +Combined Query Feature has a separate handler with class `solr.CombinedQuerySearchHandler` which can be configured as below: + +``` + +..... + +``` + +The Search Handler also accepts parameters as below: + +`maxCombinerQueries`:: + This parameter can be set to put upper limit check on the maximum number of queries can be executed defined in `combiner.query`. + It defaults to `5` if not set. + +=== Combiner Algorithm Plugin + +As mentioned xref:json-combined-query-dsl.adoc#query-dsl-structure[above], custom algorithms can be configured to combine the results across multiple queries. +The Combined Query Search Handler definition takes parameter `combiners` where a custom class can be used to define the algorithm by giving a name and the parameters required. + +Example of the Search Handler as below: +``` + + 2 + + + org.apache.solr.search.combine.CustomCombiner + 35 + customValue + + + +``` diff --git a/solr/solr-ref-guide/modules/query-guide/querying-nav.adoc b/solr/solr-ref-guide/modules/query-guide/querying-nav.adoc index c66b1f85016..bf0ce8a4726 100644 --- a/solr/solr-ref-guide/modules/query-guide/querying-nav.adoc +++ b/solr/solr-ref-guide/modules/query-guide/querying-nav.adoc @@ -27,6 +27,7 @@ ** xref:local-params.adoc[] ** xref:json-request-api.adoc[] *** xref:json-query-dsl.adoc[] +*** xref:json-combined-query-dsl.adoc[] ** xref:searching-nested-documents.adoc[] ** xref:block-join-query-parser.adoc[] ** xref:join-query-parser.adoc[] diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CombinerParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CombinerParams.java new file mode 100644 index 00000000000..23d735d595d --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/common/params/CombinerParams.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.common.params; + +/** + * This class provides constants for configuration parameters related to the combiner. It defines + * keys for various properties used in the combiner configuration. + */ +public class CombinerParams { + + private CombinerParams() {} + + public static final String COMBINER = "combiner"; + public static final String COMBINER_ALGORITHM = COMBINER + ".algorithm"; + public static final String COMBINER_QUERY = COMBINER + ".query"; + public static final String RECIPROCAL_RANK_FUSION = "rrf"; + public static final String COMBINER_RRF_K = COMBINER + "." + RECIPROCAL_RANK_FUSION + ".k"; + public static final String DEFAULT_COMBINER = RECIPROCAL_RANK_FUSION; + public static final int DEFAULT_COMBINER_RRF_K = 60; + public static final int DEFAULT_MAX_COMBINER_QUERIES = 5; +}