Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
bf3cd5d
Combined Query Feature for Multi Query Execution
Jun 15, 2025
182bec9
Tests: Combined Query Feature for Multi Query Execution
Jun 17, 2025
b884f0e
Tests: Combined Query Feature for Multi Query Execution
Jun 24, 2025
29e8aea
Tests: Combined Query Feature for Multi Query Execution
Jun 25, 2025
c113799
Improve: Fix typo
ercsonusharma Jul 4, 2025
3600ed3
Tests: Fix errors
ercsonusharma Jul 4, 2025
9b0c76e
Review comments: implementation
ercsonusharma Jul 5, 2025
a841bc7
Code review changes
ercsonusharma Jul 12, 2025
91f8e09
Code review changes
ercsonusharma Jul 12, 2025
cace1f7
Code review changes
ercsonusharma Jul 12, 2025
299db43
Code review changes
ercsonusharma Jul 13, 2025
840070e
Code review changes
ercsonusharma Jul 13, 2025
d2feefc
Improvement and fixes
ercsonusharma Jul 16, 2025
89f63a9
Review comments impl
ercsonusharma Jul 26, 2025
d821abb
Build fix
ercsonusharma Jul 28, 2025
8041d66
Added documentation
ercsonusharma Aug 5, 2025
397dbb3
Fix for lucene upgrade
ercsonusharma Aug 8, 2025
d8b5588
Doc improv for cursors
ercsonusharma Aug 14, 2025
ec0b9cb
review comment implementation
ercsonusharma Aug 18, 2025
d6fd190
review comment implementation
ercsonusharma Aug 19, 2025
86933bc
review comment implementation
ercsonusharma Aug 20, 2025
b164979
doc update
ercsonusharma Aug 27, 2025
85f2cf9
added more test
ercsonusharma Aug 29, 2025
a4a26aa
abstract QueryComponent.mergeIds' ShardDoc processing
cpoerschke Aug 29, 2025
7fe997c
add missing @Override annotations
cpoerschke Aug 29, 2025
bcd1c3b
make DefaultShardDocQueue an anonymous class
cpoerschke Sep 1, 2025
787a016
Merge branch 'apache:main' into QueryComponent-mergeIds
cpoerschke Sep 1, 2025
7e0727c
Merge remote-tracking branch 'github_cpoerschke/QueryComponent-mergeI…
cpoerschke Sep 1, 2025
4dcbb57
dev increment: add uniqueDoc map-and-logic to ShardDocQueue
cpoerschke Sep 1, 2025
8a65023
review comment fix
ercsonusharma Sep 2, 2025
006b8c2
micro dev increment: replace unnecessary local resultSize use in Quer…
cpoerschke Sep 2, 2025
771089b
dev increment: factor out ShardDocQueue.resultIds method
cpoerschke Sep 2, 2025
460e8cd
dev increment: remove no-longer-used ShardDocQueue.(pop,size) methods
cpoerschke Sep 2, 2025
ac85d2f
review comment fix
ercsonusharma Sep 3, 2025
7b0593c
review comment fix
ercsonusharma Sep 3, 2025
c03c0f7
review comment enhancement
ercsonusharma Sep 3, 2025
a52dd22
simplification/consolidation: protected QueryComponent.newShardDocQue…
cpoerschke Sep 3, 2025
195f3f1
factor out protected QueryComponent.setResultIdsAndResponseDocs method
cpoerschke Sep 3, 2025
c1f5501
review comment enhancement
ercsonusharma Sep 3, 2025
3649d3e
Merge branch 'feat_combined_query' of https://github.com/ercsonusharm…
ercsonusharma Sep 3, 2025
4eedbed
refactor to reduce cyclometric complexity
ercsonusharma Sep 3, 2025
0990e7f
review comment fixes
ercsonusharma Sep 4, 2025
14ff5e1
debug params fix and rrf shard sort order
ercsonusharma Sep 4, 2025
bd637b7
test cases fix and rrf shard sort order
ercsonusharma Sep 5, 2025
2958599
introducing combiner methods as pre and post
ercsonusharma Sep 7, 2025
c3e44c3
distrib forced and doc update
ercsonusharma Sep 10, 2025
e2dfcef
distrib forced fix
ercsonusharma Sep 11, 2025
d4b34fc
distrib forced fix
ercsonusharma Sep 12, 2025
3fe93b8
test fix
ercsonusharma Sep 12, 2025
f23cceb
removing combiner.method and test fix
ercsonusharma Sep 17, 2025
6419a07
test fix
ercsonusharma Sep 19, 2025
a560899
test fix
ercsonusharma Sep 20, 2025
ae84ef3
bug fix
ercsonusharma Sep 28, 2025
1083fdd
review comment
ercsonusharma Oct 16, 2025
be428f0
added test case for solrcloud combined query
ercsonusharma Oct 25, 2025
5b487c2
added test case for solrcloud combined query
ercsonusharma Oct 25, 2025
18e8518
little refactoring
dsmiley Nov 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
import org.apache.solr.common.params.CombinerParams;
import org.apache.solr.common.params.CursorMarkParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.response.BasicResultContext;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.QueryResult;
import org.apache.solr.search.combine.QueryAndResponseCombiner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* The CombinedQueryComponent class extends QueryComponent and provides support for executing
* multiple queries and combining their results.
*/
public class CombinedQueryComponent extends QueryComponent {

public static final String COMPONENT_NAME = "combined_query";
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

/**
* Overrides the prepare method to handle combined queries.
*
* @param rb the ResponseBuilder to prepare
* @throws IOException if an I/O error occurs during preparation
*/
@Override
public void prepare(ResponseBuilder rb) throws IOException {
if (rb instanceof CombinedQueryResponseBuilder crb) {
SolrParams params = crb.req.getParams();
String[] queriesToCombineKeys = params.getParams(CombinerParams.COMBINER_QUERY);
for (String queryKey : queriesToCombineKeys) {
final var unparsedQuery = params.get(queryKey);
ResponseBuilder rbNew = new ResponseBuilder(rb.req, new SolrQueryResponse(), rb.components);
rbNew.setQueryString(unparsedQuery);
super.prepare(rbNew);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldn't we want to manipulate the sort spec so that we get all docs up to offset (AKA "start" param) + rows since RRF/combiner is going to want to see all docs/rankings up to offset+rows? Otherwise our combiner is blind to the "offset" docs. Assuming you agree, then we need to basically apply paging at this layer (our component) instead of letting the subquery do it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It anyways happening here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's for distributed-search but not single-core search.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think user-managed/standalone vs SolrCloud is orthogonal. This is about a single shard working correctly (in whatever Solr mode). IMO it's not optional for basic paging parameters to work correctly with one shard.

I could imagine we'd prefer a mechanism for a SearchComponent to force the "shortCircuit"=false thereby ensuring there's always a distributed phase. Maybe that could be done by re-ordering SearchHandler's call to getAndPrepShardHandler to be after prepareComponents (swap adjacent lines)? Then the prepare method of this component could force distrib and add the shortCircuit=false or something like that. And/or maybe a component should have a more elegant callback to communicate that it forces distributed search (even when there's one shard/core). This would overall simplify this component, no longer needing to handle paging in process(); instead do for distributed-search only.

crb.responseBuilders.add(rbNew);
}
}
super.prepare(rb);
}

/**
* Overrides the process method to handle CombinedQueryResponseBuilder instances. This method
* processes the responses from multiple shards, combines them using the specified
* QueryAndResponseCombiner strategy, and sets the appropriate results and metadata in the
* CombinedQueryResponseBuilder.
*
* @param rb the ResponseBuilder object to process
* @throws IOException if an I/O error occurs during processing
*/
@Override
public void process(ResponseBuilder rb) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As this component forces distributed mode, why would process() be called?

Copy link
Author

@ercsonusharma ercsonusharma Oct 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With reference, In the case of distributed mode, an HTTP request is made with rb.distrib=false through ShardHandlerRequest in one of the phases. The process method would be called for that particular shard query request.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For an isShard=true request then. Okay; but then why wouldn't the logic just pass-through as normal? It's not clear why CombinedQueryComponent has any logic to accomplish at this phase. "Way 2" is only at aggregation (disbtrib=true), in my head anyway.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ping

Copy link
Author

@ercsonusharma ercsonusharma Oct 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, took longer to reply.
The forced distributed would anyway enable the shard call (isShard=true).

then why wouldn't the logic just pass-through as normal

The process() method is directly communicating with the Searcher as we need to process each query at the shard searcher level. If you would look at the method override, it is also creating the combined docSet across queries other than few other metadata, required for merging the docs finally across the shard at the coordinator level.

However, this Searcher call per query can be further parallelised as added in comment.
@dsmiley

if (rb instanceof CombinedQueryResponseBuilder crb) {
boolean partialResults = false;
boolean segmentTerminatedEarly = false;
List<QueryResult> queryResults = new ArrayList<>();
for (ResponseBuilder rbNow : crb.responseBuilders) {
super.process(rbNow);
DocListAndSet docListAndSet = rbNow.getResults();
QueryResult queryResult = new QueryResult();
queryResult.setDocListAndSet(docListAndSet);
queryResults.add(queryResult);
partialResults |= SolrQueryResponse.isPartialResults(rbNow.rsp.getResponseHeader());
rbNow.setCursorMark(rbNow.getCursorMark());
if (rbNow.rsp.getResponseHeader() != null) {
segmentTerminatedEarly |=
(boolean)
rbNow
.rsp
.getResponseHeader()
.getOrDefault(
SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY, false);
}
}
QueryAndResponseCombiner combinerStrategy =
QueryAndResponseCombiner.getImplementation(rb.req.getParams());
QueryResult combinedQueryResult = combinerStrategy.combine(queryResults);
combinedQueryResult.setPartialResults(partialResults);
combinedQueryResult.setSegmentTerminatedEarly(segmentTerminatedEarly);
crb.setResult(combinedQueryResult);
ResultContext ctx = new BasicResultContext(crb);
crb.rsp.addResponse(ctx);
crb.rsp
.getToLog()
.add(
"hits",
crb.getResults() == null || crb.getResults().docList == null
? 0
: crb.getResults().docList.matches());
if (!crb.req.getParams().getBool(ShardParams.IS_SHARD, false)) {
if (null != crb.getNextCursorMark()) {
crb.rsp.add(
CursorMarkParams.CURSOR_MARK_NEXT, crb.getNextCursorMark().getSerializedTotem());
}
}

if (crb.mergeFieldHandler != null) {
crb.mergeFieldHandler.handleMergeFields(crb, crb.req.getSearcher());
} else {
doFieldSortValues(rb, crb.req.getSearcher());
}
doPrefetch(crb);
} else {
super.process(rb);
}
}

@Override
public String getDescription() {
return "Combined Query Component to support multiple query execution";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;

import java.util.ArrayList;
import java.util.List;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;

/**
* The CombinedQueryResponseBuilder class extends the ResponseBuilder class and is responsible for
* building a combined response for multiple SearchComponent objects. It orchestrates the process of
* constructing the SolrQueryResponse by aggregating results from various components.
*/
public class CombinedQueryResponseBuilder extends ResponseBuilder {

public final List<ResponseBuilder> responseBuilders = new ArrayList<>();

/**
* Constructs a CombinedQueryResponseBuilder instance.
*
* @param req the SolrQueryRequest object containing the query parameters and context.
* @param rsp the SolrQueryResponse object to which the combined results will be added.
* @param components a list of SearchComponent objects that will be used to build the response.
*/
public CombinedQueryResponseBuilder(
SolrQueryRequest req, SolrQueryResponse rsp, List<SearchComponent> components) {
super(req, rsp, components);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SortSpecParsing;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.combine.QueryAndResponseCombiner;
import org.apache.solr.search.grouping.CommandHandler;
import org.apache.solr.search.grouping.GroupingSpecification;
import org.apache.solr.search.grouping.distributed.ShardRequestFactory;
Expand Down Expand Up @@ -963,6 +964,7 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
boolean maxHitsTerminatedEarly = false;
long approximateTotalHits = 0;
int failedShardCount = 0;
Map<String, List<ShardDoc>> shardDocMap = new HashMap<>();
for (ShardResponse srsp : sreq.responses) {
SolrDocumentList docs = null;
NamedList<?> responseHeader = null;
Expand Down Expand Up @@ -1152,7 +1154,7 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
}

shardDoc.sortFieldValues = unmarshalledSortFieldValues;

shardDocMap.computeIfAbsent(srsp.getShard(), list -> new ArrayList<>()).add(shardDoc);
queue.insertWithOverflow(shardDoc);
} // end for-each-doc-in-response
} // end for-each-response
Expand All @@ -1162,14 +1164,29 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
// the docs offset -> queuesize
int resultSize = queue.size() - ss.getOffset();
resultSize = Math.max(0, resultSize); // there may not be any docs in range

Map<Object, ShardDoc> resultIds = new HashMap<>();
for (int i = resultSize - 1; i >= 0; i--) {
ShardDoc shardDoc = queue.pop();
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);

if (rb instanceof CombinedQueryResponseBuilder) {
QueryAndResponseCombiner combinerStrategy =
QueryAndResponseCombiner.getImplementation(rb.req.getParams());
List<ShardDoc> combinedShardDocs = combinerStrategy.combine(shardDocMap);
maxScore = 0.0f;
for (int i = 0; i < resultSize; i++) {
ShardDoc shardDoc = combinedShardDocs.get(i);
shardDoc.positionInResponse = i;
maxScore = Math.max(maxScore, shardDoc.score);
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}
} else {
for (int i = resultSize - 1; i >= 0; i--) {
ShardDoc shardDoc = queue.pop();
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}
}

// Add hits for distributed requests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CombinerParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CursorMarkParams;
import org.apache.solr.common.params.ModifiableSolrParams;
Expand Down Expand Up @@ -240,7 +241,7 @@ public void changed(SolrPackageLoader.SolrPackage pkg, Ctx ctx) {
}

@SuppressWarnings({"unchecked"})
private void initComponents() {
private void initComponents(boolean isCombinedQuery) {
Object declaredComponents = initArgs.get(INIT_COMPONENTS);
List<String> first = (List<String>) initArgs.get(INIT_FIRST_COMPONENTS);
List<String> last = (List<String>) initArgs.get(INIT_LAST_COMPONENTS);
Expand All @@ -251,6 +252,11 @@ private void initComponents() {
// Use the default component list
list = getDefaultComponents();

if (isCombinedQuery) {
list.removeFirst();
list.addFirst(CombinedQueryComponent.COMPONENT_NAME);
}

if (first != null) {
List<String> clist = first;
clist.addAll(list);
Expand Down Expand Up @@ -289,12 +295,12 @@ private void initComponents() {
this.components = components;
}

public List<SearchComponent> getComponents() {
public List<SearchComponent> getComponents(boolean isCombinedQuery) {
List<SearchComponent> result = components; // volatile read
if (result == null) {
synchronized (this) {
if (components == null) {
initComponents();
initComponents(isCombinedQuery);
}
result = components;
}
Expand Down Expand Up @@ -354,6 +360,9 @@ public ShardHandler getAndPrepShardHandler(SolrQueryRequest req, ResponseBuilder
*/
protected ResponseBuilder newResponseBuilder(
SolrQueryRequest req, SolrQueryResponse rsp, List<SearchComponent> components) {
if (req.getParams().getBool(CombinerParams.COMBINER, false)) {
return new CombinedQueryResponseBuilder(req, rsp, components);
}
return new ResponseBuilder(req, rsp, components);
}

Expand Down Expand Up @@ -401,7 +410,8 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw
purpose, n -> shardPurposes.computeIfAbsent(n, name -> new Counter()).inc());
}

List<SearchComponent> components = getComponents();
List<SearchComponent> components =
getComponents(req.getParams().getBool(CombinerParams.COMBINER, false));
ResponseBuilder rb = newResponseBuilder(req, rsp, components);
if (rb.requestInfo != null) {
rb.requestInfo.setResponseBuilder(rb);
Expand Down
Loading
Loading