Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
bf3cd5d
Combined Query Feature for Multi Query Execution
Jun 15, 2025
182bec9
Tests: Combined Query Feature for Multi Query Execution
Jun 17, 2025
b884f0e
Tests: Combined Query Feature for Multi Query Execution
Jun 24, 2025
29e8aea
Tests: Combined Query Feature for Multi Query Execution
Jun 25, 2025
c113799
Improve: Fix typo
ercsonusharma Jul 4, 2025
3600ed3
Tests: Fix errors
ercsonusharma Jul 4, 2025
9b0c76e
Review comments: implementation
ercsonusharma Jul 5, 2025
a841bc7
Code review changes
ercsonusharma Jul 12, 2025
91f8e09
Code review changes
ercsonusharma Jul 12, 2025
cace1f7
Code review changes
ercsonusharma Jul 12, 2025
299db43
Code review changes
ercsonusharma Jul 13, 2025
840070e
Code review changes
ercsonusharma Jul 13, 2025
d2feefc
Improvement and fixes
ercsonusharma Jul 16, 2025
89f63a9
Review comments impl
ercsonusharma Jul 26, 2025
d821abb
Build fix
ercsonusharma Jul 28, 2025
8041d66
Added documentation
ercsonusharma Aug 5, 2025
397dbb3
Fix for lucene upgrade
ercsonusharma Aug 8, 2025
d8b5588
Doc improv for cursors
ercsonusharma Aug 14, 2025
ec0b9cb
review comment implementation
ercsonusharma Aug 18, 2025
d6fd190
review comment implementation
ercsonusharma Aug 19, 2025
86933bc
review comment implementation
ercsonusharma Aug 20, 2025
b164979
doc update
ercsonusharma Aug 27, 2025
85f2cf9
added more test
ercsonusharma Aug 29, 2025
a4a26aa
abstract QueryComponent.mergeIds' ShardDoc processing
cpoerschke Aug 29, 2025
7fe997c
add missing @Override annotations
cpoerschke Aug 29, 2025
bcd1c3b
make DefaultShardDocQueue an anonymous class
cpoerschke Sep 1, 2025
787a016
Merge branch 'apache:main' into QueryComponent-mergeIds
cpoerschke Sep 1, 2025
7e0727c
Merge remote-tracking branch 'github_cpoerschke/QueryComponent-mergeI…
cpoerschke Sep 1, 2025
4dcbb57
dev increment: add uniqueDoc map-and-logic to ShardDocQueue
cpoerschke Sep 1, 2025
8a65023
review comment fix
ercsonusharma Sep 2, 2025
006b8c2
micro dev increment: replace unnecessary local resultSize use in Quer…
cpoerschke Sep 2, 2025
771089b
dev increment: factor out ShardDocQueue.resultIds method
cpoerschke Sep 2, 2025
460e8cd
dev increment: remove no-longer-used ShardDocQueue.(pop,size) methods
cpoerschke Sep 2, 2025
ac85d2f
review comment fix
ercsonusharma Sep 3, 2025
7b0593c
review comment fix
ercsonusharma Sep 3, 2025
c03c0f7
review comment enhancement
ercsonusharma Sep 3, 2025
a52dd22
simplification/consolidation: protected QueryComponent.newShardDocQue…
cpoerschke Sep 3, 2025
195f3f1
factor out protected QueryComponent.setResultIdsAndResponseDocs method
cpoerschke Sep 3, 2025
c1f5501
review comment enhancement
ercsonusharma Sep 3, 2025
3649d3e
Merge branch 'feat_combined_query' of https://github.com/ercsonusharm…
ercsonusharma Sep 3, 2025
4eedbed
refactor to reduce cyclometric complexity
ercsonusharma Sep 3, 2025
0990e7f
review comment fixes
ercsonusharma Sep 4, 2025
14ff5e1
debug params fix and rrf shard sort order
ercsonusharma Sep 4, 2025
bd637b7
test cases fix and rrf shard sort order
ercsonusharma Sep 5, 2025
2958599
introducing combiner methods as pre and post
ercsonusharma Sep 7, 2025
c3e44c3
distrib forced and doc update
ercsonusharma Sep 10, 2025
e2dfcef
distrib forced fix
ercsonusharma Sep 11, 2025
d4b34fc
distrib forced fix
ercsonusharma Sep 12, 2025
3fe93b8
test fix
ercsonusharma Sep 12, 2025
f23cceb
removing combiner.method and test fix
ercsonusharma Sep 17, 2025
6419a07
test fix
ercsonusharma Sep 19, 2025
a560899
test fix
ercsonusharma Sep 20, 2025
ae84ef3
bug fix
ercsonusharma Sep 28, 2025
1083fdd
review comment
ercsonusharma Oct 16, 2025
be428f0
added test case for solrcloud combined query
ercsonusharma Oct 25, 2025
5b487c2
added test case for solrcloud combined query
ercsonusharma Oct 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -82,34 +82,32 @@ public void init(NamedList<?> args) {

@Override
public void inform(SolrCore core) {
if (initParams != null && initParams.size() > 0) {
for (Map.Entry<String, ?> initEntry : initParams) {
if ("combiners".equals(initEntry.getKey())
&& initEntry.getValue() instanceof NamedList<?> all) {
for (int i = 0; i < all.size(); i++) {
String name = all.getName(i);
NamedList<?> combinerConfig = (NamedList<?>) all.getVal(i);
String className = (String) combinerConfig.get("class");
QueryAndResponseCombiner combiner =
core.getResourceLoader().newInstance(className, QueryAndResponseCombiner.class);
combiner.init(combinerConfig);
combiners.compute(
name,
(k, existingCombiner) -> {
if (existingCombiner == null) {
return combiner;
}
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
"Found more than one combiner with same name");
});
}
for (Map.Entry<String, ?> initEntry : initParams) {
if ("combiners".equals(initEntry.getKey())
&& initEntry.getValue() instanceof NamedList<?> all) {
for (int i = 0; i < all.size(); i++) {
String name = all.getName(i);
NamedList<?> combinerConfig = (NamedList<?>) all.getVal(i);
String className = (String) combinerConfig.get("class");
QueryAndResponseCombiner combiner =
core.getResourceLoader().newInstance(className, QueryAndResponseCombiner.class);
combiner.init(combinerConfig);
combiners.compute(
name,
(k, existingCombiner) -> {
if (existingCombiner == null) {
return combiner;
}
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
"Found more than one combiner with same name");
});
}
}
Object maxQueries = initParams.get("maxCombinerQueries");
if (maxQueries != null) {
this.maxCombinerQueries = Integer.parseInt(maxQueries.toString());
}
}
Object maxQueries = initParams.get("maxCombinerQueries");
if (maxQueries != null) {
this.maxCombinerQueries = Integer.parseInt(maxQueries.toString());
}
combiners.computeIfAbsent(
CombinerParams.RECIPROCAL_RANK_FUSION,
Expand Down Expand Up @@ -154,9 +152,9 @@ public void prepare(ResponseBuilder rb) throws IOException {

/**
* Overrides the process method to handle CombinedQueryResponseBuilder instances. This method
* processes the responses from multiple shards, combines them using the specified
* QueryAndResponseCombiner strategy, and sets the appropriate results and metadata in the
* CombinedQueryResponseBuilder.
* processes the responses from multiple queries in the SearchIndexer, combines them using the
* specified QueryAndResponseCombiner strategy, and sets the appropriate results and metadata in
* the CombinedQueryResponseBuilder.
*
* @param rb the ResponseBuilder object to process
* @throws IOException if an I/O error occurs during processing
Expand Down Expand Up @@ -184,46 +182,8 @@ public void process(ResponseBuilder rb) throws IOException {
setMaxHitsTerminatedEarly |= queryResult.getMaxHitsTerminatedEarly();
}
}
String algorithm =
rb.req
.getParams()
.get(CombinerParams.COMBINER_ALGORITHM, CombinerParams.DEFAULT_COMBINER);
QueryAndResponseCombiner combinerStrategy =
QueryAndResponseCombiner.getImplementation(algorithm, combiners);
QueryResult combinedQueryResult = combinerStrategy.combine(queryResults, rb.req.getParams());
combinedQueryResult.setPartialResults(partialResults);
combinedQueryResult.setSegmentTerminatedEarly(segmentTerminatedEarly);
combinedQueryResult.setMaxHitsTerminatedEarly(setMaxHitsTerminatedEarly);
crb.setResult(combinedQueryResult);
if (rb.isDebug()) {
String[] queryKeys = rb.req.getParams().getParams(CombinerParams.COMBINER_QUERY);
List<Query> queries = crb.responseBuilders.stream().map(ResponseBuilder::getQuery).toList();
NamedList<Explanation> explanations =
combinerStrategy.getExplanations(
queryKeys,
queries,
queryResults,
rb.req.getSearcher(),
rb.req.getSchema(),
rb.req.getParams());
rb.addDebugInfo("combinerExplanations", explanations);
}
ResultContext ctx = new BasicResultContext(crb);
crb.rsp.addResponse(ctx);
crb.rsp.addToLog(
"hits",
crb.getResults() == null || crb.getResults().docList == null
? 0
: crb.getResults().docList.matches());
if (!crb.req.getParams().getBool(ShardParams.IS_SHARD, false)) {
// for non-distributed request and future cursor improvement
if (null != crb.getNextCursorMark()) {
crb.rsp.add(
CursorMarkParams.CURSOR_MARK_NEXT,
crb.responseBuilders.getFirst().getNextCursorMark().getSerializedTotem());
}
}

prepareCombinedResponseBuilder(
rb, crb, queryResults, partialResults, segmentTerminatedEarly, setMaxHitsTerminatedEarly);
if (crb.mergeFieldHandler != null) {
crb.mergeFieldHandler.handleMergeFields(crb, crb.req.getSearcher());
} else {
Expand All @@ -235,6 +195,53 @@ public void process(ResponseBuilder rb) throws IOException {
}
}

private void prepareCombinedResponseBuilder(
ResponseBuilder rb,
CombinedQueryResponseBuilder crb,
List<QueryResult> queryResults,
boolean partialResults,
boolean segmentTerminatedEarly,
boolean setMaxHitsTerminatedEarly)
throws IOException {
String algorithm =
rb.req.getParams().get(CombinerParams.COMBINER_ALGORITHM, CombinerParams.DEFAULT_COMBINER);
QueryAndResponseCombiner combinerStrategy =
QueryAndResponseCombiner.getImplementation(algorithm, combiners);
QueryResult combinedQueryResult = combinerStrategy.combine(queryResults, rb.req.getParams());
combinedQueryResult.setPartialResults(partialResults);
combinedQueryResult.setSegmentTerminatedEarly(segmentTerminatedEarly);
combinedQueryResult.setMaxHitsTerminatedEarly(setMaxHitsTerminatedEarly);
crb.setResult(combinedQueryResult);
if (rb.isDebug()) {
String[] queryKeys = rb.req.getParams().getParams(CombinerParams.COMBINER_QUERY);
List<Query> queries = crb.responseBuilders.stream().map(ResponseBuilder::getQuery).toList();
NamedList<Explanation> explanations =
combinerStrategy.getExplanations(
queryKeys,
queries,
queryResults,
rb.req.getSearcher(),
rb.req.getSchema(),
rb.req.getParams());
rb.addDebugInfo("combinerExplanations", explanations);
}
ResultContext ctx = new BasicResultContext(crb);
crb.rsp.addResponse(ctx);
crb.rsp.addToLog(
"hits",
crb.getResults() == null || crb.getResults().docList == null
? 0
: crb.getResults().docList.matches());
if (!crb.req.getParams().getBool(ShardParams.IS_SHARD, false)) {
// for non-distributed request and future cursor improvement
if (null != crb.getNextCursorMark()) {
crb.rsp.add(
CursorMarkParams.CURSOR_MARK_NEXT,
crb.responseBuilders.getFirst().getNextCursorMark().getSerializedTotem());
}
}
}

@Override
protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
List<MergeStrategy> mergeStrategies = rb.getMergeStrategies();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import org.apache.lucene.index.ExitableDirectoryReader;
import org.apache.lucene.index.IndexReaderContext;
Expand Down Expand Up @@ -905,7 +906,84 @@ protected boolean addFL(StringBuilder fl, String field, boolean additionalAdded)
return true;
}

protected abstract static class ShardDocQueue {
public abstract boolean push(ShardDoc shardDoc);

public abstract Map<Object, ShardDoc> resultIds(int offset);
}
;

protected static class ShardDocQueueFactory
implements BiFunction<SortField[], Integer, ShardDocQueue> {

private final SolrIndexSearcher searcher;

public ShardDocQueueFactory(SolrIndexSearcher searcher) {
this.searcher = searcher;
}

@Override
public ShardDocQueue apply(SortField[] sortFields, Integer size) {
return new ShardDocQueue() {

// id to shard mapping, to eliminate any accidental dups
private final HashMap<Object, String> uniqueDoc = new HashMap<>();

private final ShardFieldSortedHitQueue queue =
new ShardFieldSortedHitQueue(sortFields, size, searcher);

@Override
public boolean push(ShardDoc shardDoc) {
final String prevShard = uniqueDoc.put(shardDoc.id, shardDoc.shard);
if (prevShard != null) {
// duplicate detected

// For now, just always use the first encountered since we can't currently
// remove the previous one added to the priority queue. If we switched
// to the Java5 PriorityQueue, this would be easier.
return false;
// make which duplicate is used deterministic based on shard
// if (prevShard.compareTo(shardDoc.shard) >= 0) {
// TODO: remove previous from priority queue
// return false;
// }
}

queue.insertWithOverflow(shardDoc);
return true;
}

@Override
public Map<Object, ShardDoc> resultIds(int offset) {
final Map<Object, ShardDoc> resultIds = new HashMap<>();

// The queue now has 0 -> queuesize docs, where queuesize <= start + rows
// So we want to pop the last documents off the queue to get
// the docs offset -> queuesize
int resultSize = queue.size() - offset;
resultSize = Math.max(0, resultSize); // there may not be any docs in range

for (int i = resultSize - 1; i >= 0; i--) {
ShardDoc shardDoc = queue.pop();
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}

return resultIds;
}
};
}
}
;

protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
implementMergeIds(rb, sreq, new ShardDocQueueFactory(rb.req.getSearcher()));
}

private void implementMergeIds(
ResponseBuilder rb, ShardRequest sreq, ShardDocQueueFactory shardDocQueueFactory) {
List<MergeStrategy> mergeStrategies = rb.getMergeStrategies();
if (mergeStrategies != null) {
mergeStrategies.sort(MergeStrategy.MERGE_COMP);
Expand Down Expand Up @@ -947,14 +1025,10 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
IndexSchema schema = rb.req.getSchema();
SchemaField uniqueKeyField = schema.getUniqueKeyField();

// id to shard mapping, to eliminate any accidental dups
HashMap<Object, String> uniqueDoc = new HashMap<>();

// Merge the docs via a priority queue so we don't have to sort *all* of the
// documents... we only need to order the top (rows+start)
final ShardFieldSortedHitQueue queue =
new ShardFieldSortedHitQueue(
sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher());
final ShardDocQueue shardDocQueue =
shardDocQueueFactory.apply(sortFields, ss.getOffset() + ss.getCount());

NamedList<Object> shardInfo = null;
if (rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
Expand Down Expand Up @@ -1125,23 +1199,6 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
for (int i = 0; i < docs.size(); i++) {
SolrDocument doc = docs.get(i);
Object id = doc.getFieldValue(uniqueKeyField.getName());

String prevShard = uniqueDoc.put(id, srsp.getShard());
if (prevShard != null) {
// duplicate detected
numFound--;

// For now, just always use the first encountered since we can't currently
// remove the previous one added to the priority queue. If we switched
// to the Java5 PriorityQueue, this would be easier.
continue;
// make which duplicate is used deterministic based on shard
// if (prevShard.compareTo(srsp.shard) >= 0) {
// TODO: remove previous from priority queue
// continue;
// }
}

ShardDoc shardDoc = new ShardDoc();
shardDoc.id = id;
shardDoc.shard = srsp.getShard();
Expand All @@ -1160,24 +1217,13 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {

shardDoc.sortFieldValues = unmarshalledSortFieldValues;

queue.insertWithOverflow(shardDoc);
if (!shardDocQueue.push(shardDoc)) {
numFound--;
}
} // end for-each-doc-in-response
} // end for-each-response

// The queue now has 0 -> queuesize docs, where queuesize <= start + rows
// So we want to pop the last documents off the queue to get
// the docs offset -> queuesize
int resultSize = queue.size() - ss.getOffset();
resultSize = Math.max(0, resultSize); // there may not be any docs in range

Map<Object, ShardDoc> resultIds = new HashMap<>();
for (int i = resultSize - 1; i >= 0; i--) {
ShardDoc shardDoc = queue.pop();
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}
final Map<Object, ShardDoc> resultIds = shardDocQueue.resultIds(ss.getOffset());

// Add hits for distributed requests
// https://issues.apache.org/jira/browse/SOLR-3518
Expand All @@ -1189,7 +1235,7 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
responseDocs.setNumFoundExact(hitCountIsExact);
responseDocs.setStart(ss.getOffset());
// size appropriately
for (int i = 0; i < resultSize; i++) responseDocs.add(null);
for (int i = 0; i < resultIds.size(); i++) responseDocs.add(null);

// save these results in a private area so we can access them
// again when retrieving stored fields.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ It is extending JSON Query DSL ultimately enabling Hybrid Search.

[NOTE]
====
This feature is currently unsupported for grouping and Cursors. In User-Managed (aka Standalone) Mode, the `shards` parameter must be provided to enable this feature.
This feature is currently unsupported for grouping and Cursors.
====

== Query DSL Structure
Expand Down Expand Up @@ -82,7 +82,6 @@ Combined Query Feature has a separate handler with class `solr.CombinedQuerySear

```
<requestHandler name="/search" class="solr.CombinedQuerySearchHandler">
<bool name="httpCaching">true</bool>
.....
</requestHandler>
```
Expand Down