From 14a85cee277bb357f571fd78a8fde0e4b455c2f2 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Sun, 17 Aug 2025 22:41:25 +0200 Subject: [PATCH 1/3] Make calls to BM25Scorer#score inlinable. I ran experiments locally that suggest that some of the performance decrease from type pollution (https://github.com/mikemccand/luceneutil/pull/436) can be attributed to calls to `SimScorer#score` no longer being inlinable since they are polymorphic. This change helps `BM25Scorer` remain inlinable using similar tricks that we are applying for `Bits#get` and `ImpactsEnum#nextDoc`/`ImpactsEnum#advance`. Hopefully changes such as #15039 will help improve performance with other similarities as well in the future. --- .../org/apache/lucene/search/ScorerUtil.java | 53 +++++++++++++++++++ .../org/apache/lucene/search/TermQuery.java | 2 +- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java b/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java index 8562ee84c018..91c49632c16b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java @@ -22,6 +22,9 @@ import java.util.stream.StreamSupport; import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat; import org.apache.lucene.index.ImpactsEnum; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.MathUtil; @@ -98,6 +101,19 @@ static Bits likelyLiveDocs(Bits acceptDocs) { } } + /** + * Optimize the given {@link Similarity} for the case when it is a {@link BM25Similarity}. This + * helps make calls to {@link SimScorer#score(float, long)} inlinable, which in-turn helps speed + * up query evaluation. + */ + static Similarity likelyBM25Similarity(Similarity similarity) { + if (similarity instanceof BM25Similarity) { + return similarity; + } else { + return new FilterSimilarity(similarity); + } + } + private static class FilterBits implements Bits { private final Bits in; @@ -117,6 +133,43 @@ public int length() { } } + private static class FilterSimilarity extends Similarity { + + private final Similarity similarity; + + FilterSimilarity(Similarity similarity) { + this.similarity = similarity; + } + + @Override + public SimScorer scorer( + float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return new FilterSimScorer(similarity.scorer(boost, collectionStats, termStats)) { + @Override + public Explanation explain(Explanation freq, long norm) { + return in.explain(freq, norm); + } + }; + } + } + + private static class FilterSimScorer extends SimScorer { + + protected final SimScorer in; + + FilterSimScorer(SimScorer scorer) { + this.in = scorer; + } + + @Override + public float score(float freq, long norm) { + return in.score(freq, norm); + } + + // Don't override explain() here since it has a default impl, for consistency with other Filter* + // classes. + } + /** * Compute a minimum required score, so that (float) MathUtil.sumUpperBound(minRequiredScore + * maxRemainingScore, numScorers) <= minCompetitiveScore. The computed value may not be the diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 6c29c23095b4..d7fff1e80b90 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -56,7 +56,7 @@ public TermWeight( } this.scoreMode = scoreMode; this.termStates = termStates; - this.similarity = searcher.getSimilarity(); + this.similarity = ScorerUtil.likelyBM25Similarity(searcher.getSimilarity()); final CollectionStatistics collectionStats; final TermStatistics termStats; From 556d5f79f6137f34a792e0f66ba277c1bc50a0de Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Sun, 17 Aug 2025 23:52:25 +0200 Subject: [PATCH 2/3] CombinedFieldQuery. --- .../src/java/org/apache/lucene/search/CombinedFieldQuery.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/CombinedFieldQuery.java b/lucene/core/src/java/org/apache/lucene/search/CombinedFieldQuery.java index 1dcd0565b2f1..ef4410e84fa8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CombinedFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/CombinedFieldQuery.java @@ -289,8 +289,8 @@ class CombinedFieldWeight extends Weight { CollectionStatistics pseudoCollectionStats = mergeCollectionStatistics(searcher); TermStatistics pseudoTermStatistics = new TermStatistics(new BytesRef("pseudo_term"), docFreq, Math.max(1, totalTermFreq)); - this.simWeight = - searcher.getSimilarity().scorer(boost, pseudoCollectionStats, pseudoTermStatistics); + Similarity similarity = ScorerUtil.likelyBM25Similarity(searcher.getSimilarity()); + this.simWeight = similarity.scorer(boost, pseudoCollectionStats, pseudoTermStatistics); } else { this.simWeight = null; } From b01d597a06678c668a55587d9a85aa4ff67cbe2b Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Sun, 17 Aug 2025 23:54:28 +0200 Subject: [PATCH 3/3] Apply likelyBM25Similarity later. --- lucene/core/src/java/org/apache/lucene/search/TermQuery.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index d7fff1e80b90..383ec8223c88 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -56,7 +56,7 @@ public TermWeight( } this.scoreMode = scoreMode; this.termStates = termStates; - this.similarity = ScorerUtil.likelyBM25Similarity(searcher.getSimilarity()); + this.similarity = searcher.getSimilarity(); final CollectionStatistics collectionStats; final TermStatistics termStats; @@ -79,7 +79,8 @@ public TermWeight( // allocations in case default BM25Scorer is used. // See: https://github.com/apache/lucene/issues/12297 if (scoreMode.needsScores()) { - this.simScorer = similarity.scorer(boost, collectionStats, termStats); + this.simScorer = + ScorerUtil.likelyBM25Similarity(similarity).scorer(boost, collectionStats, termStats); } else { // Assigning a dummy scorer as this is not expected to be called since scores are not // needed.