From 37ea0c35eb548a9b432b379be7873a413ad5e594 Mon Sep 17 00:00:00 2001 From: "gesong.samuel" Date: Sat, 19 Jul 2025 11:04:59 +0800 Subject: [PATCH 1/2] save code --- .../lucene/index/NormAndFreqBuffer.java | 23 ++++++++++++++++++ .../org/apache/lucene/index/PostingsEnum.java | 19 +++++++++++++++ .../search/BlockMaxConjunctionBulkScorer.java | 2 +- .../lucene/search/ConstantScoreScorer.java | 24 +++++++++++++++++++ .../lucene/search/MaxScoreBulkScorer.java | 3 +-- .../java/org/apache/lucene/search/Scorer.java | 23 ++++++++++++++++++ .../org/apache/lucene/search/TermScorer.java | 21 ++++++++++++++++ 7 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/index/NormAndFreqBuffer.java diff --git a/lucene/core/src/java/org/apache/lucene/index/NormAndFreqBuffer.java b/lucene/core/src/java/org/apache/lucene/index/NormAndFreqBuffer.java new file mode 100644 index 000000000000..1fc0a0b132e6 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/index/NormAndFreqBuffer.java @@ -0,0 +1,23 @@ +package org.apache.lucene.index; + +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; + +public final class NormAndFreqBuffer { + + public long[] norms = LongsRef.EMPTY_LONGS; + + public int[] freqs = IntsRef.EMPTY_INTS; + + public int size; + + public NormAndFreqBuffer() {} + + public void growNoCopy(int minSize) { + if (size < minSize) { + norms = ArrayUtil.growNoCopy(norms, minSize); + freqs = new int[norms.length]; + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java index 8fd9d8c9a373..a4f93d4b4603 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java @@ -18,6 +18,7 @@ import java.io.IOException; import org.apache.lucene.search.DocAndFloatFeatureBuffer; +import org.apache.lucene.search.DocAndScoreAccBuffer; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; @@ -142,4 +143,22 @@ public void nextPostings(int upTo, DocAndFloatFeatureBuffer buffer) throws IOExc } buffer.size = size; } + + public void nextRequiredFreqBuffer(DocAndScoreAccBuffer buffer, int[] freq) throws IOException { + int intersectionSize = 0; + int curDoc = docID(); + for (int i = 0; i < buffer.size; i++) { + int targetDoc = buffer.docs[i]; + if (curDoc < targetDoc) { + curDoc = advance(targetDoc); + } + if (curDoc == targetDoc) { + buffer.docs[intersectionSize] = targetDoc; + buffer.scores[intersectionSize] = buffer.scores[i]; + freq[intersectionSize] = freq(); + intersectionSize++; + } + } + buffer.size = intersectionSize; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java index d10c0bb41e7c..55d7312f53ef 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java @@ -181,7 +181,7 @@ private void scoreWindowScoreFirst( docAndScoreAccBuffer, sumOfOtherClause, scorable.minCompetitiveScore, scorers.length); } - ScorerUtil.applyRequiredClause(docAndScoreAccBuffer, iterators[i], scorables[i]); + scorers[i].applyAsRequiredClause(docAndScoreAccBuffer); } for (int i = 0; i < docAndScoreAccBuffer.size; ++i) { diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java index 4ae8ef09017f..a24a14da906c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java @@ -163,4 +163,28 @@ public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndFloatFeatureBuffer Arrays.fill(buffer.features, 0, size, score); buffer.size = size; } + + @Override + public void applyAsRequiredClause(DocAndScoreAccBuffer buffer) throws IOException { + int intersectionSize = 0; + int curDoc = disi.docID(); + for (int i = 0; i < buffer.size; ++i) { + int targetDoc = buffer.docs[i]; + if (curDoc < targetDoc) { + curDoc = disi.advance(targetDoc); + } + if (curDoc == targetDoc) { + buffer.docs[intersectionSize] = targetDoc; + buffer.scores[intersectionSize] = buffer.scores[i]; + intersectionSize++; + } + } + + buffer.size = intersectionSize; + if (score != 0) { + for (int i = 0; i < intersectionSize; ++i) { + buffer.scores[i] += score; + } + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java index c723cbe00f13..9290e418d7ca 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java @@ -262,8 +262,7 @@ private void scoreInnerWindowAsConjunction(LeafCollector collector, Bits acceptD allScorers.length); } - DisiWrapper scorer = allScorers[i]; - ScorerUtil.applyRequiredClause(docAndScoreAccBuffer, scorer.iterator, scorer.scorable); + allScorers[i].scorer.applyAsRequiredClause(docAndScoreAccBuffer); } scoreNonEssentialClauses(collector, docAndScoreAccBuffer, firstRequiredScorer); diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index fc540c30cc42..2e3bb0648544 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -130,4 +130,27 @@ public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndFloatFeatureBuffer } buffer.size = size; } + + /** + * Apply this {@link Scorer} as a required clause on the given {@link DocAndScoreAccBuffer}. This + * filters out documents from the buffer that do not match this scorer, and adds the scores of + * this {@link Scorer} to the scores. + */ + public void applyAsRequiredClause(DocAndScoreAccBuffer buffer) throws IOException { + DocIdSetIterator iterator = iterator(); + int intersectionSize = 0; + int curDoc = iterator.docID(); + for (int i = 0; i < buffer.size; ++i) { + int targetDoc = buffer.docs[i]; + if (curDoc < targetDoc) { + curDoc = iterator.advance(targetDoc); + } + if (curDoc == targetDoc) { + buffer.docs[intersectionSize] = targetDoc; + buffer.scores[intersectionSize] = buffer.scores[i] + score(); + intersectionSize++; + } + } + buffer.size = intersectionSize; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 278f735838e7..09a657163ab8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -19,12 +19,14 @@ import java.io.IOException; import java.util.Arrays; import org.apache.lucene.index.ImpactsEnum; +import org.apache.lucene.index.NormAndFreqBuffer; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.LongsRef; /** @@ -39,6 +41,8 @@ public final class TermScorer extends Scorer { private final NumericDocValues norms; private final ImpactsDISI impactsDisi; private final MaxScoreCache maxScoreCache; + private final NormAndFreqBuffer normAndFreqBuffer = new NormAndFreqBuffer(); + private int[] freqs = IntsRef.EMPTY_INTS; private long[] normValues = LongsRef.EMPTY_LONGS; /** Construct a {@link TermScorer} that will iterate all documents. */ @@ -171,4 +175,21 @@ public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndFloatFeatureBuffer buffer.features[i] = scorer.score(buffer.features[i], normValues[i]); } } + + @Override + public void applyAsRequiredClause(DocAndScoreAccBuffer buffer) throws IOException { + normAndFreqBuffer.growNoCopy(buffer.size); + postingsEnum.nextRequiredFreqBuffer(buffer, normAndFreqBuffer.freqs); + for (int i = 0; i < buffer.size; i++) { + if (norms == null || norms.advanceExact(buffer.docs[i]) == false) { + normAndFreqBuffer.norms[i] = 1L; + } else { + normAndFreqBuffer.norms[i] = norms.longValue(); + } + } + + for (int i = 0; i < buffer.size; i++) { + buffer.scores[i] += scorer.score(normAndFreqBuffer.freqs[i], normAndFreqBuffer.norms[i]); + } + } } From 51c938e901f72bfbe0dce89ffd44fbbf3ad6f8ca Mon Sep 17 00:00:00 2001 From: "gesong.samuel" Date: Sun, 20 Jul 2025 18:12:33 +0800 Subject: [PATCH 2/2] simplify --- .../lucene/index/NormAndFreqBuffer.java | 23 ------------ .../org/apache/lucene/index/PostingsEnum.java | 19 ---------- .../org/apache/lucene/search/TermScorer.java | 36 ++++++++++++++----- 3 files changed, 27 insertions(+), 51 deletions(-) delete mode 100644 lucene/core/src/java/org/apache/lucene/index/NormAndFreqBuffer.java diff --git a/lucene/core/src/java/org/apache/lucene/index/NormAndFreqBuffer.java b/lucene/core/src/java/org/apache/lucene/index/NormAndFreqBuffer.java deleted file mode 100644 index 1fc0a0b132e6..000000000000 --- a/lucene/core/src/java/org/apache/lucene/index/NormAndFreqBuffer.java +++ /dev/null @@ -1,23 +0,0 @@ -package org.apache.lucene.index; - -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.LongsRef; - -public final class NormAndFreqBuffer { - - public long[] norms = LongsRef.EMPTY_LONGS; - - public int[] freqs = IntsRef.EMPTY_INTS; - - public int size; - - public NormAndFreqBuffer() {} - - public void growNoCopy(int minSize) { - if (size < minSize) { - norms = ArrayUtil.growNoCopy(norms, minSize); - freqs = new int[norms.length]; - } - } -} diff --git a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java index a4f93d4b4603..8fd9d8c9a373 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java @@ -18,7 +18,6 @@ import java.io.IOException; import org.apache.lucene.search.DocAndFloatFeatureBuffer; -import org.apache.lucene.search.DocAndScoreAccBuffer; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; @@ -143,22 +142,4 @@ public void nextPostings(int upTo, DocAndFloatFeatureBuffer buffer) throws IOExc } buffer.size = size; } - - public void nextRequiredFreqBuffer(DocAndScoreAccBuffer buffer, int[] freq) throws IOException { - int intersectionSize = 0; - int curDoc = docID(); - for (int i = 0; i < buffer.size; i++) { - int targetDoc = buffer.docs[i]; - if (curDoc < targetDoc) { - curDoc = advance(targetDoc); - } - if (curDoc == targetDoc) { - buffer.docs[intersectionSize] = targetDoc; - buffer.scores[intersectionSize] = buffer.scores[i]; - freq[intersectionSize] = freq(); - intersectionSize++; - } - } - buffer.size = intersectionSize; - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 09a657163ab8..8248186de05a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.Arrays; import org.apache.lucene.index.ImpactsEnum; -import org.apache.lucene.index.NormAndFreqBuffer; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SlowImpactsEnum; @@ -41,7 +40,6 @@ public final class TermScorer extends Scorer { private final NumericDocValues norms; private final ImpactsDISI impactsDisi; private final MaxScoreCache maxScoreCache; - private final NormAndFreqBuffer normAndFreqBuffer = new NormAndFreqBuffer(); private int[] freqs = IntsRef.EMPTY_INTS; private long[] normValues = LongsRef.EMPTY_LONGS; @@ -178,18 +176,38 @@ public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndFloatFeatureBuffer @Override public void applyAsRequiredClause(DocAndScoreAccBuffer buffer) throws IOException { - normAndFreqBuffer.growNoCopy(buffer.size); - postingsEnum.nextRequiredFreqBuffer(buffer, normAndFreqBuffer.freqs); - for (int i = 0; i < buffer.size; i++) { + int size = buffer.size; + if (freqs.length < size) { + freqs = ArrayUtil.growNoCopy(freqs, size); + normValues = new long[freqs.length]; + } + + int intersectionSize = 0; + int curDoc = docID(); + for (int i = 0; i < size; i++) { + int targetDoc = buffer.docs[i]; + if (curDoc < targetDoc) { + curDoc = postingsEnum.advance(targetDoc); + } + if (curDoc == targetDoc) { + buffer.docs[intersectionSize] = targetDoc; + buffer.scores[intersectionSize] = buffer.scores[i]; + freqs[intersectionSize] = postingsEnum.freq(); + intersectionSize++; + } + } + buffer.size = intersectionSize; + + for (int i = 0; i < intersectionSize; i++) { if (norms == null || norms.advanceExact(buffer.docs[i]) == false) { - normAndFreqBuffer.norms[i] = 1L; + normValues[i] = 1L; } else { - normAndFreqBuffer.norms[i] = norms.longValue(); + normValues[i] = norms.longValue(); } } - for (int i = 0; i < buffer.size; i++) { - buffer.scores[i] += scorer.score(normAndFreqBuffer.freqs[i], normAndFreqBuffer.norms[i]); + for (int i = 0; i < intersectionSize; i++) { + buffer.scores[i] += scorer.score(freqs[i], normValues[i]); } } }