Skip to content

Commit bfdbac3

Browse files
authored
Add estimatedByteSizes to merges kicked off by IndexWriter.addIndexes(CodecReader[]) (#15120)
1 parent 4b82169 commit bfdbac3

File tree

3 files changed

+55
-11
lines changed

3 files changed

+55
-11
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ Bug Fixes
7979
* GITHUB#14847: Allow Faiss vector format to index >2GB of vectors per-field per-segment by using MemorySegment APIs
8080
(instead of ByteBuffer) to copy bytes to native memory. (Kaival Parikh)
8181

82+
* GITHUB#15120: Add estimatedByteSizes to merges kicked off by IndexWriter.addIndexes(CodecReader[]) (Craig Perkins)
83+
8284
Changes in Runtime Behavior
8385
---------------------
8486
* GITHUB#14187: The query cache is now disabled by default. (Adrien Grand)

lucene/core/src/java/org/apache/lucene/index/IndexWriter.java

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import java.io.Closeable;
2323
import java.io.IOException;
24+
import java.io.UncheckedIOException;
2425
import java.time.Instant;
2526
import java.util.ArrayDeque;
2627
import java.util.ArrayList;
@@ -3285,6 +3286,11 @@ public AddIndexesMergeSource(IndexWriter writer) {
32853286
}
32863287

32873288
public void registerMerge(MergePolicy.OneMerge merge) {
3289+
try {
3290+
addEstimatedBytesToMerge(merge);
3291+
} catch (IOException e) {
3292+
throw new UncheckedIOException(e);
3293+
}
32883294
synchronized (IndexWriter.this) {
32893295
pendingAddIndexesMerges.add(merge);
32903296
}
@@ -4777,6 +4783,21 @@ private void abortOneMerge(MergePolicy.OneMerge merge) throws IOException {
47774783
closeMergeReaders(merge, true, false);
47784784
}
47794785

4786+
/** Compute {@code estimatedMergeBytes} and {@code totalMergeBytes} for a merge. */
4787+
void addEstimatedBytesToMerge(MergePolicy.OneMerge merge) throws IOException {
4788+
assert merge.estimatedMergeBytes == 0;
4789+
assert merge.totalMergeBytes == 0;
4790+
for (SegmentCommitInfo info : merge.segments) {
4791+
if (info.info.maxDoc() > 0) {
4792+
final int delCount = numDeletedDocs(info);
4793+
assert delCount <= info.info.maxDoc();
4794+
final double delRatio = ((double) delCount) / info.info.maxDoc();
4795+
merge.estimatedMergeBytes += (long) (info.sizeInBytes() * (1.0 - delRatio));
4796+
merge.totalMergeBytes += info.sizeInBytes();
4797+
}
4798+
}
4799+
}
4800+
47804801
/**
47814802
* Checks whether this merge involves any segments already participating in a merge. If not, this
47824803
* merge is "registered", meaning we record that its segments are now participating in a merge,
@@ -4868,17 +4889,7 @@ private synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws IO
48684889
mergingSegments.add(info);
48694890
}
48704891

4871-
assert merge.estimatedMergeBytes == 0;
4872-
assert merge.totalMergeBytes == 0;
4873-
for (SegmentCommitInfo info : merge.segments) {
4874-
if (info.info.maxDoc() > 0) {
4875-
final int delCount = numDeletedDocs(info);
4876-
assert delCount <= info.info.maxDoc();
4877-
final double delRatio = ((double) delCount) / info.info.maxDoc();
4878-
merge.estimatedMergeBytes += (long) (info.sizeInBytes() * (1.0 - delRatio));
4879-
merge.totalMergeBytes += info.sizeInBytes();
4880-
}
4881-
}
4892+
addEstimatedBytesToMerge(merge);
48824893

48834894
// Merge is now registered
48844895
merge.registerDone = true;

lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,4 +461,35 @@ public void run() {
461461

462462
directory.close();
463463
}
464+
465+
public void testAddEstimatedBytesToMerge() throws IOException {
466+
try (Directory dir = newDirectory();
467+
IndexWriter writer =
468+
new IndexWriter(
469+
dir,
470+
newIndexWriterConfig(new MockAnalyzer(random()))
471+
.setMergePolicy(NoMergePolicy.INSTANCE))) {
472+
473+
Document doc = new Document();
474+
doc.add(newTextField("field", "content", Field.Store.YES));
475+
476+
for (int i = 0; i < 10; i++) {
477+
478+
writer.addDocument(doc);
479+
}
480+
writer.flush();
481+
482+
// Create a merge with the segments
483+
SegmentInfos segmentInfos = writer.cloneSegmentInfos();
484+
MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.asList());
485+
486+
writer.addEstimatedBytesToMerge(merge);
487+
488+
assertTrue(merge.estimatedMergeBytes > 0);
489+
490+
assertTrue(merge.totalMergeBytes > 0);
491+
492+
assertTrue(merge.estimatedMergeBytes <= merge.totalMergeBytes);
493+
}
494+
}
464495
}

0 commit comments

Comments
 (0)