diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index caa9eaf38503..a2600377e01c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -81,6 +81,8 @@ Bug Fixes * GITHUB#14847: Allow Faiss vector format to index >2GB of vectors per-field per-segment by using MemorySegment APIs (instead of ByteBuffer) to copy bytes to native memory. (Kaival Parikh) +* GITHUB#15120: Add estimatedByteSizes to merges kicked off by IndexWriter.addIndexes(CodecReader[]) (Craig Perkins) + Changes in Runtime Behavior --------------------- * GITHUB#14187: The query cache is now disabled by default. (Adrien Grand) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 1224bebc9d06..a8c63c3000ea 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -21,6 +21,7 @@ import java.io.Closeable; import java.io.IOException; +import java.io.UncheckedIOException; import java.time.Instant; import java.util.ArrayDeque; import java.util.ArrayList; @@ -3285,6 +3286,11 @@ public AddIndexesMergeSource(IndexWriter writer) { } public void registerMerge(MergePolicy.OneMerge merge) { + try { + addEstimatedBytesToMerge(merge); + } catch (IOException e) { + throw new UncheckedIOException(e); + } synchronized (IndexWriter.this) { pendingAddIndexesMerges.add(merge); } @@ -4777,6 +4783,21 @@ private void abortOneMerge(MergePolicy.OneMerge merge) throws IOException { closeMergeReaders(merge, true, false); } + /** Compute {@code estimatedMergeBytes} and {@code totalMergeBytes} for a merge. */ + void addEstimatedBytesToMerge(MergePolicy.OneMerge merge) throws IOException { + assert merge.estimatedMergeBytes == 0; + assert merge.totalMergeBytes == 0; + for (SegmentCommitInfo info : merge.segments) { + if (info.info.maxDoc() > 0) { + final int delCount = numDeletedDocs(info); + assert delCount <= info.info.maxDoc(); + final double delRatio = ((double) delCount) / info.info.maxDoc(); + merge.estimatedMergeBytes += (long) (info.sizeInBytes() * (1.0 - delRatio)); + merge.totalMergeBytes += info.sizeInBytes(); + } + } + } + /** * Checks whether this merge involves any segments already participating in a merge. If not, this * merge is "registered", meaning we record that its segments are now participating in a merge, @@ -4868,17 +4889,7 @@ private synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws IO mergingSegments.add(info); } - assert merge.estimatedMergeBytes == 0; - assert merge.totalMergeBytes == 0; - for (SegmentCommitInfo info : merge.segments) { - if (info.info.maxDoc() > 0) { - final int delCount = numDeletedDocs(info); - assert delCount <= info.info.maxDoc(); - final double delRatio = ((double) delCount) / info.info.maxDoc(); - merge.estimatedMergeBytes += (long) (info.sizeInBytes() * (1.0 - delRatio)); - merge.totalMergeBytes += info.sizeInBytes(); - } - } + addEstimatedBytesToMerge(merge); // Merge is now registered merge.registerDone = true; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java index e3f47be432ab..a1c887e93b27 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java @@ -461,4 +461,35 @@ public void run() { directory.close(); } + + public void testAddEstimatedBytesToMerge() throws IOException { + try (Directory dir = newDirectory(); + IndexWriter writer = + new IndexWriter( + dir, + newIndexWriterConfig(new MockAnalyzer(random())) + .setMergePolicy(NoMergePolicy.INSTANCE))) { + + Document doc = new Document(); + doc.add(newTextField("field", "content", Field.Store.YES)); + + for (int i = 0; i < 10; i++) { + + writer.addDocument(doc); + } + writer.flush(); + + // Create a merge with the segments + SegmentInfos segmentInfos = writer.cloneSegmentInfos(); + MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.asList()); + + writer.addEstimatedBytesToMerge(merge); + + assertTrue(merge.estimatedMergeBytes > 0); + + assertTrue(merge.totalMergeBytes > 0); + + assertTrue(merge.estimatedMergeBytes <= merge.totalMergeBytes); + } + } }