From 97317eb74bf71227bd8e2dd183e1709957ad5ea5 Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Sun, 24 Aug 2025 19:40:13 -0400 Subject: [PATCH 01/10] Add estimatedByteSizes to merges kicked off by IndexWriter.addIndexes(CodecReader[]) Signed-off-by: Craig Perkins --- .../org/apache/lucene/index/IndexWriter.java | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 1224bebc9d06..36a660aff19f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3284,7 +3284,8 @@ public AddIndexesMergeSource(IndexWriter writer) { this.writer = writer; } - public void registerMerge(MergePolicy.OneMerge merge) { + public void registerMerge(MergePolicy.OneMerge merge) throws IOException { + addEstimatedBytesToMerge(merge); synchronized (IndexWriter.this) { pendingAddIndexesMerges.add(merge); } @@ -4777,6 +4778,20 @@ private void abortOneMerge(MergePolicy.OneMerge merge) throws IOException { closeMergeReaders(merge, true, false); } + public void addEstimatedBytesToMerge(MergePolicy.OneMerge merge) throws IOException { + assert merge.estimatedMergeBytes == 0; + assert merge.totalMergeBytes == 0; + for (SegmentCommitInfo info : merge.segments) { + if (info.info.maxDoc() > 0) { + final int delCount = numDeletedDocs(info); + assert delCount <= info.info.maxDoc(); + final double delRatio = ((double) delCount) / info.info.maxDoc(); + merge.estimatedMergeBytes += (long) (info.sizeInBytes() * (1.0 - delRatio)); + merge.totalMergeBytes += info.sizeInBytes(); + } + } + } + /** * Checks whether this merge involves any segments already participating in a merge. If not, this * merge is "registered", meaning we record that its segments are now participating in a merge, @@ -4868,17 +4883,7 @@ private synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws IO mergingSegments.add(info); } - assert merge.estimatedMergeBytes == 0; - assert merge.totalMergeBytes == 0; - for (SegmentCommitInfo info : merge.segments) { - if (info.info.maxDoc() > 0) { - final int delCount = numDeletedDocs(info); - assert delCount <= info.info.maxDoc(); - final double delRatio = ((double) delCount) / info.info.maxDoc(); - merge.estimatedMergeBytes += (long) (info.sizeInBytes() * (1.0 - delRatio)); - merge.totalMergeBytes += info.sizeInBytes(); - } - } + addEstimatedBytesToMerge(merge); // Merge is now registered merge.registerDone = true; From ed8b0d87fa4f71dc3ef888ae665d334d39e8a98b Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Sun, 24 Aug 2025 19:46:00 -0400 Subject: [PATCH 02/10] Swallow exception Signed-off-by: Craig Perkins --- .../core/src/java/org/apache/lucene/index/IndexWriter.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 36a660aff19f..89dd6984148c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3284,8 +3284,10 @@ public AddIndexesMergeSource(IndexWriter writer) { this.writer = writer; } - public void registerMerge(MergePolicy.OneMerge merge) throws IOException { - addEstimatedBytesToMerge(merge); + public void registerMerge(MergePolicy.OneMerge merge){ + try { + addEstimatedBytesToMerge(merge); + } catch (IOException ignore) { } synchronized (IndexWriter.this) { pendingAddIndexesMerges.add(merge); } From f8a4be60603d51c201d070dc01727a53fc2dc81d Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Sun, 24 Aug 2025 19:49:21 -0400 Subject: [PATCH 03/10] Print stack trace Signed-off-by: Craig Perkins --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 89dd6984148c..45ee2857ff60 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3287,7 +3287,9 @@ public AddIndexesMergeSource(IndexWriter writer) { public void registerMerge(MergePolicy.OneMerge merge){ try { addEstimatedBytesToMerge(merge); - } catch (IOException ignore) { } + } catch (IOException ignore) { + ignore.printStackTrace(System.err); + } synchronized (IndexWriter.this) { pendingAddIndexesMerges.add(merge); } From 43ceb42f5ac0a5791a07b37c17ce9339aba3b054 Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Sun, 24 Aug 2025 20:01:42 -0400 Subject: [PATCH 04/10] Add simple test Signed-off-by: Craig Perkins --- .../lucene/index/TestIndexWriterMerging.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java index e3f47be432ab..2927a7889733 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java @@ -461,4 +461,31 @@ public void run() { directory.close(); } + + public void testAddEstimatedBytesToMerge() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) + .setMergePolicy(NoMergePolicy.INSTANCE)); + + Document doc = new Document(); + doc.add(newTextField("field", "content", Field.Store.YES)); + for (int i = 0; i < 10; i++) { + writer.addDocument(doc); + } + writer.flush(); + + + // Create a merge with the segments + SegmentInfos segmentInfos = writer.cloneSegmentInfos(); + MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.asList()); + + writer.addEstimatedBytesToMerge(merge); + + assertTrue(merge.estimatedMergeBytes > 0); + assertTrue(merge.totalMergeBytes > 0); + assertTrue(merge.estimatedMergeBytes <= merge.totalMergeBytes); + + writer.close(); + dir.close(); + } } From 234cb5e088d0ebf79901fcbbb51aa83a054a0285 Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Sun, 24 Aug 2025 20:06:26 -0400 Subject: [PATCH 05/10] Add entry in CHANGES.txt Signed-off-by: Craig Perkins --- lucene/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index caa9eaf38503..a2600377e01c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -81,6 +81,8 @@ Bug Fixes * GITHUB#14847: Allow Faiss vector format to index >2GB of vectors per-field per-segment by using MemorySegment APIs (instead of ByteBuffer) to copy bytes to native memory. (Kaival Parikh) +* GITHUB#15120: Add estimatedByteSizes to merges kicked off by IndexWriter.addIndexes(CodecReader[]) (Craig Perkins) + Changes in Runtime Behavior --------------------- * GITHUB#14187: The query cache is now disabled by default. (Adrien Grand) From c2e1e86844b06c2d068d285abceb30c5a51a31c7 Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Sun, 24 Aug 2025 21:17:28 -0400 Subject: [PATCH 06/10] run ./gradlew tidy Signed-off-by: Craig Perkins --- .../src/java/org/apache/lucene/index/IndexWriter.java | 2 +- .../org/apache/lucene/index/TestIndexWriterMerging.java | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 45ee2857ff60..590846adff58 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3284,7 +3284,7 @@ public AddIndexesMergeSource(IndexWriter writer) { this.writer = writer; } - public void registerMerge(MergePolicy.OneMerge merge){ + public void registerMerge(MergePolicy.OneMerge merge) { try { addEstimatedBytesToMerge(merge); } catch (IOException ignore) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java index 2927a7889733..39b381a0d3f9 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java @@ -464,8 +464,11 @@ public void run() { public void testAddEstimatedBytesToMerge() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) - .setMergePolicy(NoMergePolicy.INSTANCE)); + IndexWriter writer = + new IndexWriter( + dir, + newIndexWriterConfig(new MockAnalyzer(random())) + .setMergePolicy(NoMergePolicy.INSTANCE)); Document doc = new Document(); doc.add(newTextField("field", "content", Field.Store.YES)); @@ -474,7 +477,6 @@ public void testAddEstimatedBytesToMerge() throws IOException { } writer.flush(); - // Create a merge with the segments SegmentInfos segmentInfos = writer.cloneSegmentInfos(); MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.asList()); From b2d2cade6ed665b0532c5275cf13e407a91244c1 Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Sun, 24 Aug 2025 21:56:53 -0400 Subject: [PATCH 07/10] Use underscore Signed-off-by: Craig Perkins --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 590846adff58..1993b2b9effa 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3287,8 +3287,8 @@ public AddIndexesMergeSource(IndexWriter writer) { public void registerMerge(MergePolicy.OneMerge merge) { try { addEstimatedBytesToMerge(merge); - } catch (IOException ignore) { - ignore.printStackTrace(System.err); + } catch (IOException _) { + // ignore and append to pending merges } synchronized (IndexWriter.this) { pendingAddIndexesMerges.add(merge); From a72732cd6f90014678b98035aaf63e3be3e7cb26 Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Sun, 24 Aug 2025 22:06:57 -0400 Subject: [PATCH 08/10] Add javadoc Signed-off-by: Craig Perkins --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 1993b2b9effa..76d9ede10e89 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -4782,6 +4782,7 @@ private void abortOneMerge(MergePolicy.OneMerge merge) throws IOException { closeMergeReaders(merge, true, false); } + /** Compute {@code estimatedMergeBytes} and {@code totalMergeBytes} for a merge. */ public void addEstimatedBytesToMerge(MergePolicy.OneMerge merge) throws IOException { assert merge.estimatedMergeBytes == 0; assert merge.totalMergeBytes == 0; From e5ea840bdd0b62698476d7c7ecfd102b65788d2d Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Mon, 25 Aug 2025 13:44:37 -0400 Subject: [PATCH 09/10] Make package-private Signed-off-by: Craig Perkins --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 76d9ede10e89..6f58b9d37eb0 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -4783,7 +4783,7 @@ private void abortOneMerge(MergePolicy.OneMerge merge) throws IOException { } /** Compute {@code estimatedMergeBytes} and {@code totalMergeBytes} for a merge. */ - public void addEstimatedBytesToMerge(MergePolicy.OneMerge merge) throws IOException { + void addEstimatedBytesToMerge(MergePolicy.OneMerge merge) throws IOException { assert merge.estimatedMergeBytes == 0; assert merge.totalMergeBytes == 0; for (SegmentCommitInfo info : merge.segments) { From 799b419c79aeb899f0dccfe8546da5b6831f9e8b Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Mon, 25 Aug 2025 15:41:14 -0400 Subject: [PATCH 10/10] Address code review comments Signed-off-by: Craig Perkins --- .../org/apache/lucene/index/IndexWriter.java | 5 ++- .../lucene/index/TestIndexWriterMerging.java | 44 ++++++++++--------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 6f58b9d37eb0..a8c63c3000ea 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -21,6 +21,7 @@ import java.io.Closeable; import java.io.IOException; +import java.io.UncheckedIOException; import java.time.Instant; import java.util.ArrayDeque; import java.util.ArrayList; @@ -3287,8 +3288,8 @@ public AddIndexesMergeSource(IndexWriter writer) { public void registerMerge(MergePolicy.OneMerge merge) { try { addEstimatedBytesToMerge(merge); - } catch (IOException _) { - // ignore and append to pending merges + } catch (IOException e) { + throw new UncheckedIOException(e); } synchronized (IndexWriter.this) { pendingAddIndexesMerges.add(merge); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java index 39b381a0d3f9..a1c887e93b27 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMerging.java @@ -463,31 +463,33 @@ public void run() { } public void testAddEstimatedBytesToMerge() throws IOException { - Directory dir = newDirectory(); - IndexWriter writer = - new IndexWriter( - dir, - newIndexWriterConfig(new MockAnalyzer(random())) - .setMergePolicy(NoMergePolicy.INSTANCE)); + try (Directory dir = newDirectory(); + IndexWriter writer = + new IndexWriter( + dir, + newIndexWriterConfig(new MockAnalyzer(random())) + .setMergePolicy(NoMergePolicy.INSTANCE))) { - Document doc = new Document(); - doc.add(newTextField("field", "content", Field.Store.YES)); - for (int i = 0; i < 10; i++) { - writer.addDocument(doc); - } - writer.flush(); + Document doc = new Document(); + doc.add(newTextField("field", "content", Field.Store.YES)); - // Create a merge with the segments - SegmentInfos segmentInfos = writer.cloneSegmentInfos(); - MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.asList()); + for (int i = 0; i < 10; i++) { - writer.addEstimatedBytesToMerge(merge); + writer.addDocument(doc); + } + writer.flush(); - assertTrue(merge.estimatedMergeBytes > 0); - assertTrue(merge.totalMergeBytes > 0); - assertTrue(merge.estimatedMergeBytes <= merge.totalMergeBytes); + // Create a merge with the segments + SegmentInfos segmentInfos = writer.cloneSegmentInfos(); + MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.asList()); - writer.close(); - dir.close(); + writer.addEstimatedBytesToMerge(merge); + + assertTrue(merge.estimatedMergeBytes > 0); + + assertTrue(merge.totalMergeBytes > 0); + + assertTrue(merge.estimatedMergeBytes <= merge.totalMergeBytes); + } } }