Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ Bug Fixes
* GITHUB#14847: Allow Faiss vector format to index >2GB of vectors per-field per-segment by using MemorySegment APIs
(instead of ByteBuffer) to copy bytes to native memory. (Kaival Parikh)

* GITHUB#15120: Add estimatedByteSizes to merges kicked off by IndexWriter.addIndexes(CodecReader[]) (Craig Perkins)

Changes in Runtime Behavior
---------------------
* GITHUB#14187: The query cache is now disabled by default. (Adrien Grand)
Expand Down
33 changes: 22 additions & 11 deletions lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.time.Instant;
import java.util.ArrayDeque;
import java.util.ArrayList;
Expand Down Expand Up @@ -3285,6 +3286,11 @@ public AddIndexesMergeSource(IndexWriter writer) {
}

public void registerMerge(MergePolicy.OneMerge merge) {
try {
addEstimatedBytesToMerge(merge);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
synchronized (IndexWriter.this) {
pendingAddIndexesMerges.add(merge);
}
Expand Down Expand Up @@ -4777,6 +4783,21 @@ private void abortOneMerge(MergePolicy.OneMerge merge) throws IOException {
closeMergeReaders(merge, true, false);
}

/** Compute {@code estimatedMergeBytes} and {@code totalMergeBytes} for a merge. */
void addEstimatedBytesToMerge(MergePolicy.OneMerge merge) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this method be static too? I wonder if it uses any IW's state at all.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to make it static, but I was not able to because it calls numDeletedDocs() which is non-static. When I tried to make numDeletedDocs() static I ran into other issues. I've been trying to contribute more to Lucene lately, but still very much coming up to speed on the repo so opted to start small. Do you know if numDeletedDocs() and any other methods it calls can be changed to static?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, let it stay as it is now then.

assert merge.estimatedMergeBytes == 0;
assert merge.totalMergeBytes == 0;
for (SegmentCommitInfo info : merge.segments) {
if (info.info.maxDoc() > 0) {
final int delCount = numDeletedDocs(info);
assert delCount <= info.info.maxDoc();
final double delRatio = ((double) delCount) / info.info.maxDoc();
merge.estimatedMergeBytes += (long) (info.sizeInBytes() * (1.0 - delRatio));
merge.totalMergeBytes += info.sizeInBytes();
}
}
}

/**
* Checks whether this merge involves any segments already participating in a merge. If not, this
* merge is "registered", meaning we record that its segments are now participating in a merge,
Expand Down Expand Up @@ -4868,17 +4889,7 @@ private synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws IO
mergingSegments.add(info);
}

assert merge.estimatedMergeBytes == 0;
assert merge.totalMergeBytes == 0;
for (SegmentCommitInfo info : merge.segments) {
if (info.info.maxDoc() > 0) {
final int delCount = numDeletedDocs(info);
assert delCount <= info.info.maxDoc();
final double delRatio = ((double) delCount) / info.info.maxDoc();
merge.estimatedMergeBytes += (long) (info.sizeInBytes() * (1.0 - delRatio));
merge.totalMergeBytes += info.sizeInBytes();
}
}
addEstimatedBytesToMerge(merge);

// Merge is now registered
merge.registerDone = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -461,4 +461,35 @@ public void run() {

directory.close();
}

public void testAddEstimatedBytesToMerge() throws IOException {
try (Directory dir = newDirectory();
IndexWriter writer =
new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(NoMergePolicy.INSTANCE))) {

Document doc = new Document();
doc.add(newTextField("field", "content", Field.Store.YES));

for (int i = 0; i < 10; i++) {

writer.addDocument(doc);
}
writer.flush();

// Create a merge with the segments
SegmentInfos segmentInfos = writer.cloneSegmentInfos();
MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.asList());

writer.addEstimatedBytesToMerge(merge);

assertTrue(merge.estimatedMergeBytes > 0);

assertTrue(merge.totalMergeBytes > 0);

assertTrue(merge.estimatedMergeBytes <= merge.totalMergeBytes);
}
}
}
Loading