diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a7b1356e1cf4..47c18b21d080 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -126,6 +126,8 @@ Optimizations --------------------- * GITHUB#15140: Optimize TopScoreDocCollector with TernaryLongHeap for improved performance over Binary-LongHeap. (Ramakrishna Chilaka) +* GITHUB#14998: Speed up flushing of live docs. (Adrien Grand) + Bug Fixes --------------------- (No changes) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java index 9ad7090d23ce..5f31a3ee6bdc 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java @@ -138,18 +138,23 @@ public void writeLiveDocs( } private int writeBits(IndexOutput output, Bits bits) throws IOException { - int delCount = 0; - final int longCount = FixedBitSet.bits2words(bits.length()); - for (int i = 0; i < longCount; ++i) { - long currentBits = 0; - for (int j = i << 6, end = Math.min(j + 63, bits.length() - 1); j <= end; ++j) { - if (bits.get(j)) { - currentBits |= 1L << j; // mod 64 - } else { - delCount += 1; - } + int delCount = bits.length(); + // Copy bits in batches of 1024 bits at once using Bits#applyMask, which is faster than checking + // bits one by one. + FixedBitSet copy = new FixedBitSet(1024); + for (int offset = 0; offset < bits.length(); offset += copy.length()) { + int numBitsToCopy = Math.min(bits.length() - offset, copy.length()); + copy.set(0, copy.length()); + if (numBitsToCopy < copy.length()) { + // Clear ghost bits + copy.clear(numBitsToCopy, copy.length()); + } + bits.applyMask(copy, offset); + delCount -= copy.cardinality(); + int longCount = FixedBitSet.bits2words(numBitsToCopy); + for (int i = 0; i < longCount; ++i) { + output.writeLong(copy.getBits()[i]); } - output.writeLong(currentBits); } return delCount; }