From 52f08c18feb809e42e0b3cf175c8bed5e445ea80 Mon Sep 17 00:00:00 2001 From: easyice <80152403@qq.com> Date: Mon, 11 Aug 2025 21:59:46 +0800 Subject: [PATCH 1/4] Use FixedBitSet#cardinality for counting liveDocs in CheckIndex --- .../java/org/apache/lucene/index/CheckIndex.java | 7 +------ .../org/apache/lucene/index/PendingDeletes.java | 13 ++----------- .../java/org/apache/lucene/util/FixedBitSet.java | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 7e98e51bf69b..2f7c9e5e47b8 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -1297,12 +1297,7 @@ public static Status.LiveDocStatus testLiveDocs( if (liveDocs == null) { throw new CheckIndexException("segment should have deletions, but liveDocs is null"); } else { - int numLive = 0; - for (int j = 0; j < liveDocs.length(); j++) { - if (liveDocs.get(j)) { - numLive++; - } - } + int numLive = FixedBitSet.cardinality(liveDocs, 0, liveDocs.length()); if (numLive != numDocs) { throw new CheckIndexException( "liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive); diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java index 1844afc760ea..957c9b05adff 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java +++ b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java @@ -142,12 +142,7 @@ void onNewReader(CodecReader reader, SegmentCommitInfo info) throws IOException private boolean assertCheckLiveDocs(Bits bits, int expectedLength, int expectedDeleteCount) { assert bits.length() == expectedLength; - int deletedCount = 0; - for (int i = 0; i < bits.length(); i++) { - if (bits.get(i) == false) { - deletedCount++; - } - } + int deletedCount = bits.length() - FixedBitSet.cardinality(bits, 0, bits.length()); assert deletedCount == expectedDeleteCount : "deleted: " + deletedCount + " != expected: " + expectedDeleteCount; return true; @@ -255,11 +250,7 @@ boolean verifyDocCounts(CodecReader reader) { int count = 0; Bits liveDocs = getLiveDocs(); if (liveDocs != null) { - for (int docID = 0; docID < info.info.maxDoc(); docID++) { - if (liveDocs.get(docID)) { - count++; - } - } + count = FixedBitSet.cardinality(liveDocs, 0, info.info.maxDoc()); } else { count = info.info.maxDoc(); } diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java index e896bff8721a..eaecfb2a1107 100644 --- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java @@ -867,4 +867,18 @@ private static void forEach(long bits, int base, IOIntConsumer consumer) throws bits ^= 1L << ntz; } } + + public static int cardinality(Bits bits, int from, int to) { + assert bits != null; + if (bits instanceof FixedBits fixedBits) { + return fixedBits.bitSet.cardinality(from, to); + } + int count = 0; + for (int i = from; i < to; i++) { + if (bits.get(i)) { + count++; + } + } + return count; + } } From b4ced461ea02dc6f98872c6f3bd3bbea3b3a35e9 Mon Sep 17 00:00:00 2001 From: easyice <80152403@qq.com> Date: Tue, 12 Aug 2025 10:44:30 +0800 Subject: [PATCH 2/4] add java doc and changes entry --- lucene/CHANGES.txt | 2 ++ .../org/apache/lucene/util/FixedBitSet.java | 29 ++++++++++--------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d1939e303f61..5540eeffca09 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -239,6 +239,8 @@ Optimizations * GITHUB#14980: Add bulk off-heap scoring for float32 vectors (Chris Hegarty) +* GITHUB#15045: Use FixedBitSet#cardinality for counting liveDocs in CheckIndex (Zhang Chao) + Changes in Runtime Behavior --------------------- * GITHUB#14823: Decrease TieredMergePolicy's default number of segments per diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java index eaecfb2a1107..5c7973cf6a7c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java @@ -239,6 +239,21 @@ public int cardinality(int from, int to) { return cardinality; } + /** Just like {@link #cardinality(int, int)}, return the number of set bits for {@code Bits}. */ + public static int cardinality(Bits bits, int from, int to) { + assert bits != null; + if (bits instanceof FixedBits fixedBits) { + return fixedBits.bitSet.cardinality(from, to); + } + int count = 0; + for (int i = from; i < to; i++) { + if (bits.get(i)) { + count++; + } + } + return count; + } + @Override public int approximateCardinality() { // Naive sampling: compute the number of bits that are set on the first 16 longs every 1024 @@ -867,18 +882,4 @@ private static void forEach(long bits, int base, IOIntConsumer consumer) throws bits ^= 1L << ntz; } } - - public static int cardinality(Bits bits, int from, int to) { - assert bits != null; - if (bits instanceof FixedBits fixedBits) { - return fixedBits.bitSet.cardinality(from, to); - } - int count = 0; - for (int i = from; i < to; i++) { - if (bits.get(i)) { - count++; - } - } - return count; - } } From 447f6dfa9a3ba9f31ec0b1f02beb6427de589c9f Mon Sep 17 00:00:00 2001 From: easyice <80152403@qq.com> Date: Mon, 25 Aug 2025 20:54:43 +0800 Subject: [PATCH 3/4] apply suggestions --- .../java/org/apache/lucene/index/CheckIndex.java | 8 +++++--- .../org/apache/lucene/index/PendingDeletes.java | 11 +++++++++-- .../java/org/apache/lucene/util/FixedBitSet.java | 15 --------------- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 2f7c9e5e47b8..e575f0c9f16c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -1297,10 +1297,12 @@ public static Status.LiveDocStatus testLiveDocs( if (liveDocs == null) { throw new CheckIndexException("segment should have deletions, but liveDocs is null"); } else { - int numLive = FixedBitSet.cardinality(liveDocs, 0, liveDocs.length()); - if (numLive != numDocs) { + FixedBitSet bitSet = new FixedBitSet(liveDocs.length()); + bitSet.set(0, liveDocs.length()); + liveDocs.applyMask(bitSet, 0); + if (bitSet.cardinality() != numDocs) { throw new CheckIndexException( - "liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive); + "liveDocs count mismatch: info=" + numDocs + ", vs bits=" + bitSet.cardinality()); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java index 957c9b05adff..580a283e3846 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java +++ b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java @@ -142,7 +142,7 @@ void onNewReader(CodecReader reader, SegmentCommitInfo info) throws IOException private boolean assertCheckLiveDocs(Bits bits, int expectedLength, int expectedDeleteCount) { assert bits.length() == expectedLength; - int deletedCount = bits.length() - FixedBitSet.cardinality(bits, 0, bits.length()); + int deletedCount = bits.length() - bitsCardinality(bits); assert deletedCount == expectedDeleteCount : "deleted: " + deletedCount + " != expected: " + expectedDeleteCount; return true; @@ -250,7 +250,7 @@ boolean verifyDocCounts(CodecReader reader) { int count = 0; Bits liveDocs = getLiveDocs(); if (liveDocs != null) { - count = FixedBitSet.cardinality(liveDocs, 0, info.info.maxDoc()); + count = bitsCardinality(liveDocs); } else { count = info.info.maxDoc(); } @@ -289,4 +289,11 @@ assert numDocs() == count boolean mustInitOnDelete() { return false; } + + int bitsCardinality(Bits bits) { + FixedBitSet bitSet = new FixedBitSet(bits.length()); + bitSet.set(0, bits.length()); + bits.applyMask(bitSet, 0); + return bitSet.cardinality(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java index 5c7973cf6a7c..e896bff8721a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java @@ -239,21 +239,6 @@ public int cardinality(int from, int to) { return cardinality; } - /** Just like {@link #cardinality(int, int)}, return the number of set bits for {@code Bits}. */ - public static int cardinality(Bits bits, int from, int to) { - assert bits != null; - if (bits instanceof FixedBits fixedBits) { - return fixedBits.bitSet.cardinality(from, to); - } - int count = 0; - for (int i = from; i < to; i++) { - if (bits.get(i)) { - count++; - } - } - return count; - } - @Override public int approximateCardinality() { // Naive sampling: compute the number of bits that are set on the first 16 longs every 1024 From ab714d5af318005dae4645d5362b7cd6bded24ac Mon Sep 17 00:00:00 2001 From: easyice <80152403@qq.com> Date: Tue, 26 Aug 2025 21:03:56 +0800 Subject: [PATCH 4/4] apply suggestions and revert changes in PendingDeletes --- .../org/apache/lucene/index/CheckIndex.java | 30 +++++++++++++++---- .../apache/lucene/index/PendingDeletes.java | 20 +++++++------ 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index e575f0c9f16c..7d85d6258d57 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -1297,12 +1297,10 @@ public static Status.LiveDocStatus testLiveDocs( if (liveDocs == null) { throw new CheckIndexException("segment should have deletions, but liveDocs is null"); } else { - FixedBitSet bitSet = new FixedBitSet(liveDocs.length()); - bitSet.set(0, liveDocs.length()); - liveDocs.applyMask(bitSet, 0); - if (bitSet.cardinality() != numDocs) { + int numLive = bitsCardinality(liveDocs); + if (numLive != numDocs) { throw new CheckIndexException( - "liveDocs count mismatch: info=" + numDocs + ", vs bits=" + bitSet.cardinality()); + "liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive); } } @@ -1345,6 +1343,28 @@ public static Status.LiveDocStatus testLiveDocs( return status; } + /** + * Returns the cardinality of the given {@code Bits}. + * + *
This method processes bits in batches of 1024 using {@link Bits#applyMask} and {@link + * FixedBitSet#cardinality}, which is faster than checking bits one by one. + */ + static int bitsCardinality(Bits bits) { + int cardinality = 0; + FixedBitSet copy = new FixedBitSet(1024); + for (int offset = 0; offset < bits.length(); offset += copy.length()) { + int numBitsToCopy = Math.min(bits.length() - offset, copy.length()); + copy.set(0, copy.length()); + if (numBitsToCopy < copy.length()) { + // Clear ghost bits + copy.clear(numBitsToCopy, copy.length()); + } + bits.applyMask(copy, offset); + cardinality += copy.cardinality(); + } + return cardinality; + } + /** Test field infos. */ public static Status.FieldInfoStatus testFieldInfos( CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java index 580a283e3846..1844afc760ea 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java +++ b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java @@ -142,7 +142,12 @@ void onNewReader(CodecReader reader, SegmentCommitInfo info) throws IOException private boolean assertCheckLiveDocs(Bits bits, int expectedLength, int expectedDeleteCount) { assert bits.length() == expectedLength; - int deletedCount = bits.length() - bitsCardinality(bits); + int deletedCount = 0; + for (int i = 0; i < bits.length(); i++) { + if (bits.get(i) == false) { + deletedCount++; + } + } assert deletedCount == expectedDeleteCount : "deleted: " + deletedCount + " != expected: " + expectedDeleteCount; return true; @@ -250,7 +255,11 @@ boolean verifyDocCounts(CodecReader reader) { int count = 0; Bits liveDocs = getLiveDocs(); if (liveDocs != null) { - count = bitsCardinality(liveDocs); + for (int docID = 0; docID < info.info.maxDoc(); docID++) { + if (liveDocs.get(docID)) { + count++; + } + } } else { count = info.info.maxDoc(); } @@ -289,11 +298,4 @@ assert numDocs() == count boolean mustInitOnDelete() { return false; } - - int bitsCardinality(Bits bits) { - FixedBitSet bitSet = new FixedBitSet(bits.length()); - bitSet.set(0, bits.length()); - bits.applyMask(bitSet, 0); - return bitSet.cardinality(); - } }