Skip to content

Commit 90835c5

Browse files
PostingsDecodingUtil: interchange loops to enable better memory access and SIMD vectorisation
1 parent ba7f659 commit 90835c5

File tree

3 files changed

+31
-6
lines changed

3 files changed

+31
-6
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ New Features
150150

151151
* GITHUB#14729: Support for Re-Ranking Queries using Late Interaction Model Multi-Vectors. (Vigya Sharma, Jim Ferenczi)
152152

153+
* GITHUB#15110: PostingsDecodingUtil: interchange loops to enable better memory access and SIMD vectorisation. (Ramakrishna Chilaka)
154+
153155
Improvements
154156
---------------------
155157
* GITHUB#14458: Add an IndexDeletion policy that retains the last N commits. (Owais Kazi)

lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/PostingIndexInputBenchmark.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,20 @@ public void decodeAndPrefixSum(Blackhole bh) throws IOException {
105105
postingIn.decodeAndPrefixSum(bpv, 100, values);
106106
bh.consume(values);
107107
}
108+
109+
@Benchmark
110+
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
111+
public void decodeVector(Blackhole bh) throws IOException {
112+
in.seek(3); // random unaligned offset
113+
postingIn.decode(bpv, values);
114+
bh.consume(values);
115+
}
116+
117+
@Benchmark
118+
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
119+
public void decodeAndPrefixSumVector(Blackhole bh) throws IOException {
120+
in.seek(3); // random unaligned offset
121+
postingIn.decodeAndPrefixSum(bpv, 100, values);
122+
bh.consume(values);
123+
}
108124
}

lucene/core/src/java/org/apache/lucene/internal/vectorization/PostingDecodingUtil.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,21 @@ protected PostingDecodingUtil(IndexInput in) {
4444
public void splitInts(
4545
int count, int[] b, int bShift, int dec, int bMask, int[] c, int cIndex, int cMask)
4646
throws IOException {
47-
// Default implementation, which takes advantage of the C2 compiler's loop unrolling and
48-
// auto-vectorization.
4947
in.readInts(c, cIndex, count);
50-
int maxIter = (bShift - 1) / dec;
51-
for (int i = 0; i < count; ++i) {
52-
for (int j = 0; j <= maxIter; ++j) {
53-
b[count * j + i] = (c[cIndex + i] >>> (bShift - j * dec)) & bMask;
48+
final int maxIter = (bShift - 1) / dec;
49+
50+
// Process each shift level across all elements (better for vectorization)
51+
for (int j = 0; j <= maxIter; ++j) {
52+
final int shift = bShift - j * dec;
53+
final int bOffset = count * j;
54+
// Vectorizable loop: contiguous memory access with simple operations
55+
for (int i = 0; i < count; ++i) {
56+
b[bOffset + i] = (c[cIndex + i] >>> shift) & bMask;
5457
}
58+
}
59+
60+
// Apply mask to c array (vectorizable)
61+
for (int i = 0; i < count; ++i) {
5562
c[cIndex + i] &= cMask;
5663
}
5764
}

0 commit comments

Comments
 (0)