Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

import java.io.IOException;
import java.util.Comparator;
import java.util.Arrays;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.hnsw.HnswGraphProvider;
import org.apache.lucene.index.FieldInfo;
Expand Down Expand Up @@ -57,14 +57,12 @@ protected HnswBuilder createBuilder(KnnVectorValues mergedVectorValues, int maxO
OnHeapHnswGraph graph;
BitSet initializedNodes = null;

if (graphReaders.size() == 0) {
if (largestGraphReader == null) {
graph = new OnHeapHnswGraph(M, maxOrd);
} else {
graphReaders.sort(Comparator.comparingInt(GraphReader::graphSize).reversed());
GraphReader initGraphReader = graphReaders.get(0);
KnnVectorsReader initReader = initGraphReader.reader();
MergeState.DocMap initDocMap = initGraphReader.initDocMap();
int initGraphSize = initGraphReader.graphSize();
KnnVectorsReader initReader = largestGraphReader.reader();
MergeState.DocMap initDocMap = largestGraphReader.initDocMap();
int initGraphSize = largestGraphReader.graphSize();
HnswGraph initializerGraph = ((HnswGraphProvider) initReader).getGraph(fieldInfo.name);

if (initializerGraph.size() == 0) {
Expand Down Expand Up @@ -117,6 +115,9 @@ private static int[] getNewOrdMapping(
docId != NO_MORE_DOCS;
docId = initializerIterator.nextDoc()) {
int newId = initDocMap.get(docId);
if (newId == -1) {
continue;
}
maxNewDocID = Math.max(newId, maxNewDocID);
assert newIdToOldOrdinal.containsKey(newId) == false;
newIdToOldOrdinal.put(newId, initializerIterator.index());
Expand All @@ -126,6 +127,7 @@ private static int[] getNewOrdMapping(
return new int[0];
}
final int[] oldToNewOrdinalMap = new int[initGraphSize];
Arrays.fill(oldToNewOrdinalMap, -1);
KnnVectorValues.DocIndexIterator mergedVectorIterator = mergedVectorValues.iterator();
for (int newDocId = mergedVectorIterator.nextDoc();
newDocId <= maxNewDocID;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.hnsw.HnswGraphProvider;
Expand Down Expand Up @@ -48,6 +48,8 @@ public class IncrementalHnswGraphMerger implements HnswGraphMerger {
protected final int beamWidth;

protected List<GraphReader> graphReaders = new ArrayList<>();
protected GraphReader largestGraphReader;

private int numReaders = 0;

/** Represents a vector reader that contains graph info. */
Expand All @@ -73,7 +75,7 @@ public IncrementalHnswGraphMerger(
public IncrementalHnswGraphMerger addReader(
KnnVectorsReader reader, MergeState.DocMap docMap, Bits liveDocs) throws IOException {
numReaders++;
if (hasDeletes(liveDocs) || !(reader instanceof HnswGraphProvider)) {
if (!(reader instanceof HnswGraphProvider)) {
return this;
}
HnswGraph graph = ((HnswGraphProvider) reader).getGraph(fieldInfo.name);
Expand All @@ -98,7 +100,16 @@ public IncrementalHnswGraphMerger addReader(
candidateVectorCount = vectorValues.size();
}
}
graphReaders.add(new GraphReader(reader, docMap, candidateVectorCount));

GraphReader graphReader = new GraphReader(reader, docMap, candidateVectorCount);
if (largestGraphReader == null || candidateVectorCount > largestGraphReader.graphSize) {
largestGraphReader = graphReader;
}

if (!hasDeletes(liveDocs)) {
graphReaders.add(graphReader);
}

return this;
}

Expand All @@ -112,11 +123,13 @@ public IncrementalHnswGraphMerger addReader(
*/
protected HnswBuilder createBuilder(KnnVectorValues mergedVectorValues, int maxOrd)
throws IOException {
if (graphReaders.size() == 0) {
if (largestGraphReader == null) {
return HnswGraphBuilder.create(
scorerSupplier, M, beamWidth, HnswGraphBuilder.randSeed, maxOrd);
}
graphReaders.sort(Comparator.comparingInt(GraphReader::graphSize).reversed());
if (!graphReaders.contains(largestGraphReader)) {
graphReaders.addFirst(largestGraphReader);
}

final BitSet initializedNodes =
graphReaders.size() == numReaders ? null : new FixedBitSet(maxOrd);
Expand Down Expand Up @@ -163,6 +176,7 @@ protected final int[][] getNewOrdMapping(
newDocIdToOldOrdinals[i].put(newDocId, vectorsIter.index());
}
oldToNewOrdinalMap[i] = new int[graphReaders.get(i).graphSize];
Arrays.fill(oldToNewOrdinalMap[i], -1);
}

KnnVectorValues.DocIndexIterator mergedVectorIterator = mergedVectorValues.iterator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ public static OnHeapHnswGraph initGraph(
while (it.hasNext()) {
int oldOrd = it.nextInt();
int newOrd = newOrdMap[oldOrd];
if (newOrd == -1) {
continue;
}
hnsw.addNode(level, newOrd);
hnsw.trySetNewEntryNode(newOrd, level);
NeighborArray newNeighbors = hnsw.getNeighbors(level, newOrd);
Expand All @@ -77,6 +80,9 @@ public static OnHeapHnswGraph initGraph(
oldNeighbor != NO_MORE_DOCS;
oldNeighbor = initializerGraph.nextNeighbor()) {
int newNeighbor = newOrdMap[oldNeighbor];
if (newNeighbor == -1) {
continue;
}
// we will compute these scores later when we need to pop out the non-diverse nodes
newNeighbors.addOutOfOrder(newNeighbor, Float.NaN);
}
Expand Down
Loading