Skip to content

Commit e488f7c

Browse files
authored
Expose mergePolicy in KnnIndexTester (#132062)
1 parent 86c28f0 commit e488f7c

File tree

3 files changed

+53
-5
lines changed

3 files changed

+53
-5
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ record CmdLineArgs(
5252
int quantizeBits,
5353
VectorEncoding vectorEncoding,
5454
int dimensions,
55-
boolean earlyTermination
55+
boolean earlyTermination,
56+
KnnIndexTester.MergePolicyType mergePolicy
5657
) implements ToXContentObject {
5758

5859
static final ParseField DOC_VECTORS_FIELD = new ParseField("doc_vectors");
@@ -79,6 +80,7 @@ record CmdLineArgs(
7980
static final ParseField EARLY_TERMINATION_FIELD = new ParseField("early_termination");
8081
static final ParseField FILTER_SELECTIVITY_FIELD = new ParseField("filter_selectivity");
8182
static final ParseField SEED_FIELD = new ParseField("seed");
83+
static final ParseField MERGE_POLICY_FIELD = new ParseField("merge_policy");
8284

8385
static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
8486
Builder builder = PARSER.apply(parser, null);
@@ -112,6 +114,7 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
112114
PARSER.declareBoolean(Builder::setEarlyTermination, EARLY_TERMINATION_FIELD);
113115
PARSER.declareFloat(Builder::setFilterSelectivity, FILTER_SELECTIVITY_FIELD);
114116
PARSER.declareLong(Builder::setSeed, SEED_FIELD);
117+
PARSER.declareString(Builder::setMergePolicy, MERGE_POLICY_FIELD);
115118
}
116119

117120
@Override
@@ -179,6 +182,7 @@ static class Builder {
179182
private boolean earlyTermination;
180183
private float filterSelectivity = 1f;
181184
private long seed = 1751900822751L;
185+
private KnnIndexTester.MergePolicyType mergePolicy = null;
182186

183187
public Builder setDocVectors(List<String> docVectors) {
184188
if (docVectors == null || docVectors.isEmpty()) {
@@ -304,6 +308,11 @@ public Builder setSeed(long seed) {
304308
return this;
305309
}
306310

311+
public Builder setMergePolicy(String mergePolicy) {
312+
this.mergePolicy = KnnIndexTester.MergePolicyType.valueOf(mergePolicy.toUpperCase(Locale.ROOT));
313+
return this;
314+
}
315+
307316
public CmdLineArgs build() {
308317
if (docVectors == null) {
309318
throw new IllegalArgumentException("Document vectors path must be provided");
@@ -337,7 +346,8 @@ public CmdLineArgs build() {
337346
quantizeBits,
338347
vectorEncoding,
339348
dimensions,
340-
earlyTermination
349+
earlyTermination,
350+
mergePolicy
341351
);
342352
}
343353
}

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
import org.apache.lucene.codecs.KnnVectorsFormat;
1616
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
1717
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
18+
import org.apache.lucene.index.LogByteSizeMergePolicy;
19+
import org.apache.lucene.index.LogDocMergePolicy;
20+
import org.apache.lucene.index.MergePolicy;
21+
import org.apache.lucene.index.NoMergePolicy;
22+
import org.apache.lucene.index.TieredMergePolicy;
1823
import org.elasticsearch.cli.ProcessInfo;
1924
import org.elasticsearch.common.Strings;
2025
import org.elasticsearch.common.logging.LogConfigurator;
@@ -69,6 +74,13 @@ enum IndexType {
6974
IVF
7075
}
7176

77+
enum MergePolicyType {
78+
TIERED,
79+
LOG_BYTE,
80+
NO,
81+
LOG_DOC
82+
}
83+
7284
private static String formatIndexPath(CmdLineArgs args) {
7385
List<String> suffix = new ArrayList<>();
7486
if (args.indexType() == IndexType.FLAT) {
@@ -196,6 +208,7 @@ public static void main(String[] args) throws Exception {
196208
logger.info("Running KNN index tester with arguments: " + cmdLineArgs);
197209
Codec codec = createCodec(cmdLineArgs);
198210
Path indexPath = PathUtils.get(formatIndexPath(cmdLineArgs));
211+
MergePolicy mergePolicy = getMergePolicy(cmdLineArgs);
199212
if (cmdLineArgs.reindex() || cmdLineArgs.forceMerge()) {
200213
KnnIndexer knnIndexer = new KnnIndexer(
201214
cmdLineArgs.docVectors(),
@@ -205,7 +218,8 @@ public static void main(String[] args) throws Exception {
205218
cmdLineArgs.vectorEncoding(),
206219
cmdLineArgs.dimensions(),
207220
cmdLineArgs.vectorSpace(),
208-
cmdLineArgs.numDocs()
221+
cmdLineArgs.numDocs(),
222+
mergePolicy
209223
);
210224
if (cmdLineArgs.reindex() == false && Files.exists(indexPath) == false) {
211225
throw new IllegalArgumentException("Index path does not exist: " + indexPath);
@@ -232,6 +246,24 @@ public static void main(String[] args) throws Exception {
232246
logger.info("Results: \n" + formattedResults);
233247
}
234248

249+
private static MergePolicy getMergePolicy(CmdLineArgs args) {
250+
MergePolicy mergePolicy = null;
251+
if (args.mergePolicy() != null) {
252+
if (args.mergePolicy() == MergePolicyType.TIERED) {
253+
mergePolicy = new TieredMergePolicy();
254+
} else if (args.mergePolicy() == MergePolicyType.LOG_BYTE) {
255+
mergePolicy = new LogByteSizeMergePolicy();
256+
} else if (args.mergePolicy() == MergePolicyType.NO) {
257+
mergePolicy = NoMergePolicy.INSTANCE;
258+
} else if (args.mergePolicy() == MergePolicyType.LOG_DOC) {
259+
mergePolicy = new LogDocMergePolicy();
260+
} else {
261+
throw new IllegalArgumentException("Invalid merge policy: " + args.mergePolicy());
262+
}
263+
}
264+
return mergePolicy;
265+
}
266+
235267
static class FormattedResults {
236268
List<Results> indexResults = new ArrayList<>();
237269
List<Results> queryResults = new ArrayList<>();

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexer.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.lucene.index.IndexReader;
3232
import org.apache.lucene.index.IndexWriter;
3333
import org.apache.lucene.index.IndexWriterConfig;
34+
import org.apache.lucene.index.MergePolicy;
3435
import org.apache.lucene.index.VectorEncoding;
3536
import org.apache.lucene.index.VectorSimilarityFunction;
3637
import org.apache.lucene.store.FSDirectory;
@@ -69,6 +70,7 @@ class KnnIndexer {
6970
private final Codec codec;
7071
private final int numDocs;
7172
private final int numIndexThreads;
73+
private final MergePolicy mergePolicy;
7274

7375
KnnIndexer(
7476
List<Path> docsPath,
@@ -78,7 +80,8 @@ class KnnIndexer {
7880
VectorEncoding vectorEncoding,
7981
int dim,
8082
VectorSimilarityFunction similarityFunction,
81-
int numDocs
83+
int numDocs,
84+
MergePolicy mergePolicy
8285
) {
8386
this.docsPath = docsPath;
8487
this.indexPath = indexPath;
@@ -88,6 +91,7 @@ class KnnIndexer {
8891
this.dim = dim;
8992
this.similarityFunction = similarityFunction;
9093
this.numDocs = numDocs;
94+
this.mergePolicy = mergePolicy;
9195
}
9296

9397
void numSegments(KnnIndexTester.Results result) {
@@ -103,7 +107,9 @@ void createIndex(KnnIndexTester.Results result) throws IOException, InterruptedE
103107
iwc.setCodec(codec);
104108
iwc.setRAMBufferSizeMB(WRITER_BUFFER_MB);
105109
iwc.setUseCompoundFile(false);
106-
110+
if (mergePolicy != null) {
111+
iwc.setMergePolicy(mergePolicy);
112+
}
107113
iwc.setMaxFullFlushMergeWaitMillis(0);
108114

109115
iwc.setInfoStream(new PrintStreamInfoStream(System.out) {

0 commit comments

Comments
 (0)