Pollute call sites before running benchmarks.

jpountz · jpountz · commit bb806cd44567 · 2025-08-12T08:48:54.000+02:00
Because nightly benchmarks only test a small set of scenarios, the JVM may end up over-optimizing query evaluation. For instance, it only runs with BM25Similarity, sorting tasks only run against a TermQuery, filtered vector search only exercises the approximate path, not the exact path, etc. This tries to make the benchmark more realistic by running some cheap queries before running bencharks, whose goal is to pollute call sites so that they are not all magically monomorphic. This will translate in a drop in performance for some tasks, but hopefully we can recover some of it in the future. Related PR: - apache/lucene#14968 where we suspected the speedup to be due to specialization making a call site monomorphic in nightly benchmarks that would not be monomorphic in the real world, - apache/lucene#15039 where we are trying to improve behavior with several different similarity impls but the benchmarks only show a small improvement since they always run with BM25Similarity.
diff --git a/src/main/perf/SearchPerfTest.java b/src/main/perf/SearchPerfTest.java
@@ -236,8 +236,13 @@ private static void _main(String[] clArgs) throws Exception {
     final int topN = args.getInt("-topN");
     final boolean doStoredLoads = args.getFlag("-loadStoredFields");
     final boolean exitable = args.getFlag("-exitable");
+    final boolean pollute = args.getFlag("-pollute");
     final TestContext testContext = TestContext.parse(args.getString("-context", ""));
 
+    if (pollute) {
+      TypePolluter.pollute();
+    }
+
     if (searchConcurrency == -1) {
       searchConcurrency = Runtime.getRuntime().availableProcessors();
     }
diff --git a/src/main/perf/TypePolluter.java b/src/main/perf/TypePolluter.java
@@ -0,0 +1,174 @@
+package perf;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.KnnFloatVectorField;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.ExitableDirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.QueryTimeoutImpl;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.FieldExistsQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.KnnFloatVectorQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.similarities.BooleanSimilarity;
+import org.apache.lucene.search.similarities.ClassicSimilarity;
+import org.apache.lucene.store.ByteBuffersDirectory;
+import org.apache.lucene.store.Directory;
+
+/**
+ * This helper tries to pollute a bit the types that are typically seen by queries at call sites to help better simulate production systems that may
+ * <ul>
+ *   <li>have a mix of Directory impls, e.g. because of NRTCachingDirectory</li>
+ *   <li>have a mix of segments with deletions and no deletions,</li>
+ *   <li>use multiple similarities.</li>
+ * </ul>
+ * <p>This matters because polymorphic call sites are much more expensive than bimorphic call sites, and bimorphic call sites may be noticeably more expensive than monomorphic call sites.
+ */
+public class TypePolluter {
+
+  public static void pollute() throws IOException {
+    // Use ByteBuffersDirectory instead of MMapDirectory to have multiple IndexInput sub-classes used by queries
+    try (Directory dir = new ByteBuffersDirectory()) {
+      
+      // TODO: configure a non-default codec?
+      IndexWriterConfig config = new IndexWriterConfig(null);;
+      
+      try (IndexWriter w = new IndexWriter(dir, config)) {
+        // Add enough documents for the inverted index to have full blocks (128 postings)
+        int docCount = 1024;
+        for (int i = 0; i < docCount; ++i) {
+          Document doc = new Document();
+          doc.add(new StringField("id", Integer.toString(i), Store.NO));
+          if (i % 3 != 0) {
+            doc.add(new StringField("body", "a", Store.NO));
+          }
+          if (i % 7 != 0) {
+            doc.add(new StringField("body", "b", Store.NO));
+          }
+          if (i % 11 != 0) {
+            doc.add(new StringField("body", "c", Store.NO));
+          }
+          if (i % 13 != 0) {
+            doc.add(new KnnFloatVectorField("vector", new float[] { i % 7 }));
+          }
+          if (i % 17 != 0) {
+            doc.add(new NumericDocValuesField("int", i));
+          }
+          w.addDocument(doc);
+        }
+        w.forceMerge(1);
+        // Add deleted docs to make sure that branches that exercise deleted docs are used even
+        // though the benchmark may be running with no deleted docs
+        for (int i = 0; i < docCount; i += 23) {
+          w.deleteDocuments(new Term("id", Integer.toString(i)));
+        }
+      }
+      try (DirectoryReader reader = DirectoryReader.open(dir)) {
+        runQueries(reader);
+        // ExitableDirectoryReader adds lots of wrappers everywhere
+        runQueries(new ExitableDirectoryReader(reader, new QueryTimeoutImpl(Long.MAX_VALUE)));
+      }
+    }
+  }
+
+  private static void runQueries(DirectoryReader reader) throws IOException {
+    IndexSearcher searcher = new IndexSearcher(reader);
+    // Exercise multiple similarities
+    IndexSearcher booleanSearcher = new IndexSearcher(reader);
+    booleanSearcher.setSimilarity(new BooleanSimilarity());
+    IndexSearcher classicSearcher = new IndexSearcher(reader);
+    classicSearcher.setSimilarity(new ClassicSimilarity());
+    
+    
+    Query query1 = new TermQuery(new Term("body", "a"));
+    Query query2 = new TermQuery(new Term("body", "b"));
+    Query query3 = new ConstantScoreQuery(query1);
+    Query query4 = new ConstantScoreQuery(query2);
+    Query query5 = new BooleanQuery.Builder()
+        .add(query1, Occur.SHOULD)
+        .add(query2, Occur.SHOULD)
+        .build();
+    Query query6 = new BooleanQuery.Builder()
+        .add(query1, Occur.MUST)
+        .add(query2, Occur.MUST)
+        .build();
+    Query query7 = new BooleanQuery.Builder()
+        .add(query3, Occur.SHOULD)
+        .add(query4, Occur.SHOULD)
+        .build();
+    Query query8 = new BooleanQuery.Builder()
+        .add(query3, Occur.MUST)
+        .add(query4, Occur.MUST)
+        .build();
+    
+    Query[] baseQueries = new Query[] { query1, query2, query3, query4, query5, query6, query7, query8 };
+
+    // dense filter
+    Query filter1 = new TermQuery(new Term("body", "c"));
+    // sparse filter (especially useful to make sure that the vector search query exercises exact search)
+    Query filter2 = new TermQuery(new Term("id", "1"));
+    // filter not based on postings
+    Query filter3 = new FieldExistsQuery("int");
+    
+    List<Query> queries = new ArrayList<>();
+    
+    for (Query query : baseQueries) {
+      queries.add(query);
+      for (Query filter : new Query[] { filter1, filter2, filter3 }) {
+         Query filteredQuery = new BooleanQuery.Builder()
+             .add(query, Occur.MUST)
+             .add(filter, Occur.FILTER)
+             .build();
+         queries.add(filteredQuery);
+      }
+    }
+
+    // Handle vector search separately since filters need to be applied differently
+    {
+      Query query9 = new KnnFloatVectorQuery("vector", new float[] { 1.5f }, 10);
+      queries.add(query9);
+      for (Query filter : new Query[] { filter1, filter2, filter3 }) {
+        Query filteredQuery = new KnnFloatVectorQuery("vector", new float[] { 1.5f }, 10, filter);
+        queries.add(filteredQuery);
+      }
+    }
+    
+    for (Query query : queries) {
+      // Exhaustive evaluation, no scoring
+      int count = searcher.count(query);
+      // top-k evaluation, by score
+      TopDocs hits1 = searcher.search(query, 10);
+      TopDocs hits2 = booleanSearcher.search(query, 10);
+      TopDocs hits3 = classicSearcher.search(query, 10);
+      // top-k evaluation, by field
+      TopDocs hits4 = searcher.search(query, 10, new Sort(new SortField("int", SortField.Type.INT)));
+
+      if (count == 0
+          || hits1.totalHits.value() == 0
+          || hits2.totalHits.value() == 0
+          || hits3.totalHits.value() == 0
+          || hits4.totalHits.value() == 0) {
+        // This helps catch errors if queries are malformed, and also prevents the JVM from skipping
+        // the query if we don't use the result
+        throw new Error("" + query);
+      }
+    }
+  }
+}
diff --git a/src/python/benchUtil.py b/src/python/benchUtil.py
@@ -1238,6 +1238,8 @@ def runSimpleSearchBench(self, iter, id, c, coldRun, seed, staticSeed, filter=No
       w("-vectorScale", c.vectorScale)
     if c.exitable:
       w("-exitable")
+    if c.pollute:
+      w("-pollute")
 
     print("      log: %s + stdout" % logFile)
     t0 = time.time()
diff --git a/src/python/competition.py b/src/python/competition.py
@@ -299,6 +299,7 @@ def __init__(
     javacCommand=constants.JAVAC_EXE,
     topN=100,
     testContext="",
+    pollute=True,
   ):
     self.name = name
     self.checkout = checkout
@@ -350,6 +351,9 @@ def __init__(
     # See also TestContext#parse
     self.testContext = testContext
 
+    # Whether to pollute call sites so that they are not all magically monomorphic
+    self.pollute = pollute
+
   def getAggregateProfilerResult(self, id, mode, count=30, stackSize=1):
     # we accept a sequence of stack sizes and will re-aggregate JFR results at each
     if type(stackSize) is int: