Skip to content

Commit bb806cd

Browse files
committed
Pollute call sites before running benchmarks.
Because nightly benchmarks only test a small set of scenarios, the JVM may end up over-optimizing query evaluation. For instance, it only runs with BM25Similarity, sorting tasks only run against a TermQuery, filtered vector search only exercises the approximate path, not the exact path, etc. This tries to make the benchmark more realistic by running some cheap queries before running bencharks, whose goal is to pollute call sites so that they are not all magically monomorphic. This will translate in a drop in performance for some tasks, but hopefully we can recover some of it in the future. Related PR: - apache/lucene#14968 where we suspected the speedup to be due to specialization making a call site monomorphic in nightly benchmarks that would not be monomorphic in the real world, - apache/lucene#15039 where we are trying to improve behavior with several different similarity impls but the benchmarks only show a small improvement since they always run with BM25Similarity.
1 parent f845c9d commit bb806cd

File tree

4 files changed

+185
-0
lines changed

4 files changed

+185
-0
lines changed

src/main/perf/SearchPerfTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,13 @@ private static void _main(String[] clArgs) throws Exception {
236236
final int topN = args.getInt("-topN");
237237
final boolean doStoredLoads = args.getFlag("-loadStoredFields");
238238
final boolean exitable = args.getFlag("-exitable");
239+
final boolean pollute = args.getFlag("-pollute");
239240
final TestContext testContext = TestContext.parse(args.getString("-context", ""));
240241

242+
if (pollute) {
243+
TypePolluter.pollute();
244+
}
245+
241246
if (searchConcurrency == -1) {
242247
searchConcurrency = Runtime.getRuntime().availableProcessors();
243248
}

src/main/perf/TypePolluter.java

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
package perf;
2+
3+
import java.io.IOException;
4+
import java.util.ArrayList;
5+
import java.util.List;
6+
7+
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
8+
import org.apache.lucene.document.Document;
9+
import org.apache.lucene.document.Field.Store;
10+
import org.apache.lucene.document.KnnFloatVectorField;
11+
import org.apache.lucene.document.NumericDocValuesField;
12+
import org.apache.lucene.document.StringField;
13+
import org.apache.lucene.index.DirectoryReader;
14+
import org.apache.lucene.index.ExitableDirectoryReader;
15+
import org.apache.lucene.index.IndexWriter;
16+
import org.apache.lucene.index.IndexWriterConfig;
17+
import org.apache.lucene.index.QueryTimeoutImpl;
18+
import org.apache.lucene.index.Term;
19+
import org.apache.lucene.search.BooleanClause.Occur;
20+
import org.apache.lucene.search.BooleanQuery;
21+
import org.apache.lucene.search.ConstantScoreQuery;
22+
import org.apache.lucene.search.FieldExistsQuery;
23+
import org.apache.lucene.search.IndexSearcher;
24+
import org.apache.lucene.search.KnnFloatVectorQuery;
25+
import org.apache.lucene.search.Query;
26+
import org.apache.lucene.search.Sort;
27+
import org.apache.lucene.search.SortField;
28+
import org.apache.lucene.search.TermQuery;
29+
import org.apache.lucene.search.TopDocs;
30+
import org.apache.lucene.search.similarities.BooleanSimilarity;
31+
import org.apache.lucene.search.similarities.ClassicSimilarity;
32+
import org.apache.lucene.store.ByteBuffersDirectory;
33+
import org.apache.lucene.store.Directory;
34+
35+
/**
36+
* This helper tries to pollute a bit the types that are typically seen by queries at call sites to help better simulate production systems that may
37+
* <ul>
38+
* <li>have a mix of Directory impls, e.g. because of NRTCachingDirectory</li>
39+
* <li>have a mix of segments with deletions and no deletions,</li>
40+
* <li>use multiple similarities.</li>
41+
* </ul>
42+
* <p>This matters because polymorphic call sites are much more expensive than bimorphic call sites, and bimorphic call sites may be noticeably more expensive than monomorphic call sites.
43+
*/
44+
public class TypePolluter {
45+
46+
public static void pollute() throws IOException {
47+
// Use ByteBuffersDirectory instead of MMapDirectory to have multiple IndexInput sub-classes used by queries
48+
try (Directory dir = new ByteBuffersDirectory()) {
49+
50+
// TODO: configure a non-default codec?
51+
IndexWriterConfig config = new IndexWriterConfig(null);;
52+
53+
try (IndexWriter w = new IndexWriter(dir, config)) {
54+
// Add enough documents for the inverted index to have full blocks (128 postings)
55+
int docCount = 1024;
56+
for (int i = 0; i < docCount; ++i) {
57+
Document doc = new Document();
58+
doc.add(new StringField("id", Integer.toString(i), Store.NO));
59+
if (i % 3 != 0) {
60+
doc.add(new StringField("body", "a", Store.NO));
61+
}
62+
if (i % 7 != 0) {
63+
doc.add(new StringField("body", "b", Store.NO));
64+
}
65+
if (i % 11 != 0) {
66+
doc.add(new StringField("body", "c", Store.NO));
67+
}
68+
if (i % 13 != 0) {
69+
doc.add(new KnnFloatVectorField("vector", new float[] { i % 7 }));
70+
}
71+
if (i % 17 != 0) {
72+
doc.add(new NumericDocValuesField("int", i));
73+
}
74+
w.addDocument(doc);
75+
}
76+
w.forceMerge(1);
77+
// Add deleted docs to make sure that branches that exercise deleted docs are used even
78+
// though the benchmark may be running with no deleted docs
79+
for (int i = 0; i < docCount; i += 23) {
80+
w.deleteDocuments(new Term("id", Integer.toString(i)));
81+
}
82+
}
83+
try (DirectoryReader reader = DirectoryReader.open(dir)) {
84+
runQueries(reader);
85+
// ExitableDirectoryReader adds lots of wrappers everywhere
86+
runQueries(new ExitableDirectoryReader(reader, new QueryTimeoutImpl(Long.MAX_VALUE)));
87+
}
88+
}
89+
}
90+
91+
private static void runQueries(DirectoryReader reader) throws IOException {
92+
IndexSearcher searcher = new IndexSearcher(reader);
93+
// Exercise multiple similarities
94+
IndexSearcher booleanSearcher = new IndexSearcher(reader);
95+
booleanSearcher.setSimilarity(new BooleanSimilarity());
96+
IndexSearcher classicSearcher = new IndexSearcher(reader);
97+
classicSearcher.setSimilarity(new ClassicSimilarity());
98+
99+
100+
Query query1 = new TermQuery(new Term("body", "a"));
101+
Query query2 = new TermQuery(new Term("body", "b"));
102+
Query query3 = new ConstantScoreQuery(query1);
103+
Query query4 = new ConstantScoreQuery(query2);
104+
Query query5 = new BooleanQuery.Builder()
105+
.add(query1, Occur.SHOULD)
106+
.add(query2, Occur.SHOULD)
107+
.build();
108+
Query query6 = new BooleanQuery.Builder()
109+
.add(query1, Occur.MUST)
110+
.add(query2, Occur.MUST)
111+
.build();
112+
Query query7 = new BooleanQuery.Builder()
113+
.add(query3, Occur.SHOULD)
114+
.add(query4, Occur.SHOULD)
115+
.build();
116+
Query query8 = new BooleanQuery.Builder()
117+
.add(query3, Occur.MUST)
118+
.add(query4, Occur.MUST)
119+
.build();
120+
121+
Query[] baseQueries = new Query[] { query1, query2, query3, query4, query5, query6, query7, query8 };
122+
123+
// dense filter
124+
Query filter1 = new TermQuery(new Term("body", "c"));
125+
// sparse filter (especially useful to make sure that the vector search query exercises exact search)
126+
Query filter2 = new TermQuery(new Term("id", "1"));
127+
// filter not based on postings
128+
Query filter3 = new FieldExistsQuery("int");
129+
130+
List<Query> queries = new ArrayList<>();
131+
132+
for (Query query : baseQueries) {
133+
queries.add(query);
134+
for (Query filter : new Query[] { filter1, filter2, filter3 }) {
135+
Query filteredQuery = new BooleanQuery.Builder()
136+
.add(query, Occur.MUST)
137+
.add(filter, Occur.FILTER)
138+
.build();
139+
queries.add(filteredQuery);
140+
}
141+
}
142+
143+
// Handle vector search separately since filters need to be applied differently
144+
{
145+
Query query9 = new KnnFloatVectorQuery("vector", new float[] { 1.5f }, 10);
146+
queries.add(query9);
147+
for (Query filter : new Query[] { filter1, filter2, filter3 }) {
148+
Query filteredQuery = new KnnFloatVectorQuery("vector", new float[] { 1.5f }, 10, filter);
149+
queries.add(filteredQuery);
150+
}
151+
}
152+
153+
for (Query query : queries) {
154+
// Exhaustive evaluation, no scoring
155+
int count = searcher.count(query);
156+
// top-k evaluation, by score
157+
TopDocs hits1 = searcher.search(query, 10);
158+
TopDocs hits2 = booleanSearcher.search(query, 10);
159+
TopDocs hits3 = classicSearcher.search(query, 10);
160+
// top-k evaluation, by field
161+
TopDocs hits4 = searcher.search(query, 10, new Sort(new SortField("int", SortField.Type.INT)));
162+
163+
if (count == 0
164+
|| hits1.totalHits.value() == 0
165+
|| hits2.totalHits.value() == 0
166+
|| hits3.totalHits.value() == 0
167+
|| hits4.totalHits.value() == 0) {
168+
// This helps catch errors if queries are malformed, and also prevents the JVM from skipping
169+
// the query if we don't use the result
170+
throw new Error("" + query);
171+
}
172+
}
173+
}
174+
}

src/python/benchUtil.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,8 @@ def runSimpleSearchBench(self, iter, id, c, coldRun, seed, staticSeed, filter=No
12381238
w("-vectorScale", c.vectorScale)
12391239
if c.exitable:
12401240
w("-exitable")
1241+
if c.pollute:
1242+
w("-pollute")
12411243

12421244
print(" log: %s + stdout" % logFile)
12431245
t0 = time.time()

src/python/competition.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ def __init__(
299299
javacCommand=constants.JAVAC_EXE,
300300
topN=100,
301301
testContext="",
302+
pollute=True,
302303
):
303304
self.name = name
304305
self.checkout = checkout
@@ -350,6 +351,9 @@ def __init__(
350351
# See also TestContext#parse
351352
self.testContext = testContext
352353

354+
# Whether to pollute call sites so that they are not all magically monomorphic
355+
self.pollute = pollute
356+
353357
def getAggregateProfilerResult(self, id, mode, count=30, stackSize=1):
354358
# we accept a sequence of stack sizes and will re-aggregate JFR results at each
355359
if type(stackSize) is int:

0 commit comments

Comments
 (0)