Skip to content

Commit fc7efb3

Browse files
committed
query: add DocSet cost hint and use it for intersection ordering
- Add DocSet::cost() - Use cost() instead of size_hint() to order scorers in intersect_scorers This isolates cost-related changes without the new seek APIs from PR quickwit-oss#2538
1 parent 40659d4 commit fc7efb3

File tree

17 files changed

+330
-54
lines changed

17 files changed

+330
-54
lines changed

src/docset.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,17 @@ pub trait DocSet: Send {
8787
/// length of the docset.
8888
fn size_hint(&self) -> u32;
8989

90+
/// Returns a best-effort hint of the cost to consume the entire docset.
91+
///
92+
/// Consuming means calling advance until [`TERMINATED`] is returned.
93+
/// The cost should be relative to the cost of driving a Term query,
94+
/// which would be the number of documents in the DocSet.
95+
///
96+
/// By default this returns `size_hint()`.
97+
fn cost(&self) -> u64 {
98+
self.size_hint() as u64
99+
}
100+
90101
/// Returns the number documents matching.
91102
/// Calling this method consumes the `DocSet`.
92103
fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
@@ -134,6 +145,10 @@ impl DocSet for &mut dyn DocSet {
134145
(**self).size_hint()
135146
}
136147

148+
fn cost(&self) -> u64 {
149+
(**self).cost()
150+
}
151+
137152
fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
138153
(**self).count(alive_bitset)
139154
}
@@ -169,6 +184,11 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
169184
unboxed.size_hint()
170185
}
171186

187+
fn cost(&self) -> u64 {
188+
let unboxed: &TDocSet = self.borrow();
189+
unboxed.cost()
190+
}
191+
172192
fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
173193
let unboxed: &mut TDocSet = self.borrow_mut();
174194
unboxed.count(alive_bitset)

src/postings/mod.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -667,12 +667,15 @@ mod bench {
667667
.read_postings(&TERM_D, IndexRecordOption::Basic)
668668
.unwrap()
669669
.unwrap();
670-
let mut intersection = Intersection::new(vec![
671-
segment_postings_a,
672-
segment_postings_b,
673-
segment_postings_c,
674-
segment_postings_d,
675-
]);
670+
let mut intersection = Intersection::new(
671+
vec![
672+
segment_postings_a,
673+
segment_postings_b,
674+
segment_postings_c,
675+
segment_postings_d,
676+
],
677+
reader.searcher().num_docs() as u32,
678+
);
676679
while intersection.advance() != TERMINATED {}
677680
});
678681
}

src/query/boolean_query/block_wand.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,14 @@ mod tests {
367367
checkpoints
368368
}
369369

370-
fn compute_checkpoints_manual(term_scorers: Vec<TermScorer>, n: usize) -> Vec<(DocId, Score)> {
370+
fn compute_checkpoints_manual(
371+
term_scorers: Vec<TermScorer>,
372+
n: usize,
373+
max_doc: u32,
374+
) -> Vec<(DocId, Score)> {
371375
let mut heap: BinaryHeap<Float> = BinaryHeap::with_capacity(n);
372376
let mut checkpoints: Vec<(DocId, Score)> = Vec::new();
373-
let mut scorer = BufferedUnionScorer::build(term_scorers, SumCombiner::default);
377+
let mut scorer = BufferedUnionScorer::build(term_scorers, SumCombiner::default, max_doc);
374378

375379
let mut limit = Score::MIN;
376380
loop {
@@ -478,7 +482,8 @@ mod tests {
478482
for top_k in 1..4 {
479483
let checkpoints_for_each_pruning =
480484
compute_checkpoints_for_each_pruning(term_scorers.clone(), top_k);
481-
let checkpoints_manual = compute_checkpoints_manual(term_scorers.clone(), top_k);
485+
let checkpoints_manual =
486+
compute_checkpoints_manual(term_scorers.clone(), top_k, 100_000);
482487
assert_eq!(checkpoints_for_each_pruning.len(), checkpoints_manual.len());
483488
for (&(left_doc, left_score), &(right_doc, right_score)) in checkpoints_for_each_pruning
484489
.iter()

src/query/boolean_query/boolean_weight.rs

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use core::num;
12
use std::collections::HashMap;
23

34
use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
@@ -42,6 +43,7 @@ where
4243
fn scorer_union<TScoreCombiner>(
4344
scorers: Vec<Box<dyn Scorer>>,
4445
score_combiner_fn: impl Fn() -> TScoreCombiner,
46+
num_docs: u32,
4547
) -> SpecializedScorer
4648
where
4749
TScoreCombiner: ScoreCombiner,
@@ -68,23 +70,27 @@ where
6870
return SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
6971
scorers,
7072
score_combiner_fn,
73+
num_docs,
7174
)));
7275
}
7376
}
7477
}
7578
SpecializedScorer::Other(Box::new(BufferedUnionScorer::build(
7679
scorers,
7780
score_combiner_fn,
81+
num_docs,
7882
)))
7983
}
8084

8185
fn into_box_scorer<TScoreCombiner: ScoreCombiner>(
8286
scorer: SpecializedScorer,
8387
score_combiner_fn: impl Fn() -> TScoreCombiner,
88+
num_docs: u32,
8489
) -> Box<dyn Scorer> {
8590
match scorer {
8691
SpecializedScorer::TermUnion(term_scorers) => {
87-
let union_scorer = BufferedUnionScorer::build(term_scorers, score_combiner_fn);
92+
let union_scorer =
93+
BufferedUnionScorer::build(term_scorers, score_combiner_fn, num_docs);
8894
Box::new(union_scorer)
8995
}
9096
SpecializedScorer::Other(scorer) => scorer,
@@ -151,6 +157,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
151157
boost: Score,
152158
score_combiner_fn: impl Fn() -> TComplexScoreCombiner,
153159
) -> crate::Result<SpecializedScorer> {
160+
let num_docs = reader.num_docs();
154161
let mut per_occur_scorers = self.per_occur_scorers(reader, boost)?;
155162
// Indicate how should clauses are combined with other clauses.
156163
enum CombinationMethod {
@@ -167,11 +174,16 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
167174
return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
168175
}
169176
match self.minimum_number_should_match {
170-
0 => CombinationMethod::Optional(scorer_union(should_scorers, &score_combiner_fn)),
171-
1 => {
172-
let scorer_union = scorer_union(should_scorers, &score_combiner_fn);
173-
CombinationMethod::Required(scorer_union)
174-
}
177+
0 => CombinationMethod::Optional(scorer_union(
178+
should_scorers,
179+
&score_combiner_fn,
180+
num_docs,
181+
)),
182+
1 => CombinationMethod::Required(scorer_union(
183+
should_scorers,
184+
&score_combiner_fn,
185+
num_docs,
186+
)),
175187
n if num_of_should_scorers == n => {
176188
// When num_of_should_scorers equals the number of should clauses,
177189
// they are no different from must clauses.
@@ -200,30 +212,30 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
200212
};
201213
let exclude_scorer_opt: Option<Box<dyn Scorer>> = per_occur_scorers
202214
.remove(&Occur::MustNot)
203-
.map(|scorers| scorer_union(scorers, DoNothingCombiner::default))
215+
.map(|scorers| scorer_union(scorers, DoNothingCombiner::default, num_docs))
204216
.map(|specialized_scorer: SpecializedScorer| {
205-
into_box_scorer(specialized_scorer, DoNothingCombiner::default)
217+
into_box_scorer(specialized_scorer, DoNothingCombiner::default, num_docs)
206218
});
207219
let positive_scorer = match (should_opt, must_scorers) {
208220
(CombinationMethod::Ignored, Some(must_scorers)) => {
209-
SpecializedScorer::Other(intersect_scorers(must_scorers))
221+
SpecializedScorer::Other(intersect_scorers(must_scorers, num_docs))
210222
}
211223
(CombinationMethod::Optional(should_scorer), Some(must_scorers)) => {
212-
let must_scorer = intersect_scorers(must_scorers);
224+
let must_scorer = intersect_scorers(must_scorers, num_docs);
213225
if self.scoring_enabled {
214226
SpecializedScorer::Other(Box::new(
215227
RequiredOptionalScorer::<_, _, TScoreCombiner>::new(
216228
must_scorer,
217-
into_box_scorer(should_scorer, &score_combiner_fn),
229+
into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
218230
),
219231
))
220232
} else {
221233
SpecializedScorer::Other(must_scorer)
222234
}
223235
}
224236
(CombinationMethod::Required(should_scorer), Some(mut must_scorers)) => {
225-
must_scorers.push(into_box_scorer(should_scorer, &score_combiner_fn));
226-
SpecializedScorer::Other(intersect_scorers(must_scorers))
237+
must_scorers.push(into_box_scorer(should_scorer, &score_combiner_fn, num_docs));
238+
SpecializedScorer::Other(intersect_scorers(must_scorers, num_docs))
227239
}
228240
(CombinationMethod::Ignored, None) => {
229241
return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)))
@@ -233,7 +245,8 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
233245
(CombinationMethod::Optional(should_scorer), None) => should_scorer,
234246
};
235247
if let Some(exclude_scorer) = exclude_scorer_opt {
236-
let positive_scorer_boxed = into_box_scorer(positive_scorer, &score_combiner_fn);
248+
let positive_scorer_boxed =
249+
into_box_scorer(positive_scorer, &score_combiner_fn, num_docs);
237250
Ok(SpecializedScorer::Other(Box::new(Exclude::new(
238251
positive_scorer_boxed,
239252
exclude_scorer,
@@ -246,6 +259,7 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
246259

247260
impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombiner> {
248261
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
262+
let num_docs = reader.num_docs();
249263
if self.weights.is_empty() {
250264
Ok(Box::new(EmptyScorer))
251265
} else if self.weights.len() == 1 {
@@ -258,12 +272,12 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
258272
} else if self.scoring_enabled {
259273
self.complex_scorer(reader, boost, &self.score_combiner_fn)
260274
.map(|specialized_scorer| {
261-
into_box_scorer(specialized_scorer, &self.score_combiner_fn)
275+
into_box_scorer(specialized_scorer, &self.score_combiner_fn, num_docs)
262276
})
263277
} else {
264278
self.complex_scorer(reader, boost, DoNothingCombiner::default)
265279
.map(|specialized_scorer| {
266-
into_box_scorer(specialized_scorer, DoNothingCombiner::default)
280+
into_box_scorer(specialized_scorer, DoNothingCombiner::default, num_docs)
267281
})
268282
}
269283
}
@@ -296,8 +310,11 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
296310
let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
297311
match scorer {
298312
SpecializedScorer::TermUnion(term_scorers) => {
299-
let mut union_scorer =
300-
BufferedUnionScorer::build(term_scorers, &self.score_combiner_fn);
313+
let mut union_scorer = BufferedUnionScorer::build(
314+
term_scorers,
315+
&self.score_combiner_fn,
316+
reader.num_docs(),
317+
);
301318
for_each_scorer(&mut union_scorer, callback);
302319
}
303320
SpecializedScorer::Other(mut scorer) => {
@@ -317,8 +334,11 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
317334

318335
match scorer {
319336
SpecializedScorer::TermUnion(term_scorers) => {
320-
let mut union_scorer =
321-
BufferedUnionScorer::build(term_scorers, &self.score_combiner_fn);
337+
let mut union_scorer = BufferedUnionScorer::build(
338+
term_scorers,
339+
&self.score_combiner_fn,
340+
reader.num_docs(),
341+
);
322342
for_each_docset_buffered(&mut union_scorer, &mut buffer, callback);
323343
}
324344
SpecializedScorer::Other(mut scorer) => {

src/query/boost_query.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ impl<S: Scorer> DocSet for BoostScorer<S> {
117117
self.underlying.size_hint()
118118
}
119119

120+
fn cost(&self) -> u64 {
121+
self.underlying.cost()
122+
}
123+
120124
fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
121125
self.underlying.count(alive_bitset)
122126
}

src/query/const_score_query.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ impl<TDocSet: DocSet> DocSet for ConstScorer<TDocSet> {
130130
fn size_hint(&self) -> u32 {
131131
self.docset.size_hint()
132132
}
133+
134+
fn cost(&self) -> u64 {
135+
self.docset.cost()
136+
}
133137
}
134138

135139
impl<TDocSet: DocSet + 'static> Scorer for ConstScorer<TDocSet> {

src/query/disjunction.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ impl<T: Scorer> DocSet for ScorerWrapper<T> {
7070
fn size_hint(&self) -> u32 {
7171
self.scorer.size_hint()
7272
}
73+
74+
fn cost(&self) -> u64 {
75+
self.scorer.cost()
76+
}
7377
}
7478

7579
impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> Disjunction<TScorer, TScoreCombiner> {
@@ -146,6 +150,14 @@ impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> DocSet
146150
.max()
147151
.unwrap_or(0u32)
148152
}
153+
154+
fn cost(&self) -> u64 {
155+
self.chains
156+
.iter()
157+
.map(|docset| docset.cost())
158+
.max()
159+
.unwrap_or(0u64)
160+
}
149161
}
150162

151163
impl<TScorer: Scorer, TScoreCombiner: ScoreCombiner> Scorer

0 commit comments

Comments
 (0)