586: Add settings to force milli to exhaustively compute the total number of hits r=Kerollmops a=ManyTheFish

Add a new setting `exhaustive_number_hits` to `Search` forcing the `Initial` criterion to exhaustively compute the bucket_candidates allowing the end users to implement finite pagination.
 
related to https://github.com/meilisearch/meilisearch/pull/2601

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
This commit is contained in:
bors[bot] 2022-10-17 16:24:35 +00:00 committed by GitHub
commit 19b2326f3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 115 additions and 24 deletions

View File

@ -1,31 +1,75 @@
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult}; use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, Context};
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::search::Distinct;
use crate::Result; use crate::Result;
/// Initial is a mandatory criterion, it is always the first
pub struct Initial { /// and is meant to initalize the CriterionResult used by the other criteria.
/// It behave like an [Once Iterator](https://doc.rust-lang.org/std/iter/struct.Once.html) and will return Some(CriterionResult) only one time.
pub struct Initial<'t, D> {
ctx: &'t dyn Context<'t>,
answer: Option<CriterionResult>, answer: Option<CriterionResult>,
exhaustive_number_hits: bool,
distinct: Option<D>,
} }
impl Initial { impl<'t, D> Initial<'t, D> {
pub fn new( pub fn new(
ctx: &'t dyn Context<'t>,
query_tree: Option<Operation>, query_tree: Option<Operation>,
filtered_candidates: Option<RoaringBitmap>, filtered_candidates: Option<RoaringBitmap>,
) -> Initial { exhaustive_number_hits: bool,
distinct: Option<D>,
) -> Initial<D> {
let answer = CriterionResult { let answer = CriterionResult {
query_tree, query_tree,
candidates: None, candidates: None,
filtered_candidates, filtered_candidates,
bucket_candidates: None, bucket_candidates: None,
}; };
Initial { answer: Some(answer) } Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
} }
} }
impl Criterion for Initial { impl<D: Distinct> Criterion for Initial<'_, D> {
#[logging_timer::time("Initial::{}")] #[logging_timer::time("Initial::{}")]
fn next(&mut self, _: &mut CriterionParameters) -> Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
Ok(self.answer.take()) self.answer
.take()
.map(|mut answer| {
if self.exhaustive_number_hits && answer.query_tree.is_some() {
// resolve the whole query tree to retrieve an exhaustive list of documents matching the query.
let mut candidates = resolve_query_tree(
self.ctx,
answer.query_tree.as_ref().unwrap(),
&mut params.wdcache,
)?;
// Apply the filters on the documents retrieved with the query tree.
if let Some(ref filtered_candidates) = answer.filtered_candidates {
candidates &= filtered_candidates;
}
// because the bucket_candidates should be an exhaustive count of the matching documents,
// we precompute the distinct attributes.
let bucket_candidates = match &mut self.distinct {
Some(distinct) => {
let mut bucket_candidates = RoaringBitmap::new();
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
bucket_candidates.insert(c?);
}
bucket_candidates
}
None => candidates.clone(),
};
answer.candidates = Some(candidates);
answer.bucket_candidates = Some(bucket_candidates);
}
Ok(answer)
})
.transpose()
} }
} }

View File

@ -13,7 +13,7 @@ use self::typo::Typo;
use self::words::Words; use self::words::Words;
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind}; use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
use crate::search::criteria::geo::Geo; use crate::search::criteria::geo::Geo;
use crate::search::{word_derivations, WordDerivationsCache}; use crate::search::{word_derivations, Distinct, WordDerivationsCache};
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result}; use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
mod asc_desc; mod asc_desc;
@ -226,19 +226,26 @@ impl<'t> CriteriaBuilder<'t> {
Ok(Self { rtxn, index, words_fst, words_prefixes_fst }) Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
} }
pub fn build( pub fn build<D: 't + Distinct>(
&'t self, &'t self,
query_tree: Option<Operation>, query_tree: Option<Operation>,
primitive_query: Option<Vec<PrimitiveQueryPart>>, primitive_query: Option<Vec<PrimitiveQueryPart>>,
filtered_candidates: Option<RoaringBitmap>, filtered_candidates: Option<RoaringBitmap>,
sort_criteria: Option<Vec<AscDescName>>, sort_criteria: Option<Vec<AscDescName>>,
exhaustive_number_hits: bool,
distinct: Option<D>,
) -> Result<Final<'t>> { ) -> Result<Final<'t>> {
use crate::criterion::Criterion as Name; use crate::criterion::Criterion as Name;
let primitive_query = primitive_query.unwrap_or_default(); let primitive_query = primitive_query.unwrap_or_default();
let mut criterion = let mut criterion = Box::new(Initial::new(
Box::new(Initial::new(query_tree, filtered_candidates)) as Box<dyn Criterion>; self,
query_tree,
filtered_candidates,
exhaustive_number_hits,
distinct,
)) as Box<dyn Criterion>;
for name in self.index.criteria(&self.rtxn)? { for name in self.index.criteria(&self.rtxn)? {
criterion = match name { criterion = match name {
Name::Words => Box::new(Words::new(self, criterion)), Name::Words => Box::new(Words::new(self, criterion)),

View File

@ -348,6 +348,7 @@ mod test {
use super::super::initial::Initial; use super::super::initial::Initial;
use super::super::test::TestContext; use super::super::test::TestContext;
use super::*; use super::*;
use crate::search::NoopDistinct;
fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String { fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String {
let mut result = String::new(); let mut result = String::new();
@ -368,7 +369,8 @@ mod test {
excluded_candidates: &RoaringBitmap::new(), excluded_candidates: &RoaringBitmap::new(),
}; };
let parent = Initial::new(query_tree, facet_candidates); let parent =
Initial::<NoopDistinct>::new(&context, query_tree, facet_candidates, false, None);
let criteria = Typo::new(&context, Box::new(parent)); let criteria = Typo::new(&context, Box::new(parent));
let result = display_criteria(criteria, criterion_parameters); let result = display_criteria(criteria, criterion_parameters);
@ -405,7 +407,8 @@ mod test {
wdcache: &mut WordDerivationsCache::new(), wdcache: &mut WordDerivationsCache::new(),
excluded_candidates: &RoaringBitmap::new(), excluded_candidates: &RoaringBitmap::new(),
}; };
let parent = Initial::new(Some(query_tree), facet_candidates); let parent =
Initial::<NoopDistinct>::new(&context, Some(query_tree), facet_candidates, false, None);
let criteria = Typo::new(&context, Box::new(parent)); let criteria = Typo::new(&context, Box::new(parent));
let result = display_criteria(criteria, criterion_parameters); let result = display_criteria(criteria, criterion_parameters);
@ -439,7 +442,13 @@ mod test {
wdcache: &mut WordDerivationsCache::new(), wdcache: &mut WordDerivationsCache::new(),
excluded_candidates: &RoaringBitmap::new(), excluded_candidates: &RoaringBitmap::new(),
}; };
let parent = Initial::new(query_tree, Some(facet_candidates.clone())); let parent = Initial::<NoopDistinct>::new(
&context,
query_tree,
Some(facet_candidates.clone()),
false,
None,
);
let criteria = Typo::new(&context, Box::new(parent)); let criteria = Typo::new(&context, Box::new(parent));
let result = display_criteria(criteria, criterion_parameters); let result = display_criteria(criteria, criterion_parameters);
@ -476,7 +485,13 @@ mod test {
wdcache: &mut WordDerivationsCache::new(), wdcache: &mut WordDerivationsCache::new(),
excluded_candidates: &RoaringBitmap::new(), excluded_candidates: &RoaringBitmap::new(),
}; };
let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone())); let parent = Initial::<NoopDistinct>::new(
&context,
Some(query_tree),
Some(facet_candidates.clone()),
false,
None,
);
let criteria = Typo::new(&context, Box::new(parent)); let criteria = Typo::new(&context, Box::new(parent));
let result = display_criteria(criteria, criterion_parameters); let result = display_criteria(criteria, criterion_parameters);

View File

@ -21,6 +21,7 @@ const DOCID_SIZE: usize = size_of::<DocumentId>();
/// care to keep the document we are currently on, and remove it from the excluded list. The next /// care to keep the document we are currently on, and remove it from the excluded list. The next
/// iterations will never contain any occurence of a document with the same distinct value as a /// iterations will never contain any occurence of a document with the same distinct value as a
/// document from previous iterations. /// document from previous iterations.
#[derive(Clone)]
pub struct FacetDistinct<'a> { pub struct FacetDistinct<'a> {
distinct: FieldId, distinct: FieldId,
index: &'a Index, index: &'a Index,

View File

@ -47,6 +47,7 @@ pub struct Search<'a> {
terms_matching_strategy: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
authorize_typos: bool, authorize_typos: bool,
words_limit: usize, words_limit: usize,
exhaustive_number_hits: bool,
rtxn: &'a heed::RoTxn<'a>, rtxn: &'a heed::RoTxn<'a>,
index: &'a Index, index: &'a Index,
} }
@ -61,6 +62,7 @@ impl<'a> Search<'a> {
sort_criteria: None, sort_criteria: None,
terms_matching_strategy: TermsMatchingStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(),
authorize_typos: true, authorize_typos: true,
exhaustive_number_hits: false,
words_limit: 10, words_limit: 10,
rtxn, rtxn,
index, index,
@ -107,6 +109,13 @@ impl<'a> Search<'a> {
self self
} }
/// Force the search to exhastivelly compute the number of candidates,
/// this will increase the search time but allows finite pagination.
pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
self.exhaustive_number_hits = exhaustive_number_hits;
self
}
fn is_typo_authorized(&self) -> Result<bool> { fn is_typo_authorized(&self) -> Result<bool> {
let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?; let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?;
// only authorize typos if both the index and the query allow it. // only authorize typos if both the index and the query allow it.
@ -184,20 +193,33 @@ impl<'a> Search<'a> {
} }
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?; let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
let criteria = criteria_builder.build(
query_tree,
primitive_query,
filtered_candidates,
self.sort_criteria.clone(),
)?;
match self.index.distinct_field(self.rtxn)? { match self.index.distinct_field(self.rtxn)? {
None => self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria), None => {
let criteria = criteria_builder.build::<NoopDistinct>(
query_tree,
primitive_query,
filtered_candidates,
self.sort_criteria.clone(),
self.exhaustive_number_hits,
None,
)?;
self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria)
}
Some(name) => { Some(name) => {
let field_ids_map = self.index.fields_ids_map(self.rtxn)?; let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
match field_ids_map.id(name) { match field_ids_map.id(name) {
Some(fid) => { Some(fid) => {
let distinct = FacetDistinct::new(fid, self.index, self.rtxn); let distinct = FacetDistinct::new(fid, self.index, self.rtxn);
let criteria = criteria_builder.build(
query_tree,
primitive_query,
filtered_candidates,
self.sort_criteria.clone(),
self.exhaustive_number_hits,
Some(distinct.clone()),
)?;
self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria) self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria)
} }
None => Ok(SearchResult::default()), None => Ok(SearchResult::default()),
@ -262,6 +284,7 @@ impl fmt::Debug for Search<'_> {
terms_matching_strategy, terms_matching_strategy,
authorize_typos, authorize_typos,
words_limit, words_limit,
exhaustive_number_hits,
rtxn: _, rtxn: _,
index: _, index: _,
} = self; } = self;
@ -273,6 +296,7 @@ impl fmt::Debug for Search<'_> {
.field("sort_criteria", sort_criteria) .field("sort_criteria", sort_criteria)
.field("terms_matching_strategy", terms_matching_strategy) .field("terms_matching_strategy", terms_matching_strategy)
.field("authorize_typos", authorize_typos) .field("authorize_typos", authorize_typos)
.field("exhaustive_number_hits", exhaustive_number_hits)
.field("words_limit", words_limit) .field("words_limit", words_limit)
.finish() .finish()
} }

View File

@ -51,7 +51,7 @@ macro_rules! test_criterion {
}; };
} }
test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![], vec![]); test_criterion!(none_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![], vec![]);
test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![], vec![]); test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![], vec![]);
test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words], vec![]); test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words], vec![]);
test_criterion!( test_criterion!(