diff --git a/milli/src/search/criteria/initial.rs b/milli/src/search/criteria/initial.rs index 514dbff96..ac61adfe2 100644 --- a/milli/src/search/criteria/initial.rs +++ b/milli/src/search/criteria/initial.rs @@ -1,31 +1,75 @@ use roaring::RoaringBitmap; use super::{Criterion, CriterionParameters, CriterionResult}; +use crate::search::criteria::{resolve_query_tree, Context}; use crate::search::query_tree::Operation; +use crate::search::Distinct; use crate::Result; - -pub struct Initial { +/// Initial is a mandatory criterion, it is always the first +/// and is meant to initalize the CriterionResult used by the other criteria. +/// It behave like an [Once Iterator](https://doc.rust-lang.org/std/iter/struct.Once.html) and will return Some(CriterionResult) only one time. +pub struct Initial<'t, D> { + ctx: &'t dyn Context<'t>, answer: Option, + exhaustive_number_hits: bool, + distinct: Option, } -impl Initial { +impl<'t, D> Initial<'t, D> { pub fn new( + ctx: &'t dyn Context<'t>, query_tree: Option, filtered_candidates: Option, - ) -> Initial { + exhaustive_number_hits: bool, + distinct: Option, + ) -> Initial { let answer = CriterionResult { query_tree, candidates: None, filtered_candidates, bucket_candidates: None, }; - Initial { answer: Some(answer) } + Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct } } } -impl Criterion for Initial { +impl Criterion for Initial<'_, D> { #[logging_timer::time("Initial::{}")] - fn next(&mut self, _: &mut CriterionParameters) -> Result> { - Ok(self.answer.take()) + fn next(&mut self, params: &mut CriterionParameters) -> Result> { + self.answer + .take() + .map(|mut answer| { + if self.exhaustive_number_hits && answer.query_tree.is_some() { + // resolve the whole query tree to retrieve an exhaustive list of documents matching the query. + let mut candidates = resolve_query_tree( + self.ctx, + answer.query_tree.as_ref().unwrap(), + &mut params.wdcache, + )?; + + // Apply the filters on the documents retrieved with the query tree. + if let Some(ref filtered_candidates) = answer.filtered_candidates { + candidates &= filtered_candidates; + } + + // because the bucket_candidates should be an exhaustive count of the matching documents, + // we precompute the distinct attributes. + let bucket_candidates = match &mut self.distinct { + Some(distinct) => { + let mut bucket_candidates = RoaringBitmap::new(); + for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) { + bucket_candidates.insert(c?); + } + bucket_candidates + } + None => candidates.clone(), + }; + + answer.candidates = Some(candidates); + answer.bucket_candidates = Some(bucket_candidates); + } + Ok(answer) + }) + .transpose() } } diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index f48865ba5..866eaefde 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -13,7 +13,7 @@ use self::typo::Typo; use self::words::Words; use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind}; use crate::search::criteria::geo::Geo; -use crate::search::{word_derivations, WordDerivationsCache}; +use crate::search::{word_derivations, Distinct, WordDerivationsCache}; use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result}; mod asc_desc; @@ -226,19 +226,26 @@ impl<'t> CriteriaBuilder<'t> { Ok(Self { rtxn, index, words_fst, words_prefixes_fst }) } - pub fn build( + pub fn build( &'t self, query_tree: Option, primitive_query: Option>, filtered_candidates: Option, sort_criteria: Option>, + exhaustive_number_hits: bool, + distinct: Option, ) -> Result> { use crate::criterion::Criterion as Name; let primitive_query = primitive_query.unwrap_or_default(); - let mut criterion = - Box::new(Initial::new(query_tree, filtered_candidates)) as Box; + let mut criterion = Box::new(Initial::new( + self, + query_tree, + filtered_candidates, + exhaustive_number_hits, + distinct, + )) as Box; for name in self.index.criteria(&self.rtxn)? { criterion = match name { Name::Words => Box::new(Words::new(self, criterion)), diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index e9e6fb2f5..605089fae 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -348,6 +348,7 @@ mod test { use super::super::initial::Initial; use super::super::test::TestContext; use super::*; + use crate::search::NoopDistinct; fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String { let mut result = String::new(); @@ -368,7 +369,8 @@ mod test { excluded_candidates: &RoaringBitmap::new(), }; - let parent = Initial::new(query_tree, facet_candidates); + let parent = + Initial::::new(&context, query_tree, facet_candidates, false, None); let criteria = Typo::new(&context, Box::new(parent)); let result = display_criteria(criteria, criterion_parameters); @@ -405,7 +407,8 @@ mod test { wdcache: &mut WordDerivationsCache::new(), excluded_candidates: &RoaringBitmap::new(), }; - let parent = Initial::new(Some(query_tree), facet_candidates); + let parent = + Initial::::new(&context, Some(query_tree), facet_candidates, false, None); let criteria = Typo::new(&context, Box::new(parent)); let result = display_criteria(criteria, criterion_parameters); @@ -439,7 +442,13 @@ mod test { wdcache: &mut WordDerivationsCache::new(), excluded_candidates: &RoaringBitmap::new(), }; - let parent = Initial::new(query_tree, Some(facet_candidates.clone())); + let parent = Initial::::new( + &context, + query_tree, + Some(facet_candidates.clone()), + false, + None, + ); let criteria = Typo::new(&context, Box::new(parent)); let result = display_criteria(criteria, criterion_parameters); @@ -476,7 +485,13 @@ mod test { wdcache: &mut WordDerivationsCache::new(), excluded_candidates: &RoaringBitmap::new(), }; - let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone())); + let parent = Initial::::new( + &context, + Some(query_tree), + Some(facet_candidates.clone()), + false, + None, + ); let criteria = Typo::new(&context, Box::new(parent)); let result = display_criteria(criteria, criterion_parameters); diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs index 4436d4cda..33e7b4975 100644 --- a/milli/src/search/distinct/facet_distinct.rs +++ b/milli/src/search/distinct/facet_distinct.rs @@ -21,6 +21,7 @@ const DOCID_SIZE: usize = size_of::(); /// care to keep the document we are currently on, and remove it from the excluded list. The next /// iterations will never contain any occurence of a document with the same distinct value as a /// document from previous iterations. +#[derive(Clone)] pub struct FacetDistinct<'a> { distinct: FieldId, index: &'a Index, diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 7145c1445..20003c676 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -47,6 +47,7 @@ pub struct Search<'a> { terms_matching_strategy: TermsMatchingStrategy, authorize_typos: bool, words_limit: usize, + exhaustive_number_hits: bool, rtxn: &'a heed::RoTxn<'a>, index: &'a Index, } @@ -61,6 +62,7 @@ impl<'a> Search<'a> { sort_criteria: None, terms_matching_strategy: TermsMatchingStrategy::default(), authorize_typos: true, + exhaustive_number_hits: false, words_limit: 10, rtxn, index, @@ -107,6 +109,13 @@ impl<'a> Search<'a> { self } + /// Force the search to exhastivelly compute the number of candidates, + /// this will increase the search time but allows finite pagination. + pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> { + self.exhaustive_number_hits = exhaustive_number_hits; + self + } + fn is_typo_authorized(&self) -> Result { let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?; // only authorize typos if both the index and the query allow it. @@ -184,20 +193,33 @@ impl<'a> Search<'a> { } let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?; - let criteria = criteria_builder.build( - query_tree, - primitive_query, - filtered_candidates, - self.sort_criteria.clone(), - )?; match self.index.distinct_field(self.rtxn)? { - None => self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria), + None => { + let criteria = criteria_builder.build::( + query_tree, + primitive_query, + filtered_candidates, + self.sort_criteria.clone(), + self.exhaustive_number_hits, + None, + )?; + self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria) + } Some(name) => { let field_ids_map = self.index.fields_ids_map(self.rtxn)?; match field_ids_map.id(name) { Some(fid) => { let distinct = FacetDistinct::new(fid, self.index, self.rtxn); + + let criteria = criteria_builder.build( + query_tree, + primitive_query, + filtered_candidates, + self.sort_criteria.clone(), + self.exhaustive_number_hits, + Some(distinct.clone()), + )?; self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria) } None => Ok(SearchResult::default()), @@ -262,6 +284,7 @@ impl fmt::Debug for Search<'_> { terms_matching_strategy, authorize_typos, words_limit, + exhaustive_number_hits, rtxn: _, index: _, } = self; @@ -273,6 +296,7 @@ impl fmt::Debug for Search<'_> { .field("sort_criteria", sort_criteria) .field("terms_matching_strategy", terms_matching_strategy) .field("authorize_typos", authorize_typos) + .field("exhaustive_number_hits", exhaustive_number_hits) .field("words_limit", words_limit) .finish() } diff --git a/milli/tests/search/query_criteria.rs b/milli/tests/search/query_criteria.rs index 8b72c8420..f873f56f7 100644 --- a/milli/tests/search/query_criteria.rs +++ b/milli/tests/search/query_criteria.rs @@ -51,7 +51,7 @@ macro_rules! test_criterion { }; } -test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![], vec![]); +test_criterion!(none_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![], vec![]); test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![], vec![]); test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words], vec![]); test_criterion!(