From a3968063436911975fb1fc47e1c1ed7085c13e0c Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 12 Jul 2022 17:56:50 +0200 Subject: [PATCH] Add settings to force milli to exhaustively compute the total number of hits --- milli/src/search/criteria/initial.rs | 32 ++++++++++++++++++++++------ milli/src/search/criteria/mod.rs | 4 +++- milli/src/search/criteria/typo.rs | 9 ++++---- milli/src/search/mod.rs | 10 +++++++++ milli/tests/search/query_criteria.rs | 2 +- 5 files changed, 45 insertions(+), 12 deletions(-) diff --git a/milli/src/search/criteria/initial.rs b/milli/src/search/criteria/initial.rs index 514dbff96..2aabe9b13 100644 --- a/milli/src/search/criteria/initial.rs +++ b/milli/src/search/criteria/initial.rs @@ -1,17 +1,22 @@ use roaring::RoaringBitmap; use super::{Criterion, CriterionParameters, CriterionResult}; +use crate::search::criteria::{resolve_query_tree, Context}; use crate::search::query_tree::Operation; use crate::Result; -pub struct Initial { +pub struct Initial<'t> { + ctx: &'t dyn Context<'t>, answer: Option, + exhaustive_number_hits: bool, } -impl Initial { +impl<'t> Initial<'t> { pub fn new( + ctx: &'t dyn Context<'t>, query_tree: Option, filtered_candidates: Option, + exhaustive_number_hits: bool, ) -> Initial { let answer = CriterionResult { query_tree, @@ -19,13 +24,28 @@ impl Initial { filtered_candidates, bucket_candidates: None, }; - Initial { answer: Some(answer) } + Initial { ctx, answer: Some(answer), exhaustive_number_hits } } } -impl Criterion for Initial { +impl Criterion for Initial<'_> { #[logging_timer::time("Initial::{}")] - fn next(&mut self, _: &mut CriterionParameters) -> Result> { - Ok(self.answer.take()) + fn next(&mut self, params: &mut CriterionParameters) -> Result> { + self.answer + .take() + .map(|mut answer| { + if self.exhaustive_number_hits && answer.query_tree.is_some() { + let candidates = resolve_query_tree( + self.ctx, + answer.query_tree.as_ref().unwrap(), + &mut params.wdcache, + )?; + + answer.candidates = Some(candidates.clone()); + answer.bucket_candidates = Some(candidates); + } + Ok(answer) + }) + .transpose() } } diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index f48865ba5..6c4fa51d3 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -232,13 +232,15 @@ impl<'t> CriteriaBuilder<'t> { primitive_query: Option>, filtered_candidates: Option, sort_criteria: Option>, + exhaustive_number_hits: bool, ) -> Result> { use crate::criterion::Criterion as Name; let primitive_query = primitive_query.unwrap_or_default(); let mut criterion = - Box::new(Initial::new(query_tree, filtered_candidates)) as Box; + Box::new(Initial::new(self, query_tree, filtered_candidates, exhaustive_number_hits)) + as Box; for name in self.index.criteria(&self.rtxn)? { criterion = match name { Name::Words => Box::new(Words::new(self, criterion)), diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index e9e6fb2f5..f1537ed48 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -368,7 +368,7 @@ mod test { excluded_candidates: &RoaringBitmap::new(), }; - let parent = Initial::new(query_tree, facet_candidates); + let parent = Initial::new(&context, query_tree, facet_candidates, false); let criteria = Typo::new(&context, Box::new(parent)); let result = display_criteria(criteria, criterion_parameters); @@ -405,7 +405,7 @@ mod test { wdcache: &mut WordDerivationsCache::new(), excluded_candidates: &RoaringBitmap::new(), }; - let parent = Initial::new(Some(query_tree), facet_candidates); + let parent = Initial::new(&context, Some(query_tree), facet_candidates, false); let criteria = Typo::new(&context, Box::new(parent)); let result = display_criteria(criteria, criterion_parameters); @@ -439,7 +439,7 @@ mod test { wdcache: &mut WordDerivationsCache::new(), excluded_candidates: &RoaringBitmap::new(), }; - let parent = Initial::new(query_tree, Some(facet_candidates.clone())); + let parent = Initial::new(&context, query_tree, Some(facet_candidates.clone()), false); let criteria = Typo::new(&context, Box::new(parent)); let result = display_criteria(criteria, criterion_parameters); @@ -476,7 +476,8 @@ mod test { wdcache: &mut WordDerivationsCache::new(), excluded_candidates: &RoaringBitmap::new(), }; - let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone())); + let parent = + Initial::new(&context, Some(query_tree), Some(facet_candidates.clone()), false); let criteria = Typo::new(&context, Box::new(parent)); let result = display_criteria(criteria, criterion_parameters); diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 7145c1445..6f1e1b34c 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -47,6 +47,7 @@ pub struct Search<'a> { terms_matching_strategy: TermsMatchingStrategy, authorize_typos: bool, words_limit: usize, + exhaustive_number_hits: bool, rtxn: &'a heed::RoTxn<'a>, index: &'a Index, } @@ -61,6 +62,7 @@ impl<'a> Search<'a> { sort_criteria: None, terms_matching_strategy: TermsMatchingStrategy::default(), authorize_typos: true, + exhaustive_number_hits: false, words_limit: 10, rtxn, index, @@ -107,6 +109,11 @@ impl<'a> Search<'a> { self } + pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> { + self.exhaustive_number_hits = exhaustive_number_hits; + self + } + fn is_typo_authorized(&self) -> Result { let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?; // only authorize typos if both the index and the query allow it. @@ -189,6 +196,7 @@ impl<'a> Search<'a> { primitive_query, filtered_candidates, self.sort_criteria.clone(), + self.exhaustive_number_hits, )?; match self.index.distinct_field(self.rtxn)? { @@ -262,6 +270,7 @@ impl fmt::Debug for Search<'_> { terms_matching_strategy, authorize_typos, words_limit, + exhaustive_number_hits, rtxn: _, index: _, } = self; @@ -273,6 +282,7 @@ impl fmt::Debug for Search<'_> { .field("sort_criteria", sort_criteria) .field("terms_matching_strategy", terms_matching_strategy) .field("authorize_typos", authorize_typos) + .field("exhaustive_number_hits", exhaustive_number_hits) .field("words_limit", words_limit) .finish() } diff --git a/milli/tests/search/query_criteria.rs b/milli/tests/search/query_criteria.rs index 8b72c8420..f873f56f7 100644 --- a/milli/tests/search/query_criteria.rs +++ b/milli/tests/search/query_criteria.rs @@ -51,7 +51,7 @@ macro_rules! test_criterion { }; } -test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![], vec![]); +test_criterion!(none_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![], vec![]); test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![], vec![]); test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words], vec![]); test_criterion!(