From 658f316511faf6f87d4d7733236887e80d5eef79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 23 Mar 2021 15:25:46 +0100 Subject: [PATCH] Introduce the Initial Criterion --- milli/Cargo.toml | 3 - milli/src/search/criteria/asc_desc.rs | 161 +++++++------------------ milli/src/search/criteria/attribute.rs | 88 ++++++-------- milli/src/search/criteria/fetcher.rs | 135 --------------------- milli/src/search/criteria/final.rs | 57 +++++++++ milli/src/search/criteria/initial.rs | 28 +++++ milli/src/search/criteria/mod.rs | 65 ++++------ milli/src/search/criteria/proximity.rs | 151 ++++++++--------------- milli/src/search/criteria/typo.rs | 66 ++++------ milli/src/search/criteria/words.rs | 56 +++------ milli/src/search/mod.rs | 9 +- 11 files changed, 286 insertions(+), 533 deletions(-) delete mode 100644 milli/src/search/criteria/fetcher.rs create mode 100644 milli/src/search/criteria/final.rs create mode 100644 milli/src/search/criteria/initial.rs diff --git a/milli/Cargo.toml b/milli/Cargo.toml index eefdfa7d5..ef9c64b7b 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -57,9 +57,6 @@ criterion = "0.3.4" maplit = "1.0.2" rand = "0.8.3" -[build-dependencies] -fst = "0.4.5" - [features] default = [] diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index 1dc186720..d2841d449 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -31,32 +31,10 @@ pub struct AscDesc<'t> { candidates: Box> + 't>, bucket_candidates: RoaringBitmap, faceted_candidates: RoaringBitmap, - parent: Option>, + parent: Box, } impl<'t> AscDesc<'t> { - pub fn initial_asc( - index: &'t Index, - rtxn: &'t heed::RoTxn, - query_tree: Option, - candidates: Option, - field_name: String, - ) -> anyhow::Result - { - Self::initial(index, rtxn, query_tree, candidates, field_name, true) - } - - pub fn initial_desc( - index: &'t Index, - rtxn: &'t heed::RoTxn, - query_tree: Option, - candidates: Option, - field_name: String, - ) -> anyhow::Result - { - Self::initial(index, rtxn, query_tree, candidates, field_name, false) - } - pub fn asc( index: &'t Index, rtxn: &'t heed::RoTxn, @@ -77,47 +55,6 @@ impl<'t> AscDesc<'t> { Self::new(index, rtxn, parent, field_name, false) } - fn initial( - index: &'t Index, - rtxn: &'t heed::RoTxn, - query_tree: Option, - candidates: Option, - field_name: String, - ascending: bool, - ) -> anyhow::Result - { - let fields_ids_map = index.fields_ids_map(rtxn)?; - let faceted_fields = index.faceted_fields(rtxn)?; - let (field_id, facet_type) = field_id_facet_type(&fields_ids_map, &faceted_fields, &field_name)?; - - let faceted_candidates = index.faceted_documents_ids(rtxn, field_id)?; - let candidates = match &query_tree { - Some(qt) => { - let context = CriteriaBuilder::new(rtxn, index)?; - let mut qt_candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), &mut WordDerivationsCache::new())?; - if let Some(candidates) = candidates { - qt_candidates.intersect_with(&candidates); - } - qt_candidates - }, - None => candidates.unwrap_or(faceted_candidates.clone()), - }; - - Ok(AscDesc { - index, - rtxn, - field_name, - field_id, - facet_type, - ascending, - query_tree, - candidates: facet_ordered(index, rtxn, field_id, facet_type, ascending, candidates)?, - faceted_candidates, - bucket_candidates: RoaringBitmap::new(), - parent: None, - }) - } - fn new( index: &'t Index, rtxn: &'t heed::RoTxn, @@ -141,7 +78,7 @@ impl<'t> AscDesc<'t> { candidates: Box::new(std::iter::empty()), faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?, bucket_candidates: RoaringBitmap::new(), - parent: Some(parent), + parent, }) } } @@ -156,64 +93,56 @@ impl<'t> Criterion for AscDesc<'t> { match self.candidates.next().transpose()? { None => { - let query_tree = self.query_tree.take(); - let bucket_candidates = take(&mut self.bucket_candidates); - match self.parent.as_mut() { - Some(parent) => { - match parent.next(wdcache)? { - Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { - self.query_tree = query_tree; - let candidates = match (&self.query_tree, candidates) { - (_, Some(mut candidates)) => { - candidates.intersect_with(&self.faceted_candidates); - candidates - }, - (Some(qt), None) => { - let context = CriteriaBuilder::new(&self.rtxn, &self.index)?; - let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), wdcache)?; - candidates.intersect_with(&self.faceted_candidates); - candidates - }, - (None, None) => take(&mut self.faceted_candidates), - }; - if bucket_candidates.is_empty() { - self.bucket_candidates.union_with(&candidates); - } else { - self.bucket_candidates.union_with(&bucket_candidates); - } - self.candidates = facet_ordered( - self.index, - self.rtxn, - self.field_id, - self.facet_type, - self.ascending, - candidates, - )?; + match self.parent.next(wdcache)? { + Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { + let candidates_is_some = candidates.is_some(); + self.query_tree = query_tree; + let candidates = match (&self.query_tree, candidates) { + (_, Some(mut candidates)) => { + candidates.intersect_with(&self.faceted_candidates); + candidates }, - None => return Ok(None), - } - }, - None => if query_tree.is_none() && bucket_candidates.is_empty() { - return Ok(None) - }, - } + (Some(qt), None) => { + let context = CriteriaBuilder::new(&self.rtxn, &self.index)?; + let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), wdcache)?; + candidates.intersect_with(&self.faceted_candidates); + candidates + }, + (None, None) => take(&mut self.faceted_candidates), + }; - return Ok(Some(CriterionResult { - query_tree, - candidates: Some(RoaringBitmap::new()), - bucket_candidates, - })); + // If our parent returns candidates it means that the bucket + // candidates were already computed before and we can use them. + // + // If not, we must use the just computed candidates as our bucket + // candidates. + if candidates_is_some { + self.bucket_candidates.union_with(&bucket_candidates); + } else { + self.bucket_candidates.union_with(&candidates); + } + + if candidates.is_empty() { + continue; + } + + self.candidates = facet_ordered( + self.index, + self.rtxn, + self.field_id, + self.facet_type, + self.ascending, + candidates, + )?; + }, + None => return Ok(None), + } }, Some(candidates) => { - let bucket_candidates = match self.parent { - Some(_) => take(&mut self.bucket_candidates), - None => candidates.clone(), - }; - return Ok(Some(CriterionResult { query_tree: self.query_tree.clone(), candidates: Some(candidates), - bucket_candidates, + bucket_candidates: take(&mut self.bucket_candidates), })); }, } diff --git a/milli/src/search/criteria/attribute.rs b/milli/src/search/criteria/attribute.rs index 7f8b5c622..6398c7d87 100644 --- a/milli/src/search/criteria/attribute.rs +++ b/milli/src/search/criteria/attribute.rs @@ -14,36 +14,19 @@ pub struct Attribute<'t> { query_tree: Option, candidates: Option, bucket_candidates: RoaringBitmap, - parent: Option>, + parent: Box, flattened_query_tree: Option>>, current_buckets: Option>, } impl<'t> Attribute<'t> { - pub fn initial( - ctx: &'t dyn Context, - query_tree: Option, - candidates: Option, - ) -> Self - { - Attribute { - ctx, - query_tree, - candidates, - bucket_candidates: RoaringBitmap::new(), - parent: None, - flattened_query_tree: None, - current_buckets: None, - } - } - pub fn new(ctx: &'t dyn Context, parent: Box) -> Self { Attribute { ctx, query_tree: None, candidates: None, bucket_candidates: RoaringBitmap::new(), - parent: Some(parent), + parent, flattened_query_tree: None, current_buckets: None, } @@ -63,34 +46,35 @@ impl<'t> Criterion for Attribute<'t> { })); }, (Some(qt), Some(candidates)) => { - let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| flatten_query_tree(&qt)); - let current_buckets = if let Some(current_buckets) = self.current_buckets.as_mut() { - current_buckets - } else { - let new_buckets = linear_compute_candidates(self.ctx, flattened_query_tree, candidates)?; - self.current_buckets.get_or_insert(new_buckets.into_iter()) + let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| { + flatten_query_tree(&qt) + }); + + let current_buckets = match self.current_buckets.as_mut() { + Some(current_buckets) => current_buckets, + None => { + let new_buckets = linear_compute_candidates(self.ctx, flattened_query_tree, candidates)?; + self.current_buckets.get_or_insert(new_buckets.into_iter()) + }, }; - let found_candidates = if let Some((_score, candidates)) = current_buckets.next() { - candidates - } else { - return Ok(Some(CriterionResult { - query_tree: self.query_tree.take(), - candidates: self.candidates.take(), - bucket_candidates: take(&mut self.bucket_candidates), - })); + let found_candidates = match current_buckets.next() { + Some((_score, candidates)) => candidates, + None => { + return Ok(Some(CriterionResult { + query_tree: self.query_tree.take(), + candidates: self.candidates.take(), + bucket_candidates: take(&mut self.bucket_candidates), + })); + }, }; + candidates.difference_with(&found_candidates); - let bucket_candidates = match self.parent { - Some(_) => take(&mut self.bucket_candidates), - None => found_candidates.clone(), - }; - return Ok(Some(CriterionResult { query_tree: self.query_tree.clone(), candidates: Some(found_candidates), - bucket_candidates: bucket_candidates, + bucket_candidates: take(&mut self.bucket_candidates), })); }, (Some(qt), None) => { @@ -106,18 +90,20 @@ impl<'t> Criterion for Attribute<'t> { })); }, (None, None) => { - match self.parent.as_mut() { - Some(parent) => { - match parent.next(wdcache)? { - Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { - self.query_tree = query_tree; - self.candidates = candidates; - self.bucket_candidates.union_with(&bucket_candidates); - self.flattened_query_tree = None; - self.current_buckets = None; - }, - None => return Ok(None), - } + match self.parent.next(wdcache)? { + Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { + return Ok(Some(CriterionResult { + query_tree: None, + candidates: None, + bucket_candidates, + })); + }, + Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { + self.query_tree = query_tree; + self.candidates = candidates; + self.bucket_candidates.union_with(&bucket_candidates); + self.flattened_query_tree = None; + self.current_buckets = None; }, None => return Ok(None), } diff --git a/milli/src/search/criteria/fetcher.rs b/milli/src/search/criteria/fetcher.rs deleted file mode 100644 index fa204bdf2..000000000 --- a/milli/src/search/criteria/fetcher.rs +++ /dev/null @@ -1,135 +0,0 @@ -use std::collections::HashMap; -use std::mem::take; - -use log::debug; -use roaring::RoaringBitmap; - -use crate::search::query_tree::Operation; -use crate::search::WordDerivationsCache; -use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context}; - -/// The result of a call to the fetcher. -#[derive(Debug, Clone, PartialEq)] -pub struct FetcherResult { - /// The query tree corresponding to the current bucket of the last criterion. - pub query_tree: Option, - /// The candidates of the current bucket of the last criterion. - pub candidates: RoaringBitmap, - /// Candidates that comes from the current bucket of the initial criterion. - pub bucket_candidates: RoaringBitmap, -} - -pub struct Fetcher<'t> { - ctx: &'t dyn Context, - query_tree: Option, - candidates: Candidates, - parent: Option>, - should_get_documents_ids: bool, - wdcache: WordDerivationsCache, -} - -impl<'t> Fetcher<'t> { - pub fn initial( - ctx: &'t dyn Context, - query_tree: Option, - candidates: Option, - ) -> Self - { - Fetcher { - ctx, - query_tree, - candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed), - parent: None, - should_get_documents_ids: true, - wdcache: WordDerivationsCache::new(), - } - } - - pub fn new( - ctx: &'t dyn Context, - parent: Box, - ) -> Self - { - Fetcher { - ctx, - query_tree: None, - candidates: Candidates::default(), - parent: Some(parent), - should_get_documents_ids: true, - wdcache: WordDerivationsCache::new(), - } - } - - #[logging_timer::time("Fetcher::{}")] - pub fn next(&mut self) -> anyhow::Result> { - use Candidates::{Allowed, Forbidden}; - loop { - debug!("Fetcher iteration (should_get_documents_ids: {}) ({:?})", - self.should_get_documents_ids, self.candidates, - ); - - let should_get_documents_ids = take(&mut self.should_get_documents_ids); - match &mut self.candidates { - Allowed(_) => { - let candidates = take(&mut self.candidates).into_inner(); - let candidates = match &self.query_tree { - Some(qt) if should_get_documents_ids => { - let mut docids = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?; - docids.intersect_with(&candidates); - docids - }, - _ => candidates, - }; - - return Ok(Some(FetcherResult { - query_tree: self.query_tree.take(), - candidates: candidates.clone(), - bucket_candidates: candidates, - })); - }, - Forbidden(_) => { - match self.parent.as_mut() { - Some(parent) => { - match parent.next(&mut self.wdcache)? { - Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { - let candidates = match (&query_tree, candidates) { - (_, Some(candidates)) => candidates, - (Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?, - (None, None) => RoaringBitmap::new(), - }; - - return Ok(Some(FetcherResult { query_tree, candidates, bucket_candidates })) - }, - None => if should_get_documents_ids { - let candidates = match &self.query_tree { - Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?, - None => self.ctx.documents_ids()?, - }; - - return Ok(Some(FetcherResult { - query_tree: self.query_tree.clone(), - candidates: candidates.clone(), - bucket_candidates: candidates, - })); - }, - } - }, - None => if should_get_documents_ids { - let candidates = match &self.query_tree { - Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?, - None => self.ctx.documents_ids()?, - }; - - return Ok(Some(FetcherResult { - query_tree: self.query_tree.clone(), - candidates: candidates.clone(), - bucket_candidates: candidates, - })); - }, - } - return Ok(None); - }, - } - } - } -} diff --git a/milli/src/search/criteria/final.rs b/milli/src/search/criteria/final.rs new file mode 100644 index 000000000..fe224ef94 --- /dev/null +++ b/milli/src/search/criteria/final.rs @@ -0,0 +1,57 @@ +use std::collections::HashMap; + +use log::debug; +use roaring::RoaringBitmap; + +use crate::search::query_tree::Operation; +use crate::search::WordDerivationsCache; +use super::{resolve_query_tree, Criterion, CriterionResult, Context}; + +/// The result of a call to the fetcher. +#[derive(Debug, Clone, PartialEq)] +pub struct FinalResult { + /// The query tree corresponding to the current bucket of the last criterion. + pub query_tree: Option, + /// The candidates of the current bucket of the last criterion. + pub candidates: RoaringBitmap, + /// Candidates that comes from the current bucket of the initial criterion. + pub bucket_candidates: RoaringBitmap, +} + +pub struct Final<'t> { + ctx: &'t dyn Context, + parent: Box, + wdcache: WordDerivationsCache, +} + +impl<'t> Final<'t> { + pub fn new(ctx: &'t dyn Context, parent: Box) -> Final<'t> { + Final { ctx, parent, wdcache: WordDerivationsCache::new() } + } + + #[logging_timer::time("Final::{}")] + pub fn next(&mut self) -> anyhow::Result> { + loop { + debug!("Final iteration"); + + match self.parent.next(&mut self.wdcache)? { + Some(CriterionResult { query_tree, candidates, mut bucket_candidates }) => { + let candidates = match (&query_tree, candidates) { + (_, Some(candidates)) => candidates, + (Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?, + (None, None) => self.ctx.documents_ids()?, + }; + + bucket_candidates.union_with(&candidates); + + return Ok(Some(FinalResult { + query_tree, + candidates, + bucket_candidates, + })); + }, + None => return Ok(None), + } + } + } +} diff --git a/milli/src/search/criteria/initial.rs b/milli/src/search/criteria/initial.rs new file mode 100644 index 000000000..d4b9e1379 --- /dev/null +++ b/milli/src/search/criteria/initial.rs @@ -0,0 +1,28 @@ +use roaring::RoaringBitmap; + +use crate::search::query_tree::Operation; +use crate::search::WordDerivationsCache; + +use super::{Criterion, CriterionResult}; + +pub struct Initial { + answer: Option +} + +impl Initial { + pub fn new(query_tree: Option, mut candidates: Option) -> Initial { + let answer = CriterionResult { + query_tree, + candidates: candidates.clone(), + bucket_candidates: candidates.take().unwrap_or_default(), + }; + Initial { answer: Some(answer) } + } +} + +impl Criterion for Initial { + #[logging_timer::time("Initial::{}")] + fn next(&mut self, _: &mut WordDerivationsCache) -> anyhow::Result> { + Ok(self.answer.take()) + } +} diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 1d7026d71..5e75be6ce 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -8,19 +8,21 @@ use crate::search::{word_derivations, WordDerivationsCache}; use crate::{Index, DocumentId}; use super::query_tree::{Operation, Query, QueryKind}; +use self::asc_desc::AscDesc; +use self::attribute::Attribute; +use self::r#final::Final; +use self::initial::Initial; +use self::proximity::Proximity; use self::typo::Typo; use self::words::Words; -use self::asc_desc::AscDesc; -use self::proximity::Proximity; -use self::attribute::Attribute; -use self::fetcher::Fetcher; +mod asc_desc; +mod attribute; +mod initial; +mod proximity; mod typo; mod words; -mod asc_desc; -mod proximity; -mod attribute; -pub mod fetcher; +pub mod r#final; pub trait Criterion { fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result>; @@ -61,6 +63,7 @@ impl Default for Candidates { Self::Forbidden(RoaringBitmap::new()) } } + pub trait Context { fn documents_ids(&self) -> heed::Result; fn word_docids(&self, word: &str) -> heed::Result>; @@ -128,44 +131,26 @@ impl<'t> CriteriaBuilder<'t> { pub fn build( &'t self, - mut query_tree: Option, - mut facet_candidates: Option, - ) -> anyhow::Result> + query_tree: Option, + facet_candidates: Option, + ) -> anyhow::Result> { use crate::criterion::Criterion as Name; - let mut criterion = None as Option>; + let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box; for name in self.index.criteria(&self.rtxn)? { - criterion = Some(match criterion.take() { - Some(father) => match name { - Name::Typo => Box::new(Typo::new(self, father)), - Name::Words => Box::new(Words::new(self, father)), - Name::Proximity => Box::new(Proximity::new(self, father)), - Name::Attribute => Box::new(Attribute::new(self, father)), - Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, father, field)?), - Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, father, field)?), - _otherwise => father, - }, - None => match name { - Name::Typo => Box::new(Typo::initial(self, query_tree.take(), facet_candidates.take())), - Name::Words => Box::new(Words::initial(self, query_tree.take(), facet_candidates.take())), - Name::Proximity => Box::new(Proximity::initial(self, query_tree.take(), facet_candidates.take())), - Name::Attribute => Box::new(Attribute::initial(self, query_tree.take(), facet_candidates.take())), - Name::Asc(field) => { - Box::new(AscDesc::initial_asc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), field)?) - }, - Name::Desc(field) => { - Box::new(AscDesc::initial_desc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), field)?) - }, - _otherwise => continue, - }, - }); + criterion = match name { + Name::Typo => Box::new(Typo::new(self, criterion)), + Name::Words => Box::new(Words::new(self, criterion)), + Name::Proximity => Box::new(Proximity::new(self, criterion)), + Name::Attribute => Box::new(Attribute::new(self, criterion)), + Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?), + Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?), + _otherwise => criterion, + }; } - match criterion { - Some(criterion) => Ok(Fetcher::new(self, criterion)), - None => Ok(Fetcher::initial(self, query_tree, facet_candidates)), - } + Ok(Final::new(self, criterion)) } } diff --git a/milli/src/search/criteria/proximity.rs b/milli/src/search/criteria/proximity.rs index decd4c338..dc1daafb2 100644 --- a/milli/src/search/criteria/proximity.rs +++ b/milli/src/search/criteria/proximity.rs @@ -8,48 +8,29 @@ use log::debug; use crate::{DocumentId, Position, search::{query_tree::QueryKind}}; use crate::search::query_tree::{maximum_proximity, Operation, Query}; use crate::search::{build_dfa, WordDerivationsCache}; -use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree}; +use super::{Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree}; type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>; pub struct Proximity<'t> { ctx: &'t dyn Context, - query_tree: Option<(usize, Operation)>, + /// ((max_proximity, query_tree), allowed_candidates) + state: Option<(Option<(usize, Operation)>, RoaringBitmap)>, proximity: u8, - candidates: Candidates, bucket_candidates: RoaringBitmap, - parent: Option>, + parent: Box, candidates_cache: Cache, plane_sweep_cache: Option>, } impl<'t> Proximity<'t> { - pub fn initial( - ctx: &'t dyn Context, - query_tree: Option, - candidates: Option, - ) -> Self - { - Proximity { - ctx, - query_tree: query_tree.map(|op| (maximum_proximity(&op), op)), - proximity: 0, - candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed), - bucket_candidates: RoaringBitmap::new(), - parent: None, - candidates_cache: Cache::new(), - plane_sweep_cache: None, - } - } - pub fn new(ctx: &'t dyn Context, parent: Box) -> Self { Proximity { ctx, - query_tree: None, + state: None, proximity: 0, - candidates: Candidates::default(), bucket_candidates: RoaringBitmap::new(), - parent: Some(parent), + parent: parent, candidates_cache: Cache::new(), plane_sweep_cache: None, } @@ -59,27 +40,20 @@ impl<'t> Proximity<'t> { impl<'t> Criterion for Proximity<'t> { #[logging_timer::time("Proximity::{}")] fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result> { - use Candidates::{Allowed, Forbidden}; loop { - debug!("Proximity at iteration {} (max {:?}) ({:?})", + debug!("Proximity at iteration {} (max prox {:?}) ({:?})", self.proximity, - self.query_tree.as_ref().map(|(mp, _)| mp), - self.candidates, + self.state.as_ref().map(|(qt, _)| qt.as_ref().map(|(mp, _)| mp)), + self.state.as_ref().map(|(_, cd)| cd), ); - match (&mut self.query_tree, &mut self.candidates) { - (_, Allowed(candidates)) if candidates.is_empty() => { - return Ok(Some(CriterionResult { - query_tree: self.query_tree.take().map(|(_, qt)| qt), - candidates: Some(take(&mut self.candidates).into_inner()), - bucket_candidates: take(&mut self.bucket_candidates), - })); + match &mut self.state { + Some((_, candidates)) if candidates.is_empty() => { + self.state = None; // reset state }, - (Some((max_prox, query_tree)), Allowed(candidates)) => { + Some((Some((max_prox, query_tree)), candidates)) => { if self.proximity as usize > *max_prox { - // reset state to (None, Forbidden(_)) - self.query_tree = None; - self.candidates = Candidates::default(); + self.state = None; // reset state } else { let mut new_candidates = if candidates.len() <= 1000 { if let Some(cache) = self.plane_sweep_cache.as_mut() { @@ -89,9 +63,7 @@ impl<'t> Criterion for Proximity<'t> { candidates }, None => { - // reset state to (None, Forbidden(_)) - self.query_tree = None; - self.candidates = Candidates::default(); + self.state = None; // reset state continue }, } @@ -120,79 +92,54 @@ impl<'t> Criterion for Proximity<'t> { candidates.difference_with(&new_candidates); self.proximity += 1; - let bucket_candidates = match self.parent { - Some(_) => take(&mut self.bucket_candidates), - None => new_candidates.clone(), - }; - return Ok(Some(CriterionResult { query_tree: Some(query_tree.clone()), candidates: Some(new_candidates), - bucket_candidates, + bucket_candidates: take(&mut self.bucket_candidates), })); } }, - (Some((max_prox, query_tree)), Forbidden(candidates)) => { - if self.proximity as usize > *max_prox { - self.query_tree = None; - self.candidates = Candidates::default(); - } else { - let mut new_candidates = resolve_candidates( - self.ctx, - &query_tree, - self.proximity, - &mut self.candidates_cache, - wdcache, - )?; - - new_candidates.difference_with(&candidates); - candidates.union_with(&new_candidates); - self.proximity += 1; - - let bucket_candidates = match self.parent { - Some(_) => take(&mut self.bucket_candidates), - None => new_candidates.clone(), - }; - - return Ok(Some(CriterionResult { - query_tree: Some(query_tree.clone()), - candidates: Some(new_candidates), - bucket_candidates, - })); - } - }, - (None, Allowed(_)) => { - let candidates = take(&mut self.candidates).into_inner(); + Some((None, candidates)) => { + let candidates = take(candidates); + self.state = None; // reset state return Ok(Some(CriterionResult { query_tree: None, candidates: Some(candidates.clone()), bucket_candidates: candidates, })); }, - (None, Forbidden(_)) => { - match self.parent.as_mut() { - Some(parent) => { - match parent.next(wdcache)? { - Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { - let candidates = match (&query_tree, candidates) { - (_, Some(candidates)) => candidates, - (Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), wdcache)?, - (None, None) => RoaringBitmap::new(), - }; + None => { + match self.parent.next(wdcache)? { + Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { + return Ok(Some(CriterionResult { + query_tree: None, + candidates: None, + bucket_candidates, + })); + }, + Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { + let candidates_is_some = candidates.is_some(); + let candidates = match (&query_tree, candidates) { + (_, Some(candidates)) => candidates, + (Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), wdcache)?, + (None, None) => RoaringBitmap::new(), + }; - if bucket_candidates.is_empty() { - self.bucket_candidates.union_with(&candidates); - } else { - self.bucket_candidates.union_with(&bucket_candidates); - } - - self.query_tree = query_tree.map(|op| (maximum_proximity(&op), op)); - self.proximity = 0; - self.candidates = Candidates::Allowed(candidates); - self.plane_sweep_cache = None; - }, - None => return Ok(None), + // If our parent returns candidates it means that the bucket + // candidates were already computed before and we can use them. + // + // If not, we must use the just computed candidates as our bucket + // candidates. + if candidates_is_some { + self.bucket_candidates.union_with(&bucket_candidates); + } else { + self.bucket_candidates.union_with(&candidates); } + + let query_tree = query_tree.map(|op| (maximum_proximity(&op), op)); + self.state = Some((query_tree, candidates)); + self.proximity = 0; + self.plane_sweep_cache = None; }, None => return Ok(None), } diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index 3877f53ed..40b06afc4 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -14,28 +14,11 @@ pub struct Typo<'t> { number_typos: u8, candidates: Candidates, bucket_candidates: RoaringBitmap, - parent: Option>, + parent: Box, candidates_cache: HashMap<(Operation, u8), RoaringBitmap>, } impl<'t> Typo<'t> { - pub fn initial( - ctx: &'t dyn Context, - query_tree: Option, - candidates: Option, - ) -> Self - { - Typo { - ctx, - query_tree: query_tree.map(|op| (maximum_typo(&op), op)), - number_typos: 0, - candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed), - bucket_candidates: RoaringBitmap::new(), - parent: None, - candidates_cache: HashMap::new(), - } - } - pub fn new(ctx: &'t dyn Context, parent: Box) -> Self { Typo { ctx, @@ -43,7 +26,7 @@ impl<'t> Typo<'t> { number_typos: 0, candidates: Candidates::default(), bucket_candidates: RoaringBitmap::new(), - parent: Some(parent), + parent, candidates_cache: HashMap::new(), } } @@ -90,15 +73,10 @@ impl<'t> Criterion for Typo<'t> { candidates.difference_with(&new_candidates); self.number_typos += 1; - let bucket_candidates = match self.parent { - Some(_) => take(&mut self.bucket_candidates), - None => new_candidates.clone(), - }; - return Ok(Some(CriterionResult { query_tree: Some(new_query_tree), candidates: Some(new_candidates), - bucket_candidates, + bucket_candidates: take(&mut self.bucket_candidates), })); } }, @@ -145,17 +123,19 @@ impl<'t> Criterion for Typo<'t> { })); }, (None, Forbidden(_)) => { - match self.parent.as_mut() { - Some(parent) => { - match parent.next(wdcache)? { - Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { - self.query_tree = query_tree.map(|op| (maximum_typo(&op), op)); - self.number_typos = 0; - self.candidates = candidates.map_or_else(Candidates::default, Candidates::Allowed); - self.bucket_candidates.union_with(&bucket_candidates); - }, - None => return Ok(None), - } + match self.parent.next(wdcache)? { + Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { + return Ok(Some(CriterionResult { + query_tree: None, + candidates: None, + bucket_candidates, + })); + }, + Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { + self.query_tree = query_tree.map(|op| (maximum_typo(&op), op)); + self.number_typos = 0; + self.candidates = candidates.map_or_else(Candidates::default, Candidates::Allowed); + self.bucket_candidates.union_with(&bucket_candidates); }, None => return Ok(None), } @@ -334,8 +314,8 @@ fn resolve_candidates<'t>( #[cfg(test)] mod test { - use super::*; + use super::super::initial::Initial; use super::super::test::TestContext; #[test] @@ -345,7 +325,8 @@ mod test { let facet_candidates = None; let mut wdcache = WordDerivationsCache::new(); - let mut criteria = Typo::initial(&context, query_tree, facet_candidates); + let parent = Initial::new(query_tree, facet_candidates); + let mut criteria = Typo::new(&context, Box::new(parent)); assert!(criteria.next(&mut wdcache).unwrap().is_none()); } @@ -364,7 +345,8 @@ mod test { let facet_candidates = None; let mut wdcache = WordDerivationsCache::new(); - let mut criteria = Typo::initial(&context, Some(query_tree), facet_candidates); + let parent = Initial::new(Some(query_tree), facet_candidates); + let mut criteria = Typo::new(&context, Box::new(parent)); let candidates_1 = context.word_docids("split").unwrap().unwrap() & context.word_docids("this").unwrap().unwrap() @@ -413,7 +395,8 @@ mod test { let facet_candidates = context.word_docids("earth").unwrap().unwrap(); let mut wdcache = WordDerivationsCache::new(); - let mut criteria = Typo::initial(&context, query_tree, Some(facet_candidates.clone())); + let parent = Initial::new(query_tree, Some(facet_candidates.clone())); + let mut criteria = Typo::new(&context, Box::new(parent)); let expected = CriterionResult { query_tree: None, @@ -442,7 +425,8 @@ mod test { let facet_candidates = context.word_docids("earth").unwrap().unwrap(); let mut wdcache = WordDerivationsCache::new(); - let mut criteria = Typo::initial(&context, Some(query_tree), Some(facet_candidates.clone())); + let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone())); + let mut criteria = Typo::new(&context, Box::new(parent)); let candidates_1 = context.word_docids("split").unwrap().unwrap() & context.word_docids("this").unwrap().unwrap() diff --git a/milli/src/search/criteria/words.rs b/milli/src/search/criteria/words.rs index 0aa3b483a..5bb9d8d90 100644 --- a/milli/src/search/criteria/words.rs +++ b/milli/src/search/criteria/words.rs @@ -12,34 +12,18 @@ pub struct Words<'t> { query_trees: Vec, candidates: Option, bucket_candidates: RoaringBitmap, - parent: Option>, + parent: Box, candidates_cache: HashMap<(Operation, u8), RoaringBitmap>, } impl<'t> Words<'t> { - pub fn initial( - ctx: &'t dyn Context, - query_tree: Option, - candidates: Option, - ) -> Self - { - Words { - ctx, - query_trees: query_tree.map(explode_query_tree).unwrap_or_default(), - candidates, - bucket_candidates: RoaringBitmap::new(), - parent: None, - candidates_cache: HashMap::default(), - } - } - pub fn new(ctx: &'t dyn Context, parent: Box) -> Self { Words { ctx, query_trees: Vec::default(), candidates: None, bucket_candidates: RoaringBitmap::new(), - parent: Some(parent), + parent, candidates_cache: HashMap::default(), } } @@ -65,27 +49,17 @@ impl<'t> Criterion for Words<'t> { found_candidates.intersect_with(&candidates); candidates.difference_with(&found_candidates); - let bucket_candidates = match self.parent { - Some(_) => take(&mut self.bucket_candidates), - None => found_candidates.clone(), - }; - return Ok(Some(CriterionResult { query_tree: Some(qt), candidates: Some(found_candidates), - bucket_candidates, + bucket_candidates: take(&mut self.bucket_candidates), })); }, (Some(qt), None) => { - let bucket_candidates = match self.parent { - Some(_) => take(&mut self.bucket_candidates), - None => RoaringBitmap::new(), - }; - return Ok(Some(CriterionResult { query_tree: Some(qt), candidates: None, - bucket_candidates, + bucket_candidates: take(&mut self.bucket_candidates), })); }, (None, Some(_)) => { @@ -97,16 +71,18 @@ impl<'t> Criterion for Words<'t> { })); }, (None, None) => { - match self.parent.as_mut() { - Some(parent) => { - match parent.next(wdcache)? { - Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { - self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default(); - self.candidates = candidates; - self.bucket_candidates.union_with(&bucket_candidates); - }, - None => return Ok(None), - } + match self.parent.next(wdcache)? { + Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { + return Ok(Some(CriterionResult { + query_tree: None, + candidates: None, + bucket_candidates, + })); + }, + Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { + self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default(); + self.candidates = candidates; + self.bucket_candidates.union_with(&bucket_candidates); }, None => return Ok(None), } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 174fff35c..4f0bde422 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -13,9 +13,8 @@ use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct}; - -use crate::search::criteria::fetcher::{Fetcher, FetcherResult}; -use crate::{DocumentId, Index}; +use crate::search::criteria::r#final::{Final, FinalResult}; +use crate::{Index, DocumentId}; pub use self::facet::{ FacetCondition, FacetDistribution, FacetIter, FacetNumberOperator, FacetStringOperator, @@ -162,14 +161,14 @@ impl<'a> Search<'a> { &self, mut distinct: impl for<'c> Distinct<'c>, matching_words: MatchingWords, - mut criteria: Fetcher, + mut criteria: Final, ) -> anyhow::Result { let mut offset = self.offset; let mut initial_candidates = RoaringBitmap::new(); let mut excluded_documents = RoaringBitmap::new(); let mut documents_ids = Vec::with_capacity(self.limit); - while let Some(FetcherResult { candidates, bucket_candidates, .. }) = criteria.next()? { + while let Some(FinalResult { candidates, bucket_candidates, .. }) = criteria.next()? { debug!("Number of candidates found {}", candidates.len()); let excluded = take(&mut excluded_documents);