diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 45689bbe5..d033f5707 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -46,9 +46,12 @@ impl Default for Candidates { } } pub trait Context { - fn query_docids(&self, query: &Query) -> anyhow::Result; - fn query_pair_proximity_docids(&self, left: &Query, right: &Query, proximity: u8) ->anyhow::Result; + fn word_docids(&self, word: &str) -> heed::Result>; + fn word_prefix_docids(&self, word: &str) -> heed::Result>; + fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result>; + fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result>; fn words_fst<'t>(&self) -> &'t fst::Set>; + fn in_prefix_cache(&self, word: &str) -> bool; } pub struct HeedContext<'t> { rtxn: &'t heed::RoTxn<'t>, @@ -58,83 +61,31 @@ pub struct HeedContext<'t> { } impl<'a> Context for HeedContext<'a> { - fn query_docids(&self, query: &Query) -> anyhow::Result { - match &query.kind { - QueryKind::Exact { word, .. } => { - if query.prefix && self.in_prefix_cache(&word) { - Ok(self.index.word_prefix_docids.get(self.rtxn, &word)?.unwrap_or_default()) - } else if query.prefix { - let words = word_typos(&word, true, 0, &self.words_fst)?; - let mut docids = RoaringBitmap::new(); - for (word, _typo) in words { - let current_docids = self.index.word_docids.get(self.rtxn, &word)?.unwrap_or_default(); - docids.union_with(¤t_docids); - } - Ok(docids) - } else { - Ok(self.index.word_docids.get(self.rtxn, &word)?.unwrap_or_default()) - } - }, - QueryKind::Tolerant { typo, word } => { - let words = word_typos(&word, query.prefix, *typo, &self.words_fst)?; - let mut docids = RoaringBitmap::new(); - for (word, _typo) in words { - let current_docids = self.index.word_docids.get(self.rtxn, &word)?.unwrap_or_default(); - docids.union_with(¤t_docids); - } - Ok(docids) - }, - } + fn word_docids(&self, word: &str) -> heed::Result> { + self.index.word_docids.get(self.rtxn, &word) } - fn query_pair_proximity_docids(&self, left: &Query, right: &Query, proximity: u8) -> anyhow::Result { - let prefix = right.prefix; + fn word_prefix_docids(&self, word: &str) -> heed::Result> { + self.index.word_prefix_docids.get(self.rtxn, &word) + } - match (&left.kind, &right.kind) { - (QueryKind::Exact { word: left, .. }, QueryKind::Exact { word: right, .. }) => { - if prefix && self.in_prefix_cache(&right) { - let key = (left.as_str(), right.as_str(), proximity); - Ok(self.index.word_prefix_pair_proximity_docids.get(self.rtxn, &key)?.unwrap_or_default()) - } else if prefix { - let r_words = word_typos(&right, true, 0, &self.words_fst)?; - self.all_word_pair_proximity_docids(&[(left, 0)], &r_words, proximity) - } else { - let key = (left.as_str(), right.as_str(), proximity); - Ok(self.index.word_pair_proximity_docids.get(self.rtxn, &key)?.unwrap_or_default()) - } - }, - (QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => { - let l_words = word_typos(&left, false, *typo, &self.words_fst)?; - if prefix && self.in_prefix_cache(&right) { - let mut docids = RoaringBitmap::new(); - for (left, _) in l_words { - let key = (left.as_ref(), right.as_ref(), proximity); - let current_docids = self.index.word_prefix_pair_proximity_docids.get(self.rtxn, &key)?.unwrap_or_default(); - docids.union_with(¤t_docids); - } - Ok(docids) - } else if prefix { - let r_words = word_typos(&right, true, 0, &self.words_fst)?; - self.all_word_pair_proximity_docids(&l_words, &r_words, proximity) - } else { - self.all_word_pair_proximity_docids(&l_words, &[(right, 0)], proximity) - } - }, - (QueryKind::Exact { word: left, .. }, QueryKind::Tolerant { typo, word: right }) => { - let r_words = word_typos(&right, prefix, *typo, &self.words_fst)?; - self.all_word_pair_proximity_docids(&[(left, 0)], &r_words, proximity) - }, - (QueryKind::Tolerant { typo: l_typo, word: left }, QueryKind::Tolerant { typo: r_typo, word: right }) => { - let l_words = word_typos(&left, false, *l_typo, &self.words_fst)?; - let r_words = word_typos(&right, prefix, *r_typo, &self.words_fst)?; - self.all_word_pair_proximity_docids(&l_words, &r_words, proximity) - }, - } + fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result> { + let key = (left, right, proximity); + self.index.word_pair_proximity_docids.get(self.rtxn, &key) + } + + fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result> { + let key = (left, right, proximity); + self.index.word_prefix_pair_proximity_docids.get(self.rtxn, &key) } fn words_fst<'t>(&self) -> &'t fst::Set> { &self.words_fst } + + fn in_prefix_cache(&self, word: &str) -> bool { + self.words_prefixes_fst.contains(word) + } } impl<'t> HeedContext<'t> { @@ -149,25 +100,91 @@ impl<'t> HeedContext<'t> { words_prefixes_fst, }) } +} - fn in_prefix_cache(&self, word: &str) -> bool { - self.words_prefixes_fst.contains(word) +fn all_word_pair_proximity_docids, U: AsRef>( + ctx: &dyn Context, + left_words: &[(T, u8)], + right_words: &[(U, u8)], + proximity: u8 +) -> anyhow::Result { + let mut docids = RoaringBitmap::new(); + for (left, _l_typo) in left_words { + for (right, _r_typo) in right_words { + let current_docids = ctx.word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?.unwrap_or_default(); + docids.union_with(¤t_docids); + } } + Ok(docids) +} - fn all_word_pair_proximity_docids, U: AsRef>( - &self, - left_words: &[(T, u8)], - right_words: &[(U, u8)], - proximity: u8 - ) -> anyhow::Result { - let mut docids = RoaringBitmap::new(); - for (left, _l_typo) in left_words { - for (right, _r_typo) in right_words { - let key = (left.as_ref(), right.as_ref(), proximity); - let current_docids = self.index.word_pair_proximity_docids.get(self.rtxn, &key)?.unwrap_or_default(); +fn query_docids(ctx: &dyn Context, query: &Query) -> anyhow::Result { + match &query.kind { + QueryKind::Exact { word, .. } => { + if query.prefix && ctx.in_prefix_cache(&word) { + Ok(ctx.word_prefix_docids(&word)?.unwrap_or_default()) + } else if query.prefix { + let words = word_typos(&word, true, 0, ctx.words_fst())?; + let mut docids = RoaringBitmap::new(); + for (word, _typo) in words { + let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); + docids.union_with(¤t_docids); + } + Ok(docids) + } else { + Ok(ctx.word_docids(&word)?.unwrap_or_default()) + } + }, + QueryKind::Tolerant { typo, word } => { + let words = word_typos(&word, query.prefix, *typo, ctx.words_fst())?; + let mut docids = RoaringBitmap::new(); + for (word, _typo) in words { + let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); docids.union_with(¤t_docids); } - } - Ok(docids) + Ok(docids) + }, + } +} + +fn query_pair_proximity_docids(ctx: &dyn Context, left: &Query, right: &Query, proximity: u8) -> anyhow::Result { + let prefix = right.prefix; + + match (&left.kind, &right.kind) { + (QueryKind::Exact { word: left, .. }, QueryKind::Exact { word: right, .. }) => { + if prefix && ctx.in_prefix_cache(&right) { + Ok(ctx.word_prefix_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default()) + } else if prefix { + let r_words = word_typos(&right, true, 0, ctx.words_fst())?; + all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity) + } else { + Ok(ctx.word_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default()) + } + }, + (QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => { + let l_words = word_typos(&left, false, *typo, ctx.words_fst())?; + if prefix && ctx.in_prefix_cache(&right) { + let mut docids = RoaringBitmap::new(); + for (left, _) in l_words { + let current_docids = ctx.word_prefix_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?.unwrap_or_default(); + docids.union_with(¤t_docids); + } + Ok(docids) + } else if prefix { + let r_words = word_typos(&right, true, 0, ctx.words_fst())?; + all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity) + } else { + all_word_pair_proximity_docids(ctx, &l_words, &[(right, 0)], proximity) + } + }, + (QueryKind::Exact { word: left, .. }, QueryKind::Tolerant { typo, word: right }) => { + let r_words = word_typos(&right, prefix, *typo, ctx.words_fst())?; + all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity) + }, + (QueryKind::Tolerant { typo: l_typo, word: left }, QueryKind::Tolerant { typo: r_typo, word: right }) => { + let l_words = word_typos(&left, false, *l_typo, ctx.words_fst())?; + let r_words = word_typos(&right, prefix, *r_typo, ctx.words_fst())?; + all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity) + }, } } diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index 31b56d700..a1b8e1f16 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -5,7 +5,7 @@ use roaring::RoaringBitmap; use crate::search::query_tree::{Operation, Query, QueryKind}; use crate::search::word_typos; -use super::{Candidates, Criterion, CriterionResult, Context}; +use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids}; // FIXME we must stop when the number of typos is equal to // the maximum number of typos for this query tree. @@ -206,7 +206,7 @@ fn resolve_candidates<'t>( for slice in ops.windows(2) { match (&slice[0], &slice[1]) { (Operation::Query(left), Operation::Query(right)) => { - match ctx.query_pair_proximity_docids(left, right, 1)? { + match query_pair_proximity_docids(ctx, left, right, 1)? { pair_docids if pair_docids.is_empty() => { return Ok(RoaringBitmap::new()) }, @@ -233,7 +233,7 @@ fn resolve_candidates<'t>( Ok(candidates) }, Query(q) => if q.kind.typo() == number_typos { - Ok(ctx.query_docids(q)?) + Ok(query_docids(ctx, q)?) } else { Ok(RoaringBitmap::new()) },