diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 934d2fd9b..a31a71590 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -1,4 +1,4 @@ -use std::{cmp, fmt, mem}; +use std::{borrow::Cow, cmp, fmt, mem}; use fst::Set; use meilisearch_tokenizer::token::SeparatorKind; @@ -157,6 +157,7 @@ trait Context { } /// Returns the minimum word len for 1 and 2 typos. fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>; + fn exact_words(&self) -> crate::Result>>; } /// The query tree builder is the interface to build a query tree. @@ -186,6 +187,10 @@ impl<'a> Context for QueryTreeBuilder<'a> { let two = self.index.min_word_len_two_typos(&self.rtxn)?; Ok((one, two)) } + + fn exact_words(&self) -> crate::Result>> { + self.index.exact_words(self.rtxn) + } } impl<'a> QueryTreeBuilder<'a> { @@ -265,15 +270,16 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result { pub max_typos: u8, pub word_len_one_typo: u8, pub word_len_two_typo: u8, + pub exact_words: fst::Set>, } /// Return the `QueryKind` of a word depending on `authorize_typos` /// and the provided word length. -fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind { +fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind { if authorize_typos { let count = word.chars().count().min(u8::MAX as usize) as u8; if count < config.word_len_one_typo { @@ -333,7 +339,9 @@ fn create_query_tree( children.push(child); } let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; - let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo }; + let exact_words = ctx.exact_words()?; + let config = + TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo, exact_words }; children.push(Operation::Query(Query { prefix, kind: typos(word, authorize_typos, config), @@ -385,8 +393,13 @@ fn create_query_tree( let concat = words.concat(); let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; - let config = - TypoConfig { max_typos: 1, word_len_one_typo, word_len_two_typo }; + let exact_words = ctx.exact_words()?; + let config = TypoConfig { + max_typos: 1, + word_len_one_typo, + word_len_two_typo, + exact_words, + }; let query = Query { prefix: is_prefix, kind: typos(concat, authorize_typos, config), @@ -605,6 +618,12 @@ mod test { fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> { Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) } + + fn exact_words(&self) -> crate::Result>> { + let builder = fst::SetBuilder::new(Vec::new()).unwrap(); + let data = builder.into_inner().unwrap(); + Ok(fst::Set::new(Cow::Owned(data)).unwrap()) + } } impl Default for TestContext { @@ -1225,7 +1244,9 @@ mod test { #[test] fn test_min_word_len_typo() { - let config = TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7 }; + let exact_words = fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap(); + let config = + TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7, exact_words }; assert_eq!( typos("hello".to_string(), true, config.clone()),