diff --git a/milli/src/index.rs b/milli/src/index.rs index 81648fe1c..41bd85b93 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1041,10 +1041,10 @@ impl Index { } /// List the words on which typo are not allowed - pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result>> { + pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result>>> { match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? { - Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), - None => Ok(fst::Set::default().map_data(Cow::Owned)?), + Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)), + None => Ok(None), } } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 979ee1e6e..f3f852a48 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -118,7 +118,7 @@ impl<'a> Search<'a> { let before = Instant::now(); let (query_tree, primitive_query, matching_words) = match self.query.as_ref() { Some(query) => { - let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); + let mut builder = QueryTreeBuilder::new(self.rtxn, self.index)?; builder.optional_words(self.optional_words); builder.authorize_typos(self.is_typo_authorized()?); diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 02fc0747a..76748179b 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -152,7 +152,7 @@ trait Context { } /// Returns the minimum word len for 1 and 2 typos. fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>; - fn exact_words(&self) -> crate::Result>>; + fn exact_words(&self) -> Option<&fst::Set>>; } /// The query tree builder is the interface to build a query tree. @@ -162,6 +162,7 @@ pub struct QueryTreeBuilder<'a> { optional_words: bool, authorize_typos: bool, words_limit: Option, + exact_words: Option>>, } impl<'a> Context for QueryTreeBuilder<'a> { @@ -183,16 +184,23 @@ impl<'a> Context for QueryTreeBuilder<'a> { Ok((one, two)) } - fn exact_words(&self) -> crate::Result>> { - self.index.exact_words(self.rtxn) + fn exact_words(&self) -> Option<&fst::Set>> { + self.exact_words.as_ref() } } impl<'a> QueryTreeBuilder<'a> { /// Create a `QueryTreeBuilder` from a heed ReadOnly transaction `rtxn` /// and an Index `index`. - pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Self { - Self { rtxn, index, optional_words: true, authorize_typos: true, words_limit: None } + pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Result { + Ok(Self { + rtxn, + index, + optional_words: true, + authorize_typos: true, + words_limit: None, + exact_words: index.exact_words(rtxn)?, + }) } /// if `optional_words` is set to `false` the query tree will be @@ -277,13 +285,13 @@ pub struct TypoConfig<'a> { pub max_typos: u8, pub word_len_one_typo: u8, pub word_len_two_typo: u8, - pub exact_words: fst::Set>, + pub exact_words: Option<&'a fst::Set>>, } /// Return the `QueryKind` of a word depending on `authorize_typos` /// and the provided word length. fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind { - if authorize_typos && !config.exact_words.contains(&word) { + if authorize_typos && !config.exact_words.map_or(false, |s| s.contains(&word)) { let count = word.chars().count().min(u8::MAX as usize) as u8; if count < config.word_len_one_typo { QueryKind::exact(word) @@ -342,7 +350,7 @@ fn create_query_tree( children.push(Operation::Phrase(vec![left.to_string(), right.to_string()])); } let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; - let exact_words = ctx.exact_words()?; + let exact_words = ctx.exact_words(); let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo, exact_words }; children.push(Operation::Query(Query { @@ -396,7 +404,7 @@ fn create_query_tree( let concat = words.concat(); let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; - let exact_words = ctx.exact_words()?; + let exact_words = ctx.exact_words(); let config = TypoConfig { max_typos: 1, word_len_one_typo, @@ -501,7 +509,7 @@ fn create_matching_words( } let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; - let exact_words = ctx.exact_words()?; + let exact_words = ctx.exact_words(); let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo, exact_words }; @@ -579,7 +587,7 @@ fn create_matching_words( let word = words.concat(); let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; - let exact_words = ctx.exact_words()?; + let exact_words = ctx.exact_words(); let config = TypoConfig { max_typos: 1, word_len_one_typo, @@ -742,8 +750,7 @@ mod test { struct TestContext { synonyms: HashMap, Vec>>, postings: HashMap, - // Raw bytes for the exact word fst Set - exact_words: Vec, + exact_words: Option>>, } impl TestContext { @@ -779,8 +786,8 @@ mod test { Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) } - fn exact_words(&self) -> crate::Result>> { - Ok(fst::Set::new(Cow::Borrowed(self.exact_words.as_slice())).unwrap()) + fn exact_words(&self) -> Option<&fst::Set>> { + self.exact_words.as_ref() } } @@ -799,6 +806,8 @@ mod test { } let exact_words = fst::SetBuilder::new(Vec::new()).unwrap().into_inner().unwrap(); + let exact_words = + Some(fst::Set::new(exact_words).unwrap().map_data(Cow::Owned).unwrap()); TestContext { synonyms: hashmap! { @@ -1406,8 +1415,12 @@ mod test { #[test] fn test_min_word_len_typo() { let exact_words = fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap(); - let config = - TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7, exact_words }; + let config = TypoConfig { + max_typos: 2, + word_len_one_typo: 5, + word_len_two_typo: 7, + exact_words: Some(&exact_words), + }; assert_eq!( typos("hello".to_string(), true, config.clone()), @@ -1433,6 +1446,7 @@ mod test { let tokens = result.tokens(); let exact_words = fst::Set::from_iter(Some("goodbye")).unwrap().into_fst().into_inner(); + let exact_words = Some(fst::Set::new(exact_words).unwrap().map_data(Cow::Owned).unwrap()); let context = TestContext { exact_words, ..Default::default() }; let (query_tree, _) = context.build(false, true, Some(2), tokens).unwrap().unwrap(); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index bd1495b1c..829932d5c 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1495,7 +1495,7 @@ mod tests { let words = btreeset! { S("Ab"), S("ac") }; builder.set_exact_words(words); assert!(builder.execute(|_| ()).is_ok()); - let exact_words = index.exact_words(&txn).unwrap(); + let exact_words = index.exact_words(&txn).unwrap().unwrap(); for word in exact_words.into_fst().stream().into_str_vec().unwrap() { assert!(word.0 == "ac" || word.0 == "ab"); }