From 8b1e5d9c6d654be95159519e8c233e8868694e1b Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 22 Mar 2022 09:55:49 +0100 Subject: [PATCH] add test for exact words --- milli/src/search/query_tree.rs | 25 ++++++++++++++++++++++--- milli/src/update/settings.rs | 9 ++++----- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index a31a71590..0014075d4 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -584,6 +584,8 @@ mod test { struct TestContext { synonyms: HashMap, Vec>>, postings: HashMap, + // Raw bytes for the exact word fst Set + exact_words: Vec, } impl TestContext { @@ -620,9 +622,7 @@ mod test { } fn exact_words(&self) -> crate::Result>> { - let builder = fst::SetBuilder::new(Vec::new()).unwrap(); - let data = builder.into_inner().unwrap(); - Ok(fst::Set::new(Cow::Owned(data)).unwrap()) + Ok(fst::Set::new(Cow::Borrowed(self.exact_words.as_slice())).unwrap()) } } @@ -640,6 +640,8 @@ mod test { RoaringBitmap::from_sorted_iter(values.into_iter()).unwrap() } + let exact_words = fst::SetBuilder::new(Vec::new()).unwrap().into_inner().unwrap(); + TestContext { synonyms: hashmap! { vec![String::from("hello")] => vec![ @@ -679,6 +681,7 @@ mod test { String::from("good") => random_postings(rng, 1250), String::from("morning") => random_postings(rng, 125), }, + exact_words, } } } @@ -1263,4 +1266,20 @@ mod test { QueryKind::Tolerant { typo: 2, word: "verylongword".to_string() } ); } + #[test] + fn disable_typo_on_word() { + let query = "goodbye"; + let analyzer = Analyzer::new(AnalyzerConfig::>::default()); + let result = analyzer.analyze(query); + + let tokens = result.tokens(); + let exact_words = fst::Set::from_iter(Some("goodbye")).unwrap().into_fst().into_inner(); + let context = TestContext { exact_words, ..Default::default() }; + let (query_tree, _) = context.build(false, true, Some(2), tokens).unwrap().unwrap(); + + assert!(matches!( + query_tree, + Operation::Query(Query { prefix: true, kind: QueryKind::Exact { .. } }) + )); + } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 513dee42c..503fbd06e 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -92,7 +92,7 @@ pub struct Settings<'a, 't, 'u, 'i> { authorize_typos: Setting, min_word_len_two_typos: Setting, min_word_len_one_typo: Setting, - exact_words: Setting>, + exact_words: Setting>, } impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { @@ -115,9 +115,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { primary_key: Setting::NotSet, authorize_typos: Setting::NotSet, exact_words: Setting::NotSet, - indexer_config, min_word_len_two_typos: Setting::Reset, min_word_len_one_typo: Setting::Reset, + indexer_config, } } @@ -218,7 +218,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.min_word_len_one_typo = Setting::Reset; } - pub fn set_exact_words(&mut self, words: Vec) { + pub fn set_exact_words(&mut self, words: BTreeSet) { self.exact_words = Setting::Set(words); } @@ -539,8 +539,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_exact_words(&mut self) -> Result<()> { match self.exact_words { Setting::Set(ref mut words) => { - words.sort_unstable(); - let words = fst::Set::from_iter(words)?; + let words = fst::Set::from_iter(words.iter())?; self.index.put_exact_words(&mut self.wtxn, &words)?; } Setting::Reset => {