From c4653347fd296cbc4fb289bdb448dee313c35d79 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 16 Mar 2022 10:03:18 +0100 Subject: [PATCH 1/4] add authorize typo setting --- milli/src/index.rs | 20 ++++++++++++++++++++ milli/src/search/mod.rs | 6 +++++- milli/src/search/query_tree.rs | 2 -- milli/src/update/settings.rs | 25 +++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 568d50ad8..4e43e404e 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -46,6 +46,7 @@ pub mod main_key { pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; pub const CREATED_AT_KEY: &str = "created-at"; pub const UPDATED_AT_KEY: &str = "updated-at"; + pub const AUTHORIZE_TYPOS: &str = "authorize-typos"; } pub mod db_name { @@ -866,6 +867,25 @@ impl Index { ) -> heed::Result<()> { self.main.put::<_, Str, SerdeJson>(wtxn, main_key::UPDATED_AT_KEY, &time) } + + pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result { + // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We + // identify 0 as being false, and anything else as true. The absence of a value is true, + // because by default, we authorize typos. + match self.main.get::<_, Str, OwnedType>(txn, main_key::AUTHORIZE_TYPOS)? { + Some(0) => Ok(false), + _ => Ok(true), + } + } + + pub(crate) fn put_authorize_typos(&self, txn: &mut RwTxn, flag: bool) -> heed::Result<()> { + // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We + // identify 0 as being false, and anything else as true. The absence of a value is true, + // because by default, we authorize typos. + self.main.put::<_, Str, OwnedType>(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?; + + Ok(()) + } } #[cfg(test)] diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 40e4bca24..c9eef5a0d 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -112,7 +112,11 @@ impl<'a> Search<'a> { Some(query) => { let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); builder.optional_words(self.optional_words); - builder.authorize_typos(self.authorize_typos); + + // only authorize typos if both the index and the query allow it. + let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?; + builder.authorize_typos(self.authorize_typos && index_authorizes_typos); + builder.words_limit(self.words_limit); // We make sure that the analyzer is aware of the stop words // this ensures that the query builder is able to properly remove them. diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index f3ee99d9e..5437199e1 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -191,7 +191,6 @@ impl<'a> QueryTreeBuilder<'a> { /// generated forcing all query words to be present in each matching documents /// (the criterion `words` will be ignored). /// default value if not called: `true` - #[allow(unused)] pub fn optional_words(&mut self, optional_words: bool) -> &mut Self { self.optional_words = optional_words; self @@ -201,7 +200,6 @@ impl<'a> QueryTreeBuilder<'a> { /// forcing all query words to match documents without any typo /// (the criterion `typo` will be ignored). /// default value if not called: `true` - #[allow(unused)] pub fn authorize_typos(&mut self, authorize_typos: bool) -> &mut Self { self.authorize_typos = authorize_typos; self diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index c413f81c3..25f3e92a3 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -89,6 +89,7 @@ pub struct Settings<'a, 't, 'u, 'i> { distinct_field: Setting, synonyms: Setting>>, primary_key: Setting, + authorize_typos: Setting, } impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { @@ -109,6 +110,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { distinct_field: Setting::NotSet, synonyms: Setting::NotSet, primary_key: Setting::NotSet, + authorize_typos: Setting::NotSet, indexer_config, } } @@ -186,6 +188,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.primary_key = Setting::Set(primary_key); } + pub fn set_autorize_typos(&mut self, val: bool) { + self.authorize_typos = Setting::Set(val); + } + + pub fn reset_authorize_typos(&mut self) { + self.authorize_typos = Setting::Reset; + } + fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> where F: Fn(UpdateIndexingStep) + Sync, @@ -450,6 +460,20 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } + fn update_authorize_typos(&mut self) -> Result<()> { + match self.authorize_typos { + Setting::Set(flag) => { + self.index.put_authorize_typos(self.wtxn, flag)?; + Ok(()) + } + Setting::Reset => { + self.index.put_authorize_typos(self.wtxn, true)?; + Ok(()) + } + Setting::NotSet => Ok(()), + } + } + pub fn execute(mut self, progress_callback: F) -> Result<()> where F: Fn(UpdateIndexingStep) + Sync, @@ -465,6 +489,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.update_distinct_field()?; self.update_criteria()?; self.update_primary_key()?; + self.update_authorize_typos()?; // If there is new faceted fields we indicate that we must reindex as we must // index new fields as facets. It means that the distinct attribute, From f782fe20625755e78d89b630ccf87fbee9bb8b59 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 09:54:49 +0200 Subject: [PATCH 2/4] add authorize_typo_test --- milli/src/index.rs | 14 ++++++++++++++ milli/src/search/mod.rs | 42 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 4e43e404e..badcac0e5 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1009,4 +1009,18 @@ pub(crate) mod tests { } ); } + + #[test] + fn put_and_retrieve_disable_typo() { + let index = TempIndex::new(); + let mut txn = index.write_txn().unwrap(); + // default value is true + assert!(index.authorize_typos(&txn).unwrap()); + // set to false + index.put_authorize_typos(&mut txn, false).unwrap(); + txn.commit().unwrap(); + + let txn = index.read_txn().unwrap(); + assert!(!index.authorize_typos(&txn).unwrap()); + } } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index c9eef5a0d..4f753a607 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -105,6 +105,12 @@ impl<'a> Search<'a> { self } + fn is_typo_authorized(&self) -> Result { + let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?; + // only authorize typos if both the index and the query allow it. + Ok(self.authorize_typos && index_authorizes_typos) + } + pub fn execute(&self) -> Result { // We create the query tree by spliting the query into tokens. let before = Instant::now(); @@ -113,9 +119,7 @@ impl<'a> Search<'a> { let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); builder.optional_words(self.optional_words); - // only authorize typos if both the index and the query allow it. - let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?; - builder.authorize_typos(self.authorize_typos && index_authorizes_typos); + builder.authorize_typos(self.is_typo_authorized()?); builder.words_limit(self.words_limit); // We make sure that the analyzer is aware of the stop words @@ -364,3 +368,35 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { lev.build_dfa(word) } } + +#[cfg(test)] +mod test { + use crate::index::tests::TempIndex; + + use super::*; + + #[test] + fn test_is_authorized_typos() { + let index = TempIndex::new(); + let mut txn = index.write_txn().unwrap(); + + let mut search = Search::new(&txn, &index); + + // default is authorized + assert!(search.is_typo_authorized().unwrap()); + + search.authorize_typos(false); + assert!(!search.is_typo_authorized().unwrap()); + + index.put_authorize_typos(&mut txn, false).unwrap(); + txn.commit().unwrap(); + + let txn = index.read_txn().unwrap(); + let mut search = Search::new(&txn, &index); + + assert!(!search.is_typo_authorized().unwrap()); + + search.authorize_typos(true); + assert!(!search.is_typo_authorized().unwrap()); + } +} From 6ef3bb9d83382c2797240a1a16a904e743e30081 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 14:06:23 +0200 Subject: [PATCH 3/4] fmt --- milli/src/search/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 4f753a607..614927877 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -371,9 +371,8 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { #[cfg(test)] mod test { - use crate::index::tests::TempIndex; - use super::*; + use crate::index::tests::TempIndex; #[test] fn test_is_authorized_typos() { From 3e34981d9b29e11e02f549c582b015ffad097854 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 14:12:00 +0200 Subject: [PATCH 4/4] add test for authorize_typos in update --- milli/src/update/settings.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 25f3e92a3..17924da8a 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -518,6 +518,7 @@ mod tests { use super::*; use crate::error::Error; + use crate::index::tests::TempIndex; use crate::update::IndexDocuments; use crate::{Criterion, Filter, SearchResult}; @@ -1218,4 +1219,18 @@ mod tests { let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap(); assert_eq!(line, r#""Star Wars""#); } + + #[test] + fn test_disable_typo() { + let index = TempIndex::new(); + + let mut txn = index.write_txn().unwrap(); + let config = IndexerConfig::default(); + + assert!(index.authorize_typos(&txn).unwrap()); + let mut builder = Settings::new(&mut txn, &index, &config); + builder.set_autorize_typos(false); + builder.execute(|_| ()).unwrap(); + assert!(!index.authorize_typos(&txn).unwrap()); + } }