From 5a24e605728c7c6b2a80b5d90c1dc553ebe3f9ba Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 21 Mar 2022 13:03:06 +0100 Subject: [PATCH 01/12] introduce word len for typo setting --- milli/src/error.rs | 2 ++ milli/src/index.rs | 41 ++++++++++++++++++++++++++++ milli/src/update/settings.rs | 53 ++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+) diff --git a/milli/src/error.rs b/milli/src/error.rs index e6fbc0605..3ef6aa81d 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -72,6 +72,7 @@ pub enum UserError { SerdeJson(serde_json::Error), SortError(SortError), UnknownInternalDocumentId { document_id: DocumentId }, + InvalidMinTypoWordSetting(u8, u8), } impl From for Error { @@ -291,6 +292,7 @@ ranking rules settings to use the sort parameter at search time.", Self::UnknownInternalDocumentId { document_id } => { write!(f, "An unknown internal document id have been used: `{}`.", document_id) } + Self::InvalidMinTypoWordSetting(one, two) => write!(f, "Invalid settings for MinWordLenForTypo, expected 0 < 1-typo < 2-typos < 255, but found 1-typo: {} and 2-typo: {}", one, two), } } } diff --git a/milli/src/index.rs b/milli/src/index.rs index badcac0e5..3c1ba948f 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -23,6 +23,9 @@ use crate::{ Search, StrBEU32Codec, StrStrU8Codec, BEU32, }; +pub const DEFAULT_MIN_WORD_LEN_1_TYPO: u8 = 5; +pub const DEFAULT_MIN_WORD_LEN_2_TYPOS: u8 = 9; + pub mod main_key { pub const CRITERIA_KEY: &str = "criteria"; pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; @@ -47,6 +50,8 @@ pub mod main_key { pub const CREATED_AT_KEY: &str = "created-at"; pub const UPDATED_AT_KEY: &str = "updated-at"; pub const AUTHORIZE_TYPOS: &str = "authorize-typos"; + pub const ONE_TYPO_WORD_LEN: &str = "one-typo-word-len"; + pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len"; } pub mod db_name { @@ -886,6 +891,42 @@ impl Index { Ok(()) } + + pub fn min_word_len_1_typo(&self, txn: &RoTxn) -> heed::Result { + // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We + // identify 0 as being false, and anything else as true. The absence of a value is true, + // because by default, we authorize typos. + Ok(self + .main + .get::<_, Str, OwnedType>(txn, main_key::ONE_TYPO_WORD_LEN)? + .unwrap_or(DEFAULT_MIN_WORD_LEN_1_TYPO)) + } + + pub(crate) fn put_min_word_len_1_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { + // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We + // identify 0 as being false, and anything else as true. The absence of a value is true, + // because by default, we authorize typos. + self.main.put::<_, Str, OwnedType>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?; + Ok(()) + } + + pub fn min_word_len_2_typo(&self, txn: &RoTxn) -> heed::Result { + // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We + // identify 0 as being false, and anything else as true. The absence of a value is true, + // because by default, we authorize typos. + Ok(self + .main + .get::<_, Str, OwnedType>(txn, main_key::TWO_TYPOS_WORD_LEN)? + .unwrap_or(DEFAULT_MIN_WORD_LEN_2_TYPOS)) + } + + pub(crate) fn put_min_word_len_2_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { + // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We + // identify 0 as being false, and anything else as true. The absence of a value is true, + // because by default, we authorize typos. + self.main.put::<_, Str, OwnedType>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?; + Ok(()) + } } #[cfg(test)] diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 17924da8a..72b416b02 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -90,6 +90,8 @@ pub struct Settings<'a, 't, 'u, 'i> { synonyms: Setting>>, primary_key: Setting, authorize_typos: Setting, + min_2_typos_word_len: Setting, + min_1_typo_word_len: Setting, } impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { @@ -112,6 +114,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { primary_key: Setting::NotSet, authorize_typos: Setting::NotSet, indexer_config, + min_2_typos_word_len: Setting::Reset, + min_1_typo_word_len: Setting::Reset, } } @@ -196,6 +200,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.authorize_typos = Setting::Reset; } + pub fn set_min_2_typos_word_len(&mut self, val: u8) { + self.min_2_typos_word_len = Setting::Set(val); + } + + pub fn reset_min_2_typos_word_len(&mut self) { + self.min_2_typos_word_len = Setting::Reset; + } + + pub fn set_min_1_typo_word_len(&mut self, val: u8) { + self.min_1_typo_word_len = Setting::Set(val); + } + + pub fn reset_min_1_typos_word_len(&mut self) { + self.min_1_typo_word_len = Setting::Reset; + } + fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> where F: Fn(UpdateIndexingStep) + Sync, @@ -474,6 +494,38 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } + fn update_min_typo_word_len(&mut self) -> Result<()> { + match (&self.min_1_typo_word_len, &self.min_2_typos_word_len) { + (Setting::Set(one), Setting::Set(two)) => { + if one < two { + self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; + self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; + } else { + return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into()); + } + } + (Setting::Set(one), _) => { + let two = self.index.min_word_len_2_typo(&self.wtxn)?; + if *one < two { + self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; + } else { + return Err(UserError::InvalidMinTypoWordSetting(*one, two).into()); + } + } + (_, Setting::Set(two)) => { + let one = self.index.min_word_len_1_typo(&self.wtxn)?; + if one < *two { + self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; + } else { + return Err(UserError::InvalidMinTypoWordSetting(one, *two).into()); + } + } + _ => (), + } + + Ok(()) + } + pub fn execute(mut self, progress_callback: F) -> Result<()> where F: Fn(UpdateIndexingStep) + Sync, @@ -490,6 +542,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.update_criteria()?; self.update_primary_key()?; self.update_authorize_typos()?; + self.update_min_typo_word_len()?; // If there is new faceted fields we indicate that we must reindex as we must // index new fields as facets. It means that the distinct attribute, From a1a3a49bc9493c91c38b86d11370d2c66d8d348f Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 21 Mar 2022 13:29:59 +0100 Subject: [PATCH 02/12] dynamic minimum word len for typos in query tree builder --- milli/src/search/query_tree.rs | 40 ++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 5437199e1..6db2ce7a7 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -155,6 +155,8 @@ trait Context { None => Ok(None), } } + /// Returns the minimum word len for 1 and 2 typos. + fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>; } /// The query tree builder is the interface to build a query tree. @@ -178,6 +180,12 @@ impl<'a> Context for QueryTreeBuilder<'a> { fn word_documents_count(&self, word: &str) -> heed::Result> { self.index.word_documents_count(self.rtxn, word) } + + fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> { + let one = self.index.min_word_len_1_typo(&self.rtxn)?; + let two = self.index.min_word_len_2_typo(&self.rtxn)?; + Ok((one, two)) + } } impl<'a> QueryTreeBuilder<'a> { @@ -256,14 +264,23 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result QueryKind { +fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind { if authorize_typos { - match word.chars().count() { - 0..=4 => QueryKind::exact(word), - 5..=8 => QueryKind::tolerant(1.min(max_typos), word), - _ => QueryKind::tolerant(2.min(max_typos), word), + let count = word.chars().count().min(u8::MAX as usize) as u8; + if (0..config.word_len_1_typo).contains(&count) { + QueryKind::exact(word) + } else if (config.word_len_1_typo..config.word_len_2_typo).contains(&count) { + QueryKind::tolerant(1.min(config.max_typos), word) + } else { + QueryKind::tolerant(2.min(config.max_typos), word) } } else { QueryKind::exact(word) @@ -314,9 +331,11 @@ fn create_query_tree( if let Some(child) = split_best_frequency(ctx, &word)? { children.push(child); } + let (word_len_1_typo, word_len_2_typo) = ctx.min_word_len_for_typo()?; + let config = TypoConfig { max_typos: 2, word_len_1_typo, word_len_2_typo }; children.push(Operation::Query(Query { prefix, - kind: typos(word, authorize_typos, 2), + kind: typos(word, authorize_typos, config), })); Ok(Operation::or(false, children)) } @@ -363,9 +382,12 @@ fn create_query_tree( .collect(); let mut operations = synonyms(ctx, &words)?.unwrap_or_default(); let concat = words.concat(); + let (word_len_1_typo, word_len_2_typo) = ctx.min_word_len_for_typo()?; + let config = + TypoConfig { max_typos: 1, word_len_1_typo, word_len_2_typo }; let query = Query { prefix: is_prefix, - kind: typos(concat, authorize_typos, 1), + kind: typos(concat, authorize_typos, config), }; operations.push(Operation::Query(query)); and_op_children.push(Operation::or(false, operations)); @@ -576,6 +598,10 @@ mod test { let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect(); Ok(self.synonyms.get(&words).cloned()) } + + fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> { + Ok((5, 9)) + } } impl Default for TestContext { From 9102de55003498020e1adf81ce07ec2cec7377ef Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 10:22:39 +0200 Subject: [PATCH 03/12] fix error message --- milli/src/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/error.rs b/milli/src/error.rs index 3ef6aa81d..471952a36 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -292,7 +292,7 @@ ranking rules settings to use the sort parameter at search time.", Self::UnknownInternalDocumentId { document_id } => { write!(f, "An unknown internal document id have been used: `{}`.", document_id) } - Self::InvalidMinTypoWordSetting(one, two) => write!(f, "Invalid settings for MinWordLenForTypo, expected 0 < 1-typo < 2-typos < 255, but found 1-typo: {} and 2-typo: {}", one, two), + Self::InvalidMinTypoWordSetting(one, two) => write!(f, "`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {}` and twoTypos: {}`."", one, two), } } } From 55af85db3c46b3e9abb896631389521b141f0c48 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 13:50:18 +0200 Subject: [PATCH 04/12] add tests for min_word_len_for_typo --- milli/src/error.rs | 2 +- milli/src/index.rs | 19 +++++++++++++ milli/src/search/query_tree.rs | 21 ++++++++++++++ milli/src/update/settings.rs | 51 ++++++++++++++++++++++++++++------ 4 files changed, 83 insertions(+), 10 deletions(-) diff --git a/milli/src/error.rs b/milli/src/error.rs index 471952a36..611160319 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -292,7 +292,7 @@ ranking rules settings to use the sort parameter at search time.", Self::UnknownInternalDocumentId { document_id } => { write!(f, "An unknown internal document id have been used: `{}`.", document_id) } - Self::InvalidMinTypoWordSetting(one, two) => write!(f, "`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {}` and twoTypos: {}`."", one, two), + Self::InvalidMinTypoWordSetting(one, two) => write!(f, "`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {}` and twoTypos: {}`.", one, two), } } } diff --git a/milli/src/index.rs b/milli/src/index.rs index 3c1ba948f..0095352e4 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -937,6 +937,7 @@ pub(crate) mod tests { use maplit::btreemap; use tempfile::TempDir; + use crate::index::{DEFAULT_MIN_WORD_LEN_1_TYPO, DEFAULT_MIN_WORD_LEN_2_TYPOS}; use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig}; use crate::Index; @@ -1064,4 +1065,22 @@ pub(crate) mod tests { let txn = index.read_txn().unwrap(); assert!(!index.authorize_typos(&txn).unwrap()); } + + #[test] + fn set_min_word_len_for_typos() { + let index = TempIndex::new(); + let mut txn = index.write_txn().unwrap(); + + assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_1_TYPO); + assert_eq!(index.min_word_len_2_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_2_TYPOS); + + index.put_min_word_len_1_typo(&mut txn, 3).unwrap(); + index.put_min_word_len_2_typo(&mut txn, 15).unwrap(); + + txn.commit().unwrap(); + + let txn = index.read_txn().unwrap(); + assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), 3); + assert_eq!(index.min_word_len_2_typo(&txn).unwrap(), 15); + } } diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 6db2ce7a7..acaba680f 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -264,6 +264,7 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result Settings<'a, 't, 'u, 'i> { fn update_min_typo_word_len(&mut self) -> Result<()> { match (&self.min_1_typo_word_len, &self.min_2_typos_word_len) { (Setting::Set(one), Setting::Set(two)) => { - if one < two { + if one > two { + return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into()); + } else { self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; - } else { - return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into()); } } (Setting::Set(one), _) => { let two = self.index.min_word_len_2_typo(&self.wtxn)?; - if *one < two { - self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; - } else { + if *one > two { return Err(UserError::InvalidMinTypoWordSetting(*one, two).into()); + } else { + self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; } } (_, Setting::Set(two)) => { let one = self.index.min_word_len_1_typo(&self.wtxn)?; - if one < *two { - self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; - } else { + if one > *two { return Err(UserError::InvalidMinTypoWordSetting(one, *two).into()); + } else { + self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; } } _ => (), @@ -1286,4 +1286,37 @@ mod tests { builder.execute(|_| ()).unwrap(); assert!(!index.authorize_typos(&txn).unwrap()); } + + #[test] + fn update_min_word_len_for_typo() { + let index = TempIndex::new(); + let config = IndexerConfig::default(); + + // Set the genres setting + let mut txn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut txn, &index, &config); + builder.set_min_1_typo_word_len(8); + builder.set_min_2_typos_word_len(8); + builder.execute(|_| ()).unwrap(); + + txn.commit().unwrap(); + + let txn = index.read_txn().unwrap(); + + assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), 8); + assert_eq!(index.min_word_len_2_typo(&txn).unwrap(), 8); + } + + #[test] + fn update_invalid_min_word_len_for_typo() { + let index = TempIndex::new(); + let config = IndexerConfig::default(); + + // Set the genres setting + let mut txn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut txn, &index, &config); + builder.set_min_1_typo_word_len(10); + builder.set_min_2_typos_word_len(7); + assert!(builder.execute(|_| ()).is_err()); + } } From 286dd7b2e43fdc0ba93931bdf094c1d9573b7954 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 13:51:27 +0200 Subject: [PATCH 05/12] rename min_word_len_2_typo --- milli/src/index.rs | 10 +++++----- milli/src/search/query_tree.rs | 2 +- milli/src/update/settings.rs | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 0095352e4..98f0093b8 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -910,7 +910,7 @@ impl Index { Ok(()) } - pub fn min_word_len_2_typo(&self, txn: &RoTxn) -> heed::Result { + pub fn min_word_len_2_typos(&self, txn: &RoTxn) -> heed::Result { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. @@ -920,7 +920,7 @@ impl Index { .unwrap_or(DEFAULT_MIN_WORD_LEN_2_TYPOS)) } - pub(crate) fn put_min_word_len_2_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { + pub(crate) fn put_min_word_len_2_typos(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. @@ -1072,15 +1072,15 @@ pub(crate) mod tests { let mut txn = index.write_txn().unwrap(); assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_1_TYPO); - assert_eq!(index.min_word_len_2_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_2_TYPOS); + assert_eq!(index.min_word_len_2_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_2_TYPOS); index.put_min_word_len_1_typo(&mut txn, 3).unwrap(); - index.put_min_word_len_2_typo(&mut txn, 15).unwrap(); + index.put_min_word_len_2_typos(&mut txn, 15).unwrap(); txn.commit().unwrap(); let txn = index.read_txn().unwrap(); assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), 3); - assert_eq!(index.min_word_len_2_typo(&txn).unwrap(), 15); + assert_eq!(index.min_word_len_2_typos(&txn).unwrap(), 15); } } diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index acaba680f..5d78eb674 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -183,7 +183,7 @@ impl<'a> Context for QueryTreeBuilder<'a> { fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> { let one = self.index.min_word_len_1_typo(&self.rtxn)?; - let two = self.index.min_word_len_2_typo(&self.rtxn)?; + let two = self.index.min_word_len_2_typos(&self.rtxn)?; Ok((one, two)) } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 5ccaba9ba..8fd9b9a9a 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -501,11 +501,11 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into()); } else { self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; - self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; + self.index.put_min_word_len_2_typos(&mut self.wtxn, *two)?; } } (Setting::Set(one), _) => { - let two = self.index.min_word_len_2_typo(&self.wtxn)?; + let two = self.index.min_word_len_2_typos(&self.wtxn)?; if *one > two { return Err(UserError::InvalidMinTypoWordSetting(*one, two).into()); } else { @@ -517,7 +517,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { if one > *two { return Err(UserError::InvalidMinTypoWordSetting(one, *two).into()); } else { - self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; + self.index.put_min_word_len_2_typos(&mut self.wtxn, *two)?; } } _ => (), @@ -1304,7 +1304,7 @@ mod tests { let txn = index.read_txn().unwrap(); assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), 8); - assert_eq!(index.min_word_len_2_typo(&txn).unwrap(), 8); + assert_eq!(index.min_word_len_2_typos(&txn).unwrap(), 8); } #[test] From 4c4b336ecb992c10606f9899535c5c39708bd347 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 14:15:02 +0200 Subject: [PATCH 06/12] rename min word len for typo error --- milli/src/error.rs | 4 ++-- milli/src/update/settings.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/milli/src/error.rs b/milli/src/error.rs index 611160319..688977741 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -72,7 +72,7 @@ pub enum UserError { SerdeJson(serde_json::Error), SortError(SortError), UnknownInternalDocumentId { document_id: DocumentId }, - InvalidMinTypoWordSetting(u8, u8), + InvalidMinTypoWordLenSetting(u8, u8), } impl From for Error { @@ -292,7 +292,7 @@ ranking rules settings to use the sort parameter at search time.", Self::UnknownInternalDocumentId { document_id } => { write!(f, "An unknown internal document id have been used: `{}`.", document_id) } - Self::InvalidMinTypoWordSetting(one, two) => write!(f, "`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {}` and twoTypos: {}`.", one, two), + Self::InvalidMinTypoWordLenSetting(one, two) => write!(f, "`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {}` and twoTypos: {}`.", one, two), } } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 8fd9b9a9a..26ed5730a 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -498,7 +498,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { match (&self.min_1_typo_word_len, &self.min_2_typos_word_len) { (Setting::Set(one), Setting::Set(two)) => { if one > two { - return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into()); + return Err(UserError::InvalidMinTypoWordLenSetting(*one, *two).into()); } else { self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; self.index.put_min_word_len_2_typos(&mut self.wtxn, *two)?; @@ -507,7 +507,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { (Setting::Set(one), _) => { let two = self.index.min_word_len_2_typos(&self.wtxn)?; if *one > two { - return Err(UserError::InvalidMinTypoWordSetting(*one, two).into()); + return Err(UserError::InvalidMinTypoWordLenSetting(*one, two).into()); } else { self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; } @@ -515,7 +515,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { (_, Setting::Set(two)) => { let one = self.index.min_word_len_1_typo(&self.wtxn)?; if one > *two { - return Err(UserError::InvalidMinTypoWordSetting(one, *two).into()); + return Err(UserError::InvalidMinTypoWordLenSetting(one, *two).into()); } else { self.index.put_min_word_len_2_typos(&mut self.wtxn, *two)?; } From 66020cd9239c98658ba7824a632d8c1df944edd5 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 18:23:12 +0200 Subject: [PATCH 07/12] rename min_word_len* to use plain letter numbers --- milli/src/index.rs | 30 ++++++++++---------- milli/src/search/query_tree.rs | 23 ++++++++-------- milli/src/update/settings.rs | 50 +++++++++++++++++----------------- 3 files changed, 52 insertions(+), 51 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 98f0093b8..853e7537d 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -23,8 +23,8 @@ use crate::{ Search, StrBEU32Codec, StrStrU8Codec, BEU32, }; -pub const DEFAULT_MIN_WORD_LEN_1_TYPO: u8 = 5; -pub const DEFAULT_MIN_WORD_LEN_2_TYPOS: u8 = 9; +pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; +pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9; pub mod main_key { pub const CRITERIA_KEY: &str = "criteria"; @@ -892,17 +892,17 @@ impl Index { Ok(()) } - pub fn min_word_len_1_typo(&self, txn: &RoTxn) -> heed::Result { + pub fn min_word_len_one_typo(&self, txn: &RoTxn) -> heed::Result { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. Ok(self .main .get::<_, Str, OwnedType>(txn, main_key::ONE_TYPO_WORD_LEN)? - .unwrap_or(DEFAULT_MIN_WORD_LEN_1_TYPO)) + .unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO)) } - pub(crate) fn put_min_word_len_1_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { + pub(crate) fn put_min_word_len_one_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. @@ -910,17 +910,17 @@ impl Index { Ok(()) } - pub fn min_word_len_2_typos(&self, txn: &RoTxn) -> heed::Result { + pub fn min_word_len_two_typos(&self, txn: &RoTxn) -> heed::Result { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. Ok(self .main .get::<_, Str, OwnedType>(txn, main_key::TWO_TYPOS_WORD_LEN)? - .unwrap_or(DEFAULT_MIN_WORD_LEN_2_TYPOS)) + .unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) } - pub(crate) fn put_min_word_len_2_typos(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { + pub(crate) fn put_min_word_len_two_typos(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. @@ -937,7 +937,7 @@ pub(crate) mod tests { use maplit::btreemap; use tempfile::TempDir; - use crate::index::{DEFAULT_MIN_WORD_LEN_1_TYPO, DEFAULT_MIN_WORD_LEN_2_TYPOS}; + use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig}; use crate::Index; @@ -1071,16 +1071,16 @@ pub(crate) mod tests { let index = TempIndex::new(); let mut txn = index.write_txn().unwrap(); - assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_1_TYPO); - assert_eq!(index.min_word_len_2_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_2_TYPOS); + assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_ONE_TYPO); + assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_TWO_TYPOS); - index.put_min_word_len_1_typo(&mut txn, 3).unwrap(); - index.put_min_word_len_2_typos(&mut txn, 15).unwrap(); + index.put_min_word_len_one_typo(&mut txn, 3).unwrap(); + index.put_min_word_len_two_typos(&mut txn, 15).unwrap(); txn.commit().unwrap(); let txn = index.read_txn().unwrap(); - assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), 3); - assert_eq!(index.min_word_len_2_typos(&txn).unwrap(), 15); + assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 3); + assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 15); } } diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 5d78eb674..c1803f40b 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -182,8 +182,8 @@ impl<'a> Context for QueryTreeBuilder<'a> { } fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> { - let one = self.index.min_word_len_1_typo(&self.rtxn)?; - let two = self.index.min_word_len_2_typos(&self.rtxn)?; + let one = self.index.min_word_len_one_typo(&self.rtxn)?; + let two = self.index.min_word_len_two_typos(&self.rtxn)?; Ok((one, two)) } } @@ -267,8 +267,8 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result QueryKind { if authorize_typos { let count = word.chars().count().min(u8::MAX as usize) as u8; - if (0..config.word_len_1_typo).contains(&count) { + if (0..config.word_len_one_typo).contains(&count) { QueryKind::exact(word) - } else if (config.word_len_1_typo..config.word_len_2_typo).contains(&count) { + } else if (config.word_len_one_typo..config.word_len_two_typo).contains(&count) { QueryKind::tolerant(1.min(config.max_typos), word) } else { QueryKind::tolerant(2.min(config.max_typos), word) @@ -332,8 +332,8 @@ fn create_query_tree( if let Some(child) = split_best_frequency(ctx, &word)? { children.push(child); } - let (word_len_1_typo, word_len_2_typo) = ctx.min_word_len_for_typo()?; - let config = TypoConfig { max_typos: 2, word_len_1_typo, word_len_2_typo }; + let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; + let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo }; children.push(Operation::Query(Query { prefix, kind: typos(word, authorize_typos, config), @@ -383,9 +383,10 @@ fn create_query_tree( .collect(); let mut operations = synonyms(ctx, &words)?.unwrap_or_default(); let concat = words.concat(); - let (word_len_1_typo, word_len_2_typo) = ctx.min_word_len_for_typo()?; + let (word_len_one_typo, word_len_two_typo) = + ctx.min_word_len_for_typo()?; let config = - TypoConfig { max_typos: 1, word_len_1_typo, word_len_2_typo }; + TypoConfig { max_typos: 1, word_len_one_typo, word_len_two_typo }; let query = Query { prefix: is_prefix, kind: typos(concat, authorize_typos, config), @@ -1223,7 +1224,7 @@ mod test { #[test] fn test_min_word_len_typo() { - let config = TypoConfig { max_typos: 2, word_len_1_typo: 5, word_len_2_typo: 7 }; + let config = TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7 }; assert_eq!( typos("hello".to_string(), true, config.clone()), diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 26ed5730a..94ae29595 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -90,8 +90,8 @@ pub struct Settings<'a, 't, 'u, 'i> { synonyms: Setting>>, primary_key: Setting, authorize_typos: Setting, - min_2_typos_word_len: Setting, - min_1_typo_word_len: Setting, + min_word_len_two_typos: Setting, + min_word_len_one_typo: Setting, } impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { @@ -114,8 +114,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { primary_key: Setting::NotSet, authorize_typos: Setting::NotSet, indexer_config, - min_2_typos_word_len: Setting::Reset, - min_1_typo_word_len: Setting::Reset, + min_word_len_two_typos: Setting::Reset, + min_word_len_one_typo: Setting::Reset, } } @@ -200,20 +200,20 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.authorize_typos = Setting::Reset; } - pub fn set_min_2_typos_word_len(&mut self, val: u8) { - self.min_2_typos_word_len = Setting::Set(val); + pub fn set_min_word_len_two_typos(&mut self, val: u8) { + self.min_word_len_two_typos = Setting::Set(val); } - pub fn reset_min_2_typos_word_len(&mut self) { - self.min_2_typos_word_len = Setting::Reset; + pub fn reset_min_word_len_two_typos(&mut self) { + self.min_word_len_two_typos = Setting::Reset; } - pub fn set_min_1_typo_word_len(&mut self, val: u8) { - self.min_1_typo_word_len = Setting::Set(val); + pub fn set_min_word_len_one_typo(&mut self, val: u8) { + self.min_word_len_one_typo = Setting::Set(val); } - pub fn reset_min_1_typos_word_len(&mut self) { - self.min_1_typo_word_len = Setting::Reset; + pub fn reset_min_word_len_one_typo(&mut self) { + self.min_word_len_one_typo = Setting::Reset; } fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> @@ -495,29 +495,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } fn update_min_typo_word_len(&mut self) -> Result<()> { - match (&self.min_1_typo_word_len, &self.min_2_typos_word_len) { + match (&self.min_word_len_one_typo, &self.min_word_len_two_typos) { (Setting::Set(one), Setting::Set(two)) => { if one > two { return Err(UserError::InvalidMinTypoWordLenSetting(*one, *two).into()); } else { - self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; - self.index.put_min_word_len_2_typos(&mut self.wtxn, *two)?; + self.index.put_min_word_len_one_typo(&mut self.wtxn, *one)?; + self.index.put_min_word_len_two_typos(&mut self.wtxn, *two)?; } } (Setting::Set(one), _) => { - let two = self.index.min_word_len_2_typos(&self.wtxn)?; + let two = self.index.min_word_len_two_typos(&self.wtxn)?; if *one > two { return Err(UserError::InvalidMinTypoWordLenSetting(*one, two).into()); } else { - self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; + self.index.put_min_word_len_one_typo(&mut self.wtxn, *one)?; } } (_, Setting::Set(two)) => { - let one = self.index.min_word_len_1_typo(&self.wtxn)?; + let one = self.index.min_word_len_one_typo(&self.wtxn)?; if one > *two { return Err(UserError::InvalidMinTypoWordLenSetting(one, *two).into()); } else { - self.index.put_min_word_len_2_typos(&mut self.wtxn, *two)?; + self.index.put_min_word_len_two_typos(&mut self.wtxn, *two)?; } } _ => (), @@ -1295,16 +1295,16 @@ mod tests { // Set the genres setting let mut txn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut txn, &index, &config); - builder.set_min_1_typo_word_len(8); - builder.set_min_2_typos_word_len(8); + builder.set_min_word_len_one_typo(8); + builder.set_min_word_len_two_typos(8); builder.execute(|_| ()).unwrap(); txn.commit().unwrap(); let txn = index.read_txn().unwrap(); - assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), 8); - assert_eq!(index.min_word_len_2_typos(&txn).unwrap(), 8); + assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 8); + assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 8); } #[test] @@ -1315,8 +1315,8 @@ mod tests { // Set the genres setting let mut txn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut txn, &index, &config); - builder.set_min_1_typo_word_len(10); - builder.set_min_2_typos_word_len(7); + builder.set_min_word_len_one_typo(10); + builder.set_min_word_len_two_typos(7); assert!(builder.execute(|_| ()).is_err()); } } From 950a740bd46c5e06ff2b4f6d2fefb24fe44b3cd1 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 18:37:43 +0200 Subject: [PATCH 08/12] refactor typos for readability --- milli/src/search/query_tree.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index c1803f40b..b1c846324 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -276,9 +276,9 @@ pub struct TypoConfig { fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind { if authorize_typos { let count = word.chars().count().min(u8::MAX as usize) as u8; - if (0..config.word_len_one_typo).contains(&count) { + if count < config.word_len_one_typo { QueryKind::exact(word) - } else if (config.word_len_one_typo..config.word_len_two_typo).contains(&count) { + } else if count < config.word_len_two_typo { QueryKind::tolerant(1.min(config.max_typos), word) } else { QueryKind::tolerant(2.min(config.max_typos), word) From fdaf45aab2e898b2f730ec81c2d469f055fe0452 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 18:42:10 +0200 Subject: [PATCH 09/12] replace hardcoded value with constant in TestContext --- milli/src/search/query_tree.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index b1c846324..1bb4c9516 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -564,6 +564,8 @@ mod test { use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; + use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; + use super::*; #[derive(Debug)] @@ -602,7 +604,7 @@ mod test { } fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> { - Ok((5, 9)) + Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) } } From 1941072bb29ba7763519d85ef59797e634d0f82e Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 31 Mar 2022 18:44:51 +0200 Subject: [PATCH 10/12] implement Copy on Setting --- milli/src/update/settings.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 94ae29595..c03d6e0ae 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -14,7 +14,7 @@ use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; use crate::{FieldsIdsMap, Index, Result}; -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Copy)] pub enum Setting { Set(T), Reset, @@ -495,29 +495,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } fn update_min_typo_word_len(&mut self) -> Result<()> { - match (&self.min_word_len_one_typo, &self.min_word_len_two_typos) { + match (self.min_word_len_one_typo, self.min_word_len_two_typos) { (Setting::Set(one), Setting::Set(two)) => { if one > two { - return Err(UserError::InvalidMinTypoWordLenSetting(*one, *two).into()); + return Err(UserError::InvalidMinTypoWordLenSetting(one, two).into()); } else { - self.index.put_min_word_len_one_typo(&mut self.wtxn, *one)?; - self.index.put_min_word_len_two_typos(&mut self.wtxn, *two)?; + self.index.put_min_word_len_one_typo(&mut self.wtxn, one)?; + self.index.put_min_word_len_two_typos(&mut self.wtxn, two)?; } } (Setting::Set(one), _) => { let two = self.index.min_word_len_two_typos(&self.wtxn)?; - if *one > two { - return Err(UserError::InvalidMinTypoWordLenSetting(*one, two).into()); + if one > two { + return Err(UserError::InvalidMinTypoWordLenSetting(one, two).into()); } else { - self.index.put_min_word_len_one_typo(&mut self.wtxn, *one)?; + self.index.put_min_word_len_one_typo(&mut self.wtxn, one)?; } } (_, Setting::Set(two)) => { let one = self.index.min_word_len_one_typo(&self.wtxn)?; - if one > *two { - return Err(UserError::InvalidMinTypoWordLenSetting(one, *two).into()); + if one > two { + return Err(UserError::InvalidMinTypoWordLenSetting(one, two).into()); } else { - self.index.put_min_word_len_two_typos(&mut self.wtxn, *two)?; + self.index.put_min_word_len_two_typos(&mut self.wtxn, two)?; } } _ => (), From 2cb71dff4afdc7b4f16f9692066dbb3bc4096896 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Fri, 1 Apr 2022 10:50:01 +0200 Subject: [PATCH 11/12] add typo integration tests --- milli/tests/search/mod.rs | 1 + milli/tests/search/typo_tolerance.rs | 97 ++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 milli/tests/search/typo_tolerance.rs diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index 31d53b666..52b4c7114 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -16,6 +16,7 @@ mod distinct; mod filters; mod query_criteria; mod sort; +mod typo_tolerance; pub const TEST_QUERY: &'static str = "hello world america"; diff --git a/milli/tests/search/typo_tolerance.rs b/milli/tests/search/typo_tolerance.rs new file mode 100644 index 000000000..8898fb353 --- /dev/null +++ b/milli/tests/search/typo_tolerance.rs @@ -0,0 +1,97 @@ +use milli::{ + update::{IndexerConfig, Settings}, + Criterion, Search, +}; +use Criterion::*; + +#[test] +fn test_typo_tolerance_one_typo() { + let criteria = [Typo]; + let index = super::setup_search_index_with_criteria(&criteria); + + // basic typo search with default typo settings + { + let txn = index.read_txn().unwrap(); + + let mut search = Search::new(&txn, &index); + search.query("zeal"); + search.limit(10); + search.authorize_typos(true); + search.optional_words(true); + + let result = search.execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + + let mut search = Search::new(&txn, &index); + search.query("zean"); + search.limit(10); + search.authorize_typos(true); + search.optional_words(true); + + let result = search.execute().unwrap(); + assert_eq!(result.documents_ids.len(), 0); + } + + let mut txn = index.write_txn().unwrap(); + + let config = IndexerConfig::default(); + let mut builder = Settings::new(&mut txn, &index, &config); + builder.set_min_word_len_one_typo(4); + builder.execute(|_| ()).unwrap(); + + // typo is now supported for 4 letters words + let mut search = Search::new(&txn, &index); + search.query("zean"); + search.limit(10); + search.authorize_typos(true); + search.optional_words(true); + + let result = search.execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); +} + +#[test] +fn test_typo_tolerance_two_typo() { + let criteria = [Typo]; + let index = super::setup_search_index_with_criteria(&criteria); + + // basic typo search with default typo settings + { + let txn = index.read_txn().unwrap(); + + let mut search = Search::new(&txn, &index); + search.query("zealand"); + search.limit(10); + search.authorize_typos(true); + search.optional_words(true); + + let result = search.execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + + let mut search = Search::new(&txn, &index); + search.query("zealemd"); + search.limit(10); + search.authorize_typos(true); + search.optional_words(true); + + let result = search.execute().unwrap(); + assert_eq!(result.documents_ids.len(), 0); + } + + let mut txn = index.write_txn().unwrap(); + + let config = IndexerConfig::default(); + let mut builder = Settings::new(&mut txn, &index, &config); + builder.set_min_word_len_two_typos(7); + builder.execute(|_| ()).unwrap(); + + // typo is now supported for 4 letters words + let mut search = Search::new(&txn, &index); + search.query("zealemd"); + search.limit(10); + search.authorize_typos(true); + search.optional_words(true); + + let result = search.execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); +} From 853b4a520fb2a6fd10909b085b16460b23c6e249 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Fri, 1 Apr 2022 11:21:51 +0200 Subject: [PATCH 12/12] fmt --- milli/src/search/query_tree.rs | 3 +-- milli/tests/search/typo_tolerance.rs | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 1bb4c9516..934d2fd9b 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -564,9 +564,8 @@ mod test { use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; - use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; - use super::*; + use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; #[derive(Debug)] struct TestContext { diff --git a/milli/tests/search/typo_tolerance.rs b/milli/tests/search/typo_tolerance.rs index 8898fb353..00e6853cc 100644 --- a/milli/tests/search/typo_tolerance.rs +++ b/milli/tests/search/typo_tolerance.rs @@ -1,7 +1,5 @@ -use milli::{ - update::{IndexerConfig, Settings}, - Criterion, Search, -}; +use milli::update::{IndexerConfig, Settings}; +use milli::{Criterion, Search}; use Criterion::*; #[test]