From 27a88bcd47cd47593b9531ec5f13f76c592f5744 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 22 Mar 2022 16:28:00 +0100 Subject: [PATCH 1/3] feat(all): introduce minWordLengthForTypo fix typo in settting skip serializing not set typo settings --- meilisearch-error/src/lib.rs | 4 +++ meilisearch-lib/src/error.rs | 4 ++- meilisearch-lib/src/index/index.rs | 8 ++++- meilisearch-lib/src/index/updates.rs | 47 +++++++++++++++++++++++++--- 4 files changed, 56 insertions(+), 7 deletions(-) diff --git a/meilisearch-error/src/lib.rs b/meilisearch-error/src/lib.rs index 81371eb6d..11613497c 100644 --- a/meilisearch-error/src/lib.rs +++ b/meilisearch-error/src/lib.rs @@ -120,6 +120,7 @@ pub enum Code { IndexAlreadyExists, IndexNotFound, InvalidIndexUid, + InvalidMinWordLengthForTypo, // invalid state error InvalidState, @@ -271,6 +272,9 @@ impl Code { InvalidApiKeyDescription => { ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST) } + InvalidMinWordLengthForTypo => { + ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST) + } } } diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs index f30c698e5..c3e7b8313 100644 --- a/meilisearch-lib/src/error.rs +++ b/meilisearch-lib/src/error.rs @@ -41,7 +41,9 @@ impl ErrorCode for MilliError<'_> { UserError::CriterionError(_) => Code::InvalidRankingRule, UserError::InvalidGeoField { .. } => Code::InvalidGeoField, UserError::SortError(_) => Code::Sort, - UserError::InvalidMinTypoWordLenSetting(_, _) => unreachable!(), + UserError::InvalidMinTypoWordLenSetting(_, _) => { + Code::InvalidMinWordLengthForTypo + } } } } diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index 33b3f4ad2..e95c64485 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -17,7 +17,7 @@ use crate::EnvSizer; use super::error::IndexError; use super::error::Result; -use super::updates::TypoSettings; +use super::updates::{MinWordLengthTypoSetting, TypoSettings}; use super::{Checked, Settings}; pub type Document = Map; @@ -169,8 +169,14 @@ impl Index { }) .collect(); + let min_typo_word_len = MinWordLengthTypoSetting { + one_typo: Setting::Set(self.min_word_len_one_typo(txn)?), + two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), + }; + let typo_tolerance = TypoSettings { enabled: Setting::Set(self.authorize_typos(txn)?), + min_word_length_for_typo: Setting::Set(min_typo_word_len), }; Ok(Settings { diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 95145f92c..cd985476c 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -37,14 +37,30 @@ pub struct Checked; #[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq)] pub struct Unchecked; +#[cfg_attr(test, derive(proptest_derive::Arbitrary))] #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] +pub struct MinWordLengthTypoSetting { + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub one_typo: Setting, + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub two_typos: Setting, +} + #[cfg_attr(test, derive(proptest_derive::Arbitrary))] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] pub struct TypoSettings { #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[serde(default, skip_serializing_if = "Setting::is_not_set")] pub enabled: Setting, + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub min_word_length_for_typo: Setting, } /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a @@ -352,11 +368,32 @@ pub fn apply_settings_to_builder( } match settings.typo { - Setting::Set(ref value) => match value.enabled { - Setting::Set(val) => builder.set_autorize_typos(val), - Setting::Reset => builder.reset_authorize_typos(), - Setting::NotSet => (), - }, + Setting::Set(ref value) => { + match value.enabled { + Setting::Set(val) => builder.set_autorize_typos(val), + Setting::Reset => builder.reset_authorize_typos(), + Setting::NotSet => (), + } + match value.min_word_length_for_typo { + Setting::Set(ref setting) => { + match setting.one_typo { + Setting::Set(val) => builder.set_min_word_len_one_typo(val), + Setting::Reset => builder.reset_min_word_len_one_typo(), + Setting::NotSet => (), + } + match setting.two_typos { + Setting::Set(val) => builder.set_min_word_len_two_typos(val), + Setting::Reset => builder.reset_min_word_len_two_typos(), + Setting::NotSet => (), + } + } + Setting::Reset => { + builder.reset_min_word_len_one_typo(); + builder.reset_min_word_len_two_typos(); + } + Setting::NotSet => (), + } + } Setting::Reset => { // all typo settings need to be reset here. builder.reset_authorize_typos(); From dd43ba62341b68de47c751814613ad7945ed711f Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 17 Mar 2022 11:59:35 +0100 Subject: [PATCH 2/3] feat(all): introduce disable typos --- meilisearch-lib/src/index/updates.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index cd985476c..2c4b7c1c5 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -397,6 +397,8 @@ pub fn apply_settings_to_builder( Setting::Reset => { // all typo settings need to be reset here. builder.reset_authorize_typos(); + builder.reset_min_word_len_one_typo(); + builder.reset_min_word_len_two_typos(); } Setting::NotSet => (), } From e9f66b876687c8f29464d2724f92c3ab04634a65 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 22 Mar 2022 18:17:33 +0100 Subject: [PATCH 3/3] feat(all): introduce disable typo on words --- meilisearch-lib/src/index/index.rs | 9 +++++++++ meilisearch-lib/src/index/updates.rs | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index e95c64485..778205dbb 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -5,6 +5,7 @@ use std::ops::Deref; use std::path::Path; use std::sync::Arc; +use fst::IntoStreamer; use milli::heed::{EnvOpenOptions, RoTxn}; use milli::update::{IndexerConfig, Setting}; use milli::{obkv_to_json, FieldDistribution, FieldId}; @@ -174,9 +175,17 @@ impl Index { two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), }; + let disabled_words = self + .exact_words(txn)? + .into_stream() + .into_strs()? + .into_iter() + .collect(); + let typo_tolerance = TypoSettings { enabled: Setting::Set(self.authorize_typos(txn)?), min_word_length_for_typo: Setting::Set(min_typo_word_len), + disable_on_words: Setting::Set(disabled_words), }; Ok(Settings { diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 2c4b7c1c5..f9bc990de 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -61,6 +61,9 @@ pub struct TypoSettings { #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[serde(default, skip_serializing_if = "Setting::is_not_set")] pub min_word_length_for_typo: Setting, + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub disable_on_words: Setting>, } /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a @@ -393,6 +396,13 @@ pub fn apply_settings_to_builder( } Setting::NotSet => (), } + match value.disable_on_words { + Setting::Set(ref words) => { + builder.set_exact_words(words.clone()); + } + Setting::Reset => builder.reset_exact_words(), + Setting::NotSet => (), + } } Setting::Reset => { // all typo settings need to be reset here.