mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
introduce word len for typo setting
This commit is contained in:
parent
9fe40df960
commit
5a24e60572
@ -72,6 +72,7 @@ pub enum UserError {
|
||||
SerdeJson(serde_json::Error),
|
||||
SortError(SortError),
|
||||
UnknownInternalDocumentId { document_id: DocumentId },
|
||||
InvalidMinTypoWordSetting(u8, u8),
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
@ -291,6 +292,7 @@ ranking rules settings to use the sort parameter at search time.",
|
||||
Self::UnknownInternalDocumentId { document_id } => {
|
||||
write!(f, "An unknown internal document id have been used: `{}`.", document_id)
|
||||
}
|
||||
Self::InvalidMinTypoWordSetting(one, two) => write!(f, "Invalid settings for MinWordLenForTypo, expected 0 < 1-typo < 2-typos < 255, but found 1-typo: {} and 2-typo: {}", one, two),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -23,6 +23,9 @@ use crate::{
|
||||
Search, StrBEU32Codec, StrStrU8Codec, BEU32,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_1_TYPO: u8 = 5;
|
||||
pub const DEFAULT_MIN_WORD_LEN_2_TYPOS: u8 = 9;
|
||||
|
||||
pub mod main_key {
|
||||
pub const CRITERIA_KEY: &str = "criteria";
|
||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||
@ -47,6 +50,8 @@ pub mod main_key {
|
||||
pub const CREATED_AT_KEY: &str = "created-at";
|
||||
pub const UPDATED_AT_KEY: &str = "updated-at";
|
||||
pub const AUTHORIZE_TYPOS: &str = "authorize-typos";
|
||||
pub const ONE_TYPO_WORD_LEN: &str = "one-typo-word-len";
|
||||
pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
|
||||
}
|
||||
|
||||
pub mod db_name {
|
||||
@ -886,6 +891,42 @@ impl Index {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn min_word_len_1_typo(&self, txn: &RoTxn) -> heed::Result<u8> {
|
||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||
// because by default, we authorize typos.
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)?
|
||||
.unwrap_or(DEFAULT_MIN_WORD_LEN_1_TYPO))
|
||||
}
|
||||
|
||||
pub(crate) fn put_min_word_len_1_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
|
||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||
// because by default, we authorize typos.
|
||||
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn min_word_len_2_typo(&self, txn: &RoTxn) -> heed::Result<u8> {
|
||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||
// because by default, we authorize typos.
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)?
|
||||
.unwrap_or(DEFAULT_MIN_WORD_LEN_2_TYPOS))
|
||||
}
|
||||
|
||||
pub(crate) fn put_min_word_len_2_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
|
||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||
// because by default, we authorize typos.
|
||||
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -90,6 +90,8 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||
primary_key: Setting<String>,
|
||||
authorize_typos: Setting<bool>,
|
||||
min_2_typos_word_len: Setting<u8>,
|
||||
min_1_typo_word_len: Setting<u8>,
|
||||
}
|
||||
|
||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
@ -112,6 +114,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
primary_key: Setting::NotSet,
|
||||
authorize_typos: Setting::NotSet,
|
||||
indexer_config,
|
||||
min_2_typos_word_len: Setting::Reset,
|
||||
min_1_typo_word_len: Setting::Reset,
|
||||
}
|
||||
}
|
||||
|
||||
@ -196,6 +200,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.authorize_typos = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_min_2_typos_word_len(&mut self, val: u8) {
|
||||
self.min_2_typos_word_len = Setting::Set(val);
|
||||
}
|
||||
|
||||
pub fn reset_min_2_typos_word_len(&mut self) {
|
||||
self.min_2_typos_word_len = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_min_1_typo_word_len(&mut self, val: u8) {
|
||||
self.min_1_typo_word_len = Setting::Set(val);
|
||||
}
|
||||
|
||||
pub fn reset_min_1_typos_word_len(&mut self) {
|
||||
self.min_1_typo_word_len = Setting::Reset;
|
||||
}
|
||||
|
||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
@ -474,6 +494,38 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
fn update_min_typo_word_len(&mut self) -> Result<()> {
|
||||
match (&self.min_1_typo_word_len, &self.min_2_typos_word_len) {
|
||||
(Setting::Set(one), Setting::Set(two)) => {
|
||||
if one < two {
|
||||
self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?;
|
||||
self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?;
|
||||
} else {
|
||||
return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into());
|
||||
}
|
||||
}
|
||||
(Setting::Set(one), _) => {
|
||||
let two = self.index.min_word_len_2_typo(&self.wtxn)?;
|
||||
if *one < two {
|
||||
self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?;
|
||||
} else {
|
||||
return Err(UserError::InvalidMinTypoWordSetting(*one, two).into());
|
||||
}
|
||||
}
|
||||
(_, Setting::Set(two)) => {
|
||||
let one = self.index.min_word_len_1_typo(&self.wtxn)?;
|
||||
if one < *two {
|
||||
self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?;
|
||||
} else {
|
||||
return Err(UserError::InvalidMinTypoWordSetting(one, *two).into());
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
@ -490,6 +542,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.update_criteria()?;
|
||||
self.update_primary_key()?;
|
||||
self.update_authorize_typos()?;
|
||||
self.update_min_typo_word_len()?;
|
||||
|
||||
// If there is new faceted fields we indicate that we must reindex as we must
|
||||
// index new fields as facets. It means that the distinct attribute,
|
||||
|
Loading…
Reference in New Issue
Block a user