add tests for min_word_len_for_typo

This commit is contained in:
ad hoc 2022-03-31 13:50:18 +02:00
parent 9102de5500
commit 55af85db3c
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643
4 changed files with 83 additions and 10 deletions

View File

@ -292,7 +292,7 @@ ranking rules settings to use the sort parameter at search time.",
Self::UnknownInternalDocumentId { document_id } => {
write!(f, "An unknown internal document id have been used: `{}`.", document_id)
}
Self::InvalidMinTypoWordSetting(one, two) => write!(f, "`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {}` and twoTypos: {}`."", one, two),
Self::InvalidMinTypoWordSetting(one, two) => write!(f, "`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {}` and twoTypos: {}`.", one, two),
}
}
}

View File

@ -937,6 +937,7 @@ pub(crate) mod tests {
use maplit::btreemap;
use tempfile::TempDir;
use crate::index::{DEFAULT_MIN_WORD_LEN_1_TYPO, DEFAULT_MIN_WORD_LEN_2_TYPOS};
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
use crate::Index;
@ -1064,4 +1065,22 @@ pub(crate) mod tests {
let txn = index.read_txn().unwrap();
assert!(!index.authorize_typos(&txn).unwrap());
}
#[test]
fn set_min_word_len_for_typos() {
let index = TempIndex::new();
let mut txn = index.write_txn().unwrap();
assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_1_TYPO);
assert_eq!(index.min_word_len_2_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_2_TYPOS);
index.put_min_word_len_1_typo(&mut txn, 3).unwrap();
index.put_min_word_len_2_typo(&mut txn, 15).unwrap();
txn.commit().unwrap();
let txn = index.read_txn().unwrap();
assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), 3);
assert_eq!(index.min_word_len_2_typo(&txn).unwrap(), 15);
}
}

View File

@ -264,6 +264,7 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
Ok(best.map(|(_, left, right)| Operation::Phrase(vec![left.to_string(), right.to_string()])))
}
#[derive(Clone)]
pub struct TypoConfig {
pub max_typos: u8,
pub word_len_1_typo: u8,
@ -1219,4 +1220,24 @@ mod test {
assert_eq!(expected, query_tree);
}
#[test]
fn test_min_word_len_typo() {
let config = TypoConfig { max_typos: 2, word_len_1_typo: 5, word_len_2_typo: 7 };
assert_eq!(
typos("hello".to_string(), true, config.clone()),
QueryKind::Tolerant { typo: 1, word: "hello".to_string() }
);
assert_eq!(
typos("hell".to_string(), true, config.clone()),
QueryKind::exact("hell".to_string())
);
assert_eq!(
typos("verylongword".to_string(), true, config.clone()),
QueryKind::Tolerant { typo: 2, word: "verylongword".to_string() }
);
}
}

View File

@ -497,27 +497,27 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fn update_min_typo_word_len(&mut self) -> Result<()> {
match (&self.min_1_typo_word_len, &self.min_2_typos_word_len) {
(Setting::Set(one), Setting::Set(two)) => {
if one < two {
if one > two {
return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into());
} else {
self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?;
self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?;
} else {
return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into());
}
}
(Setting::Set(one), _) => {
let two = self.index.min_word_len_2_typo(&self.wtxn)?;
if *one < two {
self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?;
} else {
if *one > two {
return Err(UserError::InvalidMinTypoWordSetting(*one, two).into());
} else {
self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?;
}
}
(_, Setting::Set(two)) => {
let one = self.index.min_word_len_1_typo(&self.wtxn)?;
if one < *two {
self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?;
} else {
if one > *two {
return Err(UserError::InvalidMinTypoWordSetting(one, *two).into());
} else {
self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?;
}
}
_ => (),
@ -1286,4 +1286,37 @@ mod tests {
builder.execute(|_| ()).unwrap();
assert!(!index.authorize_typos(&txn).unwrap());
}
#[test]
fn update_min_word_len_for_typo() {
let index = TempIndex::new();
let config = IndexerConfig::default();
// Set the genres setting
let mut txn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut txn, &index, &config);
builder.set_min_1_typo_word_len(8);
builder.set_min_2_typos_word_len(8);
builder.execute(|_| ()).unwrap();
txn.commit().unwrap();
let txn = index.read_txn().unwrap();
assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), 8);
assert_eq!(index.min_word_len_2_typo(&txn).unwrap(), 8);
}
#[test]
fn update_invalid_min_word_len_for_typo() {
let index = TempIndex::new();
let config = IndexerConfig::default();
// Set the genres setting
let mut txn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut txn, &index, &config);
builder.set_min_1_typo_word_len(10);
builder.set_min_2_typos_word_len(7);
assert!(builder.execute(|_| ()).is_err());
}
}