meilisearch/milli/tests/search/typo_tolerance.rs

209 lines
6.3 KiB
Rust
Raw Permalink Normal View History

use std::collections::BTreeSet;
use heed::EnvOpenOptions;
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
2022-08-18 23:36:08 +08:00
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
use serde_json::json;
use tempfile::tempdir;
2022-04-01 16:50:01 +08:00
use Criterion::*;
#[test]
fn test_typo_tolerance_one_typo() {
let criteria = [Typo];
let index = super::setup_search_index_with_criteria(&criteria);
// basic typo search with default typo settings
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
search.query("zeal");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::default());
2022-04-01 16:50:01 +08:00
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
let mut search = Search::new(&txn, &index);
search.query("zean");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::default());
2022-04-01 16:50:01 +08:00
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0);
}
let mut txn = index.write_txn().unwrap();
let config = IndexerConfig::default();
let mut builder = Settings::new(&mut txn, &index, &config);
builder.set_min_word_len_one_typo(4);
builder.execute(|_| (), || false).unwrap();
2022-04-01 16:50:01 +08:00
// typo is now supported for 4 letters words
let mut search = Search::new(&txn, &index);
search.query("zean");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::default());
2022-04-01 16:50:01 +08:00
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
}
#[test]
fn test_typo_tolerance_two_typo() {
let criteria = [Typo];
let index = super::setup_search_index_with_criteria(&criteria);
// basic typo search with default typo settings
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
search.query("zealand");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::default());
2022-04-01 16:50:01 +08:00
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
let mut search = Search::new(&txn, &index);
search.query("zealemd");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::default());
2022-04-01 16:50:01 +08:00
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0);
}
let mut txn = index.write_txn().unwrap();
let config = IndexerConfig::default();
let mut builder = Settings::new(&mut txn, &index, &config);
builder.set_min_word_len_two_typos(7);
builder.execute(|_| (), || false).unwrap();
2022-04-01 16:50:01 +08:00
// typo is now supported for 4 letters words
let mut search = Search::new(&txn, &index);
search.query("zealemd");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::default());
2022-04-01 16:50:01 +08:00
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
}
#[test]
fn test_typo_disabled_on_word() {
let tmp = tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let index = Index::new(options, tmp.path()).unwrap();
let mut builder = milli::documents::DocumentsBatchBuilder::new(Vec::new());
let doc1 = json!({
"id": 1usize,
"data": "zealand",
});
let doc2 = json!({
"id": 2usize,
"data": "zearand",
});
builder.append_json_object(doc1.as_object().unwrap()).unwrap();
builder.append_json_object(doc2.as_object().unwrap()).unwrap();
let vector = builder.into_inner().unwrap();
let documents =
milli::documents::DocumentsBatchReader::from_reader(std::io::Cursor::new(vector)).unwrap();
let mut txn = index.write_txn().unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let builder =
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| (), || false).unwrap();
2022-06-15 20:35:19 +08:00
let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap();
txn.commit().unwrap();
// basic typo search with default typo settings
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
search.query("zealand");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 2);
}
let mut txn = index.write_txn().unwrap();
let config = IndexerConfig::default();
let mut builder = Settings::new(&mut txn, &index, &config);
let mut exact_words = BTreeSet::new();
2022-04-05 02:34:23 +08:00
// `zealand` doesn't allow typos anymore
exact_words.insert("zealand".to_string());
builder.set_exact_words(exact_words);
builder.execute(|_| (), || false).unwrap();
let mut search = Search::new(&txn, &index);
search.query("zealand");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
}
#[test]
fn test_disable_typo_on_attribute() {
let criteria = [Typo];
let index = super::setup_search_index_with_criteria(&criteria);
// basic typo search with default typo settings
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
2022-04-05 03:17:06 +08:00
// typo in `antebel(l)um`
search.query("antebelum");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
}
let mut txn = index.write_txn().unwrap();
let config = IndexerConfig::default();
let mut builder = Settings::new(&mut txn, &index, &config);
2022-04-05 03:17:06 +08:00
// disable typos on `description`
builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect());
builder.execute(|_| (), || false).unwrap();
let mut search = Search::new(&txn, &index);
search.query("antebelum");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0);
}