mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 09:35:51 +08:00
Add more search tests
This commit is contained in:
parent
4c8a0179ba
commit
6e50f23896
@ -3,8 +3,10 @@ pub mod distinct;
|
||||
pub mod language;
|
||||
pub mod ngram_split_words;
|
||||
pub mod proximity;
|
||||
pub mod proximity_typo;
|
||||
pub mod sort;
|
||||
pub mod typo;
|
||||
pub mod typo_proximity;
|
||||
pub mod words_tms;
|
||||
|
||||
fn collect_field_values(
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*!
|
||||
This module tests the Proximity ranking rule:
|
||||
|
||||
1. A sprximity of >7 always has the same cost.
|
||||
1. A proximity of >7 always has the same cost.
|
||||
|
||||
2. Phrase terms can be in sprximity to other terms via their start and end words,
|
||||
but we need to make sure that the phrase exists in the document that meets this
|
||||
|
68
milli/src/search/new/tests/proximity_typo.rs
Normal file
68
milli/src/search/new/tests/proximity_typo.rs
Normal file
@ -0,0 +1,68 @@
|
||||
/*!
|
||||
This module tests the interactions between the proximity and typo ranking rules.
|
||||
|
||||
The proximity ranking rule should transform the query graph such that it
|
||||
only contains the word pairs that it used to compute its bucket.
|
||||
*/
|
||||
|
||||
use crate::{
|
||||
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
|
||||
SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity, Criterion::Typo]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
// Basic trap.
|
||||
//
|
||||
// We have one document with the perfect word pair: `sommer - holiday`
|
||||
// and another with the perfect word pair: `sommer holidty`.
|
||||
//
|
||||
// The proximity ranking rule will put them both in the same bucket, and it
|
||||
// should minify the query graph to make it represent:
|
||||
// EITHER:
|
||||
// sommer + holiday
|
||||
// OR:
|
||||
// sommer + holidty
|
||||
//
|
||||
// Such that the child typo ranking rule does not find any match
|
||||
// for its zero-typo bucket `summer + holiday`, even though both documents
|
||||
// contain these two exact words.
|
||||
{
|
||||
"id": 0,
|
||||
"text": "summer. holiday. sommer holidty"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"text": "summer. holiday. sommer holiday"
|
||||
},
|
||||
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trap_basic() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("summer holiday");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 3, 2]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
"###);
|
||||
}
|
126
milli/src/search/new/tests/typo_proximity.rs
Normal file
126
milli/src/search/new/tests/typo_proximity.rs
Normal file
@ -0,0 +1,126 @@
|
||||
/*!
|
||||
This module tests the interactions between the typo and proximity ranking rules.
|
||||
|
||||
The typo ranking rule should transform the query graph such that it only contains
|
||||
the combinations of word derivations that it used to compute its bucket.
|
||||
|
||||
The proximity ranking rule should then look for proximities only between those specific derivations.
|
||||
For example, given the the search query `beautiful summer` and the dataset:
|
||||
```text
|
||||
{ "id": 0, "text": "beautigul summer...... beautiful day in the summer" }
|
||||
{ "id": 1, "text": "beautiful summer" }
|
||||
```
|
||||
Then the document with id `1` should be returned before `0`.
|
||||
The proximity ranking rule is not allowed to look for the proximity between `beautigul` and `summer`
|
||||
because the typo ranking rule before it only used the derivation `beautiful`.
|
||||
*/
|
||||
|
||||
use crate::{
|
||||
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
|
||||
SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Typo, Criterion::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
// trap explained in the module documentation
|
||||
{
|
||||
"id": 0,
|
||||
"text": "beautigul summer. beautiful x y z summer"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"text": "beautiful summer"
|
||||
},
|
||||
// the next 2 documents set up a more complicated trap
|
||||
// with the query `beautiful summer`, we will have:
|
||||
// 1. documents with no typos, id 0 and 1
|
||||
// 2. documents with 1 typos: id 2 and 3, those are interpreted as EITHER
|
||||
// - id 2: "beautigul + summer" ; OR
|
||||
// - id 3: "beautiful + sommer"
|
||||
// To sort these two documents, the proximity ranking rule must use only the
|
||||
// word pairs: `beautigul -- summer` and `beautiful -- sommer` even though
|
||||
// all variations of `beautiful` and `sommer` were used by the typo ranking rule.
|
||||
{
|
||||
"id": 2,
|
||||
"text": "beautigul sommer. beautigul x summer"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"text": "beautiful sommer"
|
||||
},
|
||||
// The next two documents lay out an even more complex trap, which the current implementation
|
||||
// fails to handle properly.
|
||||
// With the user query `delicious sweet dessert`, the typo ranking rule will return one bucket of:
|
||||
// - id 4: delicitous + sweet + dessert
|
||||
// - id 5: beautiful + sweet + desgert
|
||||
// The word pairs that the proximity ranking rules is allowed to use are
|
||||
// EITHER:
|
||||
// delicitous -- sweet AND sweet -- dessert
|
||||
// OR
|
||||
// delicious -- sweet AND sweet -- desgert
|
||||
// So the word pair to use for the terms `summer` and `dessert` depend on the
|
||||
// word pairs explored before them.
|
||||
{
|
||||
"id": 4,
|
||||
"text": "delicitous. sweet. dessert. delicitous sweet desgert",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"text": "delicious. sweet desgert. delicious sweet desgert",
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trap_basic_and_complex1() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("beautiful summer");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 3, 2]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"beautiful summer\"",
|
||||
"\"beautigul summer. beautiful x y z summer\"",
|
||||
"\"beautiful sommer\"",
|
||||
"\"beautigul sommer. beautigul x summer\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trap_complex2() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("delicious sweet dessert");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
// TODO: this is incorrect. 5 should appear before 4
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"delicitous. sweet. dessert. delicitous sweet desgert\"",
|
||||
"\"delicious. sweet desgert. delicious sweet desgert\"",
|
||||
]
|
||||
"###);
|
||||
}
|
Loading…
Reference in New Issue
Block a user