Add more tests for the attribute ranking rule

This commit is contained in:
Loïc Lecrenier 2023-04-26 11:18:15 +02:00
parent 899baa0ea5
commit bc4efca611
3 changed files with 104 additions and 6 deletions

View File

@ -77,6 +77,10 @@ impl RankingRuleGraphTrait for PositionGraph {
let cost = {
let mut cost = 0;
for i in 0..term.term_ids.len() {
// This is actually not fully correct and slightly penalises ngrams unfairly.
// Because if two words are in the same bucketed position (e.g. 32) and consecutive,
// then their position cost will be 32+32=64, but an ngram of these two words at the
// same position will have a cost of 32+32+1=65
cost += position as u32 + i as u32;
}
cost

View File

@ -88,7 +88,25 @@ fn create_index() -> TempIndex {
"title": "the quick",
"description": "",
"plot": "brown fox jumps over the lazy dog",
}
},
{
"id": 12,
"title": "",
"description": "the quickbrownfox",
"plot": "jumps over the lazy dog",
},
{
"id": 13,
"title": "",
"description": "the quick brown fox",
"plot": "jumps over the lazy dog",
},
{
"id": 14,
"title": "",
"description": "the quickbrownfox",
"plot": "jumps overthelazy dog",
},
]))
.unwrap();
index
@ -104,5 +122,18 @@ fn test_attribute_fid_simple() {
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 6, 5, 4, 3, 9, 7, 8, 11, 10, 0]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 6, 5, 4, 3, 9, 7, 8, 11, 10, 12, 13, 14, 0]");
}
#[test]
fn test_attribute_fid_ngrams() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 6, 5, 4, 3, 9, 7, 8, 11, 10, 12, 13, 14, 0]");
}

View File

@ -8,7 +8,11 @@ fn create_index() -> TempIndex {
index
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned(), "other".to_owned()]);
s.set_searchable_fields(vec![
"text".to_owned(),
"text2".to_owned(),
"other".to_owned(),
]);
s.set_criteria(vec![Criterion::Attribute]);
})
.unwrap();
@ -83,8 +87,41 @@ fn create_index() -> TempIndex {
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
quickbrown",
},
{
"id": 8,
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
quick brown",
},
{
"id": 9,
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
quickbrown",
},
{
"id": 10,
"text": "quick brown",
"text2": "brown quick",
},
{
"id": 11,
"text": "quickbrown",
},
{
"id": 12,
"text": "quick brown",
},
{
"id": 13,
"text": "quickbrown",
},
]))
.unwrap();
index
@ -94,7 +131,7 @@ fn create_index() -> TempIndex {
fn test_attribute_position_simple() {
let index = create_index();
db_snap!(index, word_position_docids, @"fe86911166fa4c0903c512fd86ec65e4");
db_snap!(index, word_position_docids, @"1ad58847d772924d8aab5e92be8cf0cc");
let txn = index.read_txn().unwrap();
@ -102,7 +139,7 @@ fn test_attribute_position_simple() {
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 4, 2, 1, 0, 6, 7, 5]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]");
}
#[test]
fn test_attribute_position_repeated() {
@ -114,5 +151,31 @@ fn test_attribute_position_repeated() {
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("a a a a a");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 7, 6]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 7, 8, 9, 6]");
}
#[test]
fn test_attribute_position_different_fields() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]");
}
#[test]
fn test_attribute_position_ngrams() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]");
}