meilisearch/milli/tests/search/distinct.rs

237 lines
5.4 KiB
Rust
Raw Normal View History

2021-06-17 20:24:59 +08:00
use std::collections::HashSet;
use big_s::S;
use milli::update::Settings;
use milli::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
use RankingRule::*;
2021-06-17 20:24:59 +08:00
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
macro_rules! test_distinct {
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $offset:expr, $criteria:expr, $n_res:expr) => {
2021-06-17 20:24:59 +08:00
#[test]
fn $func() {
let criteria = $criteria;
let index = search::setup_search_index_with_criteria(&criteria);
// update distinct attribute
let mut wtxn = index.write_txn().unwrap();
let config = milli::update::IndexerConfig::default();
let mut builder = Settings::new(&mut wtxn, &index, &config);
2021-06-17 20:24:59 +08:00
builder.set_distinct_field(S(stringify!($distinct)));
builder.execute(|_| (), || false).unwrap();
2021-06-17 20:24:59 +08:00
wtxn.commit().unwrap();
2021-06-17 21:19:03 +08:00
let rtxn = index.read_txn().unwrap();
2021-06-17 20:24:59 +08:00
2021-06-17 21:19:03 +08:00
let mut search = Search::new(&rtxn, &index);
2021-06-17 20:24:59 +08:00
search.query(search::TEST_QUERY);
2022-12-07 23:41:23 +08:00
search.limit($limit);
search.offset($offset);
2022-12-07 23:41:23 +08:00
search.exhaustive_number_hits($exhaustive);
search.terms_matching_strategy(TermsMatchingStrategy::default());
2021-06-17 20:24:59 +08:00
2022-04-09 20:50:43 +08:00
let SearchResult { documents_ids, candidates, .. } = search.execute().unwrap();
assert_eq!(candidates.len(), $n_res);
2021-06-17 20:24:59 +08:00
let mut distinct_values = HashSet::new();
2022-08-18 23:36:08 +08:00
let expected_external_ids: Vec<_> =
search::expected_order(&criteria, TermsMatchingStrategy::default(), &[])
2022-08-18 23:36:08 +08:00
.into_iter()
.filter_map(|d| {
if distinct_values.contains(&d.$distinct) {
None
} else {
distinct_values.insert(d.$distinct.to_owned());
Some(d.id)
}
})
.skip($offset)
2022-12-07 23:41:23 +08:00
.take($limit)
2022-08-18 23:36:08 +08:00
.collect();
2021-06-17 20:24:59 +08:00
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
assert_eq!(documents_ids, expected_external_ids);
}
};
}
2022-12-07 23:41:23 +08:00
test_distinct!(
exhaustive_distinct_string_default_criteria,
tag,
true,
1,
0,
2022-12-07 23:41:23 +08:00
vec![Words, Typo, Proximity, Attribute, Exactness],
3
);
test_distinct!(
exhaustive_distinct_number_default_criteria,
asc_desc_rank,
true,
1,
0,
2022-12-07 23:41:23 +08:00
vec![Words, Typo, Proximity, Attribute, Exactness],
7
);
2022-12-13 22:54:43 +08:00
test_distinct!(
exhaustive_distinct_number_weird_order_criteria,
asc_desc_rank,
true,
0,
0,
2022-12-13 22:54:43 +08:00
vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo],
7
);
2022-12-07 23:41:23 +08:00
2021-06-17 21:19:03 +08:00
test_distinct!(
distinct_string_default_criteria,
tag,
2022-12-07 23:41:23 +08:00
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-04-09 20:50:43 +08:00
vec![Words, Typo, Proximity, Attribute, Exactness],
3
2021-06-17 21:19:03 +08:00
);
test_distinct!(
distinct_number_default_criteria,
asc_desc_rank,
2022-12-07 23:41:23 +08:00
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-04-09 20:50:43 +08:00
vec![Words, Typo, Proximity, Attribute, Exactness],
7
2021-06-17 21:19:03 +08:00
);
2022-12-07 23:41:23 +08:00
test_distinct!(
distinct_string_criterion_words,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-12-07 23:41:23 +08:00
vec![Words],
3
);
test_distinct!(
distinct_number_criterion_words,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-12-07 23:41:23 +08:00
vec![Words],
7
);
test_distinct!(
distinct_string_criterion_words_typo,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-12-07 23:41:23 +08:00
vec![Words, Typo],
3
);
test_distinct!(
distinct_number_criterion_words_typo,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-12-07 23:41:23 +08:00
vec![Words, Typo],
7
);
test_distinct!(
distinct_string_criterion_words_proximity,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-12-07 23:41:23 +08:00
vec![Words, Proximity],
3
);
test_distinct!(
distinct_number_criterion_words_proximity,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-12-07 23:41:23 +08:00
vec![Words, Proximity],
7
);
test_distinct!(
distinct_string_criterion_words_attribute,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-12-07 23:41:23 +08:00
vec![Words, Attribute],
3
);
test_distinct!(
distinct_number_criterion_words_attribute,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-12-07 23:41:23 +08:00
vec![Words, Attribute],
7
);
test_distinct!(
distinct_string_criterion_words_exactness,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-12-07 23:41:23 +08:00
vec![Words, Exactness],
3
);
test_distinct!(
distinct_number_criterion_words_exactness,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
0,
2022-12-07 23:41:23 +08:00
vec![Words, Exactness],
7
);
test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078
distinct_string_limit_and_offset,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
1,
vec![],
2
);
test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078
exhaustive_distinct_string_limit_and_offset,
tag,
true,
1,
2,
vec![],
1
);
test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078
distinct_number_limit_and_offset,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
2,
vec![],
5
);
test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078
exhaustive_distinct_number_limit_and_offset,
asc_desc_rank,
true,
2,
4,
vec![],
3
);