mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 09:04:59 +08:00
Merge #238
238: Integration tests on filters and distinct r=Kerollmops a=ManyTheFish Fix #216 Fix #120 Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
commit
a9e552ab18
@ -2,7 +2,7 @@
|
||||
{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","":""}
|
||||
{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","":""}
|
||||
{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","":""}
|
||||
{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""}
|
||||
{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""}
|
||||
{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","":""}
|
||||
{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","":""}
|
||||
{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","":""}
|
||||
|
72
milli/tests/search/distinct.rs
Normal file
72
milli/tests/search/distinct.rs
Normal file
@ -0,0 +1,72 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use big_s::S;
|
||||
use milli::update::Settings;
|
||||
use milli::{Criterion, Search, SearchResult};
|
||||
use Criterion::*;
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
macro_rules! test_distinct {
|
||||
($func:ident, $distinct:ident, $criteria:expr) => {
|
||||
#[test]
|
||||
fn $func() {
|
||||
let criteria = $criteria;
|
||||
let index = search::setup_search_index_with_criteria(&criteria);
|
||||
|
||||
// update distinct attribute
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_distinct_field(S(stringify!($distinct)));
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||
search.authorize_typos(true);
|
||||
search.optional_words(true);
|
||||
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
let mut distinct_values = HashSet::new();
|
||||
let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true)
|
||||
.into_iter()
|
||||
.filter_map(|d| {
|
||||
if distinct_values.contains(&d.$distinct) {
|
||||
None
|
||||
} else {
|
||||
distinct_values.insert(d.$distinct.to_owned());
|
||||
Some(d.id)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||
assert_eq!(documents_ids, expected_external_ids);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_distinct!(
|
||||
distinct_string_default_criteria,
|
||||
tag,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness]
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_default_criteria,
|
||||
asc_desc_rank,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness]
|
||||
);
|
||||
test_distinct!(distinct_string_criterion_words, tag, vec![Words]);
|
||||
test_distinct!(distinct_number_criterion_words, asc_desc_rank, vec![Words]);
|
||||
test_distinct!(distinct_string_criterion_words_typo, tag, vec![Words, Typo]);
|
||||
test_distinct!(distinct_number_criterion_words_typo, asc_desc_rank, vec![Words, Typo]);
|
||||
test_distinct!(distinct_string_criterion_words_proximity, tag, vec![Words, Proximity]);
|
||||
test_distinct!(distinct_number_criterion_words_proximity, asc_desc_rank, vec![Words, Proximity]);
|
||||
test_distinct!(distinct_string_criterion_words_attribute, tag, vec![Words, Attribute]);
|
||||
test_distinct!(distinct_number_criterion_words_attribute, asc_desc_rank, vec![Words, Attribute]);
|
||||
test_distinct!(distinct_string_criterion_words_exactness, tag, vec![Words, Exactness]);
|
||||
test_distinct!(distinct_number_criterion_words_exactness, asc_desc_rank, vec![Words, Exactness]);
|
78
milli/tests/search/filters.rs
Normal file
78
milli/tests/search/filters.rs
Normal file
@ -0,0 +1,78 @@
|
||||
use either::{Either, Left, Right};
|
||||
use milli::{Criterion, FilterCondition, Search, SearchResult};
|
||||
use Criterion::*;
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
macro_rules! test_filter {
|
||||
($func:ident, $filter:expr) => {
|
||||
#[test]
|
||||
fn $func() {
|
||||
let criteria = vec![Words, Typo, Proximity, Attribute, Exactness];
|
||||
let index = search::setup_search_index_with_criteria(&criteria);
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let filter_conditions =
|
||||
FilterCondition::from_array::<Vec<Either<Vec<&str>, &str>>, _, _, _>(
|
||||
&rtxn, &index, $filter,
|
||||
)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||
search.authorize_typos(true);
|
||||
search.optional_words(true);
|
||||
search.filter(filter_conditions);
|
||||
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
let filtered_ids = search::expected_filtered_ids($filter);
|
||||
let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true)
|
||||
.into_iter()
|
||||
.filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None })
|
||||
.collect();
|
||||
|
||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||
assert_eq!(documents_ids, expected_external_ids);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_filter!(eq_simple_string_filter, vec![Right("tag=red")]);
|
||||
test_filter!(eq_simple_number_filter, vec![Right("asc_desc_rank=1")]);
|
||||
test_filter!(eq_string_and_filter_return_empty, vec![Right("tag=red"), Right("tag=green")]);
|
||||
test_filter!(eq_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank=1")]);
|
||||
test_filter!(eq_string_or_filter, vec![Left(vec!["tag=red", "tag=green"])]);
|
||||
test_filter!(eq_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank=1"])]);
|
||||
test_filter!(eq_number_or_filter, vec![Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]);
|
||||
test_filter!(eq_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank=3")]);
|
||||
test_filter!(
|
||||
eq_complex_filter_2,
|
||||
vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]
|
||||
);
|
||||
test_filter!(greater_simple_number_filter, vec![Right("asc_desc_rank>1")]);
|
||||
test_filter!(greater_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank>1")]);
|
||||
test_filter!(greater_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank>1"])]);
|
||||
test_filter!(greater_number_or_filter, vec![Left(vec!["asc_desc_rank>3", "asc_desc_rank>1"])]);
|
||||
test_filter!(
|
||||
greater_complex_filter,
|
||||
vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank>3")]
|
||||
);
|
||||
test_filter!(
|
||||
greater_complex_filter_2,
|
||||
vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank>3", "asc_desc_rank>1"])]
|
||||
);
|
||||
test_filter!(lower_simple_number_filter, vec![Right("asc_desc_rank<1")]);
|
||||
test_filter!(lower_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank<1")]);
|
||||
test_filter!(lower_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank<1"])]);
|
||||
test_filter!(lower_number_or_filter, vec![Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])]);
|
||||
test_filter!(
|
||||
lower_complex_filter,
|
||||
vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank<3")]
|
||||
);
|
||||
test_filter!(
|
||||
lower_complex_filter_2,
|
||||
vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])]
|
||||
);
|
@ -1,4 +1,7 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use big_s::S;
|
||||
use either::{Either, Left, Right};
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::{hashmap, hashset};
|
||||
use milli::update::{IndexDocuments, Settings, UpdateFormat};
|
||||
@ -6,6 +9,8 @@ use milli::{Criterion, DocumentId, Index};
|
||||
use serde::Deserialize;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
mod distinct;
|
||||
mod filters;
|
||||
mod query_criteria;
|
||||
|
||||
pub const TEST_QUERY: &'static str = "hello world america";
|
||||
@ -120,7 +125,58 @@ pub fn expected_order(
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
|
||||
let mut id = None;
|
||||
if let Some((field, filter)) = filter.split_once("=") {
|
||||
if field == "tag" && document.tag == filter {
|
||||
id = Some(document.id.clone())
|
||||
} else if field == "asc_desc_rank"
|
||||
&& document.asc_desc_rank == filter.parse::<u32>().unwrap()
|
||||
{
|
||||
id = Some(document.id.clone())
|
||||
}
|
||||
} else if let Some(("asc_desc_rank", filter)) = filter.split_once("<") {
|
||||
if document.asc_desc_rank < filter.parse().unwrap() {
|
||||
id = Some(document.id.clone())
|
||||
}
|
||||
} else if let Some(("asc_desc_rank", filter)) = filter.split_once(">") {
|
||||
if document.asc_desc_rank > filter.parse().unwrap() {
|
||||
id = Some(document.id.clone())
|
||||
}
|
||||
}
|
||||
id
|
||||
}
|
||||
|
||||
pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> {
|
||||
let dataset: HashSet<TestDocument> =
|
||||
serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();
|
||||
|
||||
let mut filtered_ids: HashSet<_> = dataset.iter().map(|d| d.id.clone()).collect();
|
||||
for either in filters {
|
||||
let ids = match either {
|
||||
Left(array) => array
|
||||
.into_iter()
|
||||
.map(|f| {
|
||||
let ids: HashSet<String> =
|
||||
dataset.iter().filter_map(|d| execute_filter(f, d)).collect();
|
||||
ids
|
||||
})
|
||||
.reduce(|a, b| a.union(&b).cloned().collect())
|
||||
.unwrap(),
|
||||
Right(filter) => {
|
||||
let ids: HashSet<String> =
|
||||
dataset.iter().filter_map(|d| execute_filter(filter, d)).collect();
|
||||
ids
|
||||
}
|
||||
};
|
||||
|
||||
filtered_ids = filtered_ids.intersection(&ids).cloned().collect();
|
||||
}
|
||||
|
||||
filtered_ids
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub struct TestDocument {
|
||||
pub id: String,
|
||||
pub word_rank: u32,
|
||||
|
@ -11,14 +11,14 @@ const ALLOW_OPTIONAL_WORDS: bool = true;
|
||||
const DISALLOW_OPTIONAL_WORDS: bool = false;
|
||||
|
||||
macro_rules! test_criterion {
|
||||
($func:ident, $optional_word:ident, $authorize_typos:ident $(, $criterion:expr)?) => {
|
||||
($func:ident, $optional_word:ident, $authorize_typos:ident, $criteria:expr) => {
|
||||
#[test]
|
||||
fn $func() {
|
||||
let criteria = vec![$($criterion)?];
|
||||
let criteria = $criteria;
|
||||
let index = search::setup_search_index_with_criteria(&criteria);
|
||||
let mut rtxn = index.read_txn().unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&mut rtxn, &index);
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||
search.authorize_typos($authorize_typos);
|
||||
@ -26,49 +26,80 @@ macro_rules! test_criterion {
|
||||
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
let expected_external_ids: Vec<_> = search::expected_order(&criteria, $authorize_typos, $optional_word)
|
||||
.into_iter()
|
||||
.map(|d| d.id).collect();
|
||||
let expected_external_ids: Vec<_> =
|
||||
search::expected_order(&criteria, $authorize_typos, $optional_word)
|
||||
.into_iter()
|
||||
.map(|d| d.id)
|
||||
.collect();
|
||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||
assert_eq!(documents_ids, expected_external_ids);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS);
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS);
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Words);
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(attribute_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Attribute);
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(attribute_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Attribute);
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(exactness_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Exactness);
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(exactness_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Exactness);
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(proximity_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Proximity);
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(proximity_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Proximity);
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(asc_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Asc(S("asc_desc_rank")));
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(asc_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Asc(S("asc_desc_rank")));
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(desc_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Desc(S("asc_desc_rank")));
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(desc_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Desc(S("asc_desc_rank")));
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(asc_unexisting_field_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Asc(S("unexisting_field")));
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(asc_unexisting_field_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Asc(S("unexisting_field")));
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(desc_unexisting_field_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Desc(S("unexisting_field")));
|
||||
#[rustfmt::skip]
|
||||
test_criterion!(desc_unexisting_field_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Desc(S("unexisting_field")));
|
||||
test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![]);
|
||||
test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![]);
|
||||
test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words]);
|
||||
test_criterion!(attribute_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Attribute]);
|
||||
test_criterion!(attribute_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Attribute]);
|
||||
test_criterion!(exactness_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Exactness]);
|
||||
test_criterion!(exactness_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Exactness]);
|
||||
test_criterion!(proximity_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Proximity]);
|
||||
test_criterion!(proximity_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Proximity]);
|
||||
test_criterion!(
|
||||
asc_allow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
ALLOW_TYPOS,
|
||||
vec![Asc(S("asc_desc_rank"))]
|
||||
);
|
||||
test_criterion!(
|
||||
asc_disallow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
DISALLOW_TYPOS,
|
||||
vec![Asc(S("asc_desc_rank"))]
|
||||
);
|
||||
test_criterion!(
|
||||
desc_allow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
ALLOW_TYPOS,
|
||||
vec![Desc(S("asc_desc_rank"))]
|
||||
);
|
||||
test_criterion!(
|
||||
desc_disallow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
DISALLOW_TYPOS,
|
||||
vec![Desc(S("asc_desc_rank"))]
|
||||
);
|
||||
test_criterion!(
|
||||
asc_unexisting_field_allow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
ALLOW_TYPOS,
|
||||
vec![Asc(S("unexisting_field"))]
|
||||
);
|
||||
test_criterion!(
|
||||
asc_unexisting_field_disallow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
DISALLOW_TYPOS,
|
||||
vec![Asc(S("unexisting_field"))]
|
||||
);
|
||||
test_criterion!(
|
||||
desc_unexisting_field_allow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
ALLOW_TYPOS,
|
||||
vec![Desc(S("unexisting_field"))]
|
||||
);
|
||||
test_criterion!(
|
||||
desc_unexisting_field_disallow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
DISALLOW_TYPOS,
|
||||
vec![Desc(S("unexisting_field"))]
|
||||
);
|
||||
test_criterion!(
|
||||
default_criteria_order,
|
||||
ALLOW_OPTIONAL_WORDS,
|
||||
ALLOW_TYPOS,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness]
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn criteria_mixup() {
|
||||
|
Loading…
Reference in New Issue
Block a user