diff --git a/milli/tests/assets/test_set.ndjson b/milli/tests/assets/test_set.ndjson index f219ab7e9..599d479ed 100644 --- a/milli/tests/assets/test_set.ndjson +++ b/milli/tests/assets/test_set.ndjson @@ -2,7 +2,7 @@ {"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","":""} {"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","":""} {"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","":""} -{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""} +{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""} {"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","":""} {"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","":""} {"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","":""} diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs new file mode 100644 index 000000000..790bd4a58 --- /dev/null +++ b/milli/tests/search/filters.rs @@ -0,0 +1,84 @@ +use either::{Either, Left, Right}; +use milli::{Criterion, FilterCondition, Search, SearchResult}; +use Criterion::*; + +use crate::search::{self, EXTERNAL_DOCUMENTS_IDS}; + +macro_rules! test_filter { + ($func:ident, $filter:expr) => { + #[test] + fn $func() { + let criteria = vec![Words, Typo, Proximity, Attribute, Exactness]; + let index = search::setup_search_index_with_criteria(&criteria); + let mut rtxn = index.read_txn().unwrap(); + + let filter_conditions = + FilterCondition::from_array::, &str>>, _, _, _>( + &rtxn, &index, $filter, + ) + .unwrap() + .unwrap(); + + let mut search = Search::new(&mut rtxn, &index); + search.query(search::TEST_QUERY); + search.limit(EXTERNAL_DOCUMENTS_IDS.len()); + search.authorize_typos(true); + search.optional_words(true); + search.filter(filter_conditions); + + let SearchResult { documents_ids, .. } = search.execute().unwrap(); + + let filtered_ids = search::expected_filtered_ids($filter); + let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true) + .into_iter() + .filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None }) + .collect(); + + let documents_ids = search::internal_to_external_ids(&index, &documents_ids); + assert_eq!(documents_ids, expected_external_ids); + } + }; +} + +#[rustfmt::skip] +test_filter!(eq_simple_string_filter, vec![Right("tag=red")]); +#[rustfmt::skip] +test_filter!(eq_simple_number_filter, vec![Right("asc_desc_rank=1")]); +#[rustfmt::skip] +test_filter!(eq_string_and_filter_return_empty, vec![Right("tag=red"), Right("tag=green")]); +#[rustfmt::skip] +test_filter!(eq_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank=1")]); +#[rustfmt::skip] +test_filter!(eq_string_or_filter, vec![Left(vec!["tag=red", "tag=green"])]); +#[rustfmt::skip] +test_filter!(eq_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank=1"])]); +#[rustfmt::skip] +test_filter!(eq_number_or_filter, vec![Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]); +#[rustfmt::skip] +test_filter!(eq_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank=3")]); +#[rustfmt::skip] +test_filter!(eq_complex_filter_2, vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]); +#[rustfmt::skip] +test_filter!(greater_simple_number_filter, vec![Right("asc_desc_rank>1")]); +#[rustfmt::skip] +test_filter!(greater_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank>1")]); +#[rustfmt::skip] +test_filter!(greater_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank>1"])]); +#[rustfmt::skip] +test_filter!(greater_number_or_filter, vec![Left(vec!["asc_desc_rank>3", "asc_desc_rank>1"])]); +#[rustfmt::skip] +test_filter!(greater_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank>3")]); +#[rustfmt::skip] +test_filter!(greater_complex_filter_2, vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank>3", "asc_desc_rank>1"])]); +#[rustfmt::skip] +test_filter!(lower_simple_number_filter, vec![Right("asc_desc_rank<1")]); +#[rustfmt::skip] +test_filter!(lower_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank<1")]); +#[rustfmt::skip] +test_filter!(lower_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank<1"])]); +#[rustfmt::skip] +test_filter!(lower_number_or_filter, vec![Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])]); +#[rustfmt::skip] +test_filter!(lower_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank<3")]); +#[rustfmt::skip] +test_filter!(lower_complex_filter_2, vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])]); diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index 7842b6c13..e48d7704d 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -1,4 +1,7 @@ +use std::collections::HashSet; + use big_s::S; +use either::{Either, Left, Right}; use heed::EnvOpenOptions; use maplit::{hashmap, hashset}; use milli::update::{IndexDocuments, Settings, UpdateFormat}; @@ -6,6 +9,7 @@ use milli::{Criterion, DocumentId, Index}; use serde::Deserialize; use slice_group_by::GroupBy; +mod filters; mod query_criteria; pub const TEST_QUERY: &'static str = "hello world america"; @@ -120,7 +124,61 @@ pub fn expected_order( } } -#[derive(Debug, Clone, Deserialize)] +fn execute_filter(filter: &str, document: &TestDocument) -> Option { + let mut id = None; + if let Some((field, filter)) = filter.split_once("=") { + println!("eq on field {} with filter {}", field, filter); + if field == "tag" && document.tag == filter { + id = Some(document.id.clone()) + } else if field == "asc_desc_rank" + && document.asc_desc_rank == filter.parse::().unwrap() + { + id = Some(document.id.clone()) + } + } else if let Some(("asc_desc_rank", filter)) = filter.split_once("<") { + println!("lower on field asc_desc_rank with filter {}", filter); + if document.asc_desc_rank < filter.parse().unwrap() { + id = Some(document.id.clone()) + } + } else if let Some(("asc_desc_rank", filter)) = filter.split_once(">") { + println!("higher on field asc_desc_rank with filter {}", filter); + if document.asc_desc_rank > filter.parse().unwrap() { + id = Some(document.id.clone()) + } + } + id +} + +pub fn expected_filtered_ids(filters: Vec, &str>>) -> HashSet { + let dataset: HashSet = + serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect(); + + let mut filtered_ids: HashSet<_> = dataset.iter().map(|d| d.id.clone()).collect(); + for either in filters { + let ids = match either { + Left(array) => array + .into_iter() + .map(|f| { + let ids: HashSet = + dataset.iter().filter_map(|d| execute_filter(f, d)).collect(); + ids + }) + .reduce(|a, b| a.union(&b).cloned().collect()) + .unwrap(), + Right(filter) => { + let ids: HashSet = + dataset.iter().filter_map(|d| execute_filter(filter, d)).collect(); + ids + } + }; + + filtered_ids = filtered_ids.intersection(&ids).cloned().collect(); + } + + filtered_ids +} + +#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Hash)] pub struct TestDocument { pub id: String, pub word_rank: u32, diff --git a/milli/tests/search/query_criteria.rs b/milli/tests/search/query_criteria.rs index 19173bc72..7ab9897b3 100644 --- a/milli/tests/search/query_criteria.rs +++ b/milli/tests/search/query_criteria.rs @@ -11,10 +11,10 @@ const ALLOW_OPTIONAL_WORDS: bool = true; const DISALLOW_OPTIONAL_WORDS: bool = false; macro_rules! test_criterion { - ($func:ident, $optional_word:ident, $authorize_typos:ident $(, $criterion:expr)?) => { + ($func:ident, $optional_word:ident, $authorize_typos:ident , $criterion:expr) => { #[test] fn $func() { - let criteria = vec![$($criterion)?]; + let criteria = $criterion; let index = search::setup_search_index_with_criteria(&criteria); let mut rtxn = index.read_txn().unwrap(); @@ -26,49 +26,53 @@ macro_rules! test_criterion { let SearchResult { documents_ids, .. } = search.execute().unwrap(); - let expected_external_ids: Vec<_> = search::expected_order(&criteria, $authorize_typos, $optional_word) - .into_iter() - .map(|d| d.id).collect(); + let expected_external_ids: Vec<_> = + search::expected_order(&criteria, $authorize_typos, $optional_word) + .into_iter() + .map(|d| d.id) + .collect(); let documents_ids = search::internal_to_external_ids(&index, &documents_ids); assert_eq!(documents_ids, expected_external_ids); } - } + }; } #[rustfmt::skip] -test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS); +test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![]); #[rustfmt::skip] -test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS); +test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![]); #[rustfmt::skip] -test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Words); +test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words]); #[rustfmt::skip] -test_criterion!(attribute_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Attribute); +test_criterion!(attribute_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Attribute]); #[rustfmt::skip] -test_criterion!(attribute_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Attribute); +test_criterion!(attribute_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Attribute]); #[rustfmt::skip] -test_criterion!(exactness_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Exactness); +test_criterion!(exactness_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Exactness]); #[rustfmt::skip] -test_criterion!(exactness_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Exactness); +test_criterion!(exactness_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Exactness]); #[rustfmt::skip] -test_criterion!(proximity_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Proximity); +test_criterion!(proximity_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Proximity]); #[rustfmt::skip] -test_criterion!(proximity_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Proximity); +test_criterion!(proximity_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Proximity]); #[rustfmt::skip] -test_criterion!(asc_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Asc(S("asc_desc_rank"))); +test_criterion!(asc_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Asc(S("asc_desc_rank"))]); #[rustfmt::skip] -test_criterion!(asc_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Asc(S("asc_desc_rank"))); +test_criterion!(asc_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Asc(S("asc_desc_rank"))]); #[rustfmt::skip] -test_criterion!(desc_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Desc(S("asc_desc_rank"))); +test_criterion!(desc_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Desc(S("asc_desc_rank"))]); #[rustfmt::skip] -test_criterion!(desc_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Desc(S("asc_desc_rank"))); +test_criterion!(desc_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Desc(S("asc_desc_rank"))]); #[rustfmt::skip] -test_criterion!(asc_unexisting_field_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Asc(S("unexisting_field"))); +test_criterion!(asc_unexisting_field_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Asc(S("unexisting_field"))]); #[rustfmt::skip] -test_criterion!(asc_unexisting_field_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Asc(S("unexisting_field"))); +test_criterion!(asc_unexisting_field_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Asc(S("unexisting_field"))]); #[rustfmt::skip] -test_criterion!(desc_unexisting_field_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Desc(S("unexisting_field"))); +test_criterion!(desc_unexisting_field_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Desc(S("unexisting_field"))]); #[rustfmt::skip] -test_criterion!(desc_unexisting_field_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Desc(S("unexisting_field"))); +test_criterion!(desc_unexisting_field_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Desc(S("unexisting_field"))]); +#[rustfmt::skip] +test_criterion!(default_criteria_order, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words, Typo, Proximity, Attribute, Exactness]); #[test] fn criteria_mixup() {