meilisearch/milli/tests/search/mod.rs

use std::cmp::Reverse;
use std::collections::HashSet;
use std::io::Cursor;

use big_s::S;
use either::{Either, Left, Right};
use heed::EnvOpenOptions;
use maplit::{hashmap, hashset};
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{AscDesc, Criterion, DocumentId, Index, Member};
use serde::{Deserialize, Deserializer};
use slice_group_by::GroupBy;

mod distinct;
mod facet_distribution;
mod filters;
mod query_criteria;
mod sort;
mod typo_tolerance;

pub const TEST_QUERY: &'static str = "hello world america";

pub const EXTERNAL_DOCUMENTS_IDS: &[&str; 17] =
    &["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q"];

pub const CONTENT: &str = include_str!("../assets/test_set.ndjson");

pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    let path = tempfile::tempdir().unwrap();
    let mut options = EnvOpenOptions::new();
    options.map_size(10 * 1024 * 1024); // 10 MB
    let index = Index::new(options, &path).unwrap();

    let mut wtxn = index.write_txn().unwrap();
    let config = IndexerConfig::default();

    let mut builder = Settings::new(&mut wtxn, &index, &config);

    let criteria = criteria.iter().map(|c| c.to_string()).collect();
    builder.set_criteria(criteria);
    builder.set_filterable_fields(hashset! {
        S("tag"),
        S("asc_desc_rank"),
        S("_geo"),
        S("opt1"),
        S("opt1.opt2")
    });
    builder.set_sortable_fields(hashset! {
        S("tag"),
        S("asc_desc_rank"),
    });
    builder.set_synonyms(hashmap! {
        S("hello") => vec![S("good morning")],
        S("world") => vec![S("earth")],
        S("america") => vec![S("the united states")],
    });
    builder.set_searchable_fields(vec![S("title"), S("description")]);
    builder.execute(|_| ()).unwrap();

    // index documents
    let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
    let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };

    let mut builder =
        IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
    let mut cursor = Cursor::new(Vec::new());
    let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
    let reader = Cursor::new(CONTENT.as_bytes());

    for doc in serde_json::Deserializer::from_reader(reader).into_iter::<serde_json::Value>() {
        let doc = Cursor::new(serde_json::to_vec(&doc.unwrap()).unwrap());
        documents_builder.extend_from_json(doc).unwrap();
    }

    documents_builder.finish().unwrap();

    cursor.set_position(0);

    // index documents
    let content = DocumentBatchReader::from_reader(cursor).unwrap();
    builder.add_documents(content).unwrap();
    builder.execute().unwrap();

    wtxn.commit().unwrap();

    index
}

pub fn internal_to_external_ids(index: &Index, internal_ids: &[DocumentId]) -> Vec<String> {
    let mut rtxn = index.read_txn().unwrap();
    let docid_map = index.external_documents_ids(&mut rtxn).unwrap();
    let docid_map: std::collections::HashMap<_, _> =
        EXTERNAL_DOCUMENTS_IDS.iter().map(|id| (docid_map.get(id).unwrap(), id)).collect();
    internal_ids.iter().map(|id| docid_map.get(id).unwrap().to_string()).collect()
}

pub fn expected_order(
    criteria: &[Criterion],
    authorize_typo: bool,
    optional_words: bool,
    sort_by: &[AscDesc],
) -> Vec<TestDocument> {
    let dataset =
        serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();
    let mut groups: Vec<Vec<TestDocument>> = vec![dataset];

    for criterion in criteria {
        let mut new_groups = Vec::new();
        for group in groups.iter_mut() {
            match criterion {
                Criterion::Attribute => {
                    group.sort_by_key(|d| d.attribute_rank);
                    new_groups
                        .extend(group.linear_group_by_key(|d| d.attribute_rank).map(Vec::from));
                }
                Criterion::Exactness => {
                    group.sort_by_key(|d| d.exact_rank);
                    new_groups.extend(group.linear_group_by_key(|d| d.exact_rank).map(Vec::from));
                }
                Criterion::Proximity => {
                    group.sort_by_key(|d| d.proximity_rank);
                    new_groups
                        .extend(group.linear_group_by_key(|d| d.proximity_rank).map(Vec::from));
                }
                Criterion::Sort if sort_by == [AscDesc::Asc(Member::Field(S("tag")))] => {
                    group.sort_by_key(|d| d.sort_by_rank);
                    new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
                }
                Criterion::Sort if sort_by == [AscDesc::Desc(Member::Field(S("tag")))] => {
                    group.sort_by_key(|d| Reverse(d.sort_by_rank));
                    new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
                }
                Criterion::Typo => {
                    group.sort_by_key(|d| d.typo_rank);
                    new_groups.extend(group.linear_group_by_key(|d| d.typo_rank).map(Vec::from));
                }
                Criterion::Words => {
                    group.sort_by_key(|d| d.word_rank);
                    new_groups.extend(group.linear_group_by_key(|d| d.word_rank).map(Vec::from));
                }
                Criterion::Asc(field_name) if field_name == "asc_desc_rank" => {
                    group.sort_by_key(|d| d.asc_desc_rank);
                    new_groups
                        .extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from));
                }
                Criterion::Desc(field_name) if field_name == "asc_desc_rank" => {
                    group.sort_by_key(|d| Reverse(d.asc_desc_rank));
                    new_groups
                        .extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from));
                }
                Criterion::Asc(_) | Criterion::Desc(_) | Criterion::Sort => {
                    new_groups.push(group.clone())
                }
            }
        }
        groups = std::mem::take(&mut new_groups);
    }

    if authorize_typo && optional_words {
        groups.into_iter().flatten().collect()
    } else if optional_words {
        groups.into_iter().flatten().filter(|d| d.typo_rank == 0).collect()
    } else if authorize_typo {
        groups.into_iter().flatten().filter(|d| d.word_rank == 0).collect()
    } else {
        groups.into_iter().flatten().filter(|d| d.word_rank == 0 && d.typo_rank == 0).collect()
    }
}

fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
    let mut id = None;
    if let Some((field, filter)) = filter.split_once("!=") {
        if field == "tag" && document.tag != filter {
            id = Some(document.id.clone())
        } else if field == "asc_desc_rank"
            && Ok(&document.asc_desc_rank) != filter.parse::<u32>().as_ref()
        {
            id = Some(document.id.clone())
        }
    } else if let Some((field, filter)) = filter.split_once("=") {
        if field == "tag" && document.tag == filter {
            id = Some(document.id.clone())
        } else if field == "asc_desc_rank"
            && document.asc_desc_rank == filter.parse::<u32>().unwrap()
        {
            id = Some(document.id.clone())
        }
    } else if let Some(("asc_desc_rank", filter)) = filter.split_once("<") {
        if document.asc_desc_rank < filter.parse().unwrap() {
            id = Some(document.id.clone())
        }
    } else if let Some(("asc_desc_rank", filter)) = filter.split_once(">") {
        if document.asc_desc_rank > filter.parse().unwrap() {
            id = Some(document.id.clone())
        }
    } else if filter.starts_with("_geoRadius") {
        id = (document.geo_rank < 100000).then(|| document.id.clone());
    } else if filter.starts_with("NOT _geoRadius") {
        id = (document.geo_rank > 1000000).then(|| document.id.clone());
    } else if matches!(filter, "opt1 EXISTS" | "NOT opt1 NOT EXISTS") {
        id = document.opt1.is_some().then(|| document.id.clone());
    } else if matches!(filter, "NOT opt1 EXISTS" | "opt1 NOT EXISTS") {
        id = document.opt1.is_none().then(|| document.id.clone());
    } else if matches!(filter, "opt1.opt2 EXISTS") {
        if document.opt1opt2.is_some() {
            id = Some(document.id.clone());
        } else if let Some(opt1) = &document.opt1 {
            id = contains_key_rec(opt1, "opt2").then(|| document.id.clone());
        }
    }
    id
}

pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {
    match v {
        serde_json::Value::Array(v) => {
            for v in v.iter() {
                if contains_key_rec(v, key) {
                    return true;
                }
            }
            false
        }
        serde_json::Value::Object(v) => {
            for (k, v) in v.iter() {
                if k == key || contains_key_rec(v, key) {
                    return true;
                }
            }
            false
        }
        _ => false,
    }
}

pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> {
    let dataset: Vec<TestDocument> =
        serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();

    let mut filtered_ids: HashSet<_> = dataset.iter().map(|d| d.id.clone()).collect();
    for either in filters {
        let ids = match either {
            Left(array) => array
                .into_iter()
                .map(|f| {
                    let ids: HashSet<String> =
                        dataset.iter().filter_map(|d| execute_filter(f, d)).collect();
                    ids
                })
                .reduce(|a, b| a.union(&b).cloned().collect())
                .unwrap(),
            Right(filter) => {
                let ids: HashSet<String> =
                    dataset.iter().filter_map(|d| execute_filter(filter, d)).collect();
                ids
            }
        };

        filtered_ids = filtered_ids.intersection(&ids).cloned().collect();
    }

    filtered_ids
}

#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
pub struct TestDocument {
    pub id: String,
    pub word_rank: u32,
    pub typo_rank: u32,
    pub proximity_rank: u32,
    pub attribute_rank: u32,
    pub exact_rank: u32,
    pub asc_desc_rank: u32,
    pub sort_by_rank: u32,
    pub geo_rank: u32,
    pub title: String,
    pub description: String,
    pub tag: String,
    #[serde(default, deserialize_with = "some_option")]
    pub opt1: Option<serde_json::Value>,
    #[serde(default, deserialize_with = "some_option", rename = "opt1.opt2")]
    pub opt1opt2: Option<serde_json::Value>,
}

fn some_option<'de, D>(deserializer: D) -> Result<Option<serde_json::Value>, D::Error>
where
    D: Deserializer<'de>,
{
    let result = serde_json::Value::deserialize(deserializer)?;
    Ok(Some(result))
}
Revert "Revert "Sort at query time"" 2021-08-23 17:37:18 +08:00			`use std::cmp::Reverse;`
Add test on filters 2021-06-17 19:56:09 +08:00			`use std::collections::HashSet;`
Implement documents format document reader transform remove update format support document sequences fix document transform clean transform improve error handling add documents! macro fix transform bug fix tests remove csv dependency Add comments on the transform process replace search cli fmt review edits fix http ui fix clippy warnings Revert "fix clippy warnings" This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620. fix review comments remove smallvec in transform loop review edits 2021-08-31 17:44:15 +08:00			`use std::io::Cursor;`
Add test on filters 2021-06-17 19:56:09 +08:00
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`use big_s::S;`
Add test on filters 2021-06-17 19:56:09 +08:00			`use either::{Either, Left, Right};`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`use heed::EnvOpenOptions;`
			`use maplit::{hashmap, hashset};`
Implement documents format document reader transform remove update format support document sequences fix document transform clean transform improve error handling add documents! macro fix transform bug fix tests remove csv dependency Add comments on the transform process replace search cli fmt review edits fix http ui fix clippy warnings Revert "fix clippy warnings" This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620. fix review comments remove smallvec in transform loop review edits 2021-08-31 17:44:15 +08:00			`use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};`
document batch support reusable transform rework update api add indexer config fix tests review changes Co-authored-by: Clément Renault <clement@meilisearch.com> fmt 2021-12-08 21:12:07 +08:00			`use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};`
fix all the tests 2021-09-01 23:43:18 +08:00			`use milli::{AscDesc, Criterion, DocumentId, Index, Member};`
Add integration tests for the EXISTS filter 2022-07-04 15:28:23 +08:00			`use serde::{Deserialize, Deserializer};`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`use slice_group_by::GroupBy;`

Add distinct integration tests 2021-06-17 20:24:59 +08:00			`mod distinct;`
Add a test to check for the returned facet distribution 2022-04-27 00:12:58 +08:00			`mod facet_distribution;`
Add test on filters 2021-06-17 19:56:09 +08:00			`mod filters;`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`mod query_criteria;`
Add a test to make sure we throw the right error message 2021-09-07 17:01:37 +08:00			`mod sort;`
add typo integration tests 2022-04-01 16:50:01 +08:00			`mod typo_tolerance;`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00
			`pub const TEST_QUERY: &'static str = "hello world america";`

format the whole project 2021-06-17 00:33:33 +08:00			`pub const EXTERNAL_DOCUMENTS_IDS: &[&str; 17] =`
			`&["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q"];`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00
			`pub const CONTENT: &str = include_str!("../assets/test_set.ndjson");`

			`pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {`
			`let path = tempfile::tempdir().unwrap();`
			`let mut options = EnvOpenOptions::new();`
			`options.map_size(10 * 1024 * 1024); // 10 MB`
			`let index = Index::new(options, &path).unwrap();`

			`let mut wtxn = index.write_txn().unwrap();`
document batch support reusable transform rework update api add indexer config fix tests review changes Co-authored-by: Clément Renault <clement@meilisearch.com> fmt 2021-12-08 21:12:07 +08:00			`let config = IndexerConfig::default();`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00
document batch support reusable transform rework update api add indexer config fix tests review changes Co-authored-by: Clément Renault <clement@meilisearch.com> fmt 2021-12-08 21:12:07 +08:00			`let mut builder = Settings::new(&mut wtxn, &index, &config);`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00
			`let criteria = criteria.iter().map(\|c\| c.to_string()).collect();`
			`builder.set_criteria(criteria);`
format the whole project 2021-06-17 00:33:33 +08:00			`builder.set_filterable_fields(hashset! {`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`S("tag"),`
			`S("asc_desc_rank"),`
tests the geo filters 2021-09-08 19:08:48 +08:00			`S("_geo"),`
Add integration tests for the EXISTS filter 2022-07-04 15:28:23 +08:00			`S("opt1"),`
			`S("opt1.opt2")`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`});`
Revert "Revert "Sort at query time"" 2021-08-23 17:37:18 +08:00			`builder.set_sortable_fields(hashset! {`
			`S("tag"),`
			`S("asc_desc_rank"),`
			`});`
format the whole project 2021-06-17 00:33:33 +08:00			`builder.set_synonyms(hashmap! {`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`S("hello") => vec![S("good morning")],`
			`S("world") => vec![S("earth")],`
			`S("america") => vec![S("the united states")],`
			`});`
format the whole project 2021-06-17 00:33:33 +08:00			`builder.set_searchable_fields(vec![S("title"), S("description")]);`
remove update_id in UpdateBuilder 2021-11-03 20:12:01 +08:00			`builder.execute(\|_\| ()).unwrap();`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00
			`// index documents`
document batch support reusable transform rework update api add indexer config fix tests review changes Co-authored-by: Clément Renault <clement@meilisearch.com> fmt 2021-12-08 21:12:07 +08:00			`let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };`
			`let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };`

nested fields 2022-03-24 00:28:41 +08:00			`let mut builder =`
			`IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, \|_\| ()).unwrap();`
Implement documents format document reader transform remove update format support document sequences fix document transform clean transform improve error handling add documents! macro fix transform bug fix tests remove csv dependency Add comments on the transform process replace search cli fmt review edits fix http ui fix clippy warnings Revert "fix clippy warnings" This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620. fix review comments remove smallvec in transform loop review edits 2021-08-31 17:44:15 +08:00			`let mut cursor = Cursor::new(Vec::new());`
			`let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();`
			`let reader = Cursor::new(CONTENT.as_bytes());`
fix tests 2021-10-24 20:41:36 +08:00
			`for doc in serde_json::Deserializer::from_reader(reader).into_iter::<serde_json::Value>() {`
			`let doc = Cursor::new(serde_json::to_vec(&doc.unwrap()).unwrap());`
			`documents_builder.extend_from_json(doc).unwrap();`
			`}`

Implement documents format document reader transform remove update format support document sequences fix document transform clean transform improve error handling add documents! macro fix transform bug fix tests remove csv dependency Add comments on the transform process replace search cli fmt review edits fix http ui fix clippy warnings Revert "fix clippy warnings" This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620. fix review comments remove smallvec in transform loop review edits 2021-08-31 17:44:15 +08:00			`documents_builder.finish().unwrap();`

			`cursor.set_position(0);`

			`// index documents`
			`let content = DocumentBatchReader::from_reader(cursor).unwrap();`
document batch support reusable transform rework update api add indexer config fix tests review changes Co-authored-by: Clément Renault <clement@meilisearch.com> fmt 2021-12-08 21:12:07 +08:00			`builder.add_documents(content).unwrap();`
			`builder.execute().unwrap();`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00
			`wtxn.commit().unwrap();`

			`index`
			`}`

			`pub fn internal_to_external_ids(index: &Index, internal_ids: &[DocumentId]) -> Vec<String> {`
			`let mut rtxn = index.read_txn().unwrap();`
			`let docid_map = index.external_documents_ids(&mut rtxn).unwrap();`
format the whole project 2021-06-17 00:33:33 +08:00			`let docid_map: std::collections::HashMap<_, _> =`
			`EXTERNAL_DOCUMENTS_IDS.iter().map(\|id\| (docid_map.get(id).unwrap(), id)).collect();`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`internal_ids.iter().map(\|id\| docid_map.get(id).unwrap().to_string()).collect()`
			`}`

format the whole project 2021-06-17 00:33:33 +08:00			`pub fn expected_order(`
			`criteria: &[Criterion],`
			`authorize_typo: bool,`
			`optional_words: bool,`
Revert "Revert "Sort at query time"" 2021-08-23 17:37:18 +08:00			`sort_by: &[AscDesc],`
format the whole project 2021-06-17 00:33:33 +08:00			`) -> Vec<TestDocument> {`
			`let dataset =`
			`serde_json::Deserializer::from_str(CONTENT).into_iter().map(\|r\| r.unwrap()).collect();`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`let mut groups: Vec<Vec<TestDocument>> = vec![dataset];`

			`for criterion in criteria {`
			`let mut new_groups = Vec::new();`
			`for group in groups.iter_mut() {`
			`match criterion {`
			`Criterion::Attribute => {`
			`group.sort_by_key(\|d\| d.attribute_rank);`
format the whole project 2021-06-17 00:33:33 +08:00			`new_groups`
			`.extend(group.linear_group_by_key(\|d\| d.attribute_rank).map(Vec::from));`
			`}`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`Criterion::Exactness => {`
			`group.sort_by_key(\|d\| d.exact_rank);`
			`new_groups.extend(group.linear_group_by_key(\|d\| d.exact_rank).map(Vec::from));`
format the whole project 2021-06-17 00:33:33 +08:00			`}`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`Criterion::Proximity => {`
			`group.sort_by_key(\|d\| d.proximity_rank);`
format the whole project 2021-06-17 00:33:33 +08:00			`new_groups`
			`.extend(group.linear_group_by_key(\|d\| d.proximity_rank).map(Vec::from));`
			`}`
fix all the tests 2021-09-01 23:43:18 +08:00			`Criterion::Sort if sort_by == [AscDesc::Asc(Member::Field(S("tag")))] => {`
Revert "Revert "Sort at query time"" 2021-08-23 17:37:18 +08:00			`group.sort_by_key(\|d\| d.sort_by_rank);`
			`new_groups.extend(group.linear_group_by_key(\|d\| d.sort_by_rank).map(Vec::from));`
			`}`
fix all the tests 2021-09-01 23:43:18 +08:00			`Criterion::Sort if sort_by == [AscDesc::Desc(Member::Field(S("tag")))] => {`
Revert "Revert "Sort at query time"" 2021-08-23 17:37:18 +08:00			`group.sort_by_key(\|d\| Reverse(d.sort_by_rank));`
			`new_groups.extend(group.linear_group_by_key(\|d\| d.sort_by_rank).map(Vec::from));`
			`}`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`Criterion::Typo => {`
			`group.sort_by_key(\|d\| d.typo_rank);`
			`new_groups.extend(group.linear_group_by_key(\|d\| d.typo_rank).map(Vec::from));`
format the whole project 2021-06-17 00:33:33 +08:00			`}`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`Criterion::Words => {`
			`group.sort_by_key(\|d\| d.word_rank);`
			`new_groups.extend(group.linear_group_by_key(\|d\| d.word_rank).map(Vec::from));`
format the whole project 2021-06-17 00:33:33 +08:00			`}`
Factorize tests using macro_rules 2021-06-08 18:33:02 +08:00			`Criterion::Asc(field_name) if field_name == "asc_desc_rank" => {`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`group.sort_by_key(\|d\| d.asc_desc_rank);`
format the whole project 2021-06-17 00:33:33 +08:00			`new_groups`
			`.extend(group.linear_group_by_key(\|d\| d.asc_desc_rank).map(Vec::from));`
			`}`
			`Criterion::Desc(field_name) if field_name == "asc_desc_rank" => {`
Revert "Revert "Sort at query time"" 2021-08-23 17:37:18 +08:00			`group.sort_by_key(\|d\| Reverse(d.asc_desc_rank));`
format the whole project 2021-06-17 00:33:33 +08:00			`new_groups`
			`.extend(group.linear_group_by_key(\|d\| d.asc_desc_rank).map(Vec::from));`
			`}`
Revert "Revert "Sort at query time"" 2021-08-23 17:37:18 +08:00			`Criterion::Asc(_) \| Criterion::Desc(_) \| Criterion::Sort => {`
			`new_groups.push(group.clone())`
			`}`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`}`
			`}`
			`groups = std::mem::take(&mut new_groups);`
			`}`

Resolve PR comments 2021-06-08 20:11:00 +08:00			`if authorize_typo && optional_words {`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`groups.into_iter().flatten().collect()`
			`} else if optional_words {`
			`groups.into_iter().flatten().filter(\|d\| d.typo_rank == 0).collect()`
Resolve PR comments 2021-06-08 20:11:00 +08:00			`} else if authorize_typo {`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`groups.into_iter().flatten().filter(\|d\| d.word_rank == 0).collect()`
			`} else {`
			`groups.into_iter().flatten().filter(\|d\| d.word_rank == 0 && d.typo_rank == 0).collect()`
			`}`
			`}`

Add test on filters 2021-06-17 19:56:09 +08:00			`fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {`
			`let mut id = None;`
Fix not equal filter when field contains both number and strings 2022-06-27 20:55:17 +08:00			`if let Some((field, filter)) = filter.split_once("!=") {`
			`if field == "tag" && document.tag != filter {`
			`id = Some(document.id.clone())`
			`} else if field == "asc_desc_rank"`
			`&& Ok(&document.asc_desc_rank) != filter.parse::<u32>().as_ref()`
			`{`
			`id = Some(document.id.clone())`
			`}`
			`} else if let Some((field, filter)) = filter.split_once("=") {`
Add test on filters 2021-06-17 19:56:09 +08:00			`if field == "tag" && document.tag == filter {`
			`id = Some(document.id.clone())`
			`} else if field == "asc_desc_rank"`
			`&& document.asc_desc_rank == filter.parse::<u32>().unwrap()`
			`{`
			`id = Some(document.id.clone())`
			`}`
			`} else if let Some(("asc_desc_rank", filter)) = filter.split_once("<") {`
			`if document.asc_desc_rank < filter.parse().unwrap() {`
			`id = Some(document.id.clone())`
			`}`
			`} else if let Some(("asc_desc_rank", filter)) = filter.split_once(">") {`
			`if document.asc_desc_rank > filter.parse().unwrap() {`
			`id = Some(document.id.clone())`
			`}`
tests the geo filters 2021-09-08 19:08:48 +08:00			`} else if filter.starts_with("_geoRadius") {`
			`id = (document.geo_rank < 100000).then(\|\| document.id.clone());`
			`} else if filter.starts_with("NOT _geoRadius") {`
			`id = (document.geo_rank > 1000000).then(\|\| document.id.clone());`
Add integration tests for the EXISTS filter 2022-07-04 15:28:23 +08:00			`} else if matches!(filter, "opt1 EXISTS" \| "NOT opt1 NOT EXISTS") {`
			`id = document.opt1.is_some().then(\|\| document.id.clone());`
			`} else if matches!(filter, "NOT opt1 EXISTS" \| "opt1 NOT EXISTS") {`
			`id = document.opt1.is_none().then(\|\| document.id.clone());`
			`} else if matches!(filter, "opt1.opt2 EXISTS") {`
			`if document.opt1opt2.is_some() {`
			`id = Some(document.id.clone());`
			`} else if let Some(opt1) = &document.opt1 {`
			`id = contains_key_rec(opt1, "opt2").then(\|\| document.id.clone());`
			`}`
Add test on filters 2021-06-17 19:56:09 +08:00			`}`
			`id`
			`}`

Add integration tests for the EXISTS filter 2022-07-04 15:28:23 +08:00			`pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {`
			`match v {`
			`serde_json::Value::Array(v) => {`
			`for v in v.iter() {`
			`if contains_key_rec(v, key) {`
			`return true;`
			`}`
			`}`
			`false`
			`}`
			`serde_json::Value::Object(v) => {`
			`for (k, v) in v.iter() {`
			`if k == key \|\| contains_key_rec(v, key) {`
			`return true;`
			`}`
			`}`
			`false`
			`}`
			`_ => false,`
			`}`
			`}`

Add test on filters 2021-06-17 19:56:09 +08:00			`pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> {`
Add integration tests for the EXISTS filter 2022-07-04 15:28:23 +08:00			`let dataset: Vec<TestDocument> =`
Add test on filters 2021-06-17 19:56:09 +08:00			`serde_json::Deserializer::from_str(CONTENT).into_iter().map(\|r\| r.unwrap()).collect();`

			`let mut filtered_ids: HashSet<_> = dataset.iter().map(\|d\| d.id.clone()).collect();`
			`for either in filters {`
			`let ids = match either {`
			`Left(array) => array`
			`.into_iter()`
			`.map(\|f\| {`
			`let ids: HashSet<String> =`
			`dataset.iter().filter_map(\|d\| execute_filter(f, d)).collect();`
			`ids`
			`})`
			`.reduce(\|a, b\| a.union(&b).cloned().collect())`
			`.unwrap(),`
			`Right(filter) => {`
			`let ids: HashSet<String> =`
			`dataset.iter().filter_map(\|d\| execute_filter(filter, d)).collect();`
			`ids`
			`}`
			`};`

			`filtered_ids = filtered_ids.intersection(&ids).cloned().collect();`
			`}`

			`filtered_ids`
			`}`

Add integration tests for the EXISTS filter 2022-07-04 15:28:23 +08:00			`#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`pub struct TestDocument {`
			`pub id: String,`
			`pub word_rank: u32,`
			`pub typo_rank: u32,`
			`pub proximity_rank: u32,`
			`pub attribute_rank: u32,`
			`pub exact_rank: u32,`
			`pub asc_desc_rank: u32,`
Revert "Revert "Sort at query time"" 2021-08-23 17:37:18 +08:00			`pub sort_by_rank: u32,`
tests the geo filters 2021-09-08 19:08:48 +08:00			`pub geo_rank: u32,`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`pub title: String,`
			`pub description: String,`
			`pub tag: String,`
Add integration tests for the EXISTS filter 2022-07-04 15:28:23 +08:00			`#[serde(default, deserialize_with = "some_option")]`
			`pub opt1: Option<serde_json::Value>,`
			`#[serde(default, deserialize_with = "some_option", rename = "opt1.opt2")]`
			`pub opt1opt2: Option<serde_json::Value>,`
			`}`

			`fn some_option<'de, D>(deserializer: D) -> Result<Option<serde_json::Value>, D::Error>`
			`where`
			`D: Deserializer<'de>,`
			`{`
			`let result = serde_json::Value::deserialize(deserializer)?;`
			`Ok(Some(result))`
Introduce integration test on criteria 2021-06-03 20:44:53 +08:00			`}`