From 3f1a510069a616026db45329784142e3e7d60930 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 30 May 2024 12:02:42 +0200 Subject: [PATCH] Add tests and fix matching strategy --- meilisearch/tests/search/errors.rs | 4 +- meilisearch/tests/search/matching_strategy.rs | 128 ++++++++++++++++++ meilisearch/tests/search/mod.rs | 1 + milli/src/search/new/query_graph.rs | 6 +- 4 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 meilisearch/tests/search/matching_strategy.rs diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index 8be70d162..cce1a86e7 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -505,7 +505,7 @@ async fn search_bad_matching_strategy() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`", + "message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`, `frequency`", "code": "invalid_search_matching_strategy", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy" @@ -527,7 +527,7 @@ async fn search_bad_matching_strategy() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`", + "message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`, `frequency`", "code": "invalid_search_matching_strategy", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy" diff --git a/meilisearch/tests/search/matching_strategy.rs b/meilisearch/tests/search/matching_strategy.rs new file mode 100644 index 000000000..a4cb19f62 --- /dev/null +++ b/meilisearch/tests/search/matching_strategy.rs @@ -0,0 +1,128 @@ +use meili_snap::snapshot; +use once_cell::sync::Lazy; + +use crate::common::index::Index; +use crate::common::{Server, Value}; +use crate::json; + +async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> { + let index = server.index("test"); + + index.add_documents(documents.clone(), None).await; + index.wait_task(0).await; + index +} + +static SIMPLE_SEARCH_DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "title": "Shazam!", + "id": "1", + }, + { + "title": "Captain Planet", + "id": "2", + }, + { + "title": "Captain Marvel", + "id": "3", + }, + { + "title": "a Captain Marvel ersatz", + "id": "4" + }, + { + "title": "He's not part of the Marvel Cinematic Universe", + "id": "5" + }, + { + "title": "a Shazam ersatz, but better than Captain Planet", + "id": "6" + }, + { + "title": "Capitain CAAAAAVEEERNE!!!!", + "id": "7" + } + ]) +}); + +#[actix_rt::test] +async fn simple_search() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + + index + .search(json!({"q": "Captain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"2"},{"id":"6"},{"id":"7"}]"###); + }) + .await; + + index + .search(json!({"q": "Captain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###); + }) + .await; + + index + .search(json!({"q": "Captain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###); + }) + .await; +} + +#[actix_rt::test] +async fn search_with_typo() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + + index + .search(json!({"q": "Capitain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"7"},{"id":"2"},{"id":"6"}]"###); + }) + .await; + + index + .search(json!({"q": "Capitain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###); + }) + .await; + + index + .search(json!({"q": "Capitain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###); + }) + .await; +} + +#[actix_rt::test] +async fn search_with_unknown_word() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + + index + .search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"id":"2"},{"id":"3"},{"id":"4"},{"id":"6"},{"id":"7"}]"###); + }) + .await; + + index + .search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @"[]"); + }) + .await; + + index + .search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###); + }) + .await; +} diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 56fa226b2..284b68a15 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -7,6 +7,7 @@ mod facet_search; mod formatted; mod geo; mod hybrid; +mod matching_strategy; mod multi; mod pagination; mod restrict_searchable; diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index 9cbe55aff..cda767d75 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -1,4 +1,4 @@ -use std::cmp::Ordering; +use std::cmp::{Ordering, Reverse}; use std::collections::BTreeMap; use std::hash::{Hash, Hasher}; @@ -321,13 +321,13 @@ impl QueryGraph { }) .collect() }; - term_with_frequency.sort_by_key(|(_, frequency)| *frequency); + term_with_frequency.sort_by_key(|(_, frequency)| Reverse(*frequency)); let mut term_weight = BTreeMap::new(); let mut weight: u16 = 1; let mut peekable = term_with_frequency.into_iter().peekable(); while let Some((idx, frequency)) = peekable.next() { term_weight.insert(idx, weight); - if peekable.peek().map_or(false, |(_, f)| frequency < *f) { + if peekable.peek().map_or(false, |(_, f)| frequency != *f) { weight += 1; } }