From b744f335306afe43a8988610de33a780809cc447 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 29 Mar 2023 10:58:05 +0200 Subject: [PATCH 1/3] Add test --- meilisearch/tests/search/mod.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 1e5c23a71..045cfde2c 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -30,7 +30,7 @@ pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { "id": "166428", }, { - "title": "Glass", + "title": "Gläss", "id": "450465", } ]) @@ -52,7 +52,7 @@ pub(self) static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { "age": 4, }, ], - "cattos": "pesti", + "cattos": "pésti", }, { "id": 654, @@ -142,7 +142,7 @@ async fn simple_search() { index.wait_task(1).await; index - .search(json!({"q": "pesti"}), |response, code| { + .search(json!({"q": "pésti"}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!(response["hits"].as_array().unwrap().len(), 2); }) @@ -250,7 +250,7 @@ async fn search_multiple_params() { index .search( json!({ - "q": "pesti", + "q": "pésti", "attributesToCrop": ["catto:2"], "attributesToHighlight": ["catto"], "limit": 2, @@ -281,7 +281,7 @@ async fn search_with_filter_string_notation() { index .search( json!({ - "filter": "title = Glass" + "filter": "title = Gläss" }), |response, code| { assert_eq!(code, 200, "{}", response); @@ -305,7 +305,7 @@ async fn search_with_filter_string_notation() { index .search( json!({ - "filter": "cattos = pesti" + "filter": "cattos = pésti" }), |response, code| { assert_eq!(code, 200, "{}", response); @@ -343,7 +343,7 @@ async fn search_with_filter_array_notation() { let (response, code) = index .search_post(json!({ - "filter": ["title = Glass"] + "filter": ["title = Gläss"] })) .await; assert_eq!(code, 200, "{}", response); @@ -351,7 +351,7 @@ async fn search_with_filter_array_notation() { let (response, code) = index .search_post(json!({ - "filter": [["title = Glass", "title = \"Shazam!\"", "title = \"Escape Room\""]] + "filter": [["title = Gläss", "title = \"Shazam!\"", "title = \"Escape Room\""]] })) .await; assert_eq!(code, 200, "{}", response); From efea1e5837571e7af4822b12b2a25af2530f0178 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 29 Mar 2023 10:57:02 +0200 Subject: [PATCH 2/3] Fix facet normalization --- milli/src/lib.rs | 5 +++++ milli/src/search/facet/filter.rs | 2 +- .../extract/extract_fid_docid_facet_values.rs | 5 +---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 865195df5..e49e49d9c 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -22,6 +22,7 @@ use std::collections::{BTreeMap, HashMap}; use std::convert::{TryFrom, TryInto}; use std::hash::BuildHasherDefault; +use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; pub use filter_parser::{Condition, FilterCondition, Span, Token}; use fxhash::{FxHasher32, FxHasher64}; pub use grenad::CompressionType; @@ -252,6 +253,10 @@ pub fn is_faceted_by(field: &str, facet: &str) -> bool { && field[facet.len()..].chars().next().map(|c| c == '.').unwrap_or(true) } +pub fn normalize_facet(original: &str) -> String { + CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase() +} + #[cfg(test)] mod tests { use serde_json::json; diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index f67219494..c24abe6a5 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -230,7 +230,7 @@ impl<'a> Filter<'a> { &FacetGroupKey { field_id, level: 0, - left_bound: &val.value().to_lowercase(), + left_bound: &crate::normalize_facet(val.value()), }, )? .map(|v| v.bitmap) diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 71ac330e2..f0bd78792 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -4,7 +4,6 @@ use std::fs::File; use std::io; use std::mem::size_of; -use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; use heed::zerocopy::AsBytes; use heed::BytesEncode; use roaring::RoaringBitmap; @@ -136,9 +135,7 @@ fn extract_facet_values(value: &Value) -> (Vec, Vec<(String, String)>) { } } Value::String(original) => { - let normalized = CompatibilityDecompositionNormalizer - .normalize_str(original.trim()) - .to_lowercase(); + let normalized = crate::normalize_facet(original); output_strings.push((normalized, original.clone())); } Value::Array(values) => { From 6592746337fd8be9ad2540d3c7c60cc75fe3d2a3 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 29 Mar 2023 14:36:17 +0200 Subject: [PATCH 3/3] Fix other unrelated tests --- meilisearch/tests/search/formatted.rs | 38 +++++++++++++-------------- meilisearch/tests/search/multi.rs | 18 ++++++------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/meilisearch/tests/search/formatted.rs b/meilisearch/tests/search/formatted.rs index 076c14fe0..8a40616a3 100644 --- a/meilisearch/tests/search/formatted.rs +++ b/meilisearch/tests/search/formatted.rs @@ -14,7 +14,7 @@ async fn formatted_contain_wildcard() { index.add_documents(documents, None).await; index.wait_task(1).await; - index.search(json!({ "q": "pesti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"], "showMatchesPosition": true }), + index.search(json!({ "q": "pésti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"], "showMatchesPosition": true }), |response, code| { assert_eq!(code, 200, "{}", response); @@ -23,7 +23,7 @@ async fn formatted_contain_wildcard() { json!({ "_formatted": { "id": "852", - "cattos": "pesti", + "cattos": "pésti", }, "_matchesPosition": {"cattos": [{"start": 0, "length": 5}]}, }) @@ -33,13 +33,13 @@ async fn formatted_contain_wildcard() { .await; index - .search(json!({ "q": "pesti", "attributesToRetrieve": ["*"] }), |response, code| { + .search(json!({ "q": "pésti", "attributesToRetrieve": ["*"] }), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"][0], json!({ "id": 852, - "cattos": "pesti", + "cattos": "pésti", }) ); }) @@ -47,17 +47,17 @@ async fn formatted_contain_wildcard() { index .search( - json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }), + json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"][0], json!({ "id": 852, - "cattos": "pesti", + "cattos": "pésti", "_formatted": { "id": "852", - "cattos": "pesti", + "cattos": "pésti", }, "_matchesPosition": {"cattos": [{"start": 0, "length": 5}]}, }) @@ -68,17 +68,17 @@ async fn formatted_contain_wildcard() { index .search( - json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }), + json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"][0], json!({ "id": 852, - "cattos": "pesti", + "cattos": "pésti", "_formatted": { "id": "852", - "cattos": "pesti", + "cattos": "pésti", } }) ); @@ -87,16 +87,16 @@ async fn formatted_contain_wildcard() { .await; index - .search(json!({ "q": "pesti", "attributesToCrop": ["*"] }), |response, code| { + .search(json!({ "q": "pésti", "attributesToCrop": ["*"] }), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"][0], json!({ "id": 852, - "cattos": "pesti", + "cattos": "pésti", "_formatted": { "id": "852", - "cattos": "pesti", + "cattos": "pésti", } }) ); @@ -114,7 +114,7 @@ async fn format_nested() { index.wait_task(0).await; index - .search(json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] }), |response, code| { + .search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"][0], @@ -136,7 +136,7 @@ async fn format_nested() { index .search( - json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"] }), + json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"] }), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( @@ -180,7 +180,7 @@ async fn format_nested() { .await; index - .search(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }), + .search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( @@ -202,7 +202,7 @@ async fn format_nested() { .await; index - .search(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }), + .search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( @@ -224,7 +224,7 @@ async fn format_nested() { .await; index - .search(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }), + .search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( @@ -256,7 +256,7 @@ async fn format_nested() { .await; index - .search(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }), + .search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 01751ff62..b00ddf3de 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -71,7 +71,7 @@ async fn simple_search_single_index() { "indexUid": "test", "hits": [ { - "title": "Glass", + "title": "Gläss", "id": "450465" } ], @@ -166,7 +166,7 @@ async fn simple_search_two_indexes() { let (response, code) = server .multi_search(json!({"queries": [ {"indexUid" : "test", "q": "glass"}, - {"indexUid": "nested", "q": "pesti"}, + {"indexUid": "nested", "q": "pésti"}, ]})) .await; snapshot!(code, @"200 OK"); @@ -176,7 +176,7 @@ async fn simple_search_two_indexes() { "indexUid": "test", "hits": [ { - "title": "Glass", + "title": "Gläss", "id": "450465" } ], @@ -203,7 +203,7 @@ async fn simple_search_two_indexes() { "age": 4 } ], - "cattos": "pesti" + "cattos": "pésti" }, { "id": 654, @@ -221,7 +221,7 @@ async fn simple_search_two_indexes() { ] } ], - "query": "pesti", + "query": "pésti", "processingTimeMs": "[time]", "limit": 20, "offset": 0, @@ -243,7 +243,7 @@ async fn search_one_index_doesnt_exist() { let (response, code) = server .multi_search(json!({"queries": [ {"indexUid" : "test", "q": "glass"}, - {"indexUid": "nested", "q": "pesti"}, + {"indexUid": "nested", "q": "pésti"}, ]})) .await; snapshot!(code, @"400 Bad Request"); @@ -264,7 +264,7 @@ async fn search_multiple_indexes_dont_exist() { let (response, code) = server .multi_search(json!({"queries": [ {"indexUid" : "test", "q": "glass"}, - {"indexUid": "nested", "q": "pesti"}, + {"indexUid": "nested", "q": "pésti"}, ]})) .await; snapshot!(code, @"400 Bad Request"); @@ -296,7 +296,7 @@ async fn search_one_query_error() { let (response, code) = server .multi_search(json!({"queries": [ {"indexUid" : "test", "q": "glass", "facets": ["title"]}, - {"indexUid": "nested", "q": "pesti"}, + {"indexUid": "nested", "q": "pésti"}, ]})) .await; snapshot!(code, @"400 Bad Request"); @@ -328,7 +328,7 @@ async fn search_multiple_query_errors() { let (response, code) = server .multi_search(json!({"queries": [ {"indexUid" : "test", "q": "glass", "facets": ["title"]}, - {"indexUid": "nested", "q": "pesti", "facets": ["doggos"]}, + {"indexUid": "nested", "q": "pésti", "facets": ["doggos"]}, ]})) .await; snapshot!(code, @"400 Bad Request");