mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 00:55:00 +08:00
Merge #3621
3621: Fix facet normalization r=Kerollmops a=ManyTheFish # Pull Request Make sure the facet normalization is the same between indexing and search. ## Related issue Fixes #3599 Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
37a24a4a05
@ -14,7 +14,7 @@ async fn formatted_contain_wildcard() {
|
|||||||
index.add_documents(documents, None).await;
|
index.add_documents(documents, None).await;
|
||||||
index.wait_task(1).await;
|
index.wait_task(1).await;
|
||||||
|
|
||||||
index.search(json!({ "q": "pesti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"], "showMatchesPosition": true }),
|
index.search(json!({ "q": "pésti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"], "showMatchesPosition": true }),
|
||||||
|response, code|
|
|response, code|
|
||||||
{
|
{
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
@ -23,7 +23,7 @@ async fn formatted_contain_wildcard() {
|
|||||||
json!({
|
json!({
|
||||||
"_formatted": {
|
"_formatted": {
|
||||||
"id": "852",
|
"id": "852",
|
||||||
"cattos": "<em>pesti</em>",
|
"cattos": "<em>pésti</em>",
|
||||||
},
|
},
|
||||||
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
|
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
|
||||||
})
|
})
|
||||||
@ -33,13 +33,13 @@ async fn formatted_contain_wildcard() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "q": "pesti", "attributesToRetrieve": ["*"] }), |response, code| {
|
.search(json!({ "q": "pésti", "attributesToRetrieve": ["*"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
response["hits"][0],
|
response["hits"][0],
|
||||||
json!({
|
json!({
|
||||||
"id": 852,
|
"id": 852,
|
||||||
"cattos": "pesti",
|
"cattos": "pésti",
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
})
|
})
|
||||||
@ -47,17 +47,17 @@ async fn formatted_contain_wildcard() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.search(
|
.search(
|
||||||
json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }),
|
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
response["hits"][0],
|
response["hits"][0],
|
||||||
json!({
|
json!({
|
||||||
"id": 852,
|
"id": 852,
|
||||||
"cattos": "pesti",
|
"cattos": "pésti",
|
||||||
"_formatted": {
|
"_formatted": {
|
||||||
"id": "852",
|
"id": "852",
|
||||||
"cattos": "pesti",
|
"cattos": "pésti",
|
||||||
},
|
},
|
||||||
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
|
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
|
||||||
})
|
})
|
||||||
@ -68,17 +68,17 @@ async fn formatted_contain_wildcard() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.search(
|
.search(
|
||||||
json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }),
|
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
response["hits"][0],
|
response["hits"][0],
|
||||||
json!({
|
json!({
|
||||||
"id": 852,
|
"id": 852,
|
||||||
"cattos": "pesti",
|
"cattos": "pésti",
|
||||||
"_formatted": {
|
"_formatted": {
|
||||||
"id": "852",
|
"id": "852",
|
||||||
"cattos": "pesti",
|
"cattos": "pésti",
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
@ -87,16 +87,16 @@ async fn formatted_contain_wildcard() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "q": "pesti", "attributesToCrop": ["*"] }), |response, code| {
|
.search(json!({ "q": "pésti", "attributesToCrop": ["*"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
response["hits"][0],
|
response["hits"][0],
|
||||||
json!({
|
json!({
|
||||||
"id": 852,
|
"id": 852,
|
||||||
"cattos": "pesti",
|
"cattos": "pésti",
|
||||||
"_formatted": {
|
"_formatted": {
|
||||||
"id": "852",
|
"id": "852",
|
||||||
"cattos": "pesti",
|
"cattos": "pésti",
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
@ -114,7 +114,7 @@ async fn format_nested() {
|
|||||||
index.wait_task(0).await;
|
index.wait_task(0).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] }), |response, code| {
|
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
response["hits"][0],
|
response["hits"][0],
|
||||||
@ -136,7 +136,7 @@ async fn format_nested() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.search(
|
.search(
|
||||||
json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"] }),
|
json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@ -180,7 +180,7 @@ async fn format_nested() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }),
|
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@ -202,7 +202,7 @@ async fn format_nested() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }),
|
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@ -224,7 +224,7 @@ async fn format_nested() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }),
|
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@ -256,7 +256,7 @@ async fn format_nested() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }),
|
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -30,7 +30,7 @@ pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
|||||||
"id": "166428",
|
"id": "166428",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"title": "Glass",
|
"title": "Gläss",
|
||||||
"id": "450465",
|
"id": "450465",
|
||||||
}
|
}
|
||||||
])
|
])
|
||||||
@ -52,7 +52,7 @@ pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
|||||||
"age": 4,
|
"age": 4,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"cattos": "pesti",
|
"cattos": "pésti",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 654,
|
"id": 654,
|
||||||
@ -142,7 +142,7 @@ async fn simple_search() {
|
|||||||
index.wait_task(1).await;
|
index.wait_task(1).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({"q": "pesti"}), |response, code| {
|
.search(json!({"q": "pésti"}), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
|
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
|
||||||
})
|
})
|
||||||
@ -250,7 +250,7 @@ async fn search_multiple_params() {
|
|||||||
index
|
index
|
||||||
.search(
|
.search(
|
||||||
json!({
|
json!({
|
||||||
"q": "pesti",
|
"q": "pésti",
|
||||||
"attributesToCrop": ["catto:2"],
|
"attributesToCrop": ["catto:2"],
|
||||||
"attributesToHighlight": ["catto"],
|
"attributesToHighlight": ["catto"],
|
||||||
"limit": 2,
|
"limit": 2,
|
||||||
@ -281,7 +281,7 @@ async fn search_with_filter_string_notation() {
|
|||||||
index
|
index
|
||||||
.search(
|
.search(
|
||||||
json!({
|
json!({
|
||||||
"filter": "title = Glass"
|
"filter": "title = Gläss"
|
||||||
}),
|
}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
@ -305,7 +305,7 @@ async fn search_with_filter_string_notation() {
|
|||||||
index
|
index
|
||||||
.search(
|
.search(
|
||||||
json!({
|
json!({
|
||||||
"filter": "cattos = pesti"
|
"filter": "cattos = pésti"
|
||||||
}),
|
}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
@ -343,7 +343,7 @@ async fn search_with_filter_array_notation() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"filter": ["title = Glass"]
|
"filter": ["title = Gläss"]
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
@ -351,7 +351,7 @@ async fn search_with_filter_array_notation() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"filter": [["title = Glass", "title = \"Shazam!\"", "title = \"Escape Room\""]]
|
"filter": [["title = Gläss", "title = \"Shazam!\"", "title = \"Escape Room\""]]
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
@ -71,7 +71,7 @@ async fn simple_search_single_index() {
|
|||||||
"indexUid": "test",
|
"indexUid": "test",
|
||||||
"hits": [
|
"hits": [
|
||||||
{
|
{
|
||||||
"title": "Glass",
|
"title": "Gläss",
|
||||||
"id": "450465"
|
"id": "450465"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -166,7 +166,7 @@ async fn simple_search_two_indexes() {
|
|||||||
let (response, code) = server
|
let (response, code) = server
|
||||||
.multi_search(json!({"queries": [
|
.multi_search(json!({"queries": [
|
||||||
{"indexUid" : "test", "q": "glass"},
|
{"indexUid" : "test", "q": "glass"},
|
||||||
{"indexUid": "nested", "q": "pesti"},
|
{"indexUid": "nested", "q": "pésti"},
|
||||||
]}))
|
]}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -176,7 +176,7 @@ async fn simple_search_two_indexes() {
|
|||||||
"indexUid": "test",
|
"indexUid": "test",
|
||||||
"hits": [
|
"hits": [
|
||||||
{
|
{
|
||||||
"title": "Glass",
|
"title": "Gläss",
|
||||||
"id": "450465"
|
"id": "450465"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -203,7 +203,7 @@ async fn simple_search_two_indexes() {
|
|||||||
"age": 4
|
"age": 4
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"cattos": "pesti"
|
"cattos": "pésti"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 654,
|
"id": 654,
|
||||||
@ -221,7 +221,7 @@ async fn simple_search_two_indexes() {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"query": "pesti",
|
"query": "pésti",
|
||||||
"processingTimeMs": "[time]",
|
"processingTimeMs": "[time]",
|
||||||
"limit": 20,
|
"limit": 20,
|
||||||
"offset": 0,
|
"offset": 0,
|
||||||
@ -243,7 +243,7 @@ async fn search_one_index_doesnt_exist() {
|
|||||||
let (response, code) = server
|
let (response, code) = server
|
||||||
.multi_search(json!({"queries": [
|
.multi_search(json!({"queries": [
|
||||||
{"indexUid" : "test", "q": "glass"},
|
{"indexUid" : "test", "q": "glass"},
|
||||||
{"indexUid": "nested", "q": "pesti"},
|
{"indexUid": "nested", "q": "pésti"},
|
||||||
]}))
|
]}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@ -264,7 +264,7 @@ async fn search_multiple_indexes_dont_exist() {
|
|||||||
let (response, code) = server
|
let (response, code) = server
|
||||||
.multi_search(json!({"queries": [
|
.multi_search(json!({"queries": [
|
||||||
{"indexUid" : "test", "q": "glass"},
|
{"indexUid" : "test", "q": "glass"},
|
||||||
{"indexUid": "nested", "q": "pesti"},
|
{"indexUid": "nested", "q": "pésti"},
|
||||||
]}))
|
]}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@ -296,7 +296,7 @@ async fn search_one_query_error() {
|
|||||||
let (response, code) = server
|
let (response, code) = server
|
||||||
.multi_search(json!({"queries": [
|
.multi_search(json!({"queries": [
|
||||||
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
|
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
|
||||||
{"indexUid": "nested", "q": "pesti"},
|
{"indexUid": "nested", "q": "pésti"},
|
||||||
]}))
|
]}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@ -328,7 +328,7 @@ async fn search_multiple_query_errors() {
|
|||||||
let (response, code) = server
|
let (response, code) = server
|
||||||
.multi_search(json!({"queries": [
|
.multi_search(json!({"queries": [
|
||||||
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
|
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
|
||||||
{"indexUid": "nested", "q": "pesti", "facets": ["doggos"]},
|
{"indexUid": "nested", "q": "pésti", "facets": ["doggos"]},
|
||||||
]}))
|
]}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
@ -22,6 +22,7 @@ use std::collections::{BTreeMap, HashMap};
|
|||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
use std::hash::BuildHasherDefault;
|
use std::hash::BuildHasherDefault;
|
||||||
|
|
||||||
|
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
||||||
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
||||||
use fxhash::{FxHasher32, FxHasher64};
|
use fxhash::{FxHasher32, FxHasher64};
|
||||||
pub use grenad::CompressionType;
|
pub use grenad::CompressionType;
|
||||||
@ -252,6 +253,10 @@ pub fn is_faceted_by(field: &str, facet: &str) -> bool {
|
|||||||
&& field[facet.len()..].chars().next().map(|c| c == '.').unwrap_or(true)
|
&& field[facet.len()..].chars().next().map(|c| c == '.').unwrap_or(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn normalize_facet(original: &str) -> String {
|
||||||
|
CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase()
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
@ -230,7 +230,7 @@ impl<'a> Filter<'a> {
|
|||||||
&FacetGroupKey {
|
&FacetGroupKey {
|
||||||
field_id,
|
field_id,
|
||||||
level: 0,
|
level: 0,
|
||||||
left_bound: &val.value().to_lowercase(),
|
left_bound: &crate::normalize_facet(val.value()),
|
||||||
},
|
},
|
||||||
)?
|
)?
|
||||||
.map(|v| v.bitmap)
|
.map(|v| v.bitmap)
|
||||||
|
@ -4,7 +4,6 @@ use std::fs::File;
|
|||||||
use std::io;
|
use std::io;
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
|
||||||
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
|
||||||
use heed::zerocopy::AsBytes;
|
use heed::zerocopy::AsBytes;
|
||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -136,9 +135,7 @@ fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<(String, String)>) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Value::String(original) => {
|
Value::String(original) => {
|
||||||
let normalized = CompatibilityDecompositionNormalizer
|
let normalized = crate::normalize_facet(original);
|
||||||
.normalize_str(original.trim())
|
|
||||||
.to_lowercase();
|
|
||||||
output_strings.push((normalized, original.clone()));
|
output_strings.push((normalized, original.clone()));
|
||||||
}
|
}
|
||||||
Value::Array(values) => {
|
Value::Array(values) => {
|
||||||
|
Loading…
Reference in New Issue
Block a user