mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 20:15:07 +08:00
Fix facet normalization
This commit is contained in:
parent
b744f33530
commit
efea1e5837
@ -22,6 +22,7 @@ use std::collections::{BTreeMap, HashMap};
|
|||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
use std::hash::BuildHasherDefault;
|
use std::hash::BuildHasherDefault;
|
||||||
|
|
||||||
|
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
||||||
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
||||||
use fxhash::{FxHasher32, FxHasher64};
|
use fxhash::{FxHasher32, FxHasher64};
|
||||||
pub use grenad::CompressionType;
|
pub use grenad::CompressionType;
|
||||||
@ -252,6 +253,10 @@ pub fn is_faceted_by(field: &str, facet: &str) -> bool {
|
|||||||
&& field[facet.len()..].chars().next().map(|c| c == '.').unwrap_or(true)
|
&& field[facet.len()..].chars().next().map(|c| c == '.').unwrap_or(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn normalize_facet(original: &str) -> String {
|
||||||
|
CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase()
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
@ -230,7 +230,7 @@ impl<'a> Filter<'a> {
|
|||||||
&FacetGroupKey {
|
&FacetGroupKey {
|
||||||
field_id,
|
field_id,
|
||||||
level: 0,
|
level: 0,
|
||||||
left_bound: &val.value().to_lowercase(),
|
left_bound: &crate::normalize_facet(val.value()),
|
||||||
},
|
},
|
||||||
)?
|
)?
|
||||||
.map(|v| v.bitmap)
|
.map(|v| v.bitmap)
|
||||||
|
@ -4,7 +4,6 @@ use std::fs::File;
|
|||||||
use std::io;
|
use std::io;
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
|
||||||
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
|
||||||
use heed::zerocopy::AsBytes;
|
use heed::zerocopy::AsBytes;
|
||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -136,9 +135,7 @@ fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<(String, String)>) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Value::String(original) => {
|
Value::String(original) => {
|
||||||
let normalized = CompatibilityDecompositionNormalizer
|
let normalized = crate::normalize_facet(original);
|
||||||
.normalize_str(original.trim())
|
|
||||||
.to_lowercase();
|
|
||||||
output_strings.push((normalized, original.clone()));
|
output_strings.push((normalized, original.clone()));
|
||||||
}
|
}
|
||||||
Value::Array(values) => {
|
Value::Array(values) => {
|
||||||
|
Loading…
Reference in New Issue
Block a user