Only detect language for a facet if several locales have been specified by the user in the settings

This commit is contained in:
ManyTheFish 2024-08-14 11:33:17 +02:00
parent 0e68718027
commit ade54493ab
3 changed files with 26 additions and 7 deletions

View File

@ -1369,12 +1369,18 @@ pub fn perform_facet_search(
None => TimeBudget::default(),
};
// In the faceted search context, we want to use the intersection between the locales provided by the user
// and the locales of the facet string.
// If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.
// If the user does not provide locales, we use the locales of the facet string.
let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
let locales = locales.or_else(|| {
localized_attributes
let localized_attributes_locales =
localized_attributes.into_iter().find(|attr| attr.match_str(&facet_name));
let locales = localized_attributes_locales.map(|attr| {
attr.locales
.into_iter()
.find(|attr| attr.match_str(&facet_name))
.map(|attr| attr.locales)
.filter(|locale| locales.as_ref().map_or(true, |locales| locales.contains(locale)))
.collect()
});
let (search, _, _, _) =

View File

@ -339,10 +339,18 @@ impl ValuesCollection {
fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
let options = NormalizerOption { lossy: true, ..Default::default() };
let mut detection = StrDetection::new(facet_string, locales);
// Detect the language of the facet string only if several locales are explicitly provided.
let language = match locales {
Some(&[language]) => Some(language),
Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(),
_ => None,
};
let token = Token {
lemma: std::borrow::Cow::Borrowed(facet_string),
script: detection.script(),
language: detection.language(),
language,
..Default::default()
};

View File

@ -271,7 +271,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
/// Normalizes the facet string and truncates it to the max length.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
let options = NormalizerOption { lossy: true, ..Default::default() };
let options: NormalizerOption = NormalizerOption { lossy: true, ..Default::default() };
let mut detection = StrDetection::new(facet_string, locales);
let script = {
@ -285,7 +285,12 @@ fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> S
let span = tracing::trace_span!(target: "indexing::extract::extract_facet_string_docids", "detect_language");
let _entered = span.enter();
detection.language()
// Detect the language of the facet string only if several locales are explicitly provided.
match locales {
Some(&[language]) => Some(language),
Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(),
_ => None,
}
};
let token = Token {