3981: Truncate the normalized long facets used in the search for facet value r=irevoire a=ManyTheFish

# Pull Request
 Truncate the normalized long facets used in the search for facet value

## targeted release

v1.3.1

## Related issue
Fixes #3978


Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2023-08-08 15:07:07 +00:00 committed by GitHub
commit 04671d0751
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 4 deletions

View File

@ -97,7 +97,7 @@ const MAX_LMDB_KEY_LENGTH: usize = 500;
/// ///
/// This number is determined by the keys of the different facet databases /// This number is determined by the keys of the different facet databases
/// and adding a margin of safety. /// and adding a margin of safety.
pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 20; pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 32;
/// The maximum length a word can be /// The maximum length a word can be
pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2; pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;

View File

@ -94,7 +94,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::ByteSliceRefCodec;
use crate::update::index_documents::create_sorter; use crate::update::index_documents::create_sorter;
use crate::update::merge_btreeset_string; use crate::update::merge_btreeset_string;
use crate::{BEU16StrCodec, Index, Result, BEU16}; use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};
pub mod bulk; pub mod bulk;
pub mod delete; pub mod delete;
@ -191,7 +191,16 @@ impl<'i> FacetsUpdate<'i> {
for result in database.iter(wtxn)? { for result in database.iter(wtxn)? {
let (facet_group_key, ()) = result?; let (facet_group_key, ()) = result?;
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key { if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
let normalized_facet = left_bound.normalize(&options); let mut normalized_facet = left_bound.normalize(&options);
let normalized_truncated_facet: String;
if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
normalized_truncated_facet = normalized_facet
.char_indices()
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
.map(|(_, c)| c)
.collect();
normalized_facet = normalized_truncated_facet.into();
}
let set = BTreeSet::from_iter(std::iter::once(left_bound)); let set = BTreeSet::from_iter(std::iter::once(left_bound));
let key = (field_id, normalized_facet.as_ref()); let key = (field_id, normalized_facet.as_ref());
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?; let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;

View File

@ -44,7 +44,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
if normalised_value.len() > MAX_FACET_VALUE_LENGTH { if normalised_value.len() > MAX_FACET_VALUE_LENGTH {
normalised_truncated_value = normalised_value normalised_truncated_value = normalised_value
.char_indices() .char_indices()
.take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH) .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
.map(|(_, c)| c) .map(|(_, c)| c)
.collect(); .collect();
normalised_value = normalised_truncated_value.as_str(); normalised_value = normalised_truncated_value.as_str();