Facet distribution: fix issue where truncated facet distribution would have a wrong order

This commit is contained in:
Louis Dureuil 2024-09-12 17:40:33 +02:00
parent 02c2b660f8
commit e44325683a
No known key found for this signature in database

View File

@ -100,7 +100,6 @@ impl<'a> FacetDistribution<'a> {
let mut lexicographic_distribution = BTreeMap::new(); let mut lexicographic_distribution = BTreeMap::new();
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
let distribution_prelength = distribution.len();
let db = self.index.field_id_docid_facet_f64s; let db = self.index.field_id_docid_facet_f64s;
for docid in candidates { for docid in candidates {
key_buffer.truncate(mem::size_of::<FieldId>()); key_buffer.truncate(mem::size_of::<FieldId>());
@ -113,23 +112,21 @@ impl<'a> FacetDistribution<'a> {
for result in iter { for result in iter {
let ((_, _, value), ()) = result?; let ((_, _, value), ()) = result?;
*lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1; *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1;
if lexicographic_distribution.len() - distribution_prelength
== self.max_values_per_facet
{
break;
}
} }
} }
distribution.extend(lexicographic_distribution); distribution.extend(
lexicographic_distribution
.into_iter()
.take(self.max_values_per_facet.saturating_sub(distribution.len())),
);
} }
FacetType::String => { FacetType::String => {
let mut normalized_distribution = BTreeMap::new(); let mut normalized_distribution = BTreeMap::new();
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
let db = self.index.field_id_docid_facet_strings; let db = self.index.field_id_docid_facet_strings;
'outer: for docid in candidates { for docid in candidates {
key_buffer.truncate(mem::size_of::<FieldId>()); key_buffer.truncate(mem::size_of::<FieldId>());
key_buffer.extend_from_slice(&docid.to_be_bytes()); key_buffer.extend_from_slice(&docid.to_be_bytes());
let iter = db let iter = db
@ -144,14 +141,14 @@ impl<'a> FacetDistribution<'a> {
.or_insert_with(|| (original_value, 0)); .or_insert_with(|| (original_value, 0));
*count += 1; *count += 1;
if normalized_distribution.len() == self.max_values_per_facet { // we'd like to break here if we have enough facet values, but we are collecting them by increasing docid,
break 'outer; // so higher ranked facets could be in later docids
}
} }
} }
let iter = normalized_distribution let iter = normalized_distribution
.into_iter() .into_iter()
.take(self.max_values_per_facet.saturating_sub(distribution.len()))
.map(|(_normalized, (original, count))| (original.to_string(), count)); .map(|(_normalized, (original, count))| (original.to_string(), count));
distribution.extend(iter); distribution.extend(iter);
} }
@ -467,7 +464,7 @@ mod tests {
.execute() .execute()
.unwrap(); .unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###); milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###);
let map = FacetDistribution::new(&txn, &index) let map = FacetDistribution::new(&txn, &index)
.facets(iter::once(("colour", OrderBy::Count))) .facets(iter::once(("colour", OrderBy::Count)))