From e9ada44509152dcfd006451ea80d5924db2fb431 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 17 Aug 2021 11:40:07 +0200 Subject: [PATCH] AscDesc criterion returns documents ordered by numbers then by strings --- milli/src/search/criteria/asc_desc.rs | 102 +++++++++++++++++++++----- 1 file changed, 83 insertions(+), 19 deletions(-) diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index 4a664d042..6d50c1bb5 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -7,7 +7,7 @@ use roaring::RoaringBitmap; use super::{Criterion, CriterionParameters, CriterionResult}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; -use crate::search::facet::FacetNumberIter; +use crate::search::facet::{FacetNumberIter, FacetStringIter}; use crate::search::query_tree::Operation; use crate::{FieldId, Index, Result}; @@ -20,7 +20,7 @@ pub struct AscDesc<'t> { rtxn: &'t heed::RoTxn<'t>, field_name: String, field_id: Option, - ascending: bool, + is_ascending: bool, query_tree: Option, candidates: Box> + 't>, allowed_candidates: RoaringBitmap, @@ -53,12 +53,16 @@ impl<'t> AscDesc<'t> { rtxn: &'t heed::RoTxn, parent: Box, field_name: String, - ascending: bool, + is_ascending: bool, ) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; let field_id = fields_ids_map.id(&field_name); let faceted_candidates = match field_id { - Some(field_id) => index.number_faceted_documents_ids(rtxn, field_id)?, + Some(field_id) => { + let number_faceted = index.number_faceted_documents_ids(rtxn, field_id)?; + let string_faceted = index.string_faceted_documents_ids(rtxn, field_id)?; + number_faceted | string_faceted + } None => RoaringBitmap::default(), }; @@ -67,7 +71,7 @@ impl<'t> AscDesc<'t> { rtxn, field_name, field_id, - ascending, + is_ascending, query_tree: None, candidates: Box::new(std::iter::empty()), allowed_candidates: RoaringBitmap::new(), @@ -87,7 +91,7 @@ impl<'t> Criterion for AscDesc<'t> { loop { debug!( "Facet {}({}) iteration", - if self.ascending { "Asc" } else { "Desc" }, + if self.is_ascending { "Asc" } else { "Desc" }, self.field_name ); @@ -136,7 +140,7 @@ impl<'t> Criterion for AscDesc<'t> { self.index, self.rtxn, field_id, - self.ascending, + self.is_ascending, candidates & &self.faceted_candidates, )?, None => Box::new(std::iter::empty()), @@ -167,31 +171,49 @@ fn facet_ordered<'t>( index: &'t Index, rtxn: &'t heed::RoTxn, field_id: FieldId, - ascending: bool, + is_ascending: bool, candidates: RoaringBitmap, ) -> Result> + 't>> { if candidates.len() <= CANDIDATES_THRESHOLD { - let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?; - Ok(Box::new(iter.map(Ok)) as Box>) + let number_iter = iterative_facet_number_ordered_iter( + index, + rtxn, + field_id, + is_ascending, + candidates.clone(), + )?; + let string_iter = + iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?; + Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box>) } else { - let facet_fn = if ascending { + let facet_number_fn = if is_ascending { FacetNumberIter::new_reducing } else { FacetNumberIter::new_reverse_reducing }; - let iter = facet_fn(rtxn, index, field_id, candidates)?; - Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids)))) + let number_iter = facet_number_fn(rtxn, index, field_id, candidates.clone())? + .map(|res| res.map(|(_, docids)| docids)); + + let facet_string_fn = if is_ascending { + FacetStringIter::new_reducing + } else { + FacetStringIter::new_reverse_reducing + }; + let string_iter = facet_string_fn(rtxn, index, field_id, candidates)? + .map(|res| res.map(|(_, _, docids)| docids)); + + Ok(Box::new(number_iter.chain(string_iter))) } } -/// Fetch the whole list of candidates facet values one by one and order them by it. +/// Fetch the whole list of candidates facet number values one by one and order them by it. /// /// This function is fast when the amount of candidates to rank is small. -fn iterative_facet_ordered_iter<'t>( +fn iterative_facet_number_ordered_iter<'t>( index: &'t Index, rtxn: &'t heed::RoTxn, field_id: FieldId, - ascending: bool, + is_ascending: bool, candidates: RoaringBitmap, ) -> Result + 't> { let mut docids_values = Vec::with_capacity(candidates.len() as usize); @@ -199,14 +221,14 @@ fn iterative_facet_ordered_iter<'t>( let left = (field_id, docid, f64::MIN); let right = (field_id, docid, f64::MAX); let mut iter = index.field_id_docid_facet_f64s.range(rtxn, &(left..=right))?; - let entry = if ascending { iter.next() } else { iter.last() }; + let entry = if is_ascending { iter.next() } else { iter.last() }; if let Some(((_, _, value), ())) = entry.transpose()? { docids_values.push((docid, OrderedFloat(value))); } } docids_values.sort_unstable_by_key(|(_, v)| *v); let iter = docids_values.into_iter(); - let iter = if ascending { + let iter = if is_ascending { Box::new(iter) as Box> } else { Box::new(iter.rev()) @@ -216,7 +238,49 @@ fn iterative_facet_ordered_iter<'t>( // required to collect the result into an owned collection (a Vec). // https://github.com/rust-itertools/itertools/issues/499 let vec: Vec<_> = iter - .group_by(|(_, v)| v.clone()) + .group_by(|(_, v)| *v) + .into_iter() + .map(|(_, ids)| ids.map(|(id, _)| id).collect()) + .collect(); + + Ok(vec.into_iter()) +} + +/// Fetch the whole list of candidates facet string values one by one and order them by it. +/// +/// This function is fast when the amount of candidates to rank is small. +fn iterative_facet_string_ordered_iter<'t>( + index: &'t Index, + rtxn: &'t heed::RoTxn, + field_id: FieldId, + is_ascending: bool, + candidates: RoaringBitmap, +) -> Result + 't> { + let mut docids_values = Vec::with_capacity(candidates.len() as usize); + for docid in candidates.iter() { + let left = (field_id, docid, ""); + let right = (field_id, docid.saturating_add(1), ""); + // FIXME Doing this means that it will never be possible to retrieve + // the document with id 2^32, not sure this is a real problem. + let mut iter = index.field_id_docid_facet_strings.range(rtxn, &(left..right))?; + let entry = if is_ascending { iter.next() } else { iter.last() }; + if let Some(((_, _, value), _)) = entry.transpose()? { + docids_values.push((docid, value)); + } + } + docids_values.sort_unstable_by_key(|(_, v)| *v); + let iter = docids_values.into_iter(); + let iter = if is_ascending { + Box::new(iter) as Box> + } else { + Box::new(iter.rev()) + }; + + // The itertools GroupBy iterator doesn't provide an owned version, we are therefore + // required to collect the result into an owned collection (a Vec). + // https://github.com/rust-itertools/itertools/issues/499 + let vec: Vec<_> = iter + .group_by(|(_, v)| *v) .into_iter() .map(|(_, ids)| ids.map(|(id, _)| id).collect()) .collect();