From 14f9f85c4b2d30ff79ab31562ddade10c571d849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 19 Feb 2021 15:45:15 +0100 Subject: [PATCH] Introduce the AscDesc criterion --- milli/src/search/criteria/asc_desc.rs | 272 ++++++++++++++++++++++++++ milli/src/search/criteria/mod.rs | 2 + milli/src/search/mod.rs | 15 +- 3 files changed, 285 insertions(+), 4 deletions(-) create mode 100644 milli/src/search/criteria/asc_desc.rs diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs new file mode 100644 index 000000000..bf75ada7e --- /dev/null +++ b/milli/src/search/criteria/asc_desc.rs @@ -0,0 +1,272 @@ +use std::mem::take; + +use anyhow::bail; +use itertools::Itertools; +use ordered_float::OrderedFloat; +use roaring::RoaringBitmap; + +use crate::facet::FacetType; +use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetLevelValueI64Codec}; +use crate::heed_codec::facet::{FieldDocIdFacetI64Codec, FieldDocIdFacetF64Codec}; +use crate::search::facet::FacetIter; +use crate::search::query_tree::Operation; +use crate::{FieldId, Index}; +use super::{Candidates, Criterion, CriterionResult}; + +pub struct AscDesc<'t> { + index: &'t Index, + rtxn: &'t heed::RoTxn<'t>, + field_id: FieldId, + facet_type: FacetType, + ascending: bool, + query_tree: Option, + candidates: Candidates, + bucket_candidates: Option, + faceted_candidates: RoaringBitmap, + parent: Option>, +} + +impl<'t> AscDesc<'t> { + pub fn initial_asc( + index: &'t Index, + rtxn: &'t heed::RoTxn, + query_tree: Option, + candidates: Option, + field_id: FieldId, + facet_type: FacetType, + ) -> anyhow::Result where Self: Sized + { + Self::initial(index, rtxn, query_tree, candidates, field_id, facet_type, true) + } + + pub fn initial_desc( + index: &'t Index, + rtxn: &'t heed::RoTxn, + query_tree: Option, + candidates: Option, + field_id: FieldId, + facet_type: FacetType, + ) -> anyhow::Result where Self: Sized + { + Self::initial(index, rtxn, query_tree, candidates, field_id, facet_type, false) + } + + pub fn asc( + index: &'t Index, + rtxn: &'t heed::RoTxn, + parent: Box, + field_id: FieldId, + facet_type: FacetType, + ) -> anyhow::Result where Self: Sized + { + Self::new(index, rtxn, parent, field_id, facet_type, true) + } + + pub fn desc( + index: &'t Index, + rtxn: &'t heed::RoTxn, + parent: Box, + field_id: FieldId, + facet_type: FacetType, + ) -> anyhow::Result where Self: Sized + { + Self::new(index, rtxn, parent, field_id, facet_type, false) + } + + fn initial( + index: &'t Index, + rtxn: &'t heed::RoTxn, + query_tree: Option, + candidates: Option, + field_id: FieldId, + facet_type: FacetType, + ascending: bool, + ) -> anyhow::Result where Self: Sized + { + Ok(AscDesc { + index, + rtxn, + field_id, + facet_type, + ascending, + query_tree, + candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed), + faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?, + bucket_candidates: None, + parent: None, + }) + } + + fn new( + index: &'t Index, + rtxn: &'t heed::RoTxn, + parent: Box, + field_id: FieldId, + facet_type: FacetType, + ascending: bool, + ) -> anyhow::Result where Self: Sized + { + Ok(AscDesc { + index, + rtxn, + field_id, + facet_type, + ascending, + query_tree: None, + candidates: Candidates::default(), + faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?, + bucket_candidates: None, + parent: Some(parent), + }) + } +} + +impl<'t> Criterion for AscDesc<'t> { + fn next(&mut self) -> anyhow::Result> { + use Candidates::{Allowed, Forbidden}; + + loop { + match (&mut self.query_tree, &mut self.candidates) { + (_, Allowed(candidates)) if candidates.is_empty() => { + self.query_tree = None; + self.candidates = Candidates::default(); + }, + (Some(qt), Allowed(candidates)) => { + let bucket_candidates = match self.parent { + Some(_) => self.bucket_candidates.take(), + None => Some(candidates.clone()), + }; + + let mut found_candidates = facet_ordered( + self.index, + self.rtxn, + self.field_id, + self.facet_type, + self.ascending, + candidates.clone(), + )?; + + found_candidates.intersect_with(&candidates); + candidates.difference_with(&found_candidates); + + return Ok(Some(CriterionResult { + query_tree: Some(qt.clone()), + candidates: found_candidates, + bucket_candidates, + })); + }, + (Some(_qt), Forbidden(_candidates)) => { + todo!() + }, + (None, Allowed(_)) => { + let candidates = take(&mut self.candidates).into_inner(); + return Ok(Some(CriterionResult { + query_tree: None, + candidates: candidates.clone(), + bucket_candidates: Some(candidates), + })); + }, + (None, Forbidden(_)) => { + match self.parent.as_mut() { + Some(parent) => { + match parent.next()? { + Some(CriterionResult { query_tree, mut candidates, bucket_candidates }) => { + self.query_tree = query_tree; + candidates.intersect_with(&self.faceted_candidates); + self.candidates = Candidates::Allowed(candidates); + self.bucket_candidates = bucket_candidates; + }, + None => return Ok(None), + } + }, + None => return Ok(None), + } + }, + } + } + } +} + +fn facet_ordered( + index: &Index, + rtxn: &heed::RoTxn, + field_id: FieldId, + facet_type: FacetType, + ascending: bool, + candidates: RoaringBitmap, +) -> anyhow::Result +{ + match facet_type { + FacetType::Float => { + if candidates.len() <= 1000 { + let db = index.field_id_docid_facet_values.remap_key_type::(); + let mut docids_values = Vec::with_capacity(candidates.len() as usize); + for docid in candidates.iter() { + let left = (field_id, docid, f64::MIN); + let right = (field_id, docid, f64::MAX); + let mut iter = db.range(rtxn, &(left..=right))?; + let entry = if ascending { iter.next() } else { iter.last() }; + if let Some(((_, _, value), ())) = entry.transpose()? { + docids_values.push((docid, OrderedFloat(value))); + } + } + docids_values.sort_unstable_by_key(|(_, value)| *value); + let iter = docids_values.into_iter(); + let iter = if ascending { + Box::new(iter) as Box> + } else { + Box::new(iter.rev()) + }; + match iter.group_by(|(_, v)| *v).into_iter().next() { + Some((_, ids)) => Ok(ids.map(|(id, _)| id).into_iter().collect()), + None => Ok(RoaringBitmap::new()) + } + } else { + let facet_fn = if ascending { + FacetIter::::new_reducing + } else { + FacetIter::::new_reverse_reducing + }; + + let mut iter = facet_fn(rtxn, index, field_id, candidates)?; + Ok(iter.next().transpose()?.map(|(_, docids)| docids).unwrap_or_default()) + } + }, + FacetType::Integer => { + if candidates.len() <= 1000 { + let db = index.field_id_docid_facet_values.remap_key_type::(); + let mut docids_values = Vec::with_capacity(candidates.len() as usize); + for docid in candidates.iter() { + let left = (field_id, docid, i64::MIN); + let right = (field_id, docid, i64::MAX); + let mut iter = db.range(rtxn, &(left..=right))?; + let entry = if ascending { iter.next() } else { iter.last() }; + if let Some(((_, _, value), ())) = entry.transpose()? { + docids_values.push((docid, value)); + } + } + docids_values.sort_unstable_by_key(|(_, value)| *value); + let iter = docids_values.into_iter(); + let iter = if ascending { + Box::new(iter) as Box> + } else { + Box::new(iter.rev()) + }; + match iter.group_by(|(_, v)| *v).into_iter().next() { + Some((_, ids)) => Ok(ids.map(|(id, _)| id).into_iter().collect()), + None => Ok(RoaringBitmap::new()) + } + } else { + let facet_fn = if ascending { + FacetIter::::new_reducing + } else { + FacetIter::::new_reverse_reducing + }; + + let mut iter = facet_fn(rtxn, index, field_id, candidates)?; + Ok(iter.next().transpose()?.map(|(_, docids)| docids).unwrap_or_default()) + } + }, + FacetType::String => bail!("criteria facet type must be a number"), + } +} diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 5cc803dee..34d06dce3 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -9,6 +9,7 @@ use super::query_tree::{Operation, Query, QueryKind}; pub mod typo; pub mod words; +pub mod asc_desc; pub trait Criterion { fn next(&mut self) -> anyhow::Result>; @@ -28,6 +29,7 @@ pub struct CriterionResult { /// Either a set of candidates that defines the candidates /// that are allowed to be returned, /// or the candidates that must never be returned. +#[derive(Debug)] enum Candidates { Allowed(RoaringBitmap), Forbidden(RoaringBitmap) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 2a726f635..93cac34b6 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -11,11 +11,11 @@ use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; use crate::search::criteria::{Criterion, CriterionResult}; -use crate::search::criteria::{typo::Typo, words::Words}; -use crate::{Index, FieldId, DocumentId}; +use crate::search::criteria::{typo::Typo, words::Words, asc_desc::AscDesc}; +use crate::{Index, DocumentId}; pub use self::facet::{FacetCondition, FacetDistribution, FacetNumberOperator, FacetStringOperator}; -pub use self::facet::{FacetIter}; +pub use self::facet::FacetIter; use self::query_tree::QueryTreeBuilder; // Building these factories is not free. @@ -90,7 +90,14 @@ impl<'a> Search<'a> { // We aretesting the typo criteria but there will be more of them soon. let criteria_ctx = criteria::HeedContext::new(self.rtxn, self.index)?; let typo_criterion = Typo::initial(&criteria_ctx, query_tree, facet_candidates)?; - let mut criteria = Words::new(&criteria_ctx, Box::new(typo_criterion))?; + let words_criterion = Words::new(&criteria_ctx, Box::new(typo_criterion))?; + + // We sort in descending order on a specific field *by hand*, don't do that at home. + let attr_name = "released-timestamp"; + let fid = self.index.fields_ids_map(self.rtxn)?.id(attr_name).unwrap(); + let ftype = *self.index.faceted_fields(self.rtxn)?.get(attr_name).unwrap(); + let desc_criterion = AscDesc::desc(self.index, self.rtxn, Box::new(words_criterion), fid, ftype)?; + let mut criteria = desc_criterion; let mut offset = self.offset; let mut limit = self.limit;