From a8a1f5bd556aa28c7e1e0ea304a2919a1deb6161 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 1 Sep 2021 15:14:23 +0200 Subject: [PATCH] move the geosearch criteria out of asc_desc.rs --- milli/src/search/criteria/asc_desc.rs | 75 +++++------------ milli/src/search/criteria/geo.rs | 115 ++++++++++++++++++++++++++ milli/src/search/criteria/mod.rs | 40 +++++---- 3 files changed, 160 insertions(+), 70 deletions(-) create mode 100644 milli/src/search/criteria/geo.rs diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index b0951f655..6d50c1bb5 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -4,14 +4,12 @@ use itertools::Itertools; use log::debug; use ordered_float::OrderedFloat; use roaring::RoaringBitmap; -use rstar::RTree; use super::{Criterion, CriterionParameters, CriterionResult}; -use crate::criterion::Member; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::facet::{FacetNumberIter, FacetStringIter}; use crate::search::query_tree::Operation; -use crate::{FieldId, GeoPoint, Index, Result}; +use crate::{FieldId, Index, Result}; /// Threshold on the number of candidates that will make /// the system to choose between one algorithm or another. @@ -20,11 +18,10 @@ const CANDIDATES_THRESHOLD: u64 = 1000; pub struct AscDesc<'t> { index: &'t Index, rtxn: &'t heed::RoTxn<'t>, - member: Member, + field_name: String, field_id: Option, is_ascending: bool, query_tree: Option, - rtree: Option>, candidates: Box> + 't>, allowed_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap, @@ -37,29 +34,29 @@ impl<'t> AscDesc<'t> { index: &'t Index, rtxn: &'t heed::RoTxn, parent: Box, - member: Member, + field_name: String, ) -> Result { - Self::new(index, rtxn, parent, member, true) + Self::new(index, rtxn, parent, field_name, true) } pub fn desc( index: &'t Index, rtxn: &'t heed::RoTxn, parent: Box, - member: Member, + field_name: String, ) -> Result { - Self::new(index, rtxn, parent, member, false) + Self::new(index, rtxn, parent, field_name, false) } fn new( index: &'t Index, rtxn: &'t heed::RoTxn, parent: Box, - member: Member, + field_name: String, is_ascending: bool, ) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; - let field_id = member.field().and_then(|field| fields_ids_map.id(&field)); + let field_id = fields_ids_map.id(&field_name); let faceted_candidates = match field_id { Some(field_id) => { let number_faceted = index.number_faceted_documents_ids(rtxn, field_id)?; @@ -68,16 +65,14 @@ impl<'t> AscDesc<'t> { } None => RoaringBitmap::default(), }; - let rtree = index.geo_rtree(rtxn)?; Ok(AscDesc { index, rtxn, - member, + field_name, field_id, is_ascending, query_tree: None, - rtree, candidates: Box::new(std::iter::empty()), allowed_candidates: RoaringBitmap::new(), faceted_candidates, @@ -97,7 +92,7 @@ impl<'t> Criterion for AscDesc<'t> { debug!( "Facet {}({}) iteration", if self.is_ascending { "Asc" } else { "Desc" }, - self.member + self.field_name ); match self.candidates.next().transpose()? { @@ -140,31 +135,15 @@ impl<'t> Criterion for AscDesc<'t> { } self.allowed_candidates = &candidates - params.excluded_candidates; - - match &self.member { - Member::Field(field_name) => { - self.candidates = match self.field_id { - Some(field_id) => facet_ordered( - self.index, - self.rtxn, - field_id, - self.is_ascending, - candidates & &self.faceted_candidates, - )?, - None => Box::new(std::iter::empty()), - } - } - Member::Geo(point) => { - self.candidates = match &self.rtree { - Some(rtree) => { - // TODO: TAMO how to remove that? - let rtree = Box::new(rtree.clone()); - let rtree = Box::leak(rtree); - geo_point(rtree, candidates, point.clone())? - } - None => Box::new(std::iter::empty()), - } - } + self.candidates = match self.field_id { + Some(field_id) => facet_ordered( + self.index, + self.rtxn, + field_id, + self.is_ascending, + candidates & &self.faceted_candidates, + )?, + None => Box::new(std::iter::empty()), }; } None => return Ok(None), @@ -184,22 +163,6 @@ impl<'t> Criterion for AscDesc<'t> { } } -fn geo_point<'t>( - rtree: &'t RTree, - candidates: RoaringBitmap, - point: [f64; 2], -) -> Result> + 't>> { - Ok(Box::new( - rtree - .nearest_neighbor_iter_with_distance_2(&point) - .filter_map(move |(point, _distance)| { - candidates.contains(point.data).then(|| point.data) - }) - .map(|id| std::iter::once(id).collect::()) - .map(Ok), - )) -} - /// Returns an iterator over groups of the given candidates in ascending or descending order. /// /// It will either use an iterative or a recursive method on the whole facet database depending diff --git a/milli/src/search/criteria/geo.rs b/milli/src/search/criteria/geo.rs new file mode 100644 index 000000000..740bcb3a8 --- /dev/null +++ b/milli/src/search/criteria/geo.rs @@ -0,0 +1,115 @@ +use roaring::RoaringBitmap; +use rstar::RTree; + +use super::{Criterion, CriterionParameters, CriterionResult}; +use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; +use crate::{GeoPoint, Index, Result}; + +pub struct Geo<'t> { + index: &'t Index, + rtxn: &'t heed::RoTxn<'t>, + parent: Box, + candidates: Box>, + allowed_candidates: RoaringBitmap, + bucket_candidates: RoaringBitmap, + rtree: Option>, + point: [f64; 2], +} + +impl<'t> Geo<'t> { + pub fn new( + index: &'t Index, + rtxn: &'t heed::RoTxn<'t>, + parent: Box, + point: [f64; 2], + ) -> Result { + let candidates = Box::new(std::iter::empty()); + let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?; + let bucket_candidates = RoaringBitmap::new(); + let rtree = index.geo_rtree(rtxn)?; + + Ok(Self { index, rtxn, parent, candidates, allowed_candidates, bucket_candidates, rtree, point }) + } +} + +impl<'t> Criterion for Geo<'t> { + fn next(&mut self, params: &mut CriterionParameters) -> Result> { + // if there is no rtree we have nothing to returns + let rtree = match self.rtree.as_ref() { + Some(rtree) => rtree, + None => return Ok(None), + }; + + loop { + match self.candidates.next() { + Some(mut candidates) => { + candidates -= params.excluded_candidates; + self.allowed_candidates -= &candidates; + return Ok(Some(CriterionResult { + query_tree: None, + candidates: Some(candidates), + filtered_candidates: None, + bucket_candidates: Some(self.bucket_candidates.clone()), + })); + } + None => { + match self.parent.next(params)? { + Some(CriterionResult { + query_tree, + candidates, + filtered_candidates, + bucket_candidates, + }) => { + let mut candidates = match (&query_tree, candidates) { + (_, Some(candidates)) => candidates, + (Some(qt), None) => { + let context = CriteriaBuilder::new(&self.rtxn, &self.index)?; + resolve_query_tree(&context, qt, params.wdcache)? + } + // TODO: TAMO: why are we doing this? + (None, None) => self.index.documents_ids(self.rtxn)?, + }; + + if let Some(filtered_candidates) = filtered_candidates { + candidates &= filtered_candidates; + } + + match bucket_candidates { + // why not are we keeping elements from the previous bucket? + Some(bucket_candidates) => { + self.bucket_candidates |= bucket_candidates + } + None => self.bucket_candidates |= &candidates, + } + + if candidates.is_empty() { + continue; + } + let rtree = Box::new(rtree.clone()); + let rtree = Box::leak(rtree); + + self.allowed_candidates = &candidates - params.excluded_candidates; + self.candidates = geo_point(rtree, self.allowed_candidates.clone(), self.point)?; + } + None => return Ok(None), + } + } + } + } + } +} + +fn geo_point<'t>( + rtree: &'t RTree, + candidates: RoaringBitmap, + point: [f64; 2], +) -> Result + 't>> { + Ok(Box::new( + rtree + .nearest_neighbor_iter_with_distance_2(&point) + .filter_map(move |(point, _distance)| { + candidates.contains(point.data).then(|| point.data) + }) + .map(|id| std::iter::once(id).collect::()) + )) +} diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 92c0d284a..185761632 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -13,10 +13,12 @@ use self::typo::Typo; use self::words::Words; use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind}; use crate::criterion::{AscDesc as AscDescName, Member}; +use crate::search::criteria::geo::Geo; use crate::search::{word_derivations, WordDerivationsCache}; use crate::{DocumentId, FieldId, Index, Result, TreeLevel}; mod asc_desc; +mod geo; mod attribute; mod exactness; pub mod r#final; @@ -290,18 +292,28 @@ impl<'t> CriteriaBuilder<'t> { Some(ref sort_criteria) => { for asc_desc in sort_criteria { criterion = match asc_desc { - AscDescName::Asc(field) => Box::new(AscDesc::asc( - &self.index, - &self.rtxn, - criterion, - field.clone(), - )?), - AscDescName::Desc(field) => Box::new(AscDesc::desc( - &self.index, - &self.rtxn, - criterion, - field.clone(), - )?), + AscDescName::Asc(Member::Field(field)) => { + Box::new(AscDesc::asc( + &self.index, + &self.rtxn, + criterion, + field.to_string(), + )?) + } + AscDescName::Desc(Member::Field(field)) => { + Box::new(AscDesc::desc( + &self.index, + &self.rtxn, + criterion, + field.to_string(), + )?) + } + AscDescName::Asc(Member::Geo(point)) => { + Box::new(Geo::new(&self.index, &self.rtxn, criterion, point.clone())?) + } + AscDescName::Desc(Member::Geo(_point)) => { + panic!("You can't desc geosort"); // TODO: TAMO: remove this + } }; } criterion @@ -312,10 +324,10 @@ impl<'t> CriteriaBuilder<'t> { Name::Attribute => Box::new(Attribute::new(self, criterion)), Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?), Name::Asc(field) => { - Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, Member::Field(field))?) + Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?) } Name::Desc(field) => { - Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, Member::Field(field))?) + Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?) } }; }