move the geosearch criteria out of asc_desc.rs

This commit is contained in:
Tamo 2021-09-01 15:14:23 +02:00
parent dc84ecc40b
commit a8a1f5bd55
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
3 changed files with 160 additions and 70 deletions

View File

@ -4,14 +4,12 @@ use itertools::Itertools;
use log::debug; use log::debug;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use rstar::RTree;
use super::{Criterion, CriterionParameters, CriterionResult}; use super::{Criterion, CriterionParameters, CriterionResult};
use crate::criterion::Member;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::{FacetNumberIter, FacetStringIter}; use crate::search::facet::{FacetNumberIter, FacetStringIter};
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::{FieldId, GeoPoint, Index, Result}; use crate::{FieldId, Index, Result};
/// Threshold on the number of candidates that will make /// Threshold on the number of candidates that will make
/// the system to choose between one algorithm or another. /// the system to choose between one algorithm or another.
@ -20,11 +18,10 @@ const CANDIDATES_THRESHOLD: u64 = 1000;
pub struct AscDesc<'t> { pub struct AscDesc<'t> {
index: &'t Index, index: &'t Index,
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
member: Member, field_name: String,
field_id: Option<FieldId>, field_id: Option<FieldId>,
is_ascending: bool, is_ascending: bool,
query_tree: Option<Operation>, query_tree: Option<Operation>,
rtree: Option<RTree<GeoPoint>>,
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>, candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
allowed_candidates: RoaringBitmap, allowed_candidates: RoaringBitmap,
bucket_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap,
@ -37,29 +34,29 @@ impl<'t> AscDesc<'t> {
index: &'t Index, index: &'t Index,
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
member: Member, field_name: String,
) -> Result<Self> { ) -> Result<Self> {
Self::new(index, rtxn, parent, member, true) Self::new(index, rtxn, parent, field_name, true)
} }
pub fn desc( pub fn desc(
index: &'t Index, index: &'t Index,
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
member: Member, field_name: String,
) -> Result<Self> { ) -> Result<Self> {
Self::new(index, rtxn, parent, member, false) Self::new(index, rtxn, parent, field_name, false)
} }
fn new( fn new(
index: &'t Index, index: &'t Index,
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
member: Member, field_name: String,
is_ascending: bool, is_ascending: bool,
) -> Result<Self> { ) -> Result<Self> {
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let field_id = member.field().and_then(|field| fields_ids_map.id(&field)); let field_id = fields_ids_map.id(&field_name);
let faceted_candidates = match field_id { let faceted_candidates = match field_id {
Some(field_id) => { Some(field_id) => {
let number_faceted = index.number_faceted_documents_ids(rtxn, field_id)?; let number_faceted = index.number_faceted_documents_ids(rtxn, field_id)?;
@ -68,16 +65,14 @@ impl<'t> AscDesc<'t> {
} }
None => RoaringBitmap::default(), None => RoaringBitmap::default(),
}; };
let rtree = index.geo_rtree(rtxn)?;
Ok(AscDesc { Ok(AscDesc {
index, index,
rtxn, rtxn,
member, field_name,
field_id, field_id,
is_ascending, is_ascending,
query_tree: None, query_tree: None,
rtree,
candidates: Box::new(std::iter::empty()), candidates: Box::new(std::iter::empty()),
allowed_candidates: RoaringBitmap::new(), allowed_candidates: RoaringBitmap::new(),
faceted_candidates, faceted_candidates,
@ -97,7 +92,7 @@ impl<'t> Criterion for AscDesc<'t> {
debug!( debug!(
"Facet {}({}) iteration", "Facet {}({}) iteration",
if self.is_ascending { "Asc" } else { "Desc" }, if self.is_ascending { "Asc" } else { "Desc" },
self.member self.field_name
); );
match self.candidates.next().transpose()? { match self.candidates.next().transpose()? {
@ -140,31 +135,15 @@ impl<'t> Criterion for AscDesc<'t> {
} }
self.allowed_candidates = &candidates - params.excluded_candidates; self.allowed_candidates = &candidates - params.excluded_candidates;
self.candidates = match self.field_id {
match &self.member { Some(field_id) => facet_ordered(
Member::Field(field_name) => { self.index,
self.candidates = match self.field_id { self.rtxn,
Some(field_id) => facet_ordered( field_id,
self.index, self.is_ascending,
self.rtxn, candidates & &self.faceted_candidates,
field_id, )?,
self.is_ascending, None => Box::new(std::iter::empty()),
candidates & &self.faceted_candidates,
)?,
None => Box::new(std::iter::empty()),
}
}
Member::Geo(point) => {
self.candidates = match &self.rtree {
Some(rtree) => {
// TODO: TAMO how to remove that?
let rtree = Box::new(rtree.clone());
let rtree = Box::leak(rtree);
geo_point(rtree, candidates, point.clone())?
}
None => Box::new(std::iter::empty()),
}
}
}; };
} }
None => return Ok(None), None => return Ok(None),
@ -184,22 +163,6 @@ impl<'t> Criterion for AscDesc<'t> {
} }
} }
fn geo_point<'t>(
rtree: &'t RTree<GeoPoint>,
candidates: RoaringBitmap,
point: [f64; 2],
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
Ok(Box::new(
rtree
.nearest_neighbor_iter_with_distance_2(&point)
.filter_map(move |(point, _distance)| {
candidates.contains(point.data).then(|| point.data)
})
.map(|id| std::iter::once(id).collect::<RoaringBitmap>())
.map(Ok),
))
}
/// Returns an iterator over groups of the given candidates in ascending or descending order. /// Returns an iterator over groups of the given candidates in ascending or descending order.
/// ///
/// It will either use an iterative or a recursive method on the whole facet database depending /// It will either use an iterative or a recursive method on the whole facet database depending

View File

@ -0,0 +1,115 @@
use roaring::RoaringBitmap;
use rstar::RTree;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::{GeoPoint, Index, Result};
pub struct Geo<'t> {
index: &'t Index,
rtxn: &'t heed::RoTxn<'t>,
parent: Box<dyn Criterion + 't>,
candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
allowed_candidates: RoaringBitmap,
bucket_candidates: RoaringBitmap,
rtree: Option<RTree<GeoPoint>>,
point: [f64; 2],
}
impl<'t> Geo<'t> {
pub fn new(
index: &'t Index,
rtxn: &'t heed::RoTxn<'t>,
parent: Box<dyn Criterion + 't>,
point: [f64; 2],
) -> Result<Self> {
let candidates = Box::new(std::iter::empty());
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
let bucket_candidates = RoaringBitmap::new();
let rtree = index.geo_rtree(rtxn)?;
Ok(Self { index, rtxn, parent, candidates, allowed_candidates, bucket_candidates, rtree, point })
}
}
impl<'t> Criterion for Geo<'t> {
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
// if there is no rtree we have nothing to returns
let rtree = match self.rtree.as_ref() {
Some(rtree) => rtree,
None => return Ok(None),
};
loop {
match self.candidates.next() {
Some(mut candidates) => {
candidates -= params.excluded_candidates;
self.allowed_candidates -= &candidates;
return Ok(Some(CriterionResult {
query_tree: None,
candidates: Some(candidates),
filtered_candidates: None,
bucket_candidates: Some(self.bucket_candidates.clone()),
}));
}
None => {
match self.parent.next(params)? {
Some(CriterionResult {
query_tree,
candidates,
filtered_candidates,
bucket_candidates,
}) => {
let mut candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
resolve_query_tree(&context, qt, params.wdcache)?
}
// TODO: TAMO: why are we doing this?
(None, None) => self.index.documents_ids(self.rtxn)?,
};
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
match bucket_candidates {
// why not are we keeping elements from the previous bucket?
Some(bucket_candidates) => {
self.bucket_candidates |= bucket_candidates
}
None => self.bucket_candidates |= &candidates,
}
if candidates.is_empty() {
continue;
}
let rtree = Box::new(rtree.clone());
let rtree = Box::leak(rtree);
self.allowed_candidates = &candidates - params.excluded_candidates;
self.candidates = geo_point(rtree, self.allowed_candidates.clone(), self.point)?;
}
None => return Ok(None),
}
}
}
}
}
}
fn geo_point<'t>(
rtree: &'t RTree<GeoPoint>,
candidates: RoaringBitmap,
point: [f64; 2],
) -> Result<Box<dyn Iterator<Item = RoaringBitmap> + 't>> {
Ok(Box::new(
rtree
.nearest_neighbor_iter_with_distance_2(&point)
.filter_map(move |(point, _distance)| {
candidates.contains(point.data).then(|| point.data)
})
.map(|id| std::iter::once(id).collect::<RoaringBitmap>())
))
}

View File

@ -13,10 +13,12 @@ use self::typo::Typo;
use self::words::Words; use self::words::Words;
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind}; use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
use crate::criterion::{AscDesc as AscDescName, Member}; use crate::criterion::{AscDesc as AscDescName, Member};
use crate::search::criteria::geo::Geo;
use crate::search::{word_derivations, WordDerivationsCache}; use crate::search::{word_derivations, WordDerivationsCache};
use crate::{DocumentId, FieldId, Index, Result, TreeLevel}; use crate::{DocumentId, FieldId, Index, Result, TreeLevel};
mod asc_desc; mod asc_desc;
mod geo;
mod attribute; mod attribute;
mod exactness; mod exactness;
pub mod r#final; pub mod r#final;
@ -290,18 +292,28 @@ impl<'t> CriteriaBuilder<'t> {
Some(ref sort_criteria) => { Some(ref sort_criteria) => {
for asc_desc in sort_criteria { for asc_desc in sort_criteria {
criterion = match asc_desc { criterion = match asc_desc {
AscDescName::Asc(field) => Box::new(AscDesc::asc( AscDescName::Asc(Member::Field(field)) => {
&self.index, Box::new(AscDesc::asc(
&self.rtxn, &self.index,
criterion, &self.rtxn,
field.clone(), criterion,
)?), field.to_string(),
AscDescName::Desc(field) => Box::new(AscDesc::desc( )?)
&self.index, }
&self.rtxn, AscDescName::Desc(Member::Field(field)) => {
criterion, Box::new(AscDesc::desc(
field.clone(), &self.index,
)?), &self.rtxn,
criterion,
field.to_string(),
)?)
}
AscDescName::Asc(Member::Geo(point)) => {
Box::new(Geo::new(&self.index, &self.rtxn, criterion, point.clone())?)
}
AscDescName::Desc(Member::Geo(_point)) => {
panic!("You can't desc geosort"); // TODO: TAMO: remove this
}
}; };
} }
criterion criterion
@ -312,10 +324,10 @@ impl<'t> CriteriaBuilder<'t> {
Name::Attribute => Box::new(Attribute::new(self, criterion)), Name::Attribute => Box::new(Attribute::new(self, criterion)),
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?), Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
Name::Asc(field) => { Name::Asc(field) => {
Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, Member::Field(field))?) Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?)
} }
Name::Desc(field) => { Name::Desc(field) => {
Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, Member::Field(field))?) Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?)
} }
}; };
} }