Introduce the Initial Criterion

This commit is contained in:
Clément Renault 2021-03-23 15:25:46 +01:00 committed by many
parent 89ee2cf576
commit 658f316511
No known key found for this signature in database
GPG Key ID: 2CEF23B75189EACA
11 changed files with 286 additions and 533 deletions

View File

@ -57,9 +57,6 @@ criterion = "0.3.4"
maplit = "1.0.2" maplit = "1.0.2"
rand = "0.8.3" rand = "0.8.3"
[build-dependencies]
fst = "0.4.5"
[features] [features]
default = [] default = []

View File

@ -31,32 +31,10 @@ pub struct AscDesc<'t> {
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>, candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
bucket_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap,
faceted_candidates: RoaringBitmap, faceted_candidates: RoaringBitmap,
parent: Option<Box<dyn Criterion + 't>>, parent: Box<dyn Criterion + 't>,
} }
impl<'t> AscDesc<'t> { impl<'t> AscDesc<'t> {
pub fn initial_asc(
index: &'t Index,
rtxn: &'t heed::RoTxn,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
field_name: String,
) -> anyhow::Result<Self>
{
Self::initial(index, rtxn, query_tree, candidates, field_name, true)
}
pub fn initial_desc(
index: &'t Index,
rtxn: &'t heed::RoTxn,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
field_name: String,
) -> anyhow::Result<Self>
{
Self::initial(index, rtxn, query_tree, candidates, field_name, false)
}
pub fn asc( pub fn asc(
index: &'t Index, index: &'t Index,
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
@ -77,47 +55,6 @@ impl<'t> AscDesc<'t> {
Self::new(index, rtxn, parent, field_name, false) Self::new(index, rtxn, parent, field_name, false)
} }
fn initial(
index: &'t Index,
rtxn: &'t heed::RoTxn,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
field_name: String,
ascending: bool,
) -> anyhow::Result<Self>
{
let fields_ids_map = index.fields_ids_map(rtxn)?;
let faceted_fields = index.faceted_fields(rtxn)?;
let (field_id, facet_type) = field_id_facet_type(&fields_ids_map, &faceted_fields, &field_name)?;
let faceted_candidates = index.faceted_documents_ids(rtxn, field_id)?;
let candidates = match &query_tree {
Some(qt) => {
let context = CriteriaBuilder::new(rtxn, index)?;
let mut qt_candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), &mut WordDerivationsCache::new())?;
if let Some(candidates) = candidates {
qt_candidates.intersect_with(&candidates);
}
qt_candidates
},
None => candidates.unwrap_or(faceted_candidates.clone()),
};
Ok(AscDesc {
index,
rtxn,
field_name,
field_id,
facet_type,
ascending,
query_tree,
candidates: facet_ordered(index, rtxn, field_id, facet_type, ascending, candidates)?,
faceted_candidates,
bucket_candidates: RoaringBitmap::new(),
parent: None,
})
}
fn new( fn new(
index: &'t Index, index: &'t Index,
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
@ -141,7 +78,7 @@ impl<'t> AscDesc<'t> {
candidates: Box::new(std::iter::empty()), candidates: Box::new(std::iter::empty()),
faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?, faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?,
bucket_candidates: RoaringBitmap::new(), bucket_candidates: RoaringBitmap::new(),
parent: Some(parent), parent,
}) })
} }
} }
@ -156,64 +93,56 @@ impl<'t> Criterion for AscDesc<'t> {
match self.candidates.next().transpose()? { match self.candidates.next().transpose()? {
None => { None => {
let query_tree = self.query_tree.take(); match self.parent.next(wdcache)? {
let bucket_candidates = take(&mut self.bucket_candidates); Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
match self.parent.as_mut() { let candidates_is_some = candidates.is_some();
Some(parent) => { self.query_tree = query_tree;
match parent.next(wdcache)? { let candidates = match (&self.query_tree, candidates) {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { (_, Some(mut candidates)) => {
self.query_tree = query_tree; candidates.intersect_with(&self.faceted_candidates);
let candidates = match (&self.query_tree, candidates) { candidates
(_, Some(mut candidates)) => {
candidates.intersect_with(&self.faceted_candidates);
candidates
},
(Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), wdcache)?;
candidates.intersect_with(&self.faceted_candidates);
candidates
},
(None, None) => take(&mut self.faceted_candidates),
};
if bucket_candidates.is_empty() {
self.bucket_candidates.union_with(&candidates);
} else {
self.bucket_candidates.union_with(&bucket_candidates);
}
self.candidates = facet_ordered(
self.index,
self.rtxn,
self.field_id,
self.facet_type,
self.ascending,
candidates,
)?;
}, },
None => return Ok(None), (Some(qt), None) => {
} let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
}, let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), wdcache)?;
None => if query_tree.is_none() && bucket_candidates.is_empty() { candidates.intersect_with(&self.faceted_candidates);
return Ok(None) candidates
}, },
} (None, None) => take(&mut self.faceted_candidates),
};
return Ok(Some(CriterionResult { // If our parent returns candidates it means that the bucket
query_tree, // candidates were already computed before and we can use them.
candidates: Some(RoaringBitmap::new()), //
bucket_candidates, // If not, we must use the just computed candidates as our bucket
})); // candidates.
if candidates_is_some {
self.bucket_candidates.union_with(&bucket_candidates);
} else {
self.bucket_candidates.union_with(&candidates);
}
if candidates.is_empty() {
continue;
}
self.candidates = facet_ordered(
self.index,
self.rtxn,
self.field_id,
self.facet_type,
self.ascending,
candidates,
)?;
},
None => return Ok(None),
}
}, },
Some(candidates) => { Some(candidates) => {
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => candidates.clone(),
};
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(), query_tree: self.query_tree.clone(),
candidates: Some(candidates), candidates: Some(candidates),
bucket_candidates, bucket_candidates: take(&mut self.bucket_candidates),
})); }));
}, },
} }

View File

@ -14,36 +14,19 @@ pub struct Attribute<'t> {
query_tree: Option<Operation>, query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>, candidates: Option<RoaringBitmap>,
bucket_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap,
parent: Option<Box<dyn Criterion + 't>>, parent: Box<dyn Criterion + 't>,
flattened_query_tree: Option<Vec<Vec<Query>>>, flattened_query_tree: Option<Vec<Vec<Query>>>,
current_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>, current_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
} }
impl<'t> Attribute<'t> { impl<'t> Attribute<'t> {
pub fn initial(
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
) -> Self
{
Attribute {
ctx,
query_tree,
candidates,
bucket_candidates: RoaringBitmap::new(),
parent: None,
flattened_query_tree: None,
current_buckets: None,
}
}
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self { pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
Attribute { Attribute {
ctx, ctx,
query_tree: None, query_tree: None,
candidates: None, candidates: None,
bucket_candidates: RoaringBitmap::new(), bucket_candidates: RoaringBitmap::new(),
parent: Some(parent), parent,
flattened_query_tree: None, flattened_query_tree: None,
current_buckets: None, current_buckets: None,
} }
@ -63,34 +46,35 @@ impl<'t> Criterion for Attribute<'t> {
})); }));
}, },
(Some(qt), Some(candidates)) => { (Some(qt), Some(candidates)) => {
let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| flatten_query_tree(&qt)); let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| {
let current_buckets = if let Some(current_buckets) = self.current_buckets.as_mut() { flatten_query_tree(&qt)
current_buckets });
} else {
let new_buckets = linear_compute_candidates(self.ctx, flattened_query_tree, candidates)?; let current_buckets = match self.current_buckets.as_mut() {
self.current_buckets.get_or_insert(new_buckets.into_iter()) Some(current_buckets) => current_buckets,
None => {
let new_buckets = linear_compute_candidates(self.ctx, flattened_query_tree, candidates)?;
self.current_buckets.get_or_insert(new_buckets.into_iter())
},
}; };
let found_candidates = if let Some((_score, candidates)) = current_buckets.next() { let found_candidates = match current_buckets.next() {
candidates Some((_score, candidates)) => candidates,
} else { None => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.take(), query_tree: self.query_tree.take(),
candidates: self.candidates.take(), candidates: self.candidates.take(),
bucket_candidates: take(&mut self.bucket_candidates), bucket_candidates: take(&mut self.bucket_candidates),
})); }));
},
}; };
candidates.difference_with(&found_candidates); candidates.difference_with(&found_candidates);
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => found_candidates.clone(),
};
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(), query_tree: self.query_tree.clone(),
candidates: Some(found_candidates), candidates: Some(found_candidates),
bucket_candidates: bucket_candidates, bucket_candidates: take(&mut self.bucket_candidates),
})); }));
}, },
(Some(qt), None) => { (Some(qt), None) => {
@ -106,18 +90,20 @@ impl<'t> Criterion for Attribute<'t> {
})); }));
}, },
(None, None) => { (None, None) => {
match self.parent.as_mut() { match self.parent.next(wdcache)? {
Some(parent) => { Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
match parent.next(wdcache)? { return Ok(Some(CriterionResult {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { query_tree: None,
self.query_tree = query_tree; candidates: None,
self.candidates = candidates; bucket_candidates,
self.bucket_candidates.union_with(&bucket_candidates); }));
self.flattened_query_tree = None; },
self.current_buckets = None; Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
}, self.query_tree = query_tree;
None => return Ok(None), self.candidates = candidates;
} self.bucket_candidates.union_with(&bucket_candidates);
self.flattened_query_tree = None;
self.current_buckets = None;
}, },
None => return Ok(None), None => return Ok(None),
} }

View File

@ -1,135 +0,0 @@
use std::collections::HashMap;
use std::mem::take;
use log::debug;
use roaring::RoaringBitmap;
use crate::search::query_tree::Operation;
use crate::search::WordDerivationsCache;
use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context};
/// The result of a call to the fetcher.
#[derive(Debug, Clone, PartialEq)]
pub struct FetcherResult {
/// The query tree corresponding to the current bucket of the last criterion.
pub query_tree: Option<Operation>,
/// The candidates of the current bucket of the last criterion.
pub candidates: RoaringBitmap,
/// Candidates that comes from the current bucket of the initial criterion.
pub bucket_candidates: RoaringBitmap,
}
pub struct Fetcher<'t> {
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Candidates,
parent: Option<Box<dyn Criterion + 't>>,
should_get_documents_ids: bool,
wdcache: WordDerivationsCache,
}
impl<'t> Fetcher<'t> {
pub fn initial(
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
) -> Self
{
Fetcher {
ctx,
query_tree,
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
parent: None,
should_get_documents_ids: true,
wdcache: WordDerivationsCache::new(),
}
}
pub fn new(
ctx: &'t dyn Context,
parent: Box<dyn Criterion + 't>,
) -> Self
{
Fetcher {
ctx,
query_tree: None,
candidates: Candidates::default(),
parent: Some(parent),
should_get_documents_ids: true,
wdcache: WordDerivationsCache::new(),
}
}
#[logging_timer::time("Fetcher::{}")]
pub fn next(&mut self) -> anyhow::Result<Option<FetcherResult>> {
use Candidates::{Allowed, Forbidden};
loop {
debug!("Fetcher iteration (should_get_documents_ids: {}) ({:?})",
self.should_get_documents_ids, self.candidates,
);
let should_get_documents_ids = take(&mut self.should_get_documents_ids);
match &mut self.candidates {
Allowed(_) => {
let candidates = take(&mut self.candidates).into_inner();
let candidates = match &self.query_tree {
Some(qt) if should_get_documents_ids => {
let mut docids = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?;
docids.intersect_with(&candidates);
docids
},
_ => candidates,
};
return Ok(Some(FetcherResult {
query_tree: self.query_tree.take(),
candidates: candidates.clone(),
bucket_candidates: candidates,
}));
},
Forbidden(_) => {
match self.parent.as_mut() {
Some(parent) => {
match parent.next(&mut self.wdcache)? {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
let candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?,
(None, None) => RoaringBitmap::new(),
};
return Ok(Some(FetcherResult { query_tree, candidates, bucket_candidates }))
},
None => if should_get_documents_ids {
let candidates = match &self.query_tree {
Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?,
None => self.ctx.documents_ids()?,
};
return Ok(Some(FetcherResult {
query_tree: self.query_tree.clone(),
candidates: candidates.clone(),
bucket_candidates: candidates,
}));
},
}
},
None => if should_get_documents_ids {
let candidates = match &self.query_tree {
Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?,
None => self.ctx.documents_ids()?,
};
return Ok(Some(FetcherResult {
query_tree: self.query_tree.clone(),
candidates: candidates.clone(),
bucket_candidates: candidates,
}));
},
}
return Ok(None);
},
}
}
}
}

View File

@ -0,0 +1,57 @@
use std::collections::HashMap;
use log::debug;
use roaring::RoaringBitmap;
use crate::search::query_tree::Operation;
use crate::search::WordDerivationsCache;
use super::{resolve_query_tree, Criterion, CriterionResult, Context};
/// The result of a call to the fetcher.
#[derive(Debug, Clone, PartialEq)]
pub struct FinalResult {
/// The query tree corresponding to the current bucket of the last criterion.
pub query_tree: Option<Operation>,
/// The candidates of the current bucket of the last criterion.
pub candidates: RoaringBitmap,
/// Candidates that comes from the current bucket of the initial criterion.
pub bucket_candidates: RoaringBitmap,
}
pub struct Final<'t> {
ctx: &'t dyn Context,
parent: Box<dyn Criterion + 't>,
wdcache: WordDerivationsCache,
}
impl<'t> Final<'t> {
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Final<'t> {
Final { ctx, parent, wdcache: WordDerivationsCache::new() }
}
#[logging_timer::time("Final::{}")]
pub fn next(&mut self) -> anyhow::Result<Option<FinalResult>> {
loop {
debug!("Final iteration");
match self.parent.next(&mut self.wdcache)? {
Some(CriterionResult { query_tree, candidates, mut bucket_candidates }) => {
let candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?,
(None, None) => self.ctx.documents_ids()?,
};
bucket_candidates.union_with(&candidates);
return Ok(Some(FinalResult {
query_tree,
candidates,
bucket_candidates,
}));
},
None => return Ok(None),
}
}
}
}

View File

@ -0,0 +1,28 @@
use roaring::RoaringBitmap;
use crate::search::query_tree::Operation;
use crate::search::WordDerivationsCache;
use super::{Criterion, CriterionResult};
pub struct Initial {
answer: Option<CriterionResult>
}
impl Initial {
pub fn new(query_tree: Option<Operation>, mut candidates: Option<RoaringBitmap>) -> Initial {
let answer = CriterionResult {
query_tree,
candidates: candidates.clone(),
bucket_candidates: candidates.take().unwrap_or_default(),
};
Initial { answer: Some(answer) }
}
}
impl Criterion for Initial {
#[logging_timer::time("Initial::{}")]
fn next(&mut self, _: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> {
Ok(self.answer.take())
}
}

View File

@ -8,19 +8,21 @@ use crate::search::{word_derivations, WordDerivationsCache};
use crate::{Index, DocumentId}; use crate::{Index, DocumentId};
use super::query_tree::{Operation, Query, QueryKind}; use super::query_tree::{Operation, Query, QueryKind};
use self::asc_desc::AscDesc;
use self::attribute::Attribute;
use self::r#final::Final;
use self::initial::Initial;
use self::proximity::Proximity;
use self::typo::Typo; use self::typo::Typo;
use self::words::Words; use self::words::Words;
use self::asc_desc::AscDesc;
use self::proximity::Proximity;
use self::attribute::Attribute;
use self::fetcher::Fetcher;
mod asc_desc;
mod attribute;
mod initial;
mod proximity;
mod typo; mod typo;
mod words; mod words;
mod asc_desc; pub mod r#final;
mod proximity;
mod attribute;
pub mod fetcher;
pub trait Criterion { pub trait Criterion {
fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>>; fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>>;
@ -61,6 +63,7 @@ impl Default for Candidates {
Self::Forbidden(RoaringBitmap::new()) Self::Forbidden(RoaringBitmap::new())
} }
} }
pub trait Context { pub trait Context {
fn documents_ids(&self) -> heed::Result<RoaringBitmap>; fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>; fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
@ -128,44 +131,26 @@ impl<'t> CriteriaBuilder<'t> {
pub fn build( pub fn build(
&'t self, &'t self,
mut query_tree: Option<Operation>, query_tree: Option<Operation>,
mut facet_candidates: Option<RoaringBitmap>, facet_candidates: Option<RoaringBitmap>,
) -> anyhow::Result<Fetcher<'t>> ) -> anyhow::Result<Final<'t>>
{ {
use crate::criterion::Criterion as Name; use crate::criterion::Criterion as Name;
let mut criterion = None as Option<Box<dyn Criterion>>; let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box<dyn Criterion>;
for name in self.index.criteria(&self.rtxn)? { for name in self.index.criteria(&self.rtxn)? {
criterion = Some(match criterion.take() { criterion = match name {
Some(father) => match name { Name::Typo => Box::new(Typo::new(self, criterion)),
Name::Typo => Box::new(Typo::new(self, father)), Name::Words => Box::new(Words::new(self, criterion)),
Name::Words => Box::new(Words::new(self, father)), Name::Proximity => Box::new(Proximity::new(self, criterion)),
Name::Proximity => Box::new(Proximity::new(self, father)), Name::Attribute => Box::new(Attribute::new(self, criterion)),
Name::Attribute => Box::new(Attribute::new(self, father)), Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?),
Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, father, field)?), Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?),
Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, father, field)?), _otherwise => criterion,
_otherwise => father, };
},
None => match name {
Name::Typo => Box::new(Typo::initial(self, query_tree.take(), facet_candidates.take())),
Name::Words => Box::new(Words::initial(self, query_tree.take(), facet_candidates.take())),
Name::Proximity => Box::new(Proximity::initial(self, query_tree.take(), facet_candidates.take())),
Name::Attribute => Box::new(Attribute::initial(self, query_tree.take(), facet_candidates.take())),
Name::Asc(field) => {
Box::new(AscDesc::initial_asc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), field)?)
},
Name::Desc(field) => {
Box::new(AscDesc::initial_desc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), field)?)
},
_otherwise => continue,
},
});
} }
match criterion { Ok(Final::new(self, criterion))
Some(criterion) => Ok(Fetcher::new(self, criterion)),
None => Ok(Fetcher::initial(self, query_tree, facet_candidates)),
}
} }
} }

View File

@ -8,48 +8,29 @@ use log::debug;
use crate::{DocumentId, Position, search::{query_tree::QueryKind}}; use crate::{DocumentId, Position, search::{query_tree::QueryKind}};
use crate::search::query_tree::{maximum_proximity, Operation, Query}; use crate::search::query_tree::{maximum_proximity, Operation, Query};
use crate::search::{build_dfa, WordDerivationsCache}; use crate::search::{build_dfa, WordDerivationsCache};
use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree}; use super::{Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree};
type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>; type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>;
pub struct Proximity<'t> { pub struct Proximity<'t> {
ctx: &'t dyn Context, ctx: &'t dyn Context,
query_tree: Option<(usize, Operation)>, /// ((max_proximity, query_tree), allowed_candidates)
state: Option<(Option<(usize, Operation)>, RoaringBitmap)>,
proximity: u8, proximity: u8,
candidates: Candidates,
bucket_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap,
parent: Option<Box<dyn Criterion + 't>>, parent: Box<dyn Criterion + 't>,
candidates_cache: Cache, candidates_cache: Cache,
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>, plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
} }
impl<'t> Proximity<'t> { impl<'t> Proximity<'t> {
pub fn initial(
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
) -> Self
{
Proximity {
ctx,
query_tree: query_tree.map(|op| (maximum_proximity(&op), op)),
proximity: 0,
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
bucket_candidates: RoaringBitmap::new(),
parent: None,
candidates_cache: Cache::new(),
plane_sweep_cache: None,
}
}
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self { pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
Proximity { Proximity {
ctx, ctx,
query_tree: None, state: None,
proximity: 0, proximity: 0,
candidates: Candidates::default(),
bucket_candidates: RoaringBitmap::new(), bucket_candidates: RoaringBitmap::new(),
parent: Some(parent), parent: parent,
candidates_cache: Cache::new(), candidates_cache: Cache::new(),
plane_sweep_cache: None, plane_sweep_cache: None,
} }
@ -59,27 +40,20 @@ impl<'t> Proximity<'t> {
impl<'t> Criterion for Proximity<'t> { impl<'t> Criterion for Proximity<'t> {
#[logging_timer::time("Proximity::{}")] #[logging_timer::time("Proximity::{}")]
fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> {
use Candidates::{Allowed, Forbidden};
loop { loop {
debug!("Proximity at iteration {} (max {:?}) ({:?})", debug!("Proximity at iteration {} (max prox {:?}) ({:?})",
self.proximity, self.proximity,
self.query_tree.as_ref().map(|(mp, _)| mp), self.state.as_ref().map(|(qt, _)| qt.as_ref().map(|(mp, _)| mp)),
self.candidates, self.state.as_ref().map(|(_, cd)| cd),
); );
match (&mut self.query_tree, &mut self.candidates) { match &mut self.state {
(_, Allowed(candidates)) if candidates.is_empty() => { Some((_, candidates)) if candidates.is_empty() => {
return Ok(Some(CriterionResult { self.state = None; // reset state
query_tree: self.query_tree.take().map(|(_, qt)| qt),
candidates: Some(take(&mut self.candidates).into_inner()),
bucket_candidates: take(&mut self.bucket_candidates),
}));
}, },
(Some((max_prox, query_tree)), Allowed(candidates)) => { Some((Some((max_prox, query_tree)), candidates)) => {
if self.proximity as usize > *max_prox { if self.proximity as usize > *max_prox {
// reset state to (None, Forbidden(_)) self.state = None; // reset state
self.query_tree = None;
self.candidates = Candidates::default();
} else { } else {
let mut new_candidates = if candidates.len() <= 1000 { let mut new_candidates = if candidates.len() <= 1000 {
if let Some(cache) = self.plane_sweep_cache.as_mut() { if let Some(cache) = self.plane_sweep_cache.as_mut() {
@ -89,9 +63,7 @@ impl<'t> Criterion for Proximity<'t> {
candidates candidates
}, },
None => { None => {
// reset state to (None, Forbidden(_)) self.state = None; // reset state
self.query_tree = None;
self.candidates = Candidates::default();
continue continue
}, },
} }
@ -120,79 +92,54 @@ impl<'t> Criterion for Proximity<'t> {
candidates.difference_with(&new_candidates); candidates.difference_with(&new_candidates);
self.proximity += 1; self.proximity += 1;
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => new_candidates.clone(),
};
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(query_tree.clone()), query_tree: Some(query_tree.clone()),
candidates: Some(new_candidates), candidates: Some(new_candidates),
bucket_candidates, bucket_candidates: take(&mut self.bucket_candidates),
})); }));
} }
}, },
(Some((max_prox, query_tree)), Forbidden(candidates)) => { Some((None, candidates)) => {
if self.proximity as usize > *max_prox { let candidates = take(candidates);
self.query_tree = None; self.state = None; // reset state
self.candidates = Candidates::default();
} else {
let mut new_candidates = resolve_candidates(
self.ctx,
&query_tree,
self.proximity,
&mut self.candidates_cache,
wdcache,
)?;
new_candidates.difference_with(&candidates);
candidates.union_with(&new_candidates);
self.proximity += 1;
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => new_candidates.clone(),
};
return Ok(Some(CriterionResult {
query_tree: Some(query_tree.clone()),
candidates: Some(new_candidates),
bucket_candidates,
}));
}
},
(None, Allowed(_)) => {
let candidates = take(&mut self.candidates).into_inner();
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates: Some(candidates.clone()), candidates: Some(candidates.clone()),
bucket_candidates: candidates, bucket_candidates: candidates,
})); }));
}, },
(None, Forbidden(_)) => { None => {
match self.parent.as_mut() { match self.parent.next(wdcache)? {
Some(parent) => { Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
match parent.next(wdcache)? { return Ok(Some(CriterionResult {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { query_tree: None,
let candidates = match (&query_tree, candidates) { candidates: None,
(_, Some(candidates)) => candidates, bucket_candidates,
(Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), wdcache)?, }));
(None, None) => RoaringBitmap::new(), },
}; Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
let candidates_is_some = candidates.is_some();
let candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), wdcache)?,
(None, None) => RoaringBitmap::new(),
};
if bucket_candidates.is_empty() { // If our parent returns candidates it means that the bucket
self.bucket_candidates.union_with(&candidates); // candidates were already computed before and we can use them.
} else { //
self.bucket_candidates.union_with(&bucket_candidates); // If not, we must use the just computed candidates as our bucket
} // candidates.
if candidates_is_some {
self.query_tree = query_tree.map(|op| (maximum_proximity(&op), op)); self.bucket_candidates.union_with(&bucket_candidates);
self.proximity = 0; } else {
self.candidates = Candidates::Allowed(candidates); self.bucket_candidates.union_with(&candidates);
self.plane_sweep_cache = None;
},
None => return Ok(None),
} }
let query_tree = query_tree.map(|op| (maximum_proximity(&op), op));
self.state = Some((query_tree, candidates));
self.proximity = 0;
self.plane_sweep_cache = None;
}, },
None => return Ok(None), None => return Ok(None),
} }

View File

@ -14,28 +14,11 @@ pub struct Typo<'t> {
number_typos: u8, number_typos: u8,
candidates: Candidates, candidates: Candidates,
bucket_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap,
parent: Option<Box<dyn Criterion + 't>>, parent: Box<dyn Criterion + 't>,
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>, candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
} }
impl<'t> Typo<'t> { impl<'t> Typo<'t> {
pub fn initial(
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
) -> Self
{
Typo {
ctx,
query_tree: query_tree.map(|op| (maximum_typo(&op), op)),
number_typos: 0,
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
bucket_candidates: RoaringBitmap::new(),
parent: None,
candidates_cache: HashMap::new(),
}
}
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self { pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
Typo { Typo {
ctx, ctx,
@ -43,7 +26,7 @@ impl<'t> Typo<'t> {
number_typos: 0, number_typos: 0,
candidates: Candidates::default(), candidates: Candidates::default(),
bucket_candidates: RoaringBitmap::new(), bucket_candidates: RoaringBitmap::new(),
parent: Some(parent), parent,
candidates_cache: HashMap::new(), candidates_cache: HashMap::new(),
} }
} }
@ -90,15 +73,10 @@ impl<'t> Criterion for Typo<'t> {
candidates.difference_with(&new_candidates); candidates.difference_with(&new_candidates);
self.number_typos += 1; self.number_typos += 1;
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => new_candidates.clone(),
};
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(new_query_tree), query_tree: Some(new_query_tree),
candidates: Some(new_candidates), candidates: Some(new_candidates),
bucket_candidates, bucket_candidates: take(&mut self.bucket_candidates),
})); }));
} }
}, },
@ -145,17 +123,19 @@ impl<'t> Criterion for Typo<'t> {
})); }));
}, },
(None, Forbidden(_)) => { (None, Forbidden(_)) => {
match self.parent.as_mut() { match self.parent.next(wdcache)? {
Some(parent) => { Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
match parent.next(wdcache)? { return Ok(Some(CriterionResult {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { query_tree: None,
self.query_tree = query_tree.map(|op| (maximum_typo(&op), op)); candidates: None,
self.number_typos = 0; bucket_candidates,
self.candidates = candidates.map_or_else(Candidates::default, Candidates::Allowed); }));
self.bucket_candidates.union_with(&bucket_candidates); },
}, Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
None => return Ok(None), self.query_tree = query_tree.map(|op| (maximum_typo(&op), op));
} self.number_typos = 0;
self.candidates = candidates.map_or_else(Candidates::default, Candidates::Allowed);
self.bucket_candidates.union_with(&bucket_candidates);
}, },
None => return Ok(None), None => return Ok(None),
} }
@ -334,8 +314,8 @@ fn resolve_candidates<'t>(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
use super::super::initial::Initial;
use super::super::test::TestContext; use super::super::test::TestContext;
#[test] #[test]
@ -345,7 +325,8 @@ mod test {
let facet_candidates = None; let facet_candidates = None;
let mut wdcache = WordDerivationsCache::new(); let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, query_tree, facet_candidates); let parent = Initial::new(query_tree, facet_candidates);
let mut criteria = Typo::new(&context, Box::new(parent));
assert!(criteria.next(&mut wdcache).unwrap().is_none()); assert!(criteria.next(&mut wdcache).unwrap().is_none());
} }
@ -364,7 +345,8 @@ mod test {
let facet_candidates = None; let facet_candidates = None;
let mut wdcache = WordDerivationsCache::new(); let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, Some(query_tree), facet_candidates); let parent = Initial::new(Some(query_tree), facet_candidates);
let mut criteria = Typo::new(&context, Box::new(parent));
let candidates_1 = context.word_docids("split").unwrap().unwrap() let candidates_1 = context.word_docids("split").unwrap().unwrap()
& context.word_docids("this").unwrap().unwrap() & context.word_docids("this").unwrap().unwrap()
@ -413,7 +395,8 @@ mod test {
let facet_candidates = context.word_docids("earth").unwrap().unwrap(); let facet_candidates = context.word_docids("earth").unwrap().unwrap();
let mut wdcache = WordDerivationsCache::new(); let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, query_tree, Some(facet_candidates.clone())); let parent = Initial::new(query_tree, Some(facet_candidates.clone()));
let mut criteria = Typo::new(&context, Box::new(parent));
let expected = CriterionResult { let expected = CriterionResult {
query_tree: None, query_tree: None,
@ -442,7 +425,8 @@ mod test {
let facet_candidates = context.word_docids("earth").unwrap().unwrap(); let facet_candidates = context.word_docids("earth").unwrap().unwrap();
let mut wdcache = WordDerivationsCache::new(); let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, Some(query_tree), Some(facet_candidates.clone())); let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone()));
let mut criteria = Typo::new(&context, Box::new(parent));
let candidates_1 = context.word_docids("split").unwrap().unwrap() let candidates_1 = context.word_docids("split").unwrap().unwrap()
& context.word_docids("this").unwrap().unwrap() & context.word_docids("this").unwrap().unwrap()

View File

@ -12,34 +12,18 @@ pub struct Words<'t> {
query_trees: Vec<Operation>, query_trees: Vec<Operation>,
candidates: Option<RoaringBitmap>, candidates: Option<RoaringBitmap>,
bucket_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap,
parent: Option<Box<dyn Criterion + 't>>, parent: Box<dyn Criterion + 't>,
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>, candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
} }
impl<'t> Words<'t> { impl<'t> Words<'t> {
pub fn initial(
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
) -> Self
{
Words {
ctx,
query_trees: query_tree.map(explode_query_tree).unwrap_or_default(),
candidates,
bucket_candidates: RoaringBitmap::new(),
parent: None,
candidates_cache: HashMap::default(),
}
}
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self { pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
Words { Words {
ctx, ctx,
query_trees: Vec::default(), query_trees: Vec::default(),
candidates: None, candidates: None,
bucket_candidates: RoaringBitmap::new(), bucket_candidates: RoaringBitmap::new(),
parent: Some(parent), parent,
candidates_cache: HashMap::default(), candidates_cache: HashMap::default(),
} }
} }
@ -65,27 +49,17 @@ impl<'t> Criterion for Words<'t> {
found_candidates.intersect_with(&candidates); found_candidates.intersect_with(&candidates);
candidates.difference_with(&found_candidates); candidates.difference_with(&found_candidates);
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => found_candidates.clone(),
};
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(qt), query_tree: Some(qt),
candidates: Some(found_candidates), candidates: Some(found_candidates),
bucket_candidates, bucket_candidates: take(&mut self.bucket_candidates),
})); }));
}, },
(Some(qt), None) => { (Some(qt), None) => {
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => RoaringBitmap::new(),
};
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(qt), query_tree: Some(qt),
candidates: None, candidates: None,
bucket_candidates, bucket_candidates: take(&mut self.bucket_candidates),
})); }));
}, },
(None, Some(_)) => { (None, Some(_)) => {
@ -97,16 +71,18 @@ impl<'t> Criterion for Words<'t> {
})); }));
}, },
(None, None) => { (None, None) => {
match self.parent.as_mut() { match self.parent.next(wdcache)? {
Some(parent) => { Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
match parent.next(wdcache)? { return Ok(Some(CriterionResult {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { query_tree: None,
self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default(); candidates: None,
self.candidates = candidates; bucket_candidates,
self.bucket_candidates.union_with(&bucket_candidates); }));
}, },
None => return Ok(None), Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
} self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default();
self.candidates = candidates;
self.bucket_candidates.union_with(&bucket_candidates);
}, },
None => return Ok(None), None => return Ok(None),
} }

View File

@ -13,9 +13,8 @@ use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap; use roaring::bitmap::RoaringBitmap;
use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct}; use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct};
use crate::search::criteria::r#final::{Final, FinalResult};
use crate::search::criteria::fetcher::{Fetcher, FetcherResult}; use crate::{Index, DocumentId};
use crate::{DocumentId, Index};
pub use self::facet::{ pub use self::facet::{
FacetCondition, FacetDistribution, FacetIter, FacetNumberOperator, FacetStringOperator, FacetCondition, FacetDistribution, FacetIter, FacetNumberOperator, FacetStringOperator,
@ -162,14 +161,14 @@ impl<'a> Search<'a> {
&self, &self,
mut distinct: impl for<'c> Distinct<'c>, mut distinct: impl for<'c> Distinct<'c>,
matching_words: MatchingWords, matching_words: MatchingWords,
mut criteria: Fetcher, mut criteria: Final,
) -> anyhow::Result<SearchResult> { ) -> anyhow::Result<SearchResult> {
let mut offset = self.offset; let mut offset = self.offset;
let mut initial_candidates = RoaringBitmap::new(); let mut initial_candidates = RoaringBitmap::new();
let mut excluded_documents = RoaringBitmap::new(); let mut excluded_documents = RoaringBitmap::new();
let mut documents_ids = Vec::with_capacity(self.limit); let mut documents_ids = Vec::with_capacity(self.limit);
while let Some(FetcherResult { candidates, bucket_candidates, .. }) = criteria.next()? { while let Some(FinalResult { candidates, bucket_candidates, .. }) = criteria.next()? {
debug!("Number of candidates found {}", candidates.len()); debug!("Number of candidates found {}", candidates.len());
let excluded = take(&mut excluded_documents); let excluded = take(&mut excluded_documents);