Introduce the Initial Criterion

This commit is contained in:
Clément Renault 2021-03-23 15:25:46 +01:00 committed by many
parent 89ee2cf576
commit 658f316511
No known key found for this signature in database
GPG Key ID: 2CEF23B75189EACA
11 changed files with 286 additions and 533 deletions

View File

@ -57,9 +57,6 @@ criterion = "0.3.4"
maplit = "1.0.2"
rand = "0.8.3"
[build-dependencies]
fst = "0.4.5"
[features]
default = []

View File

@ -31,32 +31,10 @@ pub struct AscDesc<'t> {
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
bucket_candidates: RoaringBitmap,
faceted_candidates: RoaringBitmap,
parent: Option<Box<dyn Criterion + 't>>,
parent: Box<dyn Criterion + 't>,
}
impl<'t> AscDesc<'t> {
pub fn initial_asc(
index: &'t Index,
rtxn: &'t heed::RoTxn,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
field_name: String,
) -> anyhow::Result<Self>
{
Self::initial(index, rtxn, query_tree, candidates, field_name, true)
}
pub fn initial_desc(
index: &'t Index,
rtxn: &'t heed::RoTxn,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
field_name: String,
) -> anyhow::Result<Self>
{
Self::initial(index, rtxn, query_tree, candidates, field_name, false)
}
pub fn asc(
index: &'t Index,
rtxn: &'t heed::RoTxn,
@ -77,47 +55,6 @@ impl<'t> AscDesc<'t> {
Self::new(index, rtxn, parent, field_name, false)
}
fn initial(
index: &'t Index,
rtxn: &'t heed::RoTxn,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
field_name: String,
ascending: bool,
) -> anyhow::Result<Self>
{
let fields_ids_map = index.fields_ids_map(rtxn)?;
let faceted_fields = index.faceted_fields(rtxn)?;
let (field_id, facet_type) = field_id_facet_type(&fields_ids_map, &faceted_fields, &field_name)?;
let faceted_candidates = index.faceted_documents_ids(rtxn, field_id)?;
let candidates = match &query_tree {
Some(qt) => {
let context = CriteriaBuilder::new(rtxn, index)?;
let mut qt_candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), &mut WordDerivationsCache::new())?;
if let Some(candidates) = candidates {
qt_candidates.intersect_with(&candidates);
}
qt_candidates
},
None => candidates.unwrap_or(faceted_candidates.clone()),
};
Ok(AscDesc {
index,
rtxn,
field_name,
field_id,
facet_type,
ascending,
query_tree,
candidates: facet_ordered(index, rtxn, field_id, facet_type, ascending, candidates)?,
faceted_candidates,
bucket_candidates: RoaringBitmap::new(),
parent: None,
})
}
fn new(
index: &'t Index,
rtxn: &'t heed::RoTxn,
@ -141,7 +78,7 @@ impl<'t> AscDesc<'t> {
candidates: Box::new(std::iter::empty()),
faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?,
bucket_candidates: RoaringBitmap::new(),
parent: Some(parent),
parent,
})
}
}
@ -156,12 +93,9 @@ impl<'t> Criterion for AscDesc<'t> {
match self.candidates.next().transpose()? {
None => {
let query_tree = self.query_tree.take();
let bucket_candidates = take(&mut self.bucket_candidates);
match self.parent.as_mut() {
Some(parent) => {
match parent.next(wdcache)? {
match self.parent.next(wdcache)? {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
let candidates_is_some = candidates.is_some();
self.query_tree = query_tree;
let candidates = match (&self.query_tree, candidates) {
(_, Some(mut candidates)) => {
@ -176,11 +110,22 @@ impl<'t> Criterion for AscDesc<'t> {
},
(None, None) => take(&mut self.faceted_candidates),
};
if bucket_candidates.is_empty() {
self.bucket_candidates.union_with(&candidates);
} else {
// If our parent returns candidates it means that the bucket
// candidates were already computed before and we can use them.
//
// If not, we must use the just computed candidates as our bucket
// candidates.
if candidates_is_some {
self.bucket_candidates.union_with(&bucket_candidates);
} else {
self.bucket_candidates.union_with(&candidates);
}
if candidates.is_empty() {
continue;
}
self.candidates = facet_ordered(
self.index,
self.rtxn,
@ -193,27 +138,11 @@ impl<'t> Criterion for AscDesc<'t> {
None => return Ok(None),
}
},
None => if query_tree.is_none() && bucket_candidates.is_empty() {
return Ok(None)
},
}
return Ok(Some(CriterionResult {
query_tree,
candidates: Some(RoaringBitmap::new()),
bucket_candidates,
}));
},
Some(candidates) => {
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => candidates.clone(),
};
return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(),
candidates: Some(candidates),
bucket_candidates,
bucket_candidates: take(&mut self.bucket_candidates),
}));
},
}

View File

@ -14,36 +14,19 @@ pub struct Attribute<'t> {
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
bucket_candidates: RoaringBitmap,
parent: Option<Box<dyn Criterion + 't>>,
parent: Box<dyn Criterion + 't>,
flattened_query_tree: Option<Vec<Vec<Query>>>,
current_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
}
impl<'t> Attribute<'t> {
pub fn initial(
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
) -> Self
{
Attribute {
ctx,
query_tree,
candidates,
bucket_candidates: RoaringBitmap::new(),
parent: None,
flattened_query_tree: None,
current_buckets: None,
}
}
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
Attribute {
ctx,
query_tree: None,
candidates: None,
bucket_candidates: RoaringBitmap::new(),
parent: Some(parent),
parent,
flattened_query_tree: None,
current_buckets: None,
}
@ -63,34 +46,35 @@ impl<'t> Criterion for Attribute<'t> {
}));
},
(Some(qt), Some(candidates)) => {
let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| flatten_query_tree(&qt));
let current_buckets = if let Some(current_buckets) = self.current_buckets.as_mut() {
current_buckets
} else {
let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| {
flatten_query_tree(&qt)
});
let current_buckets = match self.current_buckets.as_mut() {
Some(current_buckets) => current_buckets,
None => {
let new_buckets = linear_compute_candidates(self.ctx, flattened_query_tree, candidates)?;
self.current_buckets.get_or_insert(new_buckets.into_iter())
},
};
let found_candidates = if let Some((_score, candidates)) = current_buckets.next() {
candidates
} else {
let found_candidates = match current_buckets.next() {
Some((_score, candidates)) => candidates,
None => {
return Ok(Some(CriterionResult {
query_tree: self.query_tree.take(),
candidates: self.candidates.take(),
bucket_candidates: take(&mut self.bucket_candidates),
}));
},
};
candidates.difference_with(&found_candidates);
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => found_candidates.clone(),
};
candidates.difference_with(&found_candidates);
return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(),
candidates: Some(found_candidates),
bucket_candidates: bucket_candidates,
bucket_candidates: take(&mut self.bucket_candidates),
}));
},
(Some(qt), None) => {
@ -106,9 +90,14 @@ impl<'t> Criterion for Attribute<'t> {
}));
},
(None, None) => {
match self.parent.as_mut() {
Some(parent) => {
match parent.next(wdcache)? {
match self.parent.next(wdcache)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates: None,
bucket_candidates,
}));
},
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
self.query_tree = query_tree;
self.candidates = candidates;
@ -119,9 +108,6 @@ impl<'t> Criterion for Attribute<'t> {
None => return Ok(None),
}
},
None => return Ok(None),
}
},
}
}
}

View File

@ -1,135 +0,0 @@
use std::collections::HashMap;
use std::mem::take;
use log::debug;
use roaring::RoaringBitmap;
use crate::search::query_tree::Operation;
use crate::search::WordDerivationsCache;
use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context};
/// The result of a call to the fetcher.
#[derive(Debug, Clone, PartialEq)]
pub struct FetcherResult {
/// The query tree corresponding to the current bucket of the last criterion.
pub query_tree: Option<Operation>,
/// The candidates of the current bucket of the last criterion.
pub candidates: RoaringBitmap,
/// Candidates that comes from the current bucket of the initial criterion.
pub bucket_candidates: RoaringBitmap,
}
pub struct Fetcher<'t> {
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Candidates,
parent: Option<Box<dyn Criterion + 't>>,
should_get_documents_ids: bool,
wdcache: WordDerivationsCache,
}
impl<'t> Fetcher<'t> {
pub fn initial(
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
) -> Self
{
Fetcher {
ctx,
query_tree,
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
parent: None,
should_get_documents_ids: true,
wdcache: WordDerivationsCache::new(),
}
}
pub fn new(
ctx: &'t dyn Context,
parent: Box<dyn Criterion + 't>,
) -> Self
{
Fetcher {
ctx,
query_tree: None,
candidates: Candidates::default(),
parent: Some(parent),
should_get_documents_ids: true,
wdcache: WordDerivationsCache::new(),
}
}
#[logging_timer::time("Fetcher::{}")]
pub fn next(&mut self) -> anyhow::Result<Option<FetcherResult>> {
use Candidates::{Allowed, Forbidden};
loop {
debug!("Fetcher iteration (should_get_documents_ids: {}) ({:?})",
self.should_get_documents_ids, self.candidates,
);
let should_get_documents_ids = take(&mut self.should_get_documents_ids);
match &mut self.candidates {
Allowed(_) => {
let candidates = take(&mut self.candidates).into_inner();
let candidates = match &self.query_tree {
Some(qt) if should_get_documents_ids => {
let mut docids = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?;
docids.intersect_with(&candidates);
docids
},
_ => candidates,
};
return Ok(Some(FetcherResult {
query_tree: self.query_tree.take(),
candidates: candidates.clone(),
bucket_candidates: candidates,
}));
},
Forbidden(_) => {
match self.parent.as_mut() {
Some(parent) => {
match parent.next(&mut self.wdcache)? {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
let candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?,
(None, None) => RoaringBitmap::new(),
};
return Ok(Some(FetcherResult { query_tree, candidates, bucket_candidates }))
},
None => if should_get_documents_ids {
let candidates = match &self.query_tree {
Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?,
None => self.ctx.documents_ids()?,
};
return Ok(Some(FetcherResult {
query_tree: self.query_tree.clone(),
candidates: candidates.clone(),
bucket_candidates: candidates,
}));
},
}
},
None => if should_get_documents_ids {
let candidates = match &self.query_tree {
Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?,
None => self.ctx.documents_ids()?,
};
return Ok(Some(FetcherResult {
query_tree: self.query_tree.clone(),
candidates: candidates.clone(),
bucket_candidates: candidates,
}));
},
}
return Ok(None);
},
}
}
}
}

View File

@ -0,0 +1,57 @@
use std::collections::HashMap;
use log::debug;
use roaring::RoaringBitmap;
use crate::search::query_tree::Operation;
use crate::search::WordDerivationsCache;
use super::{resolve_query_tree, Criterion, CriterionResult, Context};
/// The result of a call to the fetcher.
#[derive(Debug, Clone, PartialEq)]
pub struct FinalResult {
/// The query tree corresponding to the current bucket of the last criterion.
pub query_tree: Option<Operation>,
/// The candidates of the current bucket of the last criterion.
pub candidates: RoaringBitmap,
/// Candidates that comes from the current bucket of the initial criterion.
pub bucket_candidates: RoaringBitmap,
}
pub struct Final<'t> {
ctx: &'t dyn Context,
parent: Box<dyn Criterion + 't>,
wdcache: WordDerivationsCache,
}
impl<'t> Final<'t> {
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Final<'t> {
Final { ctx, parent, wdcache: WordDerivationsCache::new() }
}
#[logging_timer::time("Final::{}")]
pub fn next(&mut self) -> anyhow::Result<Option<FinalResult>> {
loop {
debug!("Final iteration");
match self.parent.next(&mut self.wdcache)? {
Some(CriterionResult { query_tree, candidates, mut bucket_candidates }) => {
let candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?,
(None, None) => self.ctx.documents_ids()?,
};
bucket_candidates.union_with(&candidates);
return Ok(Some(FinalResult {
query_tree,
candidates,
bucket_candidates,
}));
},
None => return Ok(None),
}
}
}
}

View File

@ -0,0 +1,28 @@
use roaring::RoaringBitmap;
use crate::search::query_tree::Operation;
use crate::search::WordDerivationsCache;
use super::{Criterion, CriterionResult};
pub struct Initial {
answer: Option<CriterionResult>
}
impl Initial {
pub fn new(query_tree: Option<Operation>, mut candidates: Option<RoaringBitmap>) -> Initial {
let answer = CriterionResult {
query_tree,
candidates: candidates.clone(),
bucket_candidates: candidates.take().unwrap_or_default(),
};
Initial { answer: Some(answer) }
}
}
impl Criterion for Initial {
#[logging_timer::time("Initial::{}")]
fn next(&mut self, _: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> {
Ok(self.answer.take())
}
}

View File

@ -8,19 +8,21 @@ use crate::search::{word_derivations, WordDerivationsCache};
use crate::{Index, DocumentId};
use super::query_tree::{Operation, Query, QueryKind};
use self::asc_desc::AscDesc;
use self::attribute::Attribute;
use self::r#final::Final;
use self::initial::Initial;
use self::proximity::Proximity;
use self::typo::Typo;
use self::words::Words;
use self::asc_desc::AscDesc;
use self::proximity::Proximity;
use self::attribute::Attribute;
use self::fetcher::Fetcher;
mod asc_desc;
mod attribute;
mod initial;
mod proximity;
mod typo;
mod words;
mod asc_desc;
mod proximity;
mod attribute;
pub mod fetcher;
pub mod r#final;
pub trait Criterion {
fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>>;
@ -61,6 +63,7 @@ impl Default for Candidates {
Self::Forbidden(RoaringBitmap::new())
}
}
pub trait Context {
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
@ -128,44 +131,26 @@ impl<'t> CriteriaBuilder<'t> {
pub fn build(
&'t self,
mut query_tree: Option<Operation>,
mut facet_candidates: Option<RoaringBitmap>,
) -> anyhow::Result<Fetcher<'t>>
query_tree: Option<Operation>,
facet_candidates: Option<RoaringBitmap>,
) -> anyhow::Result<Final<'t>>
{
use crate::criterion::Criterion as Name;
let mut criterion = None as Option<Box<dyn Criterion>>;
let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box<dyn Criterion>;
for name in self.index.criteria(&self.rtxn)? {
criterion = Some(match criterion.take() {
Some(father) => match name {
Name::Typo => Box::new(Typo::new(self, father)),
Name::Words => Box::new(Words::new(self, father)),
Name::Proximity => Box::new(Proximity::new(self, father)),
Name::Attribute => Box::new(Attribute::new(self, father)),
Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, father, field)?),
Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, father, field)?),
_otherwise => father,
},
None => match name {
Name::Typo => Box::new(Typo::initial(self, query_tree.take(), facet_candidates.take())),
Name::Words => Box::new(Words::initial(self, query_tree.take(), facet_candidates.take())),
Name::Proximity => Box::new(Proximity::initial(self, query_tree.take(), facet_candidates.take())),
Name::Attribute => Box::new(Attribute::initial(self, query_tree.take(), facet_candidates.take())),
Name::Asc(field) => {
Box::new(AscDesc::initial_asc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), field)?)
},
Name::Desc(field) => {
Box::new(AscDesc::initial_desc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), field)?)
},
_otherwise => continue,
},
});
criterion = match name {
Name::Typo => Box::new(Typo::new(self, criterion)),
Name::Words => Box::new(Words::new(self, criterion)),
Name::Proximity => Box::new(Proximity::new(self, criterion)),
Name::Attribute => Box::new(Attribute::new(self, criterion)),
Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?),
Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?),
_otherwise => criterion,
};
}
match criterion {
Some(criterion) => Ok(Fetcher::new(self, criterion)),
None => Ok(Fetcher::initial(self, query_tree, facet_candidates)),
}
Ok(Final::new(self, criterion))
}
}

View File

@ -8,48 +8,29 @@ use log::debug;
use crate::{DocumentId, Position, search::{query_tree::QueryKind}};
use crate::search::query_tree::{maximum_proximity, Operation, Query};
use crate::search::{build_dfa, WordDerivationsCache};
use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree};
use super::{Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree};
type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>;
pub struct Proximity<'t> {
ctx: &'t dyn Context,
query_tree: Option<(usize, Operation)>,
/// ((max_proximity, query_tree), allowed_candidates)
state: Option<(Option<(usize, Operation)>, RoaringBitmap)>,
proximity: u8,
candidates: Candidates,
bucket_candidates: RoaringBitmap,
parent: Option<Box<dyn Criterion + 't>>,
parent: Box<dyn Criterion + 't>,
candidates_cache: Cache,
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
}
impl<'t> Proximity<'t> {
pub fn initial(
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
) -> Self
{
Proximity {
ctx,
query_tree: query_tree.map(|op| (maximum_proximity(&op), op)),
proximity: 0,
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
bucket_candidates: RoaringBitmap::new(),
parent: None,
candidates_cache: Cache::new(),
plane_sweep_cache: None,
}
}
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
Proximity {
ctx,
query_tree: None,
state: None,
proximity: 0,
candidates: Candidates::default(),
bucket_candidates: RoaringBitmap::new(),
parent: Some(parent),
parent: parent,
candidates_cache: Cache::new(),
plane_sweep_cache: None,
}
@ -59,27 +40,20 @@ impl<'t> Proximity<'t> {
impl<'t> Criterion for Proximity<'t> {
#[logging_timer::time("Proximity::{}")]
fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> {
use Candidates::{Allowed, Forbidden};
loop {
debug!("Proximity at iteration {} (max {:?}) ({:?})",
debug!("Proximity at iteration {} (max prox {:?}) ({:?})",
self.proximity,
self.query_tree.as_ref().map(|(mp, _)| mp),
self.candidates,
self.state.as_ref().map(|(qt, _)| qt.as_ref().map(|(mp, _)| mp)),
self.state.as_ref().map(|(_, cd)| cd),
);
match (&mut self.query_tree, &mut self.candidates) {
(_, Allowed(candidates)) if candidates.is_empty() => {
return Ok(Some(CriterionResult {
query_tree: self.query_tree.take().map(|(_, qt)| qt),
candidates: Some(take(&mut self.candidates).into_inner()),
bucket_candidates: take(&mut self.bucket_candidates),
}));
match &mut self.state {
Some((_, candidates)) if candidates.is_empty() => {
self.state = None; // reset state
},
(Some((max_prox, query_tree)), Allowed(candidates)) => {
Some((Some((max_prox, query_tree)), candidates)) => {
if self.proximity as usize > *max_prox {
// reset state to (None, Forbidden(_))
self.query_tree = None;
self.candidates = Candidates::default();
self.state = None; // reset state
} else {
let mut new_candidates = if candidates.len() <= 1000 {
if let Some(cache) = self.plane_sweep_cache.as_mut() {
@ -89,9 +63,7 @@ impl<'t> Criterion for Proximity<'t> {
candidates
},
None => {
// reset state to (None, Forbidden(_))
self.query_tree = None;
self.candidates = Candidates::default();
self.state = None; // reset state
continue
},
}
@ -120,83 +92,58 @@ impl<'t> Criterion for Proximity<'t> {
candidates.difference_with(&new_candidates);
self.proximity += 1;
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => new_candidates.clone(),
};
return Ok(Some(CriterionResult {
query_tree: Some(query_tree.clone()),
candidates: Some(new_candidates),
bucket_candidates,
bucket_candidates: take(&mut self.bucket_candidates),
}));
}
},
(Some((max_prox, query_tree)), Forbidden(candidates)) => {
if self.proximity as usize > *max_prox {
self.query_tree = None;
self.candidates = Candidates::default();
} else {
let mut new_candidates = resolve_candidates(
self.ctx,
&query_tree,
self.proximity,
&mut self.candidates_cache,
wdcache,
)?;
new_candidates.difference_with(&candidates);
candidates.union_with(&new_candidates);
self.proximity += 1;
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => new_candidates.clone(),
};
return Ok(Some(CriterionResult {
query_tree: Some(query_tree.clone()),
candidates: Some(new_candidates),
bucket_candidates,
}));
}
},
(None, Allowed(_)) => {
let candidates = take(&mut self.candidates).into_inner();
Some((None, candidates)) => {
let candidates = take(candidates);
self.state = None; // reset state
return Ok(Some(CriterionResult {
query_tree: None,
candidates: Some(candidates.clone()),
bucket_candidates: candidates,
}));
},
(None, Forbidden(_)) => {
match self.parent.as_mut() {
Some(parent) => {
match parent.next(wdcache)? {
None => {
match self.parent.next(wdcache)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates: None,
bucket_candidates,
}));
},
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
let candidates_is_some = candidates.is_some();
let candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), wdcache)?,
(None, None) => RoaringBitmap::new(),
};
if bucket_candidates.is_empty() {
self.bucket_candidates.union_with(&candidates);
} else {
// If our parent returns candidates it means that the bucket
// candidates were already computed before and we can use them.
//
// If not, we must use the just computed candidates as our bucket
// candidates.
if candidates_is_some {
self.bucket_candidates.union_with(&bucket_candidates);
} else {
self.bucket_candidates.union_with(&candidates);
}
self.query_tree = query_tree.map(|op| (maximum_proximity(&op), op));
let query_tree = query_tree.map(|op| (maximum_proximity(&op), op));
self.state = Some((query_tree, candidates));
self.proximity = 0;
self.candidates = Candidates::Allowed(candidates);
self.plane_sweep_cache = None;
},
None => return Ok(None),
}
},
None => return Ok(None),
}
},
}
}
}

View File

@ -14,28 +14,11 @@ pub struct Typo<'t> {
number_typos: u8,
candidates: Candidates,
bucket_candidates: RoaringBitmap,
parent: Option<Box<dyn Criterion + 't>>,
parent: Box<dyn Criterion + 't>,
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
}
impl<'t> Typo<'t> {
pub fn initial(
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
) -> Self
{
Typo {
ctx,
query_tree: query_tree.map(|op| (maximum_typo(&op), op)),
number_typos: 0,
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
bucket_candidates: RoaringBitmap::new(),
parent: None,
candidates_cache: HashMap::new(),
}
}
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
Typo {
ctx,
@ -43,7 +26,7 @@ impl<'t> Typo<'t> {
number_typos: 0,
candidates: Candidates::default(),
bucket_candidates: RoaringBitmap::new(),
parent: Some(parent),
parent,
candidates_cache: HashMap::new(),
}
}
@ -90,15 +73,10 @@ impl<'t> Criterion for Typo<'t> {
candidates.difference_with(&new_candidates);
self.number_typos += 1;
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => new_candidates.clone(),
};
return Ok(Some(CriterionResult {
query_tree: Some(new_query_tree),
candidates: Some(new_candidates),
bucket_candidates,
bucket_candidates: take(&mut self.bucket_candidates),
}));
}
},
@ -145,9 +123,14 @@ impl<'t> Criterion for Typo<'t> {
}));
},
(None, Forbidden(_)) => {
match self.parent.as_mut() {
Some(parent) => {
match parent.next(wdcache)? {
match self.parent.next(wdcache)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates: None,
bucket_candidates,
}));
},
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
self.query_tree = query_tree.map(|op| (maximum_typo(&op), op));
self.number_typos = 0;
@ -157,9 +140,6 @@ impl<'t> Criterion for Typo<'t> {
None => return Ok(None),
}
},
None => return Ok(None),
}
},
}
}
}
@ -334,8 +314,8 @@ fn resolve_candidates<'t>(
#[cfg(test)]
mod test {
use super::*;
use super::super::initial::Initial;
use super::super::test::TestContext;
#[test]
@ -345,7 +325,8 @@ mod test {
let facet_candidates = None;
let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, query_tree, facet_candidates);
let parent = Initial::new(query_tree, facet_candidates);
let mut criteria = Typo::new(&context, Box::new(parent));
assert!(criteria.next(&mut wdcache).unwrap().is_none());
}
@ -364,7 +345,8 @@ mod test {
let facet_candidates = None;
let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, Some(query_tree), facet_candidates);
let parent = Initial::new(Some(query_tree), facet_candidates);
let mut criteria = Typo::new(&context, Box::new(parent));
let candidates_1 = context.word_docids("split").unwrap().unwrap()
& context.word_docids("this").unwrap().unwrap()
@ -413,7 +395,8 @@ mod test {
let facet_candidates = context.word_docids("earth").unwrap().unwrap();
let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, query_tree, Some(facet_candidates.clone()));
let parent = Initial::new(query_tree, Some(facet_candidates.clone()));
let mut criteria = Typo::new(&context, Box::new(parent));
let expected = CriterionResult {
query_tree: None,
@ -442,7 +425,8 @@ mod test {
let facet_candidates = context.word_docids("earth").unwrap().unwrap();
let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, Some(query_tree), Some(facet_candidates.clone()));
let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone()));
let mut criteria = Typo::new(&context, Box::new(parent));
let candidates_1 = context.word_docids("split").unwrap().unwrap()
& context.word_docids("this").unwrap().unwrap()

View File

@ -12,34 +12,18 @@ pub struct Words<'t> {
query_trees: Vec<Operation>,
candidates: Option<RoaringBitmap>,
bucket_candidates: RoaringBitmap,
parent: Option<Box<dyn Criterion + 't>>,
parent: Box<dyn Criterion + 't>,
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
}
impl<'t> Words<'t> {
pub fn initial(
ctx: &'t dyn Context,
query_tree: Option<Operation>,
candidates: Option<RoaringBitmap>,
) -> Self
{
Words {
ctx,
query_trees: query_tree.map(explode_query_tree).unwrap_or_default(),
candidates,
bucket_candidates: RoaringBitmap::new(),
parent: None,
candidates_cache: HashMap::default(),
}
}
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
Words {
ctx,
query_trees: Vec::default(),
candidates: None,
bucket_candidates: RoaringBitmap::new(),
parent: Some(parent),
parent,
candidates_cache: HashMap::default(),
}
}
@ -65,27 +49,17 @@ impl<'t> Criterion for Words<'t> {
found_candidates.intersect_with(&candidates);
candidates.difference_with(&found_candidates);
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => found_candidates.clone(),
};
return Ok(Some(CriterionResult {
query_tree: Some(qt),
candidates: Some(found_candidates),
bucket_candidates,
bucket_candidates: take(&mut self.bucket_candidates),
}));
},
(Some(qt), None) => {
let bucket_candidates = match self.parent {
Some(_) => take(&mut self.bucket_candidates),
None => RoaringBitmap::new(),
};
return Ok(Some(CriterionResult {
query_tree: Some(qt),
candidates: None,
bucket_candidates,
bucket_candidates: take(&mut self.bucket_candidates),
}));
},
(None, Some(_)) => {
@ -97,9 +71,14 @@ impl<'t> Criterion for Words<'t> {
}));
},
(None, None) => {
match self.parent.as_mut() {
Some(parent) => {
match parent.next(wdcache)? {
match self.parent.next(wdcache)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates: None,
bucket_candidates,
}));
},
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default();
self.candidates = candidates;
@ -108,9 +87,6 @@ impl<'t> Criterion for Words<'t> {
None => return Ok(None),
}
},
None => return Ok(None),
}
},
}
}
}

View File

@ -13,9 +13,8 @@ use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct};
use crate::search::criteria::fetcher::{Fetcher, FetcherResult};
use crate::{DocumentId, Index};
use crate::search::criteria::r#final::{Final, FinalResult};
use crate::{Index, DocumentId};
pub use self::facet::{
FacetCondition, FacetDistribution, FacetIter, FacetNumberOperator, FacetStringOperator,
@ -162,14 +161,14 @@ impl<'a> Search<'a> {
&self,
mut distinct: impl for<'c> Distinct<'c>,
matching_words: MatchingWords,
mut criteria: Fetcher,
mut criteria: Final,
) -> anyhow::Result<SearchResult> {
let mut offset = self.offset;
let mut initial_candidates = RoaringBitmap::new();
let mut excluded_documents = RoaringBitmap::new();
let mut documents_ids = Vec::with_capacity(self.limit);
while let Some(FetcherResult { candidates, bucket_candidates, .. }) = criteria.next()? {
while let Some(FinalResult { candidates, bucket_candidates, .. }) = criteria.next()? {
debug!("Number of candidates found {}", candidates.len());
let excluded = take(&mut excluded_documents);