mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 09:04:59 +08:00
Merge #586
586: Add settings to force milli to exhaustively compute the total number of hits r=Kerollmops a=ManyTheFish Add a new setting `exhaustive_number_hits` to `Search` forcing the `Initial` criterion to exhaustively compute the bucket_candidates allowing the end users to implement finite pagination. related to https://github.com/meilisearch/meilisearch/pull/2601 Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Many the fish <many@meilisearch.com>
This commit is contained in:
commit
19b2326f3d
@ -1,31 +1,75 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||||
|
use crate::search::criteria::{resolve_query_tree, Context};
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
|
use crate::search::Distinct;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
/// Initial is a mandatory criterion, it is always the first
|
||||||
pub struct Initial {
|
/// and is meant to initalize the CriterionResult used by the other criteria.
|
||||||
|
/// It behave like an [Once Iterator](https://doc.rust-lang.org/std/iter/struct.Once.html) and will return Some(CriterionResult) only one time.
|
||||||
|
pub struct Initial<'t, D> {
|
||||||
|
ctx: &'t dyn Context<'t>,
|
||||||
answer: Option<CriterionResult>,
|
answer: Option<CriterionResult>,
|
||||||
|
exhaustive_number_hits: bool,
|
||||||
|
distinct: Option<D>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Initial {
|
impl<'t, D> Initial<'t, D> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
|
ctx: &'t dyn Context<'t>,
|
||||||
query_tree: Option<Operation>,
|
query_tree: Option<Operation>,
|
||||||
filtered_candidates: Option<RoaringBitmap>,
|
filtered_candidates: Option<RoaringBitmap>,
|
||||||
) -> Initial {
|
exhaustive_number_hits: bool,
|
||||||
|
distinct: Option<D>,
|
||||||
|
) -> Initial<D> {
|
||||||
let answer = CriterionResult {
|
let answer = CriterionResult {
|
||||||
query_tree,
|
query_tree,
|
||||||
candidates: None,
|
candidates: None,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates: None,
|
bucket_candidates: None,
|
||||||
};
|
};
|
||||||
Initial { answer: Some(answer) }
|
Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Criterion for Initial {
|
impl<D: Distinct> Criterion for Initial<'_, D> {
|
||||||
#[logging_timer::time("Initial::{}")]
|
#[logging_timer::time("Initial::{}")]
|
||||||
fn next(&mut self, _: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||||
Ok(self.answer.take())
|
self.answer
|
||||||
|
.take()
|
||||||
|
.map(|mut answer| {
|
||||||
|
if self.exhaustive_number_hits && answer.query_tree.is_some() {
|
||||||
|
// resolve the whole query tree to retrieve an exhaustive list of documents matching the query.
|
||||||
|
let mut candidates = resolve_query_tree(
|
||||||
|
self.ctx,
|
||||||
|
answer.query_tree.as_ref().unwrap(),
|
||||||
|
&mut params.wdcache,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Apply the filters on the documents retrieved with the query tree.
|
||||||
|
if let Some(ref filtered_candidates) = answer.filtered_candidates {
|
||||||
|
candidates &= filtered_candidates;
|
||||||
|
}
|
||||||
|
|
||||||
|
// because the bucket_candidates should be an exhaustive count of the matching documents,
|
||||||
|
// we precompute the distinct attributes.
|
||||||
|
let bucket_candidates = match &mut self.distinct {
|
||||||
|
Some(distinct) => {
|
||||||
|
let mut bucket_candidates = RoaringBitmap::new();
|
||||||
|
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
|
||||||
|
bucket_candidates.insert(c?);
|
||||||
|
}
|
||||||
|
bucket_candidates
|
||||||
|
}
|
||||||
|
None => candidates.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
answer.candidates = Some(candidates);
|
||||||
|
answer.bucket_candidates = Some(bucket_candidates);
|
||||||
|
}
|
||||||
|
Ok(answer)
|
||||||
|
})
|
||||||
|
.transpose()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,7 @@ use self::typo::Typo;
|
|||||||
use self::words::Words;
|
use self::words::Words;
|
||||||
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
||||||
use crate::search::criteria::geo::Geo;
|
use crate::search::criteria::geo::Geo;
|
||||||
use crate::search::{word_derivations, WordDerivationsCache};
|
use crate::search::{word_derivations, Distinct, WordDerivationsCache};
|
||||||
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
|
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
|
||||||
|
|
||||||
mod asc_desc;
|
mod asc_desc;
|
||||||
@ -226,19 +226,26 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
|
Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build(
|
pub fn build<D: 't + Distinct>(
|
||||||
&'t self,
|
&'t self,
|
||||||
query_tree: Option<Operation>,
|
query_tree: Option<Operation>,
|
||||||
primitive_query: Option<Vec<PrimitiveQueryPart>>,
|
primitive_query: Option<Vec<PrimitiveQueryPart>>,
|
||||||
filtered_candidates: Option<RoaringBitmap>,
|
filtered_candidates: Option<RoaringBitmap>,
|
||||||
sort_criteria: Option<Vec<AscDescName>>,
|
sort_criteria: Option<Vec<AscDescName>>,
|
||||||
|
exhaustive_number_hits: bool,
|
||||||
|
distinct: Option<D>,
|
||||||
) -> Result<Final<'t>> {
|
) -> Result<Final<'t>> {
|
||||||
use crate::criterion::Criterion as Name;
|
use crate::criterion::Criterion as Name;
|
||||||
|
|
||||||
let primitive_query = primitive_query.unwrap_or_default();
|
let primitive_query = primitive_query.unwrap_or_default();
|
||||||
|
|
||||||
let mut criterion =
|
let mut criterion = Box::new(Initial::new(
|
||||||
Box::new(Initial::new(query_tree, filtered_candidates)) as Box<dyn Criterion>;
|
self,
|
||||||
|
query_tree,
|
||||||
|
filtered_candidates,
|
||||||
|
exhaustive_number_hits,
|
||||||
|
distinct,
|
||||||
|
)) as Box<dyn Criterion>;
|
||||||
for name in self.index.criteria(&self.rtxn)? {
|
for name in self.index.criteria(&self.rtxn)? {
|
||||||
criterion = match name {
|
criterion = match name {
|
||||||
Name::Words => Box::new(Words::new(self, criterion)),
|
Name::Words => Box::new(Words::new(self, criterion)),
|
||||||
|
@ -348,6 +348,7 @@ mod test {
|
|||||||
use super::super::initial::Initial;
|
use super::super::initial::Initial;
|
||||||
use super::super::test::TestContext;
|
use super::super::test::TestContext;
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::search::NoopDistinct;
|
||||||
|
|
||||||
fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String {
|
fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String {
|
||||||
let mut result = String::new();
|
let mut result = String::new();
|
||||||
@ -368,7 +369,8 @@ mod test {
|
|||||||
excluded_candidates: &RoaringBitmap::new(),
|
excluded_candidates: &RoaringBitmap::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let parent = Initial::new(query_tree, facet_candidates);
|
let parent =
|
||||||
|
Initial::<NoopDistinct>::new(&context, query_tree, facet_candidates, false, None);
|
||||||
let criteria = Typo::new(&context, Box::new(parent));
|
let criteria = Typo::new(&context, Box::new(parent));
|
||||||
|
|
||||||
let result = display_criteria(criteria, criterion_parameters);
|
let result = display_criteria(criteria, criterion_parameters);
|
||||||
@ -405,7 +407,8 @@ mod test {
|
|||||||
wdcache: &mut WordDerivationsCache::new(),
|
wdcache: &mut WordDerivationsCache::new(),
|
||||||
excluded_candidates: &RoaringBitmap::new(),
|
excluded_candidates: &RoaringBitmap::new(),
|
||||||
};
|
};
|
||||||
let parent = Initial::new(Some(query_tree), facet_candidates);
|
let parent =
|
||||||
|
Initial::<NoopDistinct>::new(&context, Some(query_tree), facet_candidates, false, None);
|
||||||
let criteria = Typo::new(&context, Box::new(parent));
|
let criteria = Typo::new(&context, Box::new(parent));
|
||||||
|
|
||||||
let result = display_criteria(criteria, criterion_parameters);
|
let result = display_criteria(criteria, criterion_parameters);
|
||||||
@ -439,7 +442,13 @@ mod test {
|
|||||||
wdcache: &mut WordDerivationsCache::new(),
|
wdcache: &mut WordDerivationsCache::new(),
|
||||||
excluded_candidates: &RoaringBitmap::new(),
|
excluded_candidates: &RoaringBitmap::new(),
|
||||||
};
|
};
|
||||||
let parent = Initial::new(query_tree, Some(facet_candidates.clone()));
|
let parent = Initial::<NoopDistinct>::new(
|
||||||
|
&context,
|
||||||
|
query_tree,
|
||||||
|
Some(facet_candidates.clone()),
|
||||||
|
false,
|
||||||
|
None,
|
||||||
|
);
|
||||||
let criteria = Typo::new(&context, Box::new(parent));
|
let criteria = Typo::new(&context, Box::new(parent));
|
||||||
|
|
||||||
let result = display_criteria(criteria, criterion_parameters);
|
let result = display_criteria(criteria, criterion_parameters);
|
||||||
@ -476,7 +485,13 @@ mod test {
|
|||||||
wdcache: &mut WordDerivationsCache::new(),
|
wdcache: &mut WordDerivationsCache::new(),
|
||||||
excluded_candidates: &RoaringBitmap::new(),
|
excluded_candidates: &RoaringBitmap::new(),
|
||||||
};
|
};
|
||||||
let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone()));
|
let parent = Initial::<NoopDistinct>::new(
|
||||||
|
&context,
|
||||||
|
Some(query_tree),
|
||||||
|
Some(facet_candidates.clone()),
|
||||||
|
false,
|
||||||
|
None,
|
||||||
|
);
|
||||||
let criteria = Typo::new(&context, Box::new(parent));
|
let criteria = Typo::new(&context, Box::new(parent));
|
||||||
|
|
||||||
let result = display_criteria(criteria, criterion_parameters);
|
let result = display_criteria(criteria, criterion_parameters);
|
||||||
|
@ -21,6 +21,7 @@ const DOCID_SIZE: usize = size_of::<DocumentId>();
|
|||||||
/// care to keep the document we are currently on, and remove it from the excluded list. The next
|
/// care to keep the document we are currently on, and remove it from the excluded list. The next
|
||||||
/// iterations will never contain any occurence of a document with the same distinct value as a
|
/// iterations will never contain any occurence of a document with the same distinct value as a
|
||||||
/// document from previous iterations.
|
/// document from previous iterations.
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct FacetDistinct<'a> {
|
pub struct FacetDistinct<'a> {
|
||||||
distinct: FieldId,
|
distinct: FieldId,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
|
@ -47,6 +47,7 @@ pub struct Search<'a> {
|
|||||||
terms_matching_strategy: TermsMatchingStrategy,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
words_limit: usize,
|
words_limit: usize,
|
||||||
|
exhaustive_number_hits: bool,
|
||||||
rtxn: &'a heed::RoTxn<'a>,
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
}
|
}
|
||||||
@ -61,6 +62,7 @@ impl<'a> Search<'a> {
|
|||||||
sort_criteria: None,
|
sort_criteria: None,
|
||||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||||
authorize_typos: true,
|
authorize_typos: true,
|
||||||
|
exhaustive_number_hits: false,
|
||||||
words_limit: 10,
|
words_limit: 10,
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
@ -107,6 +109,13 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Force the search to exhastivelly compute the number of candidates,
|
||||||
|
/// this will increase the search time but allows finite pagination.
|
||||||
|
pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
|
||||||
|
self.exhaustive_number_hits = exhaustive_number_hits;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
fn is_typo_authorized(&self) -> Result<bool> {
|
fn is_typo_authorized(&self) -> Result<bool> {
|
||||||
let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?;
|
let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?;
|
||||||
// only authorize typos if both the index and the query allow it.
|
// only authorize typos if both the index and the query allow it.
|
||||||
@ -184,20 +193,33 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
|
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||||
let criteria = criteria_builder.build(
|
|
||||||
query_tree,
|
|
||||||
primitive_query,
|
|
||||||
filtered_candidates,
|
|
||||||
self.sort_criteria.clone(),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
match self.index.distinct_field(self.rtxn)? {
|
match self.index.distinct_field(self.rtxn)? {
|
||||||
None => self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria),
|
None => {
|
||||||
|
let criteria = criteria_builder.build::<NoopDistinct>(
|
||||||
|
query_tree,
|
||||||
|
primitive_query,
|
||||||
|
filtered_candidates,
|
||||||
|
self.sort_criteria.clone(),
|
||||||
|
self.exhaustive_number_hits,
|
||||||
|
None,
|
||||||
|
)?;
|
||||||
|
self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria)
|
||||||
|
}
|
||||||
Some(name) => {
|
Some(name) => {
|
||||||
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||||
match field_ids_map.id(name) {
|
match field_ids_map.id(name) {
|
||||||
Some(fid) => {
|
Some(fid) => {
|
||||||
let distinct = FacetDistinct::new(fid, self.index, self.rtxn);
|
let distinct = FacetDistinct::new(fid, self.index, self.rtxn);
|
||||||
|
|
||||||
|
let criteria = criteria_builder.build(
|
||||||
|
query_tree,
|
||||||
|
primitive_query,
|
||||||
|
filtered_candidates,
|
||||||
|
self.sort_criteria.clone(),
|
||||||
|
self.exhaustive_number_hits,
|
||||||
|
Some(distinct.clone()),
|
||||||
|
)?;
|
||||||
self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria)
|
self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria)
|
||||||
}
|
}
|
||||||
None => Ok(SearchResult::default()),
|
None => Ok(SearchResult::default()),
|
||||||
@ -262,6 +284,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
terms_matching_strategy,
|
terms_matching_strategy,
|
||||||
authorize_typos,
|
authorize_typos,
|
||||||
words_limit,
|
words_limit,
|
||||||
|
exhaustive_number_hits,
|
||||||
rtxn: _,
|
rtxn: _,
|
||||||
index: _,
|
index: _,
|
||||||
} = self;
|
} = self;
|
||||||
@ -273,6 +296,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
.field("sort_criteria", sort_criteria)
|
.field("sort_criteria", sort_criteria)
|
||||||
.field("terms_matching_strategy", terms_matching_strategy)
|
.field("terms_matching_strategy", terms_matching_strategy)
|
||||||
.field("authorize_typos", authorize_typos)
|
.field("authorize_typos", authorize_typos)
|
||||||
|
.field("exhaustive_number_hits", exhaustive_number_hits)
|
||||||
.field("words_limit", words_limit)
|
.field("words_limit", words_limit)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ macro_rules! test_criterion {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![], vec![]);
|
test_criterion!(none_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![], vec![]);
|
||||||
test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![], vec![]);
|
test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![], vec![]);
|
||||||
test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words], vec![]);
|
test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words], vec![]);
|
||||||
test_criterion!(
|
test_criterion!(
|
||||||
|
Loading…
Reference in New Issue
Block a user