mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Introduce the final Fetcher criterion
This commit is contained in:
parent
7ac09d7b7c
commit
daf126a638
107
milli/src/search/criteria/fetcher.rs
Normal file
107
milli/src/search/criteria/fetcher.rs
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::mem::take;
|
||||||
|
|
||||||
|
use log::debug;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::search::query_tree::Operation;
|
||||||
|
use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context};
|
||||||
|
|
||||||
|
pub struct Fetcher<'t> {
|
||||||
|
ctx: &'t dyn Context,
|
||||||
|
query_tree: Option<Operation>,
|
||||||
|
candidates: Candidates,
|
||||||
|
parent: Option<Box<dyn Criterion + 't>>,
|
||||||
|
should_get_documents_ids: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t> Fetcher<'t> {
|
||||||
|
pub fn initial(
|
||||||
|
ctx: &'t dyn Context,
|
||||||
|
query_tree: Option<Operation>,
|
||||||
|
candidates: Option<RoaringBitmap>,
|
||||||
|
) -> Self
|
||||||
|
{
|
||||||
|
Fetcher {
|
||||||
|
ctx,
|
||||||
|
query_tree,
|
||||||
|
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
|
||||||
|
parent: None,
|
||||||
|
should_get_documents_ids: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new(
|
||||||
|
ctx: &'t dyn Context,
|
||||||
|
parent: Box<dyn Criterion + 't>,
|
||||||
|
) -> Self
|
||||||
|
{
|
||||||
|
Fetcher {
|
||||||
|
ctx,
|
||||||
|
query_tree: None,
|
||||||
|
candidates: Candidates::default(),
|
||||||
|
parent: Some(parent),
|
||||||
|
should_get_documents_ids: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t> Criterion for Fetcher<'t> {
|
||||||
|
fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> {
|
||||||
|
use Candidates::{Allowed, Forbidden};
|
||||||
|
loop {
|
||||||
|
debug!("Fetcher iteration (should_get_documents_ids: {}) ({:?})",
|
||||||
|
self.should_get_documents_ids, self.candidates,
|
||||||
|
);
|
||||||
|
|
||||||
|
match &mut self.candidates {
|
||||||
|
Allowed(candidates) => if candidates.is_empty() {
|
||||||
|
self.candidates = Candidates::default();
|
||||||
|
} else {
|
||||||
|
self.should_get_documents_ids = false;
|
||||||
|
let candidates = take(&mut self.candidates).into_inner();
|
||||||
|
return Ok(Some(CriterionResult {
|
||||||
|
query_tree: self.query_tree.clone(),
|
||||||
|
candidates: candidates.clone(),
|
||||||
|
bucket_candidates: Some(candidates),
|
||||||
|
}));
|
||||||
|
},
|
||||||
|
Forbidden(_) => {
|
||||||
|
let should_get_documents_ids = take(&mut self.should_get_documents_ids);
|
||||||
|
match self.parent.as_mut() {
|
||||||
|
Some(parent) => {
|
||||||
|
match parent.next()? {
|
||||||
|
Some(result) => return Ok(Some(result)),
|
||||||
|
None => if should_get_documents_ids {
|
||||||
|
let candidates = match &self.query_tree {
|
||||||
|
Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new())?,
|
||||||
|
None => self.ctx.documents_ids()?,
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(Some(CriterionResult {
|
||||||
|
query_tree: self.query_tree.clone(),
|
||||||
|
candidates: candidates.clone(),
|
||||||
|
bucket_candidates: Some(candidates),
|
||||||
|
}));
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
None => if should_get_documents_ids {
|
||||||
|
let candidates = match &self.query_tree {
|
||||||
|
Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new())?,
|
||||||
|
None => self.ctx.documents_ids()?,
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(Some(CriterionResult {
|
||||||
|
query_tree: self.query_tree.clone(),
|
||||||
|
candidates: candidates.clone(),
|
||||||
|
bucket_candidates: Some(candidates),
|
||||||
|
}));
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return Ok(None);
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,16 +1,19 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use anyhow::bail;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::Index;
|
use crate::Index;
|
||||||
use crate::search::word_derivations;
|
use crate::search::word_derivations;
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use super::query_tree::{Operation, Query, QueryKind};
|
use super::query_tree::{Operation, Query, QueryKind};
|
||||||
|
|
||||||
pub mod typo;
|
pub mod typo;
|
||||||
pub mod words;
|
pub mod words;
|
||||||
pub mod asc_desc;
|
pub mod asc_desc;
|
||||||
pub mod proximity;
|
pub mod proximity;
|
||||||
|
pub mod fetcher;
|
||||||
|
|
||||||
pub trait Criterion {
|
pub trait Criterion {
|
||||||
fn next(&mut self) -> anyhow::Result<Option<CriterionResult>>;
|
fn next(&mut self) -> anyhow::Result<Option<CriterionResult>>;
|
||||||
@ -51,6 +54,7 @@ impl Default for Candidates {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub trait Context {
|
pub trait Context {
|
||||||
|
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
|
||||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||||
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||||
fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>>;
|
fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>>;
|
||||||
@ -66,6 +70,10 @@ pub struct HeedContext<'t> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Context for HeedContext<'a> {
|
impl<'a> Context for HeedContext<'a> {
|
||||||
|
fn documents_ids(&self) -> heed::Result<RoaringBitmap> {
|
||||||
|
self.index.documents_ids(self.rtxn)
|
||||||
|
}
|
||||||
|
|
||||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
self.index.word_docids.get(self.rtxn, &word)
|
self.index.word_docids.get(self.rtxn, &word)
|
||||||
}
|
}
|
||||||
@ -107,6 +115,80 @@ impl<'t> HeedContext<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn resolve_query_tree<'t>(
|
||||||
|
ctx: &'t dyn Context,
|
||||||
|
query_tree: &Operation,
|
||||||
|
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||||
|
) -> anyhow::Result<RoaringBitmap>
|
||||||
|
{
|
||||||
|
fn resolve_operation<'t>(
|
||||||
|
ctx: &'t dyn Context,
|
||||||
|
query_tree: &Operation,
|
||||||
|
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||||
|
) -> anyhow::Result<RoaringBitmap>
|
||||||
|
{
|
||||||
|
use Operation::{And, Consecutive, Or, Query};
|
||||||
|
|
||||||
|
match query_tree {
|
||||||
|
And(ops) => {
|
||||||
|
let mut ops = ops.iter().map(|op| {
|
||||||
|
resolve_operation(ctx, op, cache)
|
||||||
|
}).collect::<anyhow::Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
ops.sort_unstable_by_key(|cds| cds.len());
|
||||||
|
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
let mut first_loop = true;
|
||||||
|
for docids in ops {
|
||||||
|
if first_loop {
|
||||||
|
candidates = docids;
|
||||||
|
first_loop = false;
|
||||||
|
} else {
|
||||||
|
candidates.intersect_with(&docids);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(candidates)
|
||||||
|
},
|
||||||
|
Consecutive(ops) => {
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
let mut first_loop = true;
|
||||||
|
for slice in ops.windows(2) {
|
||||||
|
match (&slice[0], &slice[1]) {
|
||||||
|
(Operation::Query(left), Operation::Query(right)) => {
|
||||||
|
match query_pair_proximity_docids(ctx, left, right, 1)? {
|
||||||
|
pair_docids if pair_docids.is_empty() => {
|
||||||
|
return Ok(RoaringBitmap::new())
|
||||||
|
},
|
||||||
|
pair_docids if first_loop => {
|
||||||
|
candidates = pair_docids;
|
||||||
|
first_loop = false;
|
||||||
|
},
|
||||||
|
pair_docids => {
|
||||||
|
candidates.intersect_with(&pair_docids);
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => bail!("invalid consecutive query type"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(candidates)
|
||||||
|
},
|
||||||
|
Or(_, ops) => {
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
for op in ops {
|
||||||
|
let docids = resolve_operation(ctx, op, cache)?;
|
||||||
|
candidates.union_with(&docids);
|
||||||
|
}
|
||||||
|
Ok(candidates)
|
||||||
|
},
|
||||||
|
Query(q) => Ok(query_docids(ctx, q)?),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_operation(ctx, query_tree, cache)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
|
fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
|
||||||
ctx: &dyn Context,
|
ctx: &dyn Context,
|
||||||
left_words: &[(T, u8)],
|
left_words: &[(T, u8)],
|
||||||
@ -218,6 +300,10 @@ pub mod test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Context for TestContext<'a> {
|
impl<'a> Context for TestContext<'a> {
|
||||||
|
fn documents_ids(&self) -> heed::Result<RoaringBitmap> {
|
||||||
|
Ok(self.word_docids.iter().fold(RoaringBitmap::new(), |acc, (_, docids)| acc | docids))
|
||||||
|
}
|
||||||
|
|
||||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
Ok(self.word_docids.get(&word.to_string()).cloned())
|
Ok(self.word_docids.get(&word.to_string()).cloned())
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::mem::take;
|
use std::mem::take;
|
||||||
|
|
||||||
use anyhow::bail;
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids};
|
use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context};
|
||||||
|
|
||||||
pub struct Words<'t> {
|
pub struct Words<'t> {
|
||||||
ctx: &'t dyn Context,
|
ctx: &'t dyn Context,
|
||||||
@ -62,7 +61,7 @@ impl<'t> Criterion for Words<'t> {
|
|||||||
self.candidates = Candidates::default();
|
self.candidates = Candidates::default();
|
||||||
},
|
},
|
||||||
(Some(qt), Allowed(candidates)) => {
|
(Some(qt), Allowed(candidates)) => {
|
||||||
let mut found_candidates = resolve_candidates(self.ctx, &qt, &mut self.candidates_cache)?;
|
let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache)?;
|
||||||
found_candidates.intersect_with(&candidates);
|
found_candidates.intersect_with(&candidates);
|
||||||
candidates.difference_with(&found_candidates);
|
candidates.difference_with(&found_candidates);
|
||||||
|
|
||||||
@ -78,7 +77,7 @@ impl<'t> Criterion for Words<'t> {
|
|||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
(Some(qt), Forbidden(candidates)) => {
|
(Some(qt), Forbidden(candidates)) => {
|
||||||
let mut found_candidates = resolve_candidates(self.ctx, &qt, &mut self.candidates_cache)?;
|
let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache)?;
|
||||||
found_candidates.difference_with(&candidates);
|
found_candidates.difference_with(&candidates);
|
||||||
candidates.union_with(&found_candidates);
|
candidates.union_with(&found_candidates);
|
||||||
|
|
||||||
@ -127,76 +126,3 @@ fn explode_query_tree(query_tree: Operation) -> Vec<Operation> {
|
|||||||
otherwise => vec![otherwise],
|
otherwise => vec![otherwise],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_candidates<'t>(
|
|
||||||
ctx: &'t dyn Context,
|
|
||||||
query_tree: &Operation,
|
|
||||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
|
||||||
{
|
|
||||||
fn resolve_operation<'t>(
|
|
||||||
ctx: &'t dyn Context,
|
|
||||||
query_tree: &Operation,
|
|
||||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
|
||||||
{
|
|
||||||
use Operation::{And, Consecutive, Or, Query};
|
|
||||||
|
|
||||||
match query_tree {
|
|
||||||
And(ops) => {
|
|
||||||
let mut ops = ops.iter().map(|op| {
|
|
||||||
resolve_operation(ctx, op, cache)
|
|
||||||
}).collect::<anyhow::Result<Vec<_>>>()?;
|
|
||||||
|
|
||||||
ops.sort_unstable_by_key(|cds| cds.len());
|
|
||||||
|
|
||||||
let mut candidates = RoaringBitmap::new();
|
|
||||||
let mut first_loop = true;
|
|
||||||
for docids in ops {
|
|
||||||
if first_loop {
|
|
||||||
candidates = docids;
|
|
||||||
first_loop = false;
|
|
||||||
} else {
|
|
||||||
candidates.intersect_with(&docids);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(candidates)
|
|
||||||
},
|
|
||||||
Consecutive(ops) => {
|
|
||||||
let mut candidates = RoaringBitmap::new();
|
|
||||||
let mut first_loop = true;
|
|
||||||
for slice in ops.windows(2) {
|
|
||||||
match (&slice[0], &slice[1]) {
|
|
||||||
(Operation::Query(left), Operation::Query(right)) => {
|
|
||||||
match query_pair_proximity_docids(ctx, left, right, 1)? {
|
|
||||||
pair_docids if pair_docids.is_empty() => {
|
|
||||||
return Ok(RoaringBitmap::new())
|
|
||||||
},
|
|
||||||
pair_docids if first_loop => {
|
|
||||||
candidates = pair_docids;
|
|
||||||
first_loop = false;
|
|
||||||
},
|
|
||||||
pair_docids => {
|
|
||||||
candidates.intersect_with(&pair_docids);
|
|
||||||
},
|
|
||||||
}
|
|
||||||
},
|
|
||||||
_ => bail!("invalid consecutive query type"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(candidates)
|
|
||||||
},
|
|
||||||
Or(_, ops) => {
|
|
||||||
let mut candidates = RoaringBitmap::new();
|
|
||||||
for op in ops {
|
|
||||||
let docids = resolve_operation(ctx, op, cache)?;
|
|
||||||
candidates.union_with(&docids);
|
|
||||||
}
|
|
||||||
Ok(candidates)
|
|
||||||
},
|
|
||||||
Query(q) => Ok(query_docids(ctx, q)?),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resolve_operation(ctx, query_tree, cache)
|
|
||||||
}
|
|
||||||
|
@ -10,7 +10,7 @@ use once_cell::sync::Lazy;
|
|||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
|
|
||||||
use crate::search::criteria::{Criterion, CriterionResult};
|
use crate::search::criteria::{Criterion, CriterionResult};
|
||||||
use crate::search::criteria::{typo::Typo, words::Words, proximity::Proximity};
|
use crate::search::criteria::{typo::Typo, words::Words, proximity::Proximity, fetcher::Fetcher};
|
||||||
use crate::{Index, DocumentId};
|
use crate::{Index, DocumentId};
|
||||||
|
|
||||||
pub use self::facet::FacetIter;
|
pub use self::facet::FacetIter;
|
||||||
@ -92,13 +92,12 @@ impl<'a> Search<'a> {
|
|||||||
None => MatchingWords::default(),
|
None => MatchingWords::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// We are testing the typo criteria but there will be more of them soon.
|
|
||||||
let criteria_ctx = criteria::HeedContext::new(self.rtxn, self.index)?;
|
let criteria_ctx = criteria::HeedContext::new(self.rtxn, self.index)?;
|
||||||
let typo_criterion = Typo::initial(&criteria_ctx, query_tree, facet_candidates)?;
|
let typo_criterion = Typo::initial(&criteria_ctx, query_tree, facet_candidates)?;
|
||||||
let words_criterion = Words::new(&criteria_ctx, Box::new(typo_criterion))?;
|
let words_criterion = Words::new(&criteria_ctx, Box::new(typo_criterion))?;
|
||||||
let proximity_criterion = Proximity::new(&criteria_ctx, Box::new(words_criterion))?;
|
let proximity_criterion = Proximity::new(&criteria_ctx, Box::new(words_criterion))?;
|
||||||
// let proximity_criterion = Proximity::initial(&criteria_ctx, query_tree, facet_candidates)?;
|
let fetcher_criterion = Fetcher::new(&criteria_ctx, Box::new(proximity_criterion));
|
||||||
let mut criteria = proximity_criterion;
|
let mut criteria = fetcher_criterion;
|
||||||
|
|
||||||
// // We sort in descending order on a specific field *by hand*, don't do that at home.
|
// // We sort in descending order on a specific field *by hand*, don't do that at home.
|
||||||
// let attr_name = "released-timestamp";
|
// let attr_name = "released-timestamp";
|
||||||
|
Loading…
Reference in New Issue
Block a user