mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Improve the mDFS performance and return the proximity
This commit is contained in:
parent
bb15f16d8c
commit
e9e03259c1
49
src/mdfs.rs
49
src/mdfs.rs
@ -32,18 +32,18 @@ impl<'a> Mdfs<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Iterator for Mdfs<'a> {
|
impl<'a> Iterator for Mdfs<'a> {
|
||||||
type Item = anyhow::Result<RoaringBitmap>;
|
type Item = anyhow::Result<(u32, RoaringBitmap)>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
// If there is less or only one word therefore the only
|
// If there is less or only one word therefore the only
|
||||||
// possible documents that we can return are the candidates.
|
// possible documents that we can return are the candidates.
|
||||||
if self.words.len() <= 1 {
|
if self.words.len() <= 1 {
|
||||||
if self.candidates.is_empty() { return None }
|
if self.candidates.is_empty() { return None }
|
||||||
return Some(Ok(mem::take(&mut self.candidates)));
|
return Some(Ok((0, mem::take(&mut self.candidates))));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut answer = RoaringBitmap::new();
|
|
||||||
while self.mana <= self.max_mana {
|
while self.mana <= self.max_mana {
|
||||||
|
let mut answer = RoaringBitmap::new();
|
||||||
let result = mdfs_step(
|
let result = mdfs_step(
|
||||||
&self.index,
|
&self.index,
|
||||||
&self.rtxn,
|
&self.rtxn,
|
||||||
@ -52,24 +52,25 @@ impl<'a> Iterator for Mdfs<'a> {
|
|||||||
&self.candidates,
|
&self.candidates,
|
||||||
&self.candidates,
|
&self.candidates,
|
||||||
&mut self.union_cache,
|
&mut self.union_cache,
|
||||||
|
&mut answer,
|
||||||
);
|
);
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(Some(a)) => {
|
Ok(()) => {
|
||||||
// We remove the answered documents from the list of
|
// We always increase the mana for the next loop.
|
||||||
// candidates to be sure we don't search for them again.
|
let proximity = self.mana;
|
||||||
self.candidates.difference_with(&a);
|
|
||||||
answer.union_with(&a);
|
|
||||||
},
|
|
||||||
Ok(None) => {
|
|
||||||
// We found the last iteration for this amount of mana that gives nothing,
|
|
||||||
// we can now store that the next mana to use for the loop is incremented.
|
|
||||||
self.mana = self.mana + 1;
|
self.mana = self.mana + 1;
|
||||||
// If the answer is empty it means that we found nothing for this amount
|
|
||||||
// of mana therefore we continue with a bigger mana.
|
// If no documents were found we must not return and continue
|
||||||
|
// the search with more mana.
|
||||||
if !answer.is_empty() {
|
if !answer.is_empty() {
|
||||||
// Otherwise we return the answer.
|
|
||||||
return Some(Ok(answer));
|
// We remove the answered documents from the list of
|
||||||
|
// candidates to be sure we don't search for them again.
|
||||||
|
self.candidates.difference_with(&answer);
|
||||||
|
|
||||||
|
// We return the answer.
|
||||||
|
return Some(Ok((proximity, answer)));
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Err(e) => return Some(Err(e)),
|
Err(e) => return Some(Err(e)),
|
||||||
@ -88,7 +89,8 @@ fn mdfs_step(
|
|||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
parent_docids: &RoaringBitmap,
|
parent_docids: &RoaringBitmap,
|
||||||
union_cache: &mut HashMap<(usize, u8), RoaringBitmap>,
|
union_cache: &mut HashMap<(usize, u8), RoaringBitmap>,
|
||||||
) -> anyhow::Result<Option<RoaringBitmap>>
|
answer: &mut RoaringBitmap,
|
||||||
|
) -> anyhow::Result<()>
|
||||||
{
|
{
|
||||||
use std::cmp::{min, max};
|
use std::cmp::{min, max};
|
||||||
|
|
||||||
@ -126,19 +128,22 @@ fn mdfs_step(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// We must be sure that we only return docids that are present in the candidates.
|
||||||
docids.intersect_with(parent_docids);
|
docids.intersect_with(parent_docids);
|
||||||
|
|
||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
let mana = mana.checked_sub(proximity as u32).unwrap();
|
let mana = mana.checked_sub(proximity as u32).unwrap();
|
||||||
// We are the last pair, we return without recursing as we don't have any child.
|
if tail.len() < 2 {
|
||||||
if tail.len() < 2 { return Ok(Some(docids)) }
|
// We are the last pair, we return without recuring as we don't have any child.
|
||||||
if let Some(di) = mdfs_step(index, rtxn, mana, tail, candidates, &docids, union_cache)? {
|
answer.union_with(&docids);
|
||||||
return Ok(Some(di))
|
return Ok(());
|
||||||
|
} else {
|
||||||
|
return mdfs_step(index, rtxn, mana, tail, candidates, &docids, union_cache, answer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(None)
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn words_pair_combinations<'h>(
|
fn words_pair_combinations<'h>(
|
||||||
|
@ -164,8 +164,8 @@ impl<'a> Search<'a> {
|
|||||||
// We execute the Mdfs iterator until we find enough documents.
|
// We execute the Mdfs iterator until we find enough documents.
|
||||||
while documents.iter().map(RoaringBitmap::len).sum::<u64>() < limit as u64 {
|
while documents.iter().map(RoaringBitmap::len).sum::<u64>() < limit as u64 {
|
||||||
match mdfs.next().transpose()? {
|
match mdfs.next().transpose()? {
|
||||||
Some(answer) => {
|
Some((proximity, answer)) => {
|
||||||
debug!("answer: {:?}", answer);
|
debug!("answer with a proximity of {}: {:?}", proximity, answer);
|
||||||
documents.push(answer);
|
documents.push(answer);
|
||||||
},
|
},
|
||||||
None => break,
|
None => break,
|
||||||
|
@ -90,7 +90,7 @@ fn is_chinese(c: char) -> bool {
|
|||||||
/// length of the found key. Otherwise `None` is returned.
|
/// length of the found key. Otherwise `None` is returned.
|
||||||
///
|
///
|
||||||
/// This can be used to e.g. build tokenizing functions.
|
/// This can be used to e.g. build tokenizing functions.
|
||||||
//
|
// Copyright @llogiq
|
||||||
// https://github.com/BurntSushi/fst/pull/104
|
// https://github.com/BurntSushi/fst/pull/104
|
||||||
#[inline]
|
#[inline]
|
||||||
fn find_longest_prefix(fst: &Fst<&[u8]>, value: &[u8]) -> Option<(u64, usize)> {
|
fn find_longest_prefix(fst: &Fst<&[u8]>, value: &[u8]) -> Option<(u64, usize)> {
|
||||||
|
Loading…
Reference in New Issue
Block a user