mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 09:04:59 +08:00
rewrite word pair distance gathering
This commit is contained in:
parent
d852dc0d2b
commit
13de251047
@ -1,7 +1,6 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use itertools::Itertools;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use self::asc_desc::AscDesc;
|
use self::asc_desc::AscDesc;
|
||||||
@ -323,22 +322,18 @@ pub fn resolve_query_tree<'t>(
|
|||||||
let winsize = words.len().min(7);
|
let winsize = words.len().min(7);
|
||||||
|
|
||||||
for win in words.windows(winsize) {
|
for win in words.windows(winsize) {
|
||||||
// Get all the word pairs and their compute their relative distance
|
// Get all the documents with the matching distance for each word pairs.
|
||||||
let dists = win
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.cartesian_product(win.iter().enumerate())
|
|
||||||
.filter(|(x, y)| y > x)
|
|
||||||
.map(|((pos1, s1), (pos2, s2))| (s1, s2, pos2 - pos1));
|
|
||||||
|
|
||||||
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
|
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
|
||||||
|
for (offset, s1) in win.iter().enumerate() {
|
||||||
for (s1, s2, d) in dists {
|
for (dist, s2) in win.iter().skip(offset).enumerate() {
|
||||||
match ctx.word_pair_proximity_docids(s1, s2, d as u8)? {
|
match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
|
||||||
Some(m) => bitmaps.push(m),
|
Some(m) => bitmaps.push(m),
|
||||||
|
// If there are no document for this distance, there will be no
|
||||||
|
// results for the phrase query.
|
||||||
None => return Ok(RoaringBitmap::new()),
|
None => return Ok(RoaringBitmap::new()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We sort the bitmaps so that we perform the small intersections first, which is faster.
|
// We sort the bitmaps so that we perform the small intersections first, which is faster.
|
||||||
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
|
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
|
||||||
|
Loading…
Reference in New Issue
Block a user