fix phrase search

This commit is contained in:
ad hoc 2022-02-01 20:10:16 +01:00
parent 38d23546a5
commit d852dc0d2b
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643
2 changed files with 46 additions and 15 deletions

View File

@ -448,8 +448,10 @@ impl Search {
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
struct SettingsUpdate { struct SettingsUpdate {
#[structopt(short, long)] #[structopt(long)]
filterable_attributes: Option<Vec<String>>, filterable_attributes: Option<Vec<String>>,
#[structopt(long)]
criteria: Option<Vec<String>>,
} }
impl Performer for SettingsUpdate { impl Performer for SettingsUpdate {
@ -468,6 +470,14 @@ impl Performer for SettingsUpdate {
} }
} }
if let Some(criteria) = self.criteria {
if !criteria.is_empty() {
update.set_criteria(criteria);
} else {
update.reset_criteria();
}
}
let mut bars = Vec::new(); let mut bars = Vec::new();
let progesses = MultiProgress::new(); let progesses = MultiProgress::new();
for _ in 0..4 { for _ in 0..4 {

View File

@ -1,6 +1,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use itertools::Itertools;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use self::asc_desc::AscDesc; use self::asc_desc::AscDesc;
@ -318,21 +319,41 @@ pub fn resolve_query_tree<'t>(
} }
Phrase(words) => { Phrase(words) => {
let mut candidates = RoaringBitmap::new(); let mut candidates = RoaringBitmap::new();
let mut first_loop = true; let mut first_iter = true;
for slice in words.windows(2) { let winsize = words.len().min(7);
let (left, right) = (&slice[0], &slice[1]);
match ctx.word_pair_proximity_docids(left, right, 1)? { for win in words.windows(winsize) {
Some(pair_docids) => { // Get all the word pairs and their compute their relative distance
if pair_docids.is_empty() { let dists = win
return Ok(RoaringBitmap::new()); .iter()
} else if first_loop { .enumerate()
candidates = pair_docids; .cartesian_product(win.iter().enumerate())
first_loop = false; .filter(|(x, y)| y > x)
} else { .map(|((pos1, s1), (pos2, s2))| (s1, s2, pos2 - pos1));
candidates &= pair_docids;
} let mut bitmaps = Vec::with_capacity(winsize.pow(2));
for (s1, s2, d) in dists {
match ctx.word_pair_proximity_docids(s1, s2, d as u8)? {
Some(m) => bitmaps.push(m),
None => return Ok(RoaringBitmap::new()),
}
}
// We sort the bitmaps so that we perform the small intersections first, which is faster.
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
for bitmap in bitmaps {
if first_iter {
candidates = bitmap;
first_iter = false;
} else {
candidates &= bitmap;
}
// There will be no match, return early
if candidates.is_empty() {
break;
} }
None => return Ok(RoaringBitmap::new()),
} }
} }
Ok(candidates) Ok(candidates)