Check that an edges combination contains results

This commit is contained in:
Kerollmops 2020-06-13 11:16:02 +02:00
parent b9cc6c10af
commit 69285b22d3
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 29 additions and 6 deletions

View File

@ -102,7 +102,8 @@ pub struct BestProximity<F> {
impl<F> BestProximity<F> {
pub fn new(positions: Vec<Vec<u32>>, contains_documents: F) -> BestProximity<F> {
BestProximity { positions, best_proximity: 0, contains_documents }
let best_proximity = positions.len() as u32 - 1;
BestProximity { positions, best_proximity, contains_documents }
}
}

View File

@ -119,13 +119,35 @@ impl Index {
let mut documents = Vec::new();
for (proximity, mut positions) in BestProximity::new(positions, |_, _| true) {
// TODO we must ignore positions paths that gives nothing
if (proximity as usize) < words.len() - 1 {
eprintln!("Skipping too short proximities of {}.", proximity);
continue
let contains_documents = |(lword, lpos): (usize, u32), (rword, rpos)| {
use std::iter::once;
let left = (&words[lword], lpos);
let right = (&words[rword], rpos);
let mut intersect_docids: Option<RoaringBitmap> = None;
for (derived_words, pos) in once(left).chain(once(right)) {
let mut union_docids = RoaringBitmap::default();
// TODO re-enable the prefixes system
for word in derived_words.iter() {
let mut key = word.clone();
key.extend_from_slice(&pos.to_be_bytes());
if let Some(attrs) = self.postings_ids.get(rtxn, &key).unwrap() {
let right = RoaringBitmap::deserialize_from(attrs).unwrap();
union_docids.union_with(&right);
}
}
match &mut intersect_docids {
Some(left) => left.intersect_with(&union_docids),
None => intersect_docids = Some(union_docids),
}
}
intersect_docids.map_or(false, |i| !i.is_empty())
};
for (proximity, mut positions) in BestProximity::new(positions, contains_documents) {
positions.sort_unstable();
let same_prox_before = Instant::now();