From 69285b22d338e830b0f9b523982e1beead80021d Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Sat, 13 Jun 2020 11:16:02 +0200 Subject: [PATCH] Check that an edges combination contains results --- src/best_proximity.rs | 3 ++- src/lib.rs | 32 +++++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/best_proximity.rs b/src/best_proximity.rs index 192c387f1..49f90eb8e 100644 --- a/src/best_proximity.rs +++ b/src/best_proximity.rs @@ -102,7 +102,8 @@ pub struct BestProximity { impl BestProximity { pub fn new(positions: Vec>, contains_documents: F) -> BestProximity { - BestProximity { positions, best_proximity: 0, contains_documents } + let best_proximity = positions.len() as u32 - 1; + BestProximity { positions, best_proximity, contains_documents } } } diff --git a/src/lib.rs b/src/lib.rs index 6918eccda..fee6831ef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -119,13 +119,35 @@ impl Index { let mut documents = Vec::new(); - for (proximity, mut positions) in BestProximity::new(positions, |_, _| true) { - // TODO we must ignore positions paths that gives nothing - if (proximity as usize) < words.len() - 1 { - eprintln!("Skipping too short proximities of {}.", proximity); - continue + let contains_documents = |(lword, lpos): (usize, u32), (rword, rpos)| { + use std::iter::once; + + let left = (&words[lword], lpos); + let right = (&words[rword], rpos); + + let mut intersect_docids: Option = None; + for (derived_words, pos) in once(left).chain(once(right)) { + let mut union_docids = RoaringBitmap::default(); + // TODO re-enable the prefixes system + for word in derived_words.iter() { + let mut key = word.clone(); + key.extend_from_slice(&pos.to_be_bytes()); + if let Some(attrs) = self.postings_ids.get(rtxn, &key).unwrap() { + let right = RoaringBitmap::deserialize_from(attrs).unwrap(); + union_docids.union_with(&right); + } + } + + match &mut intersect_docids { + Some(left) => left.intersect_with(&union_docids), + None => intersect_docids = Some(union_docids), + } } + intersect_docids.map_or(false, |i| !i.is_empty()) + }; + + for (proximity, mut positions) in BestProximity::new(positions, contains_documents) { positions.sort_unstable(); let same_prox_before = Instant::now();