From 8db16ff30619bde34a3a9da3b053392287b65496 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Sun, 14 Jun 2020 13:10:30 +0200 Subject: [PATCH] Add a cache to the contains_documents success function --- src/best_proximity.rs | 12 +++++++----- src/lib.rs | 43 +++++++++++++++++++++++-------------------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/src/best_proximity.rs b/src/best_proximity.rs index b17f176c7..0f4b7c233 100644 --- a/src/best_proximity.rs +++ b/src/best_proximity.rs @@ -102,7 +102,7 @@ impl Node { } } - fn is_reachable(&self, mut contains_documents: F) -> bool + fn is_reachable(&self, contains_documents: &mut F) -> bool where F: FnMut((usize, u32), (usize, u32)) -> bool, { match self { @@ -133,7 +133,7 @@ impl BestProximity { } impl Iterator for BestProximity -where F: FnMut((usize, u32), (usize, u32)) -> bool + Copy, +where F: FnMut((usize, u32), (usize, u32)) -> bool, { type Item = (u32, Vec>); @@ -144,13 +144,15 @@ where F: FnMut((usize, u32), (usize, u32)) -> bool + Copy, return None; } + let BestProximity { positions, best_proximity, contains_documents } = self; + let result = astar_bag( &Node::Uninit, // start - |n| n.successors(&self.positions, self.best_proximity), + |n| n.successors(&positions, *best_proximity), |_| 0, // heuristic |n| { // success - let c = n.is_complete(&self.positions) && n.proximity() >= self.best_proximity; - if n.is_reachable(self.contains_documents) { Some(c) } else { None } + let c = n.is_complete(&positions) && n.proximity() >= *best_proximity; + if n.is_reachable(contains_documents) { Some(c) } else { None } }, ); diff --git a/src/lib.rs b/src/lib.rs index d49621483..151ddc6d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -120,32 +120,35 @@ impl Index { let mut documents = Vec::new(); + let mut intersect_cache = HashMap::new(); let contains_documents = |(lword, lpos): (usize, u32), (rword, rpos)| { - use std::iter::once; + *intersect_cache.entry(((lword, lpos), (rword, rpos))).or_insert_with(|| { + use std::iter::once; - let left = (&words[lword], lpos); - let right = (&words[rword], rpos); + let left = (&words[lword], lpos); + let right = (&words[rword], rpos); - let mut intersect_docids: Option = None; - for (derived_words, pos) in once(left).chain(once(right)) { - let mut union_docids = RoaringBitmap::default(); - // TODO re-enable the prefixes system - for word in derived_words.iter() { - let mut key = word.clone(); - key.extend_from_slice(&pos.to_be_bytes()); - if let Some(attrs) = self.postings_ids.get(rtxn, &key).unwrap() { - let right = RoaringBitmap::deserialize_from(attrs).unwrap(); - union_docids.union_with(&right); + let mut intersect_docids: Option = None; + for (derived_words, pos) in once(left).chain(once(right)) { + let mut union_docids = RoaringBitmap::default(); + // TODO re-enable the prefixes system + for word in derived_words.iter() { + let mut key = word.clone(); + key.extend_from_slice(&pos.to_be_bytes()); + if let Some(attrs) = self.postings_ids.get(rtxn, &key).unwrap() { + let right = RoaringBitmap::deserialize_from(attrs).unwrap(); + union_docids.union_with(&right); + } + } + + match &mut intersect_docids { + Some(left) => left.intersect_with(&union_docids), + None => intersect_docids = Some(union_docids), } } - match &mut intersect_docids { - Some(left) => left.intersect_with(&union_docids), - None => intersect_docids = Some(union_docids), - } - } - - intersect_docids.map_or(false, |i| !i.is_empty()) + intersect_docids.map_or(false, |i| !i.is_empty()) + }) }; for (proximity, mut positions) in BestProximity::new(positions, contains_documents) {