Add a cache to the contains_documents success function

This commit is contained in:
Kerollmops 2020-06-14 13:10:30 +02:00
parent a8cda248b4
commit 8db16ff306
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 30 additions and 25 deletions

View File

@ -102,7 +102,7 @@ impl Node {
} }
} }
fn is_reachable<F>(&self, mut contains_documents: F) -> bool fn is_reachable<F>(&self, contains_documents: &mut F) -> bool
where F: FnMut((usize, u32), (usize, u32)) -> bool, where F: FnMut((usize, u32), (usize, u32)) -> bool,
{ {
match self { match self {
@ -133,7 +133,7 @@ impl<F> BestProximity<F> {
} }
impl<F> Iterator for BestProximity<F> impl<F> Iterator for BestProximity<F>
where F: FnMut((usize, u32), (usize, u32)) -> bool + Copy, where F: FnMut((usize, u32), (usize, u32)) -> bool,
{ {
type Item = (u32, Vec<Vec<u32>>); type Item = (u32, Vec<Vec<u32>>);
@ -144,13 +144,15 @@ where F: FnMut((usize, u32), (usize, u32)) -> bool + Copy,
return None; return None;
} }
let BestProximity { positions, best_proximity, contains_documents } = self;
let result = astar_bag( let result = astar_bag(
&Node::Uninit, // start &Node::Uninit, // start
|n| n.successors(&self.positions, self.best_proximity), |n| n.successors(&positions, *best_proximity),
|_| 0, // heuristic |_| 0, // heuristic
|n| { // success |n| { // success
let c = n.is_complete(&self.positions) && n.proximity() >= self.best_proximity; let c = n.is_complete(&positions) && n.proximity() >= *best_proximity;
if n.is_reachable(self.contains_documents) { Some(c) } else { None } if n.is_reachable(contains_documents) { Some(c) } else { None }
}, },
); );

View File

@ -120,32 +120,35 @@ impl Index {
let mut documents = Vec::new(); let mut documents = Vec::new();
let mut intersect_cache = HashMap::new();
let contains_documents = |(lword, lpos): (usize, u32), (rword, rpos)| { let contains_documents = |(lword, lpos): (usize, u32), (rword, rpos)| {
use std::iter::once; *intersect_cache.entry(((lword, lpos), (rword, rpos))).or_insert_with(|| {
use std::iter::once;
let left = (&words[lword], lpos); let left = (&words[lword], lpos);
let right = (&words[rword], rpos); let right = (&words[rword], rpos);
let mut intersect_docids: Option<RoaringBitmap> = None; let mut intersect_docids: Option<RoaringBitmap> = None;
for (derived_words, pos) in once(left).chain(once(right)) { for (derived_words, pos) in once(left).chain(once(right)) {
let mut union_docids = RoaringBitmap::default(); let mut union_docids = RoaringBitmap::default();
// TODO re-enable the prefixes system // TODO re-enable the prefixes system
for word in derived_words.iter() { for word in derived_words.iter() {
let mut key = word.clone(); let mut key = word.clone();
key.extend_from_slice(&pos.to_be_bytes()); key.extend_from_slice(&pos.to_be_bytes());
if let Some(attrs) = self.postings_ids.get(rtxn, &key).unwrap() { if let Some(attrs) = self.postings_ids.get(rtxn, &key).unwrap() {
let right = RoaringBitmap::deserialize_from(attrs).unwrap(); let right = RoaringBitmap::deserialize_from(attrs).unwrap();
union_docids.union_with(&right); union_docids.union_with(&right);
}
}
match &mut intersect_docids {
Some(left) => left.intersect_with(&union_docids),
None => intersect_docids = Some(union_docids),
} }
} }
match &mut intersect_docids { intersect_docids.map_or(false, |i| !i.is_empty())
Some(left) => left.intersect_with(&union_docids), })
None => intersect_docids = Some(union_docids),
}
}
intersect_docids.map_or(false, |i| !i.is_empty())
}; };
for (proximity, mut positions) in BestProximity::new(positions, contains_documents) { for (proximity, mut positions) in BestProximity::new(positions, contains_documents) {