Optimize Atrribute criterion on big requests

This commit is contained in:
many 2021-04-15 12:22:44 +02:00
parent 716c8e22b0
commit e77291a6f3
No known key found for this signature in database
GPG Key ID: 2CEF23B75189EACA

View File

@ -101,7 +101,7 @@ impl<'t> Criterion for Attribute<'t> {
}, },
(Some(qt), None) => { (Some(qt), None) => {
let query_tree_candidates = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), wdcache)?; let query_tree_candidates = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), wdcache)?;
self.bucket_candidates.union_with(&query_tree_candidates); self.bucket_candidates |= &query_tree_candidates;
self.candidates = Some(query_tree_candidates); self.candidates = Some(query_tree_candidates);
}, },
(None, Some(_)) => { (None, Some(_)) => {
@ -123,7 +123,7 @@ impl<'t> Criterion for Attribute<'t> {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
self.query_tree = query_tree; self.query_tree = query_tree;
self.candidates = candidates; self.candidates = candidates;
self.bucket_candidates.union_with(&bucket_candidates); self.bucket_candidates |= bucket_candidates;
self.flattened_query_tree = None; self.flattened_query_tree = None;
self.current_buckets = None; self.current_buckets = None;
}, },
@ -160,14 +160,12 @@ impl<'t, 'q> WordLevelIterator<'t, 'q> {
} }
} }
fn dig(&self, ctx: &'t dyn Context<'t>, level: &TreeLevel) -> heed::Result<Self> { fn dig(&self, ctx: &'t dyn Context<'t>, level: &TreeLevel, left_interval: Option<u32>) -> heed::Result<Self> {
let level = level.min(&self.level).clone(); let level = level.min(&self.level).clone();
let interval_size = 4u32.pow(Into::<u8>::into(level.clone()) as u32); let interval_size = 4u32.pow(Into::<u8>::into(level.clone()) as u32);
let word = self.word.clone(); let word = self.word.clone();
let in_prefix_cache = self.in_prefix_cache; let in_prefix_cache = self.in_prefix_cache;
// TODO try to dig starting from the current interval let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, left_interval, None)?;
// let left = self.current_interval.map(|(left, _)| left);
let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, None, None)?;
Ok(Self {inner, level, interval_size, word, in_prefix_cache, inner_next: None, current_interval: None}) Ok(Self {inner, level, interval_size, word, in_prefix_cache, inner_next: None, current_interval: None})
} }
@ -209,6 +207,7 @@ struct QueryLevelIterator<'t, 'q> {
level: TreeLevel, level: TreeLevel,
accumulator: Vec<Option<(u32, u32, RoaringBitmap)>>, accumulator: Vec<Option<(u32, u32, RoaringBitmap)>>,
parent_accumulator: Vec<Option<(u32, u32, RoaringBitmap)>>, parent_accumulator: Vec<Option<(u32, u32, RoaringBitmap)>>,
interval_to_skip: usize,
} }
impl<'t, 'q> QueryLevelIterator<'t, 'q> { impl<'t, 'q> QueryLevelIterator<'t, 'q> {
@ -250,6 +249,7 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
level, level,
accumulator: vec![], accumulator: vec![],
parent_accumulator: vec![], parent_accumulator: vec![],
interval_to_skip: 0,
})), })),
None => Ok(None), None => Ok(None),
} }
@ -270,16 +270,15 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
None => (self.level.saturating_sub(1), None), None => (self.level.saturating_sub(1), None),
}; };
let left_interval = self.accumulator.get(self.interval_to_skip).map(|opt| opt.as_ref().map(|(left, _, _)| *left)).flatten();
let mut inner = Vec::with_capacity(self.inner.len()); let mut inner = Vec::with_capacity(self.inner.len());
for word_level_iterator in self.inner.iter() { for word_level_iterator in self.inner.iter() {
inner.push(word_level_iterator.dig(ctx, &level)?); inner.push(word_level_iterator.dig(ctx, &level, left_interval)?);
} }
Ok(Self {parent, inner, level, accumulator: vec![], parent_accumulator: vec![]}) Ok(Self {parent, inner, level, accumulator: vec![], parent_accumulator: vec![], interval_to_skip: 0})
} }
fn inner_next(&mut self, level: TreeLevel) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> { fn inner_next(&mut self, level: TreeLevel) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
let mut accumulated: Option<(u32, u32, RoaringBitmap)> = None; let mut accumulated: Option<(u32, u32, RoaringBitmap)> = None;
let u8_level = Into::<u8>::into(level); let u8_level = Into::<u8>::into(level);
@ -289,12 +288,13 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
let accumulated_count = 4u32.pow((u8_level - wli_u8_level) as u32); let accumulated_count = 4u32.pow((u8_level - wli_u8_level) as u32);
for _ in 0..accumulated_count { for _ in 0..accumulated_count {
if let Some((next_left, _, next_docids)) = wli.next()? { if let Some((next_left, _, next_docids)) = wli.next()? {
accumulated = accumulated.take().map( accumulated = match accumulated.take(){
|(acc_left, acc_right, mut acc_docids)| { Some((acc_left, acc_right, mut acc_docids)) => {
acc_docids.union_with(&next_docids); acc_docids |= next_docids;
(acc_left, acc_right, acc_docids) Some((acc_left, acc_right, acc_docids))
} },
).or_else(|| Some((next_left, next_left + interval_size, next_docids))); None => Some((next_left, next_left + interval_size, next_docids)),
};
} }
} }
} }
@ -304,35 +304,59 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
/// return the next meta-interval created from inner WordLevelIterators, /// return the next meta-interval created from inner WordLevelIterators,
/// and from eventual chainned QueryLevelIterator. /// and from eventual chainned QueryLevelIterator.
fn next(&mut self) -> heed::Result<(TreeLevel, Option<(u32, u32, RoaringBitmap)>)> { fn next(&mut self, allowed_candidates: &RoaringBitmap, tree_level: TreeLevel) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
let parent_result = match self.parent.as_mut() { let parent_result = match self.parent.as_mut() {
Some(parent) => { Some(parent) => {
Some(parent.next()?) Some(parent.next(allowed_candidates, tree_level)?)
}, },
None => None, None => None,
}; };
match parent_result { match parent_result {
Some((parent_level, parent_next)) => { Some(parent_next) => {
let inner_next = self.inner_next(parent_level)?; let inner_next = self.inner_next(tree_level)?;
self.interval_to_skip += self.accumulator.iter().zip(self.parent_accumulator.iter()).skip(self.interval_to_skip).take_while(|current| {
match current {
(Some((_, _, inner)), Some((_, _, parent))) => {
inner.is_disjoint(allowed_candidates) && parent.is_empty()
},
(Some((_, _, inner)), None) => {
inner.is_disjoint(allowed_candidates)
},
(None, Some((_, _, parent))) => {
parent.is_empty()
},
(None, None) => true,
}
}).count();
self.accumulator.push(inner_next); self.accumulator.push(inner_next);
self.parent_accumulator.push(parent_next); self.parent_accumulator.push(parent_next);
// TODO @many clean firsts intervals of both accumulators when both RoaringBitmap are empty, let mut merged_interval: Option<(u32, u32, RoaringBitmap)> = None;
// WARNING the cleaned intervals count needs to be kept to skip at the end
let mut merged_interval = None; for current in self.accumulator.iter().rev().zip(self.parent_accumulator.iter()).skip(self.interval_to_skip) {
for current in self.accumulator.iter().rev().zip(self.parent_accumulator.iter()) {
if let (Some((left_a, right_a, a)), Some((left_b, right_b, b))) = current { if let (Some((left_a, right_a, a)), Some((left_b, right_b, b))) = current {
let (_, _, merged_docids) = merged_interval.get_or_insert_with(|| (left_a + left_b, right_a + right_b, RoaringBitmap::new())); match merged_interval.as_mut() {
merged_docids.union_with(&(a & b)); Some((_, _, merged_docids)) => *merged_docids |= a & b,
None => merged_interval = Some((left_a + left_b, right_a + right_b, a & b)),
}
} }
} }
Ok((parent_level, merged_interval)) Ok(merged_interval)
}, },
None => { None => {
let level = self.level.clone(); let level = self.level;
let next_interval = self.inner_next(level.clone())?; match self.inner_next(level)? {
self.accumulator = vec![next_interval.clone()]; Some((left, right, mut candidates)) => {
Ok((level, next_interval)) self.accumulator = vec![Some((left, right, RoaringBitmap::new()))];
candidates &= allowed_candidates;
Ok(Some((left, right, candidates)))
},
None => {
self.accumulator = vec![None];
Ok(None)
},
}
} }
} }
} }
@ -346,17 +370,31 @@ struct Branch<'t, 'q> {
} }
impl<'t, 'q> Branch<'t, 'q> { impl<'t, 'q> Branch<'t, 'q> {
fn next(&mut self) -> heed::Result<bool> { fn next(&mut self, allowed_candidates: &RoaringBitmap) -> heed::Result<bool> {
match self.query_level_iterator.next()? { let tree_level = self.query_level_iterator.level;
(tree_level, Some(last_result)) => { match self.query_level_iterator.next(allowed_candidates, tree_level)? {
Some(last_result) => {
self.last_result = last_result; self.last_result = last_result;
self.tree_level = tree_level; self.tree_level = tree_level;
Ok(true) Ok(true)
}, },
(_, None) => Ok(false), None => Ok(false),
} }
} }
fn dig(&mut self, ctx: &'t dyn Context<'t>) -> heed::Result<()> {
self.query_level_iterator = self.query_level_iterator.dig(ctx)?;
Ok(())
}
fn lazy_next(&mut self) {
let u8_level = Into::<u8>::into(self.tree_level.clone());
let interval_size = 4u32.pow(u8_level as u32);
let (left, right, _) = self.last_result;
self.last_result = (left + interval_size, right + interval_size, RoaringBitmap::new());
}
fn compute_rank(&self) -> u32 { fn compute_rank(&self) -> u32 {
// we compute a rank from the left interval. // we compute a rank from the left interval.
let (left, _, _) = self.last_result; let (left, _, _) = self.last_result;
@ -367,11 +405,11 @@ impl<'t, 'q> Branch<'t, 'q> {
let self_rank = self.compute_rank(); let self_rank = self.compute_rank();
let other_rank = other.compute_rank(); let other_rank = other.compute_rank();
let left_cmp = self_rank.cmp(&other_rank).reverse(); let left_cmp = self_rank.cmp(&other_rank).reverse();
// on level: higher is better, // on level: lower is better,
// we want to reduce highest levels first. // we want to dig faster into levels on interesting branches.
let level_cmp = self.tree_level.cmp(&other.tree_level); let level_cmp = self.tree_level.cmp(&other.tree_level).reverse();
left_cmp.then(level_cmp) left_cmp.then(level_cmp).then(self.last_result.2.len().cmp(&other.last_result.2.len()))
} }
} }
@ -398,6 +436,7 @@ impl<'t, 'q> Eq for Branch<'t, 'q> {}
fn initialize_query_level_iterators<'t, 'q>( fn initialize_query_level_iterators<'t, 'q>(
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
branches: &'q Vec<Vec<Vec<Query>>>, branches: &'q Vec<Vec<Vec<Query>>>,
allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> { ) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> {
@ -418,7 +457,6 @@ fn initialize_query_level_iterators<'t, 'q>(
branch_positions.sort_unstable_by_key(|qli| qli.level); branch_positions.sort_unstable_by_key(|qli| qli.level);
let folded_query_level_iterators = branch_positions let folded_query_level_iterators = branch_positions
.into_iter() .into_iter()
.rev()
.fold(None, |fold: Option<QueryLevelIterator>, mut qli| match fold { .fold(None, |fold: Option<QueryLevelIterator>, mut qli| match fold {
Some(fold) => { Some(fold) => {
qli.parent(fold); qli.parent(fold);
@ -428,7 +466,8 @@ fn initialize_query_level_iterators<'t, 'q>(
}); });
if let Some(mut folded_query_level_iterators) = folded_query_level_iterators { if let Some(mut folded_query_level_iterators) = folded_query_level_iterators {
let (tree_level, last_result) = folded_query_level_iterators.next()?; let tree_level = folded_query_level_iterators.level;
let last_result = folded_query_level_iterators.next(allowed_candidates, tree_level)?;
if let Some(last_result) = last_result { if let Some(last_result) = last_result {
let branch = Branch { let branch = Branch {
last_result, last_result,
@ -451,48 +490,43 @@ fn set_compute_candidates<'t>(
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Option<RoaringBitmap>> ) -> anyhow::Result<Option<RoaringBitmap>>
{ {
let mut branches_heap = initialize_query_level_iterators(ctx, branches, wdcache)?; let mut branches_heap = initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?;
let lowest_level = TreeLevel::min_value(); let lowest_level = TreeLevel::min_value();
let mut final_candidates: Option<(u32, RoaringBitmap)> = None; let mut final_candidates: Option<(u32, RoaringBitmap)> = None;
let mut allowed_candidates = allowed_candidates.clone();
while let Some(mut branch) = branches_heap.peek_mut() { while let Some(mut branch) = branches_heap.peek_mut() {
let is_lowest_level = branch.tree_level == lowest_level; let is_lowest_level = branch.tree_level == lowest_level;
let branch_rank = branch.compute_rank(); let branch_rank = branch.compute_rank();
let (_, _, candidates) = &mut branch.last_result; // if current is worst than best we break to return
candidates.intersect_with(&allowed_candidates); // candidates that correspond to the best rank
if let Some((best_rank, _)) = final_candidates { if branch_rank > best_rank { break; } }
let _left = branch.last_result.0;
let candidates = take(&mut branch.last_result.2);
if candidates.is_empty() { if candidates.is_empty() {
// we don't have candidates, get next interval. // we don't have candidates, get next interval.
if !branch.next()? { PeekMut::pop(branch); } if !branch.next(&allowed_candidates)? { PeekMut::pop(branch); }
} }
else if is_lowest_level { else if is_lowest_level {
// we have candidates, but we can't dig deeper, return candidates. // we have candidates, but we can't dig deeper.
allowed_candidates -= &candidates;
final_candidates = match final_candidates.take() { final_candidates = match final_candidates.take() {
// we add current candidates to best candidates
Some((best_rank, mut best_candidates)) => { Some((best_rank, mut best_candidates)) => {
// if current is worst than best we break to return best_candidates |= candidates;
// candidates that correspond to the best rank branch.lazy_next();
if branch_rank > best_rank { Some((best_rank, best_candidates))
final_candidates = Some((best_rank, best_candidates));
break;
// else we add current candidates to best candidates
// and we fetch the next page
} else {
best_candidates.union_with(candidates);
if !branch.next()? { PeekMut::pop(branch); }
Some((best_rank, best_candidates))
}
}, },
// we take current candidates as best candidates // we take current candidates as best candidates
// and we fetch the next page
None => { None => {
let candidates = take(candidates); branch.lazy_next();
if !branch.next()? { PeekMut::pop(branch); }
Some((branch_rank, candidates)) Some((branch_rank, candidates))
}, },
}; };
} else { } else {
// we have candidates, lets dig deeper in levels. // we have candidates, lets dig deeper in levels.
branch.query_level_iterator = branch.query_level_iterator.dig(ctx)?; branch.dig(ctx)?;
if !branch.next()? { PeekMut::pop(branch); } if !branch.next(&allowed_candidates)? { PeekMut::pop(branch); }
} }
} }