Introduce a cache on the docid_word_positions database method

This commit is contained in:
Kerollmops 2021-03-08 16:12:03 +01:00
parent 5fcaedb880
commit 82a0f678fb
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 55 additions and 23 deletions

View File

@ -1,5 +1,6 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap, btree_map}; use std::collections::btree_map::{self, BTreeMap};
use std::collections::hash_map::{HashMap, Entry};
use std::mem::take; use std::mem::take;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -331,19 +332,21 @@ fn resolve_candidates<'t>(
Ok(candidates) Ok(candidates)
} }
fn resolve_plane_sweep_candidates<'t>( fn resolve_plane_sweep_candidates(
ctx: &'t dyn Context, ctx: &dyn Context,
query_tree: &Operation, query_tree: &Operation,
allowed_candidates: &RoaringBitmap, allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>> ) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>>
{ {
/// FIXME may be buggy with query like "new new york" /// FIXME may be buggy with query like "new new york"
fn plane_sweep<'t>( fn plane_sweep<'a>(
ctx: &'t dyn Context, ctx: &dyn Context,
operations: &[Operation], operations: &'a [Operation],
docid: DocumentId, docid: DocumentId,
consecutive: bool, consecutive: bool,
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
dwpcache: &mut HashMap<String, Option<RoaringBitmap>>,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Vec<(Position, u8, Position)>> ) -> anyhow::Result<Vec<(Position, u8, Position)>>
{ {
@ -385,7 +388,7 @@ fn resolve_plane_sweep_candidates<'t>(
let mut groups_positions = Vec::with_capacity(groups_len); let mut groups_positions = Vec::with_capacity(groups_len);
for operation in operations { for operation in operations {
let positions = resolve_operation(ctx, operation, docid, wdcache)?; let positions = resolve_operation(ctx, operation, docid, rocache, dwpcache, wdcache)?;
groups_positions.push(positions.into_iter()); groups_positions.push(positions.into_iter());
} }
@ -456,25 +459,32 @@ fn resolve_plane_sweep_candidates<'t>(
Ok(output) Ok(output)
} }
fn resolve_operation<'t>( fn resolve_operation<'a>(
ctx: &'t dyn Context, ctx: &dyn Context,
query_tree: &Operation, query_tree: &'a Operation,
docid: DocumentId, docid: DocumentId,
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
dwpcache: &mut HashMap<String, Option<RoaringBitmap>>,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Vec<(Position, u8, Position)>> { ) -> anyhow::Result<Vec<(Position, u8, Position)>>
{
use Operation::{And, Consecutive, Or}; use Operation::{And, Consecutive, Or};
match query_tree { if let Some(result) = rocache.get(query_tree) {
And(ops) => plane_sweep(ctx, ops, docid, false, wdcache), return Ok(result.clone());
Consecutive(ops) => plane_sweep(ctx, ops, docid, true, wdcache), }
let result = match query_tree {
And(ops) => plane_sweep(ctx, ops, docid, false, rocache, dwpcache, wdcache)?,
Consecutive(ops) => plane_sweep(ctx, ops, docid, true, rocache, dwpcache, wdcache)?,
Or(_, ops) => { Or(_, ops) => {
let mut result = Vec::new(); let mut result = Vec::new();
for op in ops { for op in ops {
result.extend(resolve_operation(ctx, op, docid, wdcache)?) result.extend(resolve_operation(ctx, op, docid, rocache, dwpcache, wdcache)?)
} }
result.sort_unstable(); result.sort_unstable();
Ok(result) result
}, },
Operation::Query(Query {prefix, kind}) => { Operation::Query(Query {prefix, kind}) => {
let fst = ctx.words_fst(); let fst = ctx.words_fst();
@ -493,21 +503,43 @@ fn resolve_plane_sweep_candidates<'t>(
let mut result = Vec::new(); let mut result = Vec::new();
for (word, _) in words.as_ref() { for (word, _) in words.as_ref() {
if let Some(positions) = ctx.docid_word_positions(docid, word)? { let positions = match dwpcache.entry(word.to_string()) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => {
let positions = ctx.docid_word_positions(docid, word)?;
entry.insert(positions)
}
};
if let Some(positions) = positions {
let iter = positions.iter().map(|p| (p, 0, p)); let iter = positions.iter().map(|p| (p, 0, p));
result.extend(iter); result.extend(iter);
} }
} }
result.sort_unstable(); result.sort_unstable();
result
}
};
rocache.insert(query_tree, result.clone());
Ok(result) Ok(result)
} }
}
}
let mut word_positions_cache = HashMap::new();
let mut resolve_operation_cache = HashMap::new();
let mut candidates = BTreeMap::new(); let mut candidates = BTreeMap::new();
for docid in allowed_candidates { for docid in allowed_candidates {
let positions = resolve_operation(ctx, query_tree, docid, wdcache)?; word_positions_cache.clear();
resolve_operation_cache.clear();
let positions = resolve_operation(
ctx,
query_tree,
docid,
&mut resolve_operation_cache,
&mut word_positions_cache,
wdcache,
)?;
let best_proximity = positions.into_iter().min_by_key(|(_, proximity, _)| *proximity); let best_proximity = positions.into_iter().min_by_key(|(_, proximity, _)| *proximity);
let best_proximity = best_proximity.map(|(_, proximity, _)| proximity).unwrap_or(7); let best_proximity = best_proximity.map(|(_, proximity, _)| proximity).unwrap_or(7);
candidates.entry(best_proximity).or_insert_with(RoaringBitmap::new).insert(docid); candidates.entry(best_proximity).or_insert_with(RoaringBitmap::new).insert(docid);

View File

@ -379,7 +379,7 @@ mod test {
let facet_candidates = None; let facet_candidates = None;
let mut wdcache = WordDerivationsCache::new(); let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, Some(query_tree), facet_candidates); let mut criteria = Typo::initial(&context, Some(query_tree), facet_candidates);
let candidates_1 = context.word_docids("split").unwrap().unwrap() let candidates_1 = context.word_docids("split").unwrap().unwrap()
@ -428,7 +428,7 @@ let mut wdcache = WordDerivationsCache::new();
let query_tree = None; let query_tree = None;
let facet_candidates = context.word_docids("earth").unwrap().unwrap(); let facet_candidates = context.word_docids("earth").unwrap().unwrap();
let mut wdcache = WordDerivationsCache::new(); let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, query_tree, Some(facet_candidates.clone())); let mut criteria = Typo::initial(&context, query_tree, Some(facet_candidates.clone()));
let expected = CriterionResult { let expected = CriterionResult {
@ -457,7 +457,7 @@ let mut wdcache = WordDerivationsCache::new();
let facet_candidates = context.word_docids("earth").unwrap().unwrap(); let facet_candidates = context.word_docids("earth").unwrap().unwrap();
let mut wdcache = WordDerivationsCache::new(); let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, Some(query_tree), Some(facet_candidates.clone())); let mut criteria = Typo::initial(&context, Some(query_tree), Some(facet_candidates.clone()));
let candidates_1 = context.word_docids("split").unwrap().unwrap() let candidates_1 = context.word_docids("split").unwrap().unwrap()