Use the prefix caches

This commit is contained in:
Clément Renault 2020-01-08 13:14:07 +01:00
parent a262c67ec3
commit 07937ed6d7
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 42 additions and 23 deletions

View File

@ -55,7 +55,14 @@ where
None => return Ok(Vec::new()), None => return Ok(Vec::new()),
}; };
let QueryResult { docids, queries } = traverse_query_tree(reader, &words, postings_lists_store, &operation).unwrap(); let QueryResult { docids, queries } =
traverse_query_tree(
reader,
&words,
postings_lists_store,
prefix_postings_lists_cache_store,
&operation,
).unwrap();
println!("found {} documents", docids.len()); println!("found {} documents", docids.len());
println!("number of postings {:?}", queries.len()); println!("number of postings {:?}", queries.len());

View File

@ -234,6 +234,7 @@ pub fn traverse_query_tree<'o, 'txn>(
reader: &'txn heed::RoTxn<MainT>, reader: &'txn heed::RoTxn<MainT>,
words_set: &fst::Set, words_set: &fst::Set,
postings_lists: store::PostingsLists, postings_lists: store::PostingsLists,
prefix_postings_lists: store::PrefixPostingsListsCache,
tree: &'o Operation, tree: &'o Operation,
) -> MResult<QueryResult<'o, 'txn>> ) -> MResult<QueryResult<'o, 'txn>>
{ {
@ -241,6 +242,7 @@ pub fn traverse_query_tree<'o, 'txn>(
reader: &'txn heed::RoTxn<MainT>, reader: &'txn heed::RoTxn<MainT>,
words_set: &fst::Set, words_set: &fst::Set,
pls: store::PostingsLists, pls: store::PostingsLists,
ppls: store::PrefixPostingsListsCache,
cache: &mut Cache<'o, 'txn>, cache: &mut Cache<'o, 'txn>,
postings: &mut Postings<'o, 'txn>, postings: &mut Postings<'o, 'txn>,
depth: usize, depth: usize,
@ -255,9 +257,9 @@ pub fn traverse_query_tree<'o, 'txn>(
for op in operations { for op in operations {
if cache.get(op).is_none() { if cache.get(op).is_none() {
let docids = match op { let docids = match op {
Operation::And(ops) => execute_and(reader, words_set, pls, cache, postings, depth + 1, &ops)?, Operation::And(ops) => execute_and(reader, words_set, pls, ppls, cache, postings, depth + 1, &ops)?,
Operation::Or(ops) => execute_or(reader, words_set, pls, cache, postings, depth + 1, &ops)?, Operation::Or(ops) => execute_or(reader, words_set, pls, ppls, cache, postings, depth + 1, &ops)?,
Operation::Query(query) => execute_query(reader, words_set, pls, postings, depth + 1, &query)?, Operation::Query(query) => execute_query(reader, words_set, pls, ppls, postings, depth + 1, &query)?,
}; };
cache.insert(op, docids); cache.insert(op, docids);
} }
@ -281,6 +283,7 @@ pub fn traverse_query_tree<'o, 'txn>(
reader: &'txn heed::RoTxn<MainT>, reader: &'txn heed::RoTxn<MainT>,
words_set: &fst::Set, words_set: &fst::Set,
pls: store::PostingsLists, pls: store::PostingsLists,
ppls: store::PrefixPostingsListsCache,
cache: &mut Cache<'o, 'txn>, cache: &mut Cache<'o, 'txn>,
postings: &mut Postings<'o, 'txn>, postings: &mut Postings<'o, 'txn>,
depth: usize, depth: usize,
@ -297,9 +300,9 @@ pub fn traverse_query_tree<'o, 'txn>(
Some(docids) => docids, Some(docids) => docids,
None => { None => {
let docids = match op { let docids = match op {
Operation::And(ops) => execute_and(reader, words_set, pls, cache, postings, depth + 1, &ops)?, Operation::And(ops) => execute_and(reader, words_set, pls, ppls, cache, postings, depth + 1, &ops)?,
Operation::Or(ops) => execute_or(reader, words_set, pls, cache, postings, depth + 1, &ops)?, Operation::Or(ops) => execute_or(reader, words_set, pls, ppls, cache, postings, depth + 1, &ops)?,
Operation::Query(query) => execute_query(reader, words_set, pls, postings, depth + 1, &query)?, Operation::Query(query) => execute_query(reader, words_set, pls, ppls, postings, depth + 1, &query)?,
}; };
cache.entry(op).or_insert(docids) cache.entry(op).or_insert(docids)
} }
@ -319,6 +322,7 @@ pub fn traverse_query_tree<'o, 'txn>(
reader: &'txn heed::RoTxn<MainT>, reader: &'txn heed::RoTxn<MainT>,
words_set: &fst::Set, words_set: &fst::Set,
pls: store::PostingsLists, pls: store::PostingsLists,
ppls: store::PrefixPostingsListsCache,
postings: &mut Postings<'o, 'txn>, postings: &mut Postings<'o, 'txn>,
depth: usize, depth: usize,
query: &'o Query, query: &'o Query,
@ -329,6 +333,13 @@ pub fn traverse_query_tree<'o, 'txn>(
let Query { id, prefix, kind } = query; let Query { id, prefix, kind } = query;
let docids = match kind { let docids = match kind {
QueryKind::Tolerant(word) => { QueryKind::Tolerant(word) => {
if *prefix && word.len() == 1 {
let prefix = [word.as_bytes()[0], 0, 0, 0];
let matches = ppls.prefix_postings_list(reader, prefix)?.unwrap_or_default();
let mut docids: Vec<_> = matches.into_iter().map(|m| m.document_id).collect();
docids.dedup();
SetBuf::new(docids).unwrap()
} else {
let dfa = if *prefix { build_prefix_dfa(word) } else { build_dfa(word) }; let dfa = if *prefix { build_prefix_dfa(word) } else { build_dfa(word) };
let byte = word.as_bytes()[0]; let byte = word.as_bytes()[0];
@ -346,6 +357,7 @@ pub fn traverse_query_tree<'o, 'txn>(
} }
SetBuf::from_dirty(docids) SetBuf::from_dirty(docids)
}
}, },
QueryKind::Exact(word) => { QueryKind::Exact(word) => {
// TODO support prefix and non-prefix exact DFA // TODO support prefix and non-prefix exact DFA
@ -407,9 +419,9 @@ pub fn traverse_query_tree<'o, 'txn>(
let mut postings = Postings::new(); let mut postings = Postings::new();
let docids = match tree { let docids = match tree {
Operation::And(ops) => execute_and(reader, words_set, postings_lists, &mut cache, &mut postings, 0, &ops)?, Operation::And(ops) => execute_and(reader, words_set, postings_lists, prefix_postings_lists, &mut cache, &mut postings, 0, &ops)?,
Operation::Or(ops) => execute_or(reader, words_set, postings_lists, &mut cache, &mut postings, 0, &ops)?, Operation::Or(ops) => execute_or(reader, words_set, postings_lists, prefix_postings_lists, &mut cache, &mut postings, 0, &ops)?,
Operation::Query(query) => execute_query(reader, words_set, postings_lists, &mut postings, 0, &query)?, Operation::Query(query) => execute_query(reader, words_set, postings_lists, prefix_postings_lists, &mut postings, 0, &query)?,
}; };
Ok(QueryResult { docids, queries: postings }) Ok(QueryResult { docids, queries: postings })