mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-30 23:13:09 +08:00
Introduce a cache on the docid_word_positions database method
This commit is contained in:
parent
5fcaedb880
commit
82a0f678fb
@ -1,5 +1,6 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::{BTreeMap, HashMap, btree_map};
|
use std::collections::btree_map::{self, BTreeMap};
|
||||||
|
use std::collections::hash_map::{HashMap, Entry};
|
||||||
use std::mem::take;
|
use std::mem::take;
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -331,19 +332,21 @@ fn resolve_candidates<'t>(
|
|||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_plane_sweep_candidates<'t>(
|
fn resolve_plane_sweep_candidates(
|
||||||
ctx: &'t dyn Context,
|
ctx: &dyn Context,
|
||||||
query_tree: &Operation,
|
query_tree: &Operation,
|
||||||
allowed_candidates: &RoaringBitmap,
|
allowed_candidates: &RoaringBitmap,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>>
|
) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>>
|
||||||
{
|
{
|
||||||
/// FIXME may be buggy with query like "new new york"
|
/// FIXME may be buggy with query like "new new york"
|
||||||
fn plane_sweep<'t>(
|
fn plane_sweep<'a>(
|
||||||
ctx: &'t dyn Context,
|
ctx: &dyn Context,
|
||||||
operations: &[Operation],
|
operations: &'a [Operation],
|
||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
consecutive: bool,
|
consecutive: bool,
|
||||||
|
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
|
||||||
|
dwpcache: &mut HashMap<String, Option<RoaringBitmap>>,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Vec<(Position, u8, Position)>>
|
) -> anyhow::Result<Vec<(Position, u8, Position)>>
|
||||||
{
|
{
|
||||||
@ -385,7 +388,7 @@ fn resolve_plane_sweep_candidates<'t>(
|
|||||||
let mut groups_positions = Vec::with_capacity(groups_len);
|
let mut groups_positions = Vec::with_capacity(groups_len);
|
||||||
|
|
||||||
for operation in operations {
|
for operation in operations {
|
||||||
let positions = resolve_operation(ctx, operation, docid, wdcache)?;
|
let positions = resolve_operation(ctx, operation, docid, rocache, dwpcache, wdcache)?;
|
||||||
groups_positions.push(positions.into_iter());
|
groups_positions.push(positions.into_iter());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -456,25 +459,32 @@ fn resolve_plane_sweep_candidates<'t>(
|
|||||||
Ok(output)
|
Ok(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_operation<'t>(
|
fn resolve_operation<'a>(
|
||||||
ctx: &'t dyn Context,
|
ctx: &dyn Context,
|
||||||
query_tree: &Operation,
|
query_tree: &'a Operation,
|
||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
|
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
|
||||||
|
dwpcache: &mut HashMap<String, Option<RoaringBitmap>>,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Vec<(Position, u8, Position)>> {
|
) -> anyhow::Result<Vec<(Position, u8, Position)>>
|
||||||
|
{
|
||||||
use Operation::{And, Consecutive, Or};
|
use Operation::{And, Consecutive, Or};
|
||||||
|
|
||||||
match query_tree {
|
if let Some(result) = rocache.get(query_tree) {
|
||||||
And(ops) => plane_sweep(ctx, ops, docid, false, wdcache),
|
return Ok(result.clone());
|
||||||
Consecutive(ops) => plane_sweep(ctx, ops, docid, true, wdcache),
|
}
|
||||||
|
|
||||||
|
let result = match query_tree {
|
||||||
|
And(ops) => plane_sweep(ctx, ops, docid, false, rocache, dwpcache, wdcache)?,
|
||||||
|
Consecutive(ops) => plane_sweep(ctx, ops, docid, true, rocache, dwpcache, wdcache)?,
|
||||||
Or(_, ops) => {
|
Or(_, ops) => {
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for op in ops {
|
for op in ops {
|
||||||
result.extend(resolve_operation(ctx, op, docid, wdcache)?)
|
result.extend(resolve_operation(ctx, op, docid, rocache, dwpcache, wdcache)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
result.sort_unstable();
|
result.sort_unstable();
|
||||||
Ok(result)
|
result
|
||||||
},
|
},
|
||||||
Operation::Query(Query {prefix, kind}) => {
|
Operation::Query(Query {prefix, kind}) => {
|
||||||
let fst = ctx.words_fst();
|
let fst = ctx.words_fst();
|
||||||
@ -493,21 +503,43 @@ fn resolve_plane_sweep_candidates<'t>(
|
|||||||
|
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for (word, _) in words.as_ref() {
|
for (word, _) in words.as_ref() {
|
||||||
if let Some(positions) = ctx.docid_word_positions(docid, word)? {
|
let positions = match dwpcache.entry(word.to_string()) {
|
||||||
|
Entry::Occupied(entry) => entry.into_mut(),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
let positions = ctx.docid_word_positions(docid, word)?;
|
||||||
|
entry.insert(positions)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(positions) = positions {
|
||||||
let iter = positions.iter().map(|p| (p, 0, p));
|
let iter = positions.iter().map(|p| (p, 0, p));
|
||||||
result.extend(iter);
|
result.extend(iter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result.sort_unstable();
|
result.sort_unstable();
|
||||||
|
result
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
rocache.insert(query_tree, result.clone());
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
let mut word_positions_cache = HashMap::new();
|
||||||
|
let mut resolve_operation_cache = HashMap::new();
|
||||||
let mut candidates = BTreeMap::new();
|
let mut candidates = BTreeMap::new();
|
||||||
for docid in allowed_candidates {
|
for docid in allowed_candidates {
|
||||||
let positions = resolve_operation(ctx, query_tree, docid, wdcache)?;
|
word_positions_cache.clear();
|
||||||
|
resolve_operation_cache.clear();
|
||||||
|
let positions = resolve_operation(
|
||||||
|
ctx,
|
||||||
|
query_tree,
|
||||||
|
docid,
|
||||||
|
&mut resolve_operation_cache,
|
||||||
|
&mut word_positions_cache,
|
||||||
|
wdcache,
|
||||||
|
)?;
|
||||||
let best_proximity = positions.into_iter().min_by_key(|(_, proximity, _)| *proximity);
|
let best_proximity = positions.into_iter().min_by_key(|(_, proximity, _)| *proximity);
|
||||||
let best_proximity = best_proximity.map(|(_, proximity, _)| proximity).unwrap_or(7);
|
let best_proximity = best_proximity.map(|(_, proximity, _)| proximity).unwrap_or(7);
|
||||||
candidates.entry(best_proximity).or_insert_with(RoaringBitmap::new).insert(docid);
|
candidates.entry(best_proximity).or_insert_with(RoaringBitmap::new).insert(docid);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user