meilisearch/milli/src/search/new/db_cache.rs

141 lines
4.7 KiB
Rust
Raw Normal View History

2023-03-08 16:55:53 +08:00
use std::collections::hash_map::Entry;
2023-03-08 20:26:29 +08:00
use std::hash::Hash;
2023-03-08 16:55:53 +08:00
use fxhash::FxHashMap;
2023-02-21 20:57:34 +08:00
use heed::types::ByteSlice;
2023-03-08 20:26:29 +08:00
use heed::{BytesEncode, Database, RoTxn};
2023-03-08 16:55:53 +08:00
2023-03-14 23:37:47 +08:00
use super::interner::{DedupInterner, Interned};
2023-03-09 18:12:31 +08:00
use crate::{Index, Result};
2023-03-08 20:26:29 +08:00
/// A cache storing pointers to values in the LMDB databases.
///
/// Used for performance reasons only. By using this cache, we avoid performing a
/// database lookup and instead get a direct reference to the value using a fast
/// local HashMap lookup.
#[derive(Default)]
2023-03-13 21:03:48 +08:00
pub struct DatabaseCache<'ctx> {
pub word_pair_proximity_docids:
2023-03-13 21:03:48 +08:00
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
pub word_prefix_pair_proximity_docids:
2023-03-13 21:03:48 +08:00
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
pub prefix_word_pair_proximity_docids:
2023-03-13 21:03:48 +08:00
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
pub word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
pub exact_word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
pub word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
}
2023-03-13 21:03:48 +08:00
impl<'ctx> DatabaseCache<'ctx> {
2023-03-08 20:26:29 +08:00
fn get_value<'v, K1, KC>(
2023-03-13 21:03:48 +08:00
txn: &'ctx RoTxn,
2023-03-08 20:26:29 +08:00
cache_key: K1,
db_key: &'v KC::EItem,
2023-03-13 21:03:48 +08:00
cache: &mut FxHashMap<K1, Option<&'ctx [u8]>>,
2023-03-08 20:26:29 +08:00
db: Database<KC, ByteSlice>,
2023-03-13 21:03:48 +08:00
) -> Result<Option<&'ctx [u8]>>
2023-03-08 20:26:29 +08:00
where
K1: Copy + Eq + Hash,
KC: BytesEncode<'v>,
{
let bitmap_ptr = match cache.entry(cache_key) {
Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
Entry::Vacant(entry) => {
2023-03-08 20:26:29 +08:00
let bitmap_ptr = db.get(txn, db_key)?;
entry.insert(bitmap_ptr);
bitmap_ptr
}
};
Ok(bitmap_ptr)
}
2023-03-08 20:26:29 +08:00
/// Retrieve or insert the given value in the `word_docids` database.
2023-03-09 18:12:31 +08:00
pub fn get_word_docids(
&mut self,
index: &Index,
2023-03-13 21:03:48 +08:00
txn: &'ctx RoTxn,
2023-03-14 23:37:47 +08:00
word_interner: &DedupInterner<String>,
2023-03-09 18:12:31 +08:00
word: Interned<String>,
2023-03-13 21:03:48 +08:00
) -> Result<Option<&'ctx [u8]>> {
2023-03-08 20:26:29 +08:00
Self::get_value(
2023-03-09 18:12:31 +08:00
txn,
2023-03-08 20:26:29 +08:00
word,
2023-03-09 18:12:31 +08:00
word_interner.get(word).as_str(),
&mut self.word_docids,
index.word_docids.remap_data_type::<ByteSlice>(),
2023-03-08 20:26:29 +08:00
)
}
/// Retrieve or insert the given value in the `word_prefix_docids` database.
pub fn get_word_prefix_docids(
&mut self,
2023-03-09 18:12:31 +08:00
index: &Index,
2023-03-13 21:03:48 +08:00
txn: &'ctx RoTxn,
2023-03-14 23:37:47 +08:00
word_interner: &DedupInterner<String>,
2023-03-08 20:26:29 +08:00
prefix: Interned<String>,
2023-03-13 21:03:48 +08:00
) -> Result<Option<&'ctx [u8]>> {
2023-03-08 20:26:29 +08:00
Self::get_value(
2023-03-09 18:12:31 +08:00
txn,
2023-03-08 20:26:29 +08:00
prefix,
2023-03-09 18:12:31 +08:00
word_interner.get(prefix).as_str(),
&mut self.word_prefix_docids,
index.word_prefix_docids.remap_data_type::<ByteSlice>(),
2023-03-08 20:26:29 +08:00
)
}
pub fn get_word_pair_proximity_docids(
&mut self,
2023-03-09 18:12:31 +08:00
index: &Index,
2023-03-13 21:03:48 +08:00
txn: &'ctx RoTxn,
2023-03-14 23:37:47 +08:00
word_interner: &DedupInterner<String>,
word1: Interned<String>,
word2: Interned<String>,
proximity: u8,
2023-03-13 21:03:48 +08:00
) -> Result<Option<&'ctx [u8]>> {
2023-03-08 20:26:29 +08:00
Self::get_value(
2023-03-09 18:12:31 +08:00
txn,
2023-03-08 20:26:29 +08:00
(proximity, word1, word2),
2023-03-09 18:12:31 +08:00
&(proximity, word_interner.get(word1).as_str(), word_interner.get(word2).as_str()),
&mut self.word_pair_proximity_docids,
index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
2023-03-08 20:26:29 +08:00
)
}
pub fn get_word_prefix_pair_proximity_docids(
&mut self,
2023-03-09 18:12:31 +08:00
index: &Index,
2023-03-13 21:03:48 +08:00
txn: &'ctx RoTxn,
2023-03-14 23:37:47 +08:00
word_interner: &DedupInterner<String>,
word1: Interned<String>,
prefix2: Interned<String>,
proximity: u8,
2023-03-13 21:03:48 +08:00
) -> Result<Option<&'ctx [u8]>> {
2023-03-08 20:26:29 +08:00
Self::get_value(
2023-03-09 18:12:31 +08:00
txn,
2023-03-08 20:26:29 +08:00
(proximity, word1, prefix2),
2023-03-09 18:12:31 +08:00
&(proximity, word_interner.get(word1).as_str(), word_interner.get(prefix2).as_str()),
&mut self.word_prefix_pair_proximity_docids,
index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
2023-03-08 20:26:29 +08:00
)
}
pub fn get_prefix_word_pair_proximity_docids(
&mut self,
2023-03-09 18:12:31 +08:00
index: &Index,
2023-03-13 21:03:48 +08:00
txn: &'ctx RoTxn,
2023-03-14 23:37:47 +08:00
word_interner: &DedupInterner<String>,
left_prefix: Interned<String>,
right: Interned<String>,
proximity: u8,
2023-03-13 21:03:48 +08:00
) -> Result<Option<&'ctx [u8]>> {
2023-03-08 20:26:29 +08:00
Self::get_value(
2023-03-09 18:12:31 +08:00
txn,
2023-03-08 20:26:29 +08:00
(proximity, left_prefix, right),
&(
proximity,
2023-03-09 18:12:31 +08:00
word_interner.get(left_prefix).as_str(),
word_interner.get(right).as_str(),
2023-03-08 20:26:29 +08:00
),
2023-03-09 18:12:31 +08:00
&mut self.prefix_word_pair_proximity_docids,
index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
2023-03-08 20:26:29 +08:00
)
}
}