use std::collections::hash_map::Entry; use fxhash::FxHashMap; use heed::{types::ByteSlice, RoTxn}; use crate::{Index, Result}; #[derive(Default)] pub struct DatabaseCache<'transaction> { pub word_pair_proximity_docids: FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, pub word_prefix_pair_proximity_docids: FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, pub word_docids: FxHashMap>, pub exact_word_docids: FxHashMap>, pub word_prefix_docids: FxHashMap>, } impl<'transaction> DatabaseCache<'transaction> { pub fn get_word_docids( &mut self, index: &Index, txn: &'transaction RoTxn, word: &str, ) -> Result> { let bitmap_ptr = match self.word_docids.entry(word.to_owned()) { Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), Entry::Vacant(entry) => { let bitmap_ptr = index.word_docids.remap_data_type::().get(txn, word)?; entry.insert(bitmap_ptr); bitmap_ptr } }; Ok(bitmap_ptr) } pub fn get_prefix_docids( &mut self, index: &Index, txn: &'transaction RoTxn, prefix: &str, ) -> Result> { // In the future, this will be a frozen roaring bitmap let bitmap_ptr = match self.word_prefix_docids.entry(prefix.to_owned()) { Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), Entry::Vacant(entry) => { let bitmap_ptr = index.word_prefix_docids.remap_data_type::().get(txn, prefix)?; entry.insert(bitmap_ptr); bitmap_ptr } }; Ok(bitmap_ptr) } pub fn get_word_pair_proximity_docids( &mut self, index: &Index, txn: &'transaction RoTxn, word1: &str, word2: &str, proximity: u8, ) -> Result> { let key = (proximity, word1.to_owned(), word2.to_owned()); match self.word_pair_proximity_docids.entry(key.clone()) { Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), Entry::Vacant(entry) => { // Note that now, we really want to do a prefix iter over (w1, w2) to get all the possible proximities // but oh well // // Actually, we shouldn'transaction greedily access this DB at all // a DB (w1, w2) -> [proximities] would be much better // We could even have a DB that is (w1) -> set of words such that (w1, w2) are in proximity // And if we worked with words encoded as integers, the set of words could be a roaring bitmap // Then, to find all the proximities between two list of words, we'd do: // inputs: // - words1 (roaring bitmap) // - words2 (roaring bitmap) // output: // - [(word1, word2, [proximities])] // algo: // let mut ouput = vec![]; // for word1 in words1 { // let all_words_in_proximity_of_w1 = pair_words_db.get(word1); // let words_in_proximity_of_w1 = all_words_in_proximity_of_w1 & words2; // for word2 in words_in_proximity_of_w1 { // let proximties = prox_db.get(word1, word2); // output.push(word1, word2, proximities); // } // } let bitmap_ptr = index .word_pair_proximity_docids .remap_data_type::() .get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?; entry.insert(bitmap_ptr); Ok(bitmap_ptr) } } } pub fn get_word_prefix_pair_proximity_docids( &mut self, index: &Index, txn: &'transaction RoTxn, word1: &str, prefix2: &str, proximity: u8, ) -> Result> { let key = (proximity, word1.to_owned(), prefix2.to_owned()); match self.word_prefix_pair_proximity_docids.entry(key.clone()) { Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), Entry::Vacant(entry) => { let bitmap_ptr = index .word_prefix_pair_proximity_docids .remap_data_type::() .get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?; entry.insert(bitmap_ptr); Ok(bitmap_ptr) } } } }