diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 829cb6244..87c9a004d 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -57,7 +57,6 @@ fn main() -> Result<(), Box> { false, &None, &None, - None, GeoSortStrategy::default(), 0, 20, diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 2f97143cc..baa391660 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -117,6 +117,11 @@ impl<'a> Search<'a> { pub fn execute(&self) -> Result { let mut ctx = SearchContext::new(self.index, self.rtxn); + + if let Some(searchable_attributes) = self.searchable_attributes { + ctx.searchable_attributes(searchable_attributes)?; + } + let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } = execute_search( &mut ctx, @@ -126,7 +131,6 @@ impl<'a> Search<'a> { self.exhaustive_number_hits, &self.filter, &self.sort_criteria, - self.searchable_attributes, self.geo_strategy, self.offset, self.limit, diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 90c604d72..2b2cd4d79 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -4,12 +4,13 @@ use std::hash::Hash; use fxhash::FxHashMap; use heed::types::ByteSlice; -use heed::{BytesDecode, BytesEncode, Database, RoTxn}; +use heed::{BytesEncode, Database, RoTxn}; use roaring::RoaringBitmap; use super::interner::Interned; use super::Word; -use crate::heed_codec::StrBEU16Codec; +use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec}; +use crate::update::MergeFn; use crate::{ CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext, }; @@ -22,48 +23,106 @@ use crate::{ #[derive(Default)] pub struct DatabaseCache<'ctx> { pub word_pair_proximity_docids: - FxHashMap<(u8, Interned, Interned), Option<&'ctx [u8]>>, + FxHashMap<(u8, Interned, Interned), Option>>, pub word_prefix_pair_proximity_docids: - FxHashMap<(u8, Interned, Interned), Option<&'ctx [u8]>>, + FxHashMap<(u8, Interned, Interned), Option>>, pub prefix_word_pair_proximity_docids: - FxHashMap<(u8, Interned, Interned), Option<&'ctx [u8]>>, - pub word_docids: FxHashMap, Option<&'ctx [u8]>>, - pub exact_word_docids: FxHashMap, Option<&'ctx [u8]>>, - pub word_prefix_docids: FxHashMap, Option<&'ctx [u8]>>, - pub exact_word_prefix_docids: FxHashMap, Option<&'ctx [u8]>>, + FxHashMap<(u8, Interned, Interned), Option>>, + pub word_docids: FxHashMap, Option>>, + pub exact_word_docids: FxHashMap, Option>>, + pub word_prefix_docids: FxHashMap, Option>>, + pub exact_word_prefix_docids: FxHashMap, Option>>, pub words_fst: Option>>, - pub word_position_docids: FxHashMap<(Interned, u16), Option<&'ctx [u8]>>, - pub word_prefix_position_docids: FxHashMap<(Interned, u16), Option<&'ctx [u8]>>, + pub word_position_docids: FxHashMap<(Interned, u16), Option>>, + pub word_prefix_position_docids: FxHashMap<(Interned, u16), Option>>, pub word_positions: FxHashMap, Vec>, pub word_prefix_positions: FxHashMap, Vec>, - pub word_fid_docids: FxHashMap<(Interned, u16), Option<&'ctx [u8]>>, - pub word_prefix_fid_docids: FxHashMap<(Interned, u16), Option<&'ctx [u8]>>, + pub word_fid_docids: FxHashMap<(Interned, u16), Option>>, + pub word_prefix_fid_docids: FxHashMap<(Interned, u16), Option>>, pub word_fids: FxHashMap, Vec>, pub word_prefix_fids: FxHashMap, Vec>, } impl<'ctx> DatabaseCache<'ctx> { - fn get_value<'v, K1, KC>( + fn get_value<'v, K1, KC, DC>( txn: &'ctx RoTxn, cache_key: K1, db_key: &'v KC::EItem, - cache: &mut FxHashMap>, + cache: &mut FxHashMap>>, db: Database, - ) -> Result> + ) -> Result> where K1: Copy + Eq + Hash, KC: BytesEncode<'v>, + DC: BytesDecodeOwned, { - let bitmap_ptr = match cache.entry(cache_key) { - Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), + match cache.entry(cache_key) { + Entry::Occupied(_) => {} Entry::Vacant(entry) => { - let bitmap_ptr = db.get(txn, db_key)?; + let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed); + entry.insert(bitmap_ptr); + } + } + + match cache.get(&cache_key).unwrap() { + Some(Cow::Borrowed(bytes)) => { + DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) + } + Some(Cow::Owned(bytes)) => { + DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) + } + None => Ok(None), + } + } + + fn get_value_from_keys<'v, K1, KC, DC>( + txn: &'ctx RoTxn, + cache_key: K1, + db_keys: &[&'v KC::EItem], + cache: &mut FxHashMap>>, + db: Database, + merger: MergeFn, + ) -> Result> + where + K1: Copy + Eq + Hash, + KC: BytesEncode<'v>, + DC: BytesDecodeOwned, + { + match cache.entry(cache_key) { + Entry::Occupied(_) => {} + Entry::Vacant(entry) => { + let bitmap_ptr: Option> = match db_keys { + [] => None, + [key] => db.get(txn, key)?.map(Cow::Borrowed), + keys => { + let bitmaps = keys + .into_iter() + .filter_map(|key| db.get(txn, key).transpose()) + .map(|v| v.map(Cow::Borrowed)) + .collect::>, _>>()?; + + if bitmaps.is_empty() { + None + } else { + Some(merger(&[], &bitmaps[..])?) + } + } + }; + entry.insert(bitmap_ptr); - bitmap_ptr } }; - Ok(bitmap_ptr) + + match cache.get(&cache_key).unwrap() { + Some(Cow::Borrowed(bytes)) => { + DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) + } + Some(Cow::Owned(bytes)) => { + DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) + } + None => Ok(None), + } } } impl<'ctx> SearchContext<'ctx> { @@ -99,30 +158,26 @@ impl<'ctx> SearchContext<'ctx> { /// Retrieve or insert the given value in the `word_docids` database. fn get_db_word_docids(&mut self, word: Interned) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( self.txn, word, self.word_interner.get(word).as_str(), &mut self.db_cache.word_docids, self.index.word_docids.remap_data_type::(), - )? - .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } fn get_db_exact_word_docids( &mut self, word: Interned, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( self.txn, word, self.word_interner.get(word).as_str(), &mut self.db_cache.exact_word_docids, self.index.exact_word_docids.remap_data_type::(), - )? - .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn word_prefix_docids(&mut self, prefix: Word) -> Result> { @@ -150,30 +205,26 @@ impl<'ctx> SearchContext<'ctx> { &mut self, prefix: Interned, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( self.txn, prefix, self.word_interner.get(prefix).as_str(), &mut self.db_cache.word_prefix_docids, self.index.word_prefix_docids.remap_data_type::(), - )? - .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } fn get_db_exact_word_prefix_docids( &mut self, prefix: Interned, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( self.txn, prefix, self.word_interner.get(prefix).as_str(), &mut self.db_cache.exact_word_prefix_docids, self.index.exact_word_prefix_docids.remap_data_type::(), - )? - .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_pair_proximity_docids( @@ -182,7 +233,7 @@ impl<'ctx> SearchContext<'ctx> { word2: Interned, proximity: u8, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (proximity, word1, word2), &( @@ -192,9 +243,7 @@ impl<'ctx> SearchContext<'ctx> { ), &mut self.db_cache.word_pair_proximity_docids, self.index.word_pair_proximity_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_pair_proximity_docids_len( @@ -203,7 +252,7 @@ impl<'ctx> SearchContext<'ctx> { word2: Interned, proximity: u8, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>( self.txn, (proximity, word1, word2), &( @@ -213,11 +262,7 @@ impl<'ctx> SearchContext<'ctx> { ), &mut self.db_cache.word_pair_proximity_docids, self.index.word_pair_proximity_docids.remap_data_type::(), - )? - .map(|bytes| { - CboRoaringBitmapLenCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()) - }) - .transpose() + ) } pub fn get_db_word_prefix_pair_proximity_docids( @@ -226,7 +271,7 @@ impl<'ctx> SearchContext<'ctx> { prefix2: Interned, proximity: u8, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (proximity, word1, prefix2), &( @@ -236,9 +281,7 @@ impl<'ctx> SearchContext<'ctx> { ), &mut self.db_cache.word_prefix_pair_proximity_docids, self.index.word_prefix_pair_proximity_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_prefix_word_pair_proximity_docids( &mut self, @@ -246,7 +289,7 @@ impl<'ctx> SearchContext<'ctx> { right: Interned, proximity: u8, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (proximity, left_prefix, right), &( @@ -256,9 +299,7 @@ impl<'ctx> SearchContext<'ctx> { ), &mut self.db_cache.prefix_word_pair_proximity_docids, self.index.prefix_word_pair_proximity_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_fid_docids( @@ -266,15 +307,13 @@ impl<'ctx> SearchContext<'ctx> { word: Interned, fid: u16, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (word, fid), &(self.word_interner.get(word).as_str(), fid), &mut self.db_cache.word_fid_docids, self.index.word_fid_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_prefix_fid_docids( @@ -282,15 +321,13 @@ impl<'ctx> SearchContext<'ctx> { word_prefix: Interned, fid: u16, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (word_prefix, fid), &(self.word_interner.get(word_prefix).as_str(), fid), &mut self.db_cache.word_prefix_fid_docids, self.index.word_prefix_fid_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_fids(&mut self, word: Interned) -> Result> { @@ -309,7 +346,7 @@ impl<'ctx> SearchContext<'ctx> { for result in remap_key_type { let ((_, fid), value) = result?; // filling other caches to avoid searching for them again - self.db_cache.word_fid_docids.insert((word, fid), Some(value)); + self.db_cache.word_fid_docids.insert((word, fid), Some(Cow::Borrowed(value))); fids.push(fid); } entry.insert(fids.clone()); @@ -335,7 +372,9 @@ impl<'ctx> SearchContext<'ctx> { for result in remap_key_type { let ((_, fid), value) = result?; // filling other caches to avoid searching for them again - self.db_cache.word_prefix_fid_docids.insert((word_prefix, fid), Some(value)); + self.db_cache + .word_prefix_fid_docids + .insert((word_prefix, fid), Some(Cow::Borrowed(value))); fids.push(fid); } entry.insert(fids.clone()); @@ -350,15 +389,13 @@ impl<'ctx> SearchContext<'ctx> { word: Interned, position: u16, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (word, position), &(self.word_interner.get(word).as_str(), position), &mut self.db_cache.word_position_docids, self.index.word_position_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_prefix_position_docids( @@ -366,15 +403,13 @@ impl<'ctx> SearchContext<'ctx> { word_prefix: Interned, position: u16, ) -> Result> { - DatabaseCache::get_value( + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (word_prefix, position), &(self.word_interner.get(word_prefix).as_str(), position), &mut self.db_cache.word_prefix_position_docids, self.index.word_prefix_position_docids.remap_data_type::(), - )? - .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) - .transpose() + ) } pub fn get_db_word_positions(&mut self, word: Interned) -> Result> { @@ -393,7 +428,9 @@ impl<'ctx> SearchContext<'ctx> { for result in remap_key_type { let ((_, position), value) = result?; // filling other caches to avoid searching for them again - self.db_cache.word_position_docids.insert((word, position), Some(value)); + self.db_cache + .word_position_docids + .insert((word, position), Some(Cow::Borrowed(value))); positions.push(position); } entry.insert(positions.clone()); @@ -424,7 +461,7 @@ impl<'ctx> SearchContext<'ctx> { // filling other caches to avoid searching for them again self.db_cache .word_prefix_position_docids - .insert((word_prefix, position), Some(value)); + .insert((word_prefix, position), Some(Cow::Borrowed(value))); positions.push(position); } entry.insert(positions.clone()); diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index d70bb1550..f33d595e5 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -514,7 +514,6 @@ mod tests { false, &None, &None, - None, crate::search::new::GeoSortStrategy::default(), 0, 100, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index e72093bf8..6cc155c56 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -57,6 +57,7 @@ pub struct SearchContext<'ctx> { pub phrase_interner: DedupInterner, pub term_interner: Interner, pub phrase_docids: PhraseDocIdsCache, + pub restricted_fids: Option>, } impl<'ctx> SearchContext<'ctx> { @@ -69,8 +70,18 @@ impl<'ctx> SearchContext<'ctx> { phrase_interner: <_>::default(), term_interner: <_>::default(), phrase_docids: <_>::default(), + restricted_fids: None, } } + + pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { + let fids_map = self.index.fields_ids_map(&self.txn)?; + let restricted_fids = + searchable_attributes.iter().filter_map(|name| fids_map.id(name)).collect(); + self.restricted_fids = Some(restricted_fids); + + Ok(()) + } } #[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)] @@ -355,7 +366,6 @@ pub fn execute_search( exhaustive_number_hits: bool, filters: &Option, sort_criteria: &Option>, - searchable_attributes: Option<&[String]>, geo_strategy: geo_sort::Strategy, from: usize, length: usize, diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs index 7a3fd1fd9..011a2eb60 100644 --- a/milli/src/update/mod.rs +++ b/milli/src/update/mod.rs @@ -4,7 +4,8 @@ pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDele pub use self::facet::bulk::FacetsUpdateBulk; pub use self::facet::incremental::FacetsUpdateIncrementalInner; pub use self::index_documents::{ - DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, + merge_roaring_bitmaps, DocumentAdditionResult, DocumentId, IndexDocuments, + IndexDocumentsConfig, IndexDocumentsMethod, MergeFn, }; pub use self::indexer_config::IndexerConfig; pub use self::prefix_word_pairs::{