Filter on attributes before computing the docids when attribute restriction is on

This commit is contained in:
ManyTheFish 2023-11-28 14:55:29 +01:00
parent dc07790133
commit d6c2ee15a9
2 changed files with 66 additions and 23 deletions

View File

@ -154,7 +154,7 @@ impl<'ctx> SearchContext<'ctx> {
/// Retrieve or insert the given value in the `word_docids` database. /// Retrieve or insert the given value in the `word_docids` database.
fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> { fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
match &self.restricted_fids { match &self.restricted_tolerant_fids {
Some(restricted_fids) => { Some(restricted_fids) => {
let interned = self.word_interner.get(word).as_str(); let interned = self.word_interner.get(word).as_str();
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
@ -182,13 +182,28 @@ impl<'ctx> SearchContext<'ctx> {
&mut self, &mut self,
word: Interned<String>, word: Interned<String>,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( match &self.restricted_exact_fids {
self.txn, Some(restricted_fids) => {
word, let interned = self.word_interner.get(word).as_str();
self.word_interner.get(word).as_str(), let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
&mut self.db_cache.exact_word_docids,
self.index.exact_word_docids.remap_data_type::<ByteSlice>(), DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
) self.txn,
word,
&keys[..],
&mut self.db_cache.exact_word_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.exact_word_docids,
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
),
}
} }
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> { pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
@ -216,7 +231,7 @@ impl<'ctx> SearchContext<'ctx> {
&mut self, &mut self,
prefix: Interned<String>, prefix: Interned<String>,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
match &self.restricted_fids { match &self.restricted_tolerant_fids {
Some(restricted_fids) => { Some(restricted_fids) => {
let interned = self.word_interner.get(prefix).as_str(); let interned = self.word_interner.get(prefix).as_str();
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
@ -244,13 +259,28 @@ impl<'ctx> SearchContext<'ctx> {
&mut self, &mut self,
prefix: Interned<String>, prefix: Interned<String>,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( match &self.restricted_exact_fids {
self.txn, Some(restricted_fids) => {
prefix, let interned = self.word_interner.get(prefix).as_str();
self.word_interner.get(prefix).as_str(), let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
&mut self.db_cache.exact_word_prefix_docids,
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(), DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
) self.txn,
prefix,
&keys[..],
&mut self.db_cache.exact_word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
self.txn,
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.exact_word_prefix_docids,
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
),
}
} }
pub fn get_db_word_pair_proximity_docids( pub fn get_db_word_pair_proximity_docids(
@ -334,7 +364,9 @@ impl<'ctx> SearchContext<'ctx> {
fid: u16, fid: u16,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
// if the requested fid isn't in the restricted list, return None. // if the requested fid isn't in the restricted list, return None.
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { if self.restricted_tolerant_fids.as_ref().map_or(false, |fids| !fids.contains(&fid))
&& self.restricted_exact_fids.as_ref().map_or(false, |fids| !fids.contains(&fid))
{
return Ok(None); return Ok(None);
} }
@ -353,7 +385,9 @@ impl<'ctx> SearchContext<'ctx> {
fid: u16, fid: u16,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
// if the requested fid isn't in the restricted list, return None. // if the requested fid isn't in the restricted list, return None.
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { if self.restricted_tolerant_fids.as_ref().map_or(false, |fids| !fids.contains(&fid))
&& self.restricted_exact_fids.as_ref().map_or(false, |fids| !fids.contains(&fid))
{
return Ok(None); return Ok(None);
} }

View File

@ -63,7 +63,8 @@ pub struct SearchContext<'ctx> {
pub phrase_interner: DedupInterner<Phrase>, pub phrase_interner: DedupInterner<Phrase>,
pub term_interner: Interner<QueryTerm>, pub term_interner: Interner<QueryTerm>,
pub phrase_docids: PhraseDocIdsCache, pub phrase_docids: PhraseDocIdsCache,
pub restricted_fids: Option<Vec<u16>>, pub restricted_tolerant_fids: Option<Vec<u16>>,
pub restricted_exact_fids: Option<Vec<u16>>,
} }
impl<'ctx> SearchContext<'ctx> { impl<'ctx> SearchContext<'ctx> {
@ -76,15 +77,18 @@ impl<'ctx> SearchContext<'ctx> {
phrase_interner: <_>::default(), phrase_interner: <_>::default(),
term_interner: <_>::default(), term_interner: <_>::default(),
phrase_docids: <_>::default(), phrase_docids: <_>::default(),
restricted_fids: None, restricted_tolerant_fids: None,
restricted_exact_fids: None,
} }
} }
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
let fids_map = self.index.fields_ids_map(self.txn)?; let fids_map = self.index.fields_ids_map(self.txn)?;
let searchable_names = self.index.searchable_fields(self.txn)?; let searchable_names = self.index.searchable_fields(self.txn)?;
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
let mut restricted_fids = Vec::new(); let mut restricted_exact_fids = Vec::new();
let mut restricted_tolerant_fids = Vec::new();
let mut contains_wildcard = false; let mut contains_wildcard = false;
for field_name in searchable_attributes { for field_name in searchable_attributes {
if field_name == "*" { if field_name == "*" {
@ -123,10 +127,15 @@ impl<'ctx> SearchContext<'ctx> {
} }
}; };
restricted_fids.push(fid); if exact_attributes_ids.contains(&fid) {
restricted_exact_fids.push(fid);
} else {
restricted_tolerant_fids.push(fid);
};
} }
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids); self.restricted_exact_fids = (!contains_wildcard).then_some(restricted_exact_fids);
self.restricted_tolerant_fids = (!contains_wildcard).then_some(restricted_tolerant_fids);
Ok(()) Ok(())
} }