mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-31 07:23:15 +08:00
Expose the universe to do efficient intersections on deserialization
This commit is contained in:
parent
4ca4a3f954
commit
a6f3a01c6a
@ -38,7 +38,7 @@ where
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||
Ok(())
|
||||
} else {
|
||||
@ -81,7 +81,7 @@ where
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
// We first fill the heap with values from the highest level
|
||||
let starting_key =
|
||||
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
|
@ -4,9 +4,11 @@ use heed::BytesEncode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::Result;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
/// Find all the document ids for which the given field contains a value contained within
|
||||
/// the two bounds.
|
||||
@ -16,6 +18,7 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
||||
field_id: u16,
|
||||
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
docids: &mut RoaringBitmap,
|
||||
) -> Result<()>
|
||||
where
|
||||
@ -46,13 +49,15 @@ where
|
||||
}
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
||||
let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids };
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
|
||||
if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(starting_left_bound) =
|
||||
get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?
|
||||
{
|
||||
let rightmost_bound =
|
||||
Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
||||
Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
||||
let group_size = usize::MAX;
|
||||
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
||||
Ok(())
|
||||
@ -64,12 +69,16 @@ where
|
||||
/// Fetch the document ids that have a facet with a value between the two given bounds
|
||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
|
||||
field_id: u16,
|
||||
left: Bound<&'b [u8]>,
|
||||
right: Bound<&'b [u8]>,
|
||||
/// The subset of documents ids that are useful for this search.
|
||||
/// Great performance optimizations can be achieved by only fetching values matching this subset.
|
||||
universe: Option<&'bitmap RoaringBitmap>,
|
||||
docids: &'bitmap mut RoaringBitmap,
|
||||
}
|
||||
|
||||
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
|
||||
let left_key =
|
||||
@ -104,7 +113,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
}
|
||||
|
||||
if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
|
||||
*self.docids |= value.bitmap;
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
|
||||
};
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@ -195,7 +210,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
left_condition && right_condition
|
||||
};
|
||||
if should_take_whole_group {
|
||||
*self.docids |= &previous_value.bitmap;
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
previous_value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
};
|
||||
previous_key = next_key;
|
||||
previous_value = next_value;
|
||||
continue;
|
||||
@ -291,7 +312,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
left_condition && right_condition
|
||||
};
|
||||
if should_take_whole_group {
|
||||
*self.docids |= &previous_value.bitmap;
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
previous_value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
};
|
||||
} else {
|
||||
let level = level - 1;
|
||||
let starting_left_bound = previous_key.left_bound;
|
||||
@ -365,6 +392,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -384,6 +412,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -418,6 +447,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -439,6 +469,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -474,6 +505,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -499,6 +531,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -537,6 +570,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -556,6 +590,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -571,6 +606,7 @@ mod tests {
|
||||
0,
|
||||
&Bound::Unbounded,
|
||||
&Bound::Unbounded,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -586,6 +622,7 @@ mod tests {
|
||||
1,
|
||||
&Bound::Unbounded,
|
||||
&Bound::Unbounded,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -621,6 +658,7 @@ mod tests {
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
@ -634,6 +672,7 @@ mod tests {
|
||||
1,
|
||||
&start,
|
||||
&end,
|
||||
None,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
|
@ -36,7 +36,7 @@ pub fn ascending_facet_sort<'t>(
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||
|
||||
|
@ -19,9 +19,9 @@ pub fn descending_facet_sort<'t>(
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
|
||||
let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap();
|
||||
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||
Ok(itertools::Either::Left(DescendingFacetSort {
|
||||
|
@ -224,14 +224,14 @@ impl<'a> Filter<'a> {
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields)
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields, None)
|
||||
}
|
||||
|
||||
fn evaluate_operator(
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
field_id: FieldId,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
operator: &Condition<'a>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let numbers_db = index.facet_id_f64_docids;
|
||||
@ -291,14 +291,22 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
Condition::NotEqual(val) => {
|
||||
let operator = Condition::Equal(val.clone());
|
||||
let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?;
|
||||
let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
|
||||
let all_ids = index.documents_ids(rtxn)?;
|
||||
return Ok(all_ids - docids);
|
||||
}
|
||||
};
|
||||
|
||||
let mut output = RoaringBitmap::new();
|
||||
Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?;
|
||||
Self::explore_facet_number_levels(
|
||||
rtxn,
|
||||
numbers_db,
|
||||
field_id,
|
||||
left,
|
||||
right,
|
||||
universe,
|
||||
&mut output,
|
||||
)?;
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
@ -310,6 +318,7 @@ impl<'a> Filter<'a> {
|
||||
field_id: FieldId,
|
||||
left: Bound<f64>,
|
||||
right: Bound<f64>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
output: &mut RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
match (left, right) {
|
||||
@ -321,7 +330,7 @@ impl<'a> Filter<'a> {
|
||||
(_, _) => (),
|
||||
}
|
||||
facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
rtxn, db, field_id, &left, &right, output,
|
||||
rtxn, db, field_id, &left, &right, universe, output,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
@ -332,15 +341,18 @@ impl<'a> Filter<'a> {
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
filterable_fields: &HashSet<String>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
match &self.condition {
|
||||
FilterCondition::Not(f) => {
|
||||
// TODO improve the documents_ids to also support intersections at deserialize time.
|
||||
let all_ids = index.documents_ids(rtxn)?;
|
||||
let selected = Self::inner_evaluate(
|
||||
&(f.as_ref().clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
Ok(all_ids - selected)
|
||||
}
|
||||
@ -353,7 +365,8 @@ impl<'a> Filter<'a> {
|
||||
|
||||
for el in els {
|
||||
let op = Condition::Equal(el.clone());
|
||||
let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?;
|
||||
let el_bitmap =
|
||||
Self::evaluate_operator(rtxn, index, fid, universe, &op)?;
|
||||
bitmap |= el_bitmap;
|
||||
}
|
||||
Ok(bitmap)
|
||||
@ -371,7 +384,7 @@ impl<'a> Filter<'a> {
|
||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
Self::evaluate_operator(rtxn, index, fid, op)
|
||||
Self::evaluate_operator(rtxn, index, fid, universe, op)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
@ -384,7 +397,8 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
FilterCondition::Or(subfilters) => subfilters
|
||||
.iter()
|
||||
.map(|f| Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields))
|
||||
.cloned()
|
||||
.map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe))
|
||||
.union(),
|
||||
FilterCondition::And(subfilters) => {
|
||||
let mut subfilters_iter = subfilters.iter();
|
||||
@ -394,16 +408,21 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
for f in subfilters_iter {
|
||||
if bitmap.is_empty() {
|
||||
return Ok(bitmap);
|
||||
}
|
||||
// TODO We are doing the intersections two times,
|
||||
// it could be more efficient
|
||||
// Can't I just replace this `&=` by an `=`?
|
||||
bitmap &= Self::inner_evaluate(
|
||||
&(f.clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
Some(&bitmap),
|
||||
)?;
|
||||
}
|
||||
Ok(bitmap)
|
||||
@ -503,6 +522,7 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
let geo_lng_token = Token::new(
|
||||
@ -535,6 +555,7 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
let condition_right = FilterCondition::Condition {
|
||||
@ -548,6 +569,7 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
left | right
|
||||
@ -563,6 +585,7 @@ impl<'a> Filter<'a> {
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
universe,
|
||||
)?
|
||||
};
|
||||
|
||||
|
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
||||
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::filter::{BadGeoError, Filter};
|
||||
pub use self::search::{FacetValueHit, SearchForFacetValues};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{Index, Result};
|
||||
|
||||
@ -54,9 +54,9 @@ pub fn facet_max_value<'t>(
|
||||
}
|
||||
|
||||
/// Get the first facet value in the facet database
|
||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
||||
pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@ -78,9 +78,9 @@ where
|
||||
}
|
||||
|
||||
/// Get the last facet value in the facet database
|
||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
||||
pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@ -102,9 +102,9 @@ where
|
||||
}
|
||||
|
||||
/// Get the height of the highest level in the facet database
|
||||
pub(crate) fn get_highest_level<'t>(
|
||||
pub(crate) fn get_highest_level<'t, DC>(
|
||||
txn: &'t RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<u8> {
|
||||
let field_id_prefix = &field_id.to_be_bytes();
|
||||
|
Loading…
x
Reference in New Issue
Block a user