mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 20:15:07 +08:00
Merge #4682
4682: Speed Up Filter ANDs operations r=Kerollmops a=Kerollmops This PR fixes #4659 and improves the way we do AND operations by using the latest [RoaringBitmap feature to do intersections with serialized bitmaps](https://github.com/RoaringBitmap/roaring-rs/pull/281). Doing so drastically reduces the time spent reading, copying bytes in memory to use and keep a subset of the containers in the bitmap. ### Some Example Results With a 45M documents dataset running on a good NVMe. This example filter was taking 77ms and with this PR only 13ms (6x speedup): ```sql artist = 'The Beatles' AND (duration 150 TO 500 OR duration NOT EXISTS) AND genres IN [Rock, 'Rock and Roll'] AND rating > 4 AND released_year 1960 TO 1990 ``` By reordering the filter AND clauses we can reach a constant 8ms execution time. However, note that it is a manual operation. On the other side the previous filter pipeline is still at a constant 45ms execution time with this filter. (6x speedup) ```sql artist = 'The Beatles' AND genres IN [Rock, 'Rock and Roll'] AND released_year 1960 TO 1990 AND (duration 150 TO 500 OR duration NOT EXISTS) ``` ### To Do - [x] Rebase on `release-v1.9.0`. - [ ] ~Skip branches of the facet/filter tree when nothing is in common with the universe~ slower this way. - [x] When the universe is required use the universe given in parameter if possible. Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
commit
a838f39fce
11
Cargo.lock
generated
11
Cargo.lock
generated
@ -4377,12 +4377,6 @@ dependencies = [
|
|||||||
"winreg",
|
"winreg",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "retain_mut"
|
|
||||||
version = "0.1.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ring"
|
name = "ring"
|
||||||
version = "0.17.8"
|
version = "0.17.8"
|
||||||
@ -4400,13 +4394,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "roaring"
|
name = "roaring"
|
||||||
version = "0.10.2"
|
version = "0.10.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873"
|
checksum = "7699249cc2c7d71939f30868f47e9d7add0bdc030d90ee10bfd16887ff8bb1c8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"retain_mut",
|
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -47,6 +47,12 @@ pub struct FacetGroupValue {
|
|||||||
pub bitmap: RoaringBitmap,
|
pub bitmap: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct FacetGroupLazyValue<'b> {
|
||||||
|
pub size: u8,
|
||||||
|
pub bitmap_bytes: &'b [u8],
|
||||||
|
}
|
||||||
|
|
||||||
pub struct FacetGroupKeyCodec<T> {
|
pub struct FacetGroupKeyCodec<T> {
|
||||||
_phantom: PhantomData<T>,
|
_phantom: PhantomData<T>,
|
||||||
}
|
}
|
||||||
@ -69,6 +75,7 @@ where
|
|||||||
Ok(Cow::Owned(v))
|
Ok(Cow::Owned(v))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
|
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
|
||||||
where
|
where
|
||||||
T: BytesDecode<'a>,
|
T: BytesDecode<'a>,
|
||||||
@ -84,6 +91,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct FacetGroupValueCodec;
|
pub struct FacetGroupValueCodec;
|
||||||
|
|
||||||
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||||
type EItem = FacetGroupValue;
|
type EItem = FacetGroupValue;
|
||||||
|
|
||||||
@ -93,11 +101,23 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
|||||||
Ok(Cow::Owned(v))
|
Ok(Cow::Owned(v))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||||
type DItem = FacetGroupValue;
|
type DItem = FacetGroupValue;
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||||
let size = bytes[0];
|
let size = bytes[0];
|
||||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
|
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
|
||||||
Ok(FacetGroupValue { size, bitmap })
|
Ok(FacetGroupValue { size, bitmap })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct FacetGroupLazyValueCodec;
|
||||||
|
|
||||||
|
impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec {
|
||||||
|
type DItem = FacetGroupLazyValue<'a>;
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||||
|
Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::io;
|
use std::io::{self, Cursor};
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
|
||||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||||
@ -57,6 +57,24 @@ impl CboRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn intersection_with_serialized(
|
||||||
|
mut bytes: &[u8],
|
||||||
|
other: &RoaringBitmap,
|
||||||
|
) -> io::Result<RoaringBitmap> {
|
||||||
|
// See above `deserialize_from` method for implementation details.
|
||||||
|
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||||
|
let mut bitmap = RoaringBitmap::new();
|
||||||
|
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
||||||
|
if other.contains(integer) {
|
||||||
|
bitmap.insert(integer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(bitmap)
|
||||||
|
} else {
|
||||||
|
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Merge serialized CboRoaringBitmaps in a buffer.
|
/// Merge serialized CboRoaringBitmaps in a buffer.
|
||||||
///
|
///
|
||||||
/// if the merged values length is under the threshold, values are directly
|
/// if the merged values length is under the threshold, values are directly
|
||||||
|
@ -38,7 +38,7 @@ where
|
|||||||
field_id,
|
field_id,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
@ -81,7 +81,7 @@ where
|
|||||||
field_id,
|
field_id,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||||
// We first fill the heap with values from the highest level
|
// We first fill the heap with values from the highest level
|
||||||
let starting_key =
|
let starting_key =
|
||||||
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
|
@ -4,9 +4,11 @@ use heed::BytesEncode;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
use crate::heed_codec::facet::{
|
||||||
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
|
||||||
|
};
|
||||||
use crate::heed_codec::BytesRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::Result;
|
use crate::{CboRoaringBitmapCodec, Result};
|
||||||
|
|
||||||
/// Find all the document ids for which the given field contains a value contained within
|
/// Find all the document ids for which the given field contains a value contained within
|
||||||
/// the two bounds.
|
/// the two bounds.
|
||||||
@ -16,6 +18,7 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
|||||||
field_id: u16,
|
field_id: u16,
|
||||||
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||||
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
docids: &mut RoaringBitmap,
|
docids: &mut RoaringBitmap,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
@ -46,13 +49,15 @@ where
|
|||||||
}
|
}
|
||||||
Bound::Unbounded => Bound::Unbounded,
|
Bound::Unbounded => Bound::Unbounded,
|
||||||
};
|
};
|
||||||
let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>();
|
||||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids };
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
|
|
||||||
if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
if let Some(starting_left_bound) =
|
||||||
|
get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?
|
||||||
|
{
|
||||||
let rightmost_bound =
|
let rightmost_bound =
|
||||||
Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
||||||
let group_size = usize::MAX;
|
let group_size = usize::MAX;
|
||||||
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -64,12 +69,16 @@ where
|
|||||||
/// Fetch the document ids that have a facet with a value between the two given bounds
|
/// Fetch the document ids that have a facet with a value between the two given bounds
|
||||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
left: Bound<&'b [u8]>,
|
left: Bound<&'b [u8]>,
|
||||||
right: Bound<&'b [u8]>,
|
right: Bound<&'b [u8]>,
|
||||||
|
/// The subset of documents ids that are useful for this search.
|
||||||
|
/// Great performance optimizations can be achieved by only fetching values matching this subset.
|
||||||
|
universe: Option<&'bitmap RoaringBitmap>,
|
||||||
docids: &'bitmap mut RoaringBitmap,
|
docids: &'bitmap mut RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||||
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
|
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
|
||||||
let left_key =
|
let left_key =
|
||||||
@ -104,7 +113,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
|
if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
|
||||||
*self.docids |= value.bitmap;
|
*self.docids |= match self.universe {
|
||||||
|
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||||
|
value.bitmap_bytes,
|
||||||
|
universe,
|
||||||
|
)?,
|
||||||
|
None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -195,7 +210,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
|||||||
left_condition && right_condition
|
left_condition && right_condition
|
||||||
};
|
};
|
||||||
if should_take_whole_group {
|
if should_take_whole_group {
|
||||||
*self.docids |= &previous_value.bitmap;
|
*self.docids |= match self.universe {
|
||||||
|
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||||
|
previous_value.bitmap_bytes,
|
||||||
|
universe,
|
||||||
|
)?,
|
||||||
|
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||||
|
};
|
||||||
previous_key = next_key;
|
previous_key = next_key;
|
||||||
previous_value = next_value;
|
previous_value = next_value;
|
||||||
continue;
|
continue;
|
||||||
@ -291,7 +312,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
|||||||
left_condition && right_condition
|
left_condition && right_condition
|
||||||
};
|
};
|
||||||
if should_take_whole_group {
|
if should_take_whole_group {
|
||||||
*self.docids |= &previous_value.bitmap;
|
*self.docids |= match self.universe {
|
||||||
|
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||||
|
previous_value.bitmap_bytes,
|
||||||
|
universe,
|
||||||
|
)?,
|
||||||
|
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||||
|
};
|
||||||
} else {
|
} else {
|
||||||
let level = level - 1;
|
let level = level - 1;
|
||||||
let starting_left_bound = previous_key.left_bound;
|
let starting_left_bound = previous_key.left_bound;
|
||||||
@ -365,6 +392,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -384,6 +412,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -418,6 +447,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -439,6 +469,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -474,6 +505,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -499,6 +531,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -537,6 +570,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -556,6 +590,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -571,6 +606,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
&Bound::Unbounded,
|
&Bound::Unbounded,
|
||||||
&Bound::Unbounded,
|
&Bound::Unbounded,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -586,6 +622,7 @@ mod tests {
|
|||||||
1,
|
1,
|
||||||
&Bound::Unbounded,
|
&Bound::Unbounded,
|
||||||
&Bound::Unbounded,
|
&Bound::Unbounded,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -621,6 +658,7 @@ mod tests {
|
|||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -634,6 +672,7 @@ mod tests {
|
|||||||
1,
|
1,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
None,
|
||||||
&mut docids,
|
&mut docids,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -36,7 +36,7 @@ pub fn ascending_facet_sort<'t>(
|
|||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||||
|
|
||||||
|
@ -19,9 +19,9 @@ pub fn descending_facet_sort<'t>(
|
|||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
|
||||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
|
let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap();
|
||||||
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||||
Ok(itertools::Either::Left(DescendingFacetSort {
|
Ok(itertools::Either::Left(DescendingFacetSort {
|
||||||
|
@ -4,7 +4,7 @@ use std::ops::Bound::{self, Excluded, Included};
|
|||||||
|
|
||||||
use either::Either;
|
use either::Either;
|
||||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
|
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::{MultiOps, RoaringBitmap};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::facet_range_search;
|
use super::facet_range_search;
|
||||||
@ -224,14 +224,14 @@ impl<'a> Filter<'a> {
|
|||||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||||
|
self.inner_evaluate(rtxn, index, &filterable_fields, None)
|
||||||
self.inner_evaluate(rtxn, index, &filterable_fields)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn evaluate_operator(
|
fn evaluate_operator(
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
operator: &Condition<'a>,
|
operator: &Condition<'a>,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let numbers_db = index.facet_id_f64_docids;
|
let numbers_db = index.facet_id_f64_docids;
|
||||||
@ -291,14 +291,22 @@ impl<'a> Filter<'a> {
|
|||||||
}
|
}
|
||||||
Condition::NotEqual(val) => {
|
Condition::NotEqual(val) => {
|
||||||
let operator = Condition::Equal(val.clone());
|
let operator = Condition::Equal(val.clone());
|
||||||
let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?;
|
let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
|
||||||
let all_ids = index.documents_ids(rtxn)?;
|
let all_ids = index.documents_ids(rtxn)?;
|
||||||
return Ok(all_ids - docids);
|
return Ok(all_ids - docids);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut output = RoaringBitmap::new();
|
let mut output = RoaringBitmap::new();
|
||||||
Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?;
|
Self::explore_facet_number_levels(
|
||||||
|
rtxn,
|
||||||
|
numbers_db,
|
||||||
|
field_id,
|
||||||
|
left,
|
||||||
|
right,
|
||||||
|
universe,
|
||||||
|
&mut output,
|
||||||
|
)?;
|
||||||
Ok(output)
|
Ok(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -310,6 +318,7 @@ impl<'a> Filter<'a> {
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
left: Bound<f64>,
|
left: Bound<f64>,
|
||||||
right: Bound<f64>,
|
right: Bound<f64>,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
output: &mut RoaringBitmap,
|
output: &mut RoaringBitmap,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
match (left, right) {
|
match (left, right) {
|
||||||
@ -321,7 +330,7 @@ impl<'a> Filter<'a> {
|
|||||||
(_, _) => (),
|
(_, _) => (),
|
||||||
}
|
}
|
||||||
facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||||
rtxn, db, field_id, &left, &right, output,
|
rtxn, db, field_id, &left, &right, universe, output,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -332,31 +341,37 @@ impl<'a> Filter<'a> {
|
|||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
filterable_fields: &HashSet<String>,
|
filterable_fields: &HashSet<String>,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
|
if universe.map_or(false, |u| u.is_empty()) {
|
||||||
|
return Ok(RoaringBitmap::new());
|
||||||
|
}
|
||||||
|
|
||||||
match &self.condition {
|
match &self.condition {
|
||||||
FilterCondition::Not(f) => {
|
FilterCondition::Not(f) => {
|
||||||
let all_ids = index.documents_ids(rtxn)?;
|
|
||||||
let selected = Self::inner_evaluate(
|
let selected = Self::inner_evaluate(
|
||||||
&(f.as_ref().clone()).into(),
|
&(f.as_ref().clone()).into(),
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
filterable_fields,
|
filterable_fields,
|
||||||
|
universe,
|
||||||
)?;
|
)?;
|
||||||
|
match universe {
|
||||||
|
Some(universe) => Ok(universe - selected),
|
||||||
|
None => {
|
||||||
|
let all_ids = index.documents_ids(rtxn)?;
|
||||||
Ok(all_ids - selected)
|
Ok(all_ids - selected)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
FilterCondition::In { fid, els } => {
|
FilterCondition::In { fid, els } => {
|
||||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
|
|
||||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
els.iter()
|
||||||
|
.map(|el| Condition::Equal(el.clone()))
|
||||||
for el in els {
|
.map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
|
||||||
let op = Condition::Equal(el.clone());
|
.union()
|
||||||
let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?;
|
|
||||||
bitmap |= el_bitmap;
|
|
||||||
}
|
|
||||||
Ok(bitmap)
|
|
||||||
} else {
|
} else {
|
||||||
Ok(RoaringBitmap::new())
|
Ok(RoaringBitmap::new())
|
||||||
}
|
}
|
||||||
@ -371,7 +386,7 @@ impl<'a> Filter<'a> {
|
|||||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||||
Self::evaluate_operator(rtxn, index, fid, op)
|
Self::evaluate_operator(rtxn, index, fid, universe, op)
|
||||||
} else {
|
} else {
|
||||||
Ok(RoaringBitmap::new())
|
Ok(RoaringBitmap::new())
|
||||||
}
|
}
|
||||||
@ -382,14 +397,11 @@ impl<'a> Filter<'a> {
|
|||||||
}))?
|
}))?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FilterCondition::Or(subfilters) => {
|
FilterCondition::Or(subfilters) => subfilters
|
||||||
let mut bitmap = RoaringBitmap::new();
|
.iter()
|
||||||
for f in subfilters {
|
.cloned()
|
||||||
bitmap |=
|
.map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe))
|
||||||
Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
|
.union(),
|
||||||
}
|
|
||||||
Ok(bitmap)
|
|
||||||
}
|
|
||||||
FilterCondition::And(subfilters) => {
|
FilterCondition::And(subfilters) => {
|
||||||
let mut subfilters_iter = subfilters.iter();
|
let mut subfilters_iter = subfilters.iter();
|
||||||
if let Some(first_subfilter) = subfilters_iter.next() {
|
if let Some(first_subfilter) = subfilters_iter.next() {
|
||||||
@ -398,16 +410,21 @@ impl<'a> Filter<'a> {
|
|||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
filterable_fields,
|
filterable_fields,
|
||||||
|
universe,
|
||||||
)?;
|
)?;
|
||||||
for f in subfilters_iter {
|
for f in subfilters_iter {
|
||||||
if bitmap.is_empty() {
|
if bitmap.is_empty() {
|
||||||
return Ok(bitmap);
|
return Ok(bitmap);
|
||||||
}
|
}
|
||||||
|
// TODO We are doing the intersections two times,
|
||||||
|
// it could be more efficient
|
||||||
|
// Can't I just replace this `&=` by an `=`?
|
||||||
bitmap &= Self::inner_evaluate(
|
bitmap &= Self::inner_evaluate(
|
||||||
&(f.clone()).into(),
|
&(f.clone()).into(),
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
filterable_fields,
|
filterable_fields,
|
||||||
|
Some(&bitmap),
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
Ok(bitmap)
|
Ok(bitmap)
|
||||||
@ -507,6 +524,7 @@ impl<'a> Filter<'a> {
|
|||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
filterable_fields,
|
filterable_fields,
|
||||||
|
universe,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let geo_lng_token = Token::new(
|
let geo_lng_token = Token::new(
|
||||||
@ -539,6 +557,7 @@ impl<'a> Filter<'a> {
|
|||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
filterable_fields,
|
filterable_fields,
|
||||||
|
universe,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let condition_right = FilterCondition::Condition {
|
let condition_right = FilterCondition::Condition {
|
||||||
@ -552,6 +571,7 @@ impl<'a> Filter<'a> {
|
|||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
filterable_fields,
|
filterable_fields,
|
||||||
|
universe,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
left | right
|
left | right
|
||||||
@ -567,6 +587,7 @@ impl<'a> Filter<'a> {
|
|||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
filterable_fields,
|
filterable_fields,
|
||||||
|
universe,
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
|||||||
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::filter::{BadGeoError, Filter};
|
pub use self::filter::{BadGeoError, Filter};
|
||||||
pub use self::search::{FacetValueHit, SearchForFacetValues};
|
pub use self::search::{FacetValueHit, SearchForFacetValues};
|
||||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||||
use crate::heed_codec::BytesRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
|
|
||||||
@ -54,9 +54,9 @@ pub fn facet_max_value<'t>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Get the first facet value in the facet database
|
/// Get the first facet value in the facet database
|
||||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
|
||||||
txn: &'t RoTxn,
|
txn: &'t RoTxn,
|
||||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||||
where
|
where
|
||||||
@ -78,9 +78,9 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Get the last facet value in the facet database
|
/// Get the last facet value in the facet database
|
||||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
|
||||||
txn: &'t RoTxn,
|
txn: &'t RoTxn,
|
||||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||||
where
|
where
|
||||||
@ -102,9 +102,9 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Get the height of the highest level in the facet database
|
/// Get the height of the highest level in the facet database
|
||||||
pub(crate) fn get_highest_level<'t>(
|
pub(crate) fn get_highest_level<'t, DC>(
|
||||||
txn: &'t RoTxn<'t>,
|
txn: &'t RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<u8> {
|
) -> heed::Result<u8> {
|
||||||
let field_id_prefix = &field_id.to_be_bytes();
|
let field_id_prefix = &field_id.to_be_bytes();
|
||||||
|
@ -548,6 +548,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument(level = "trace", skip_all, target = "search")]
|
||||||
pub fn filtered_universe(
|
pub fn filtered_universe(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &RoTxn<'_>,
|
txn: &RoTxn<'_>,
|
||||||
|
Loading…
Reference in New Issue
Block a user