mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 00:55:00 +08:00
Introduce the FacetRevRange Iterator struct
This commit is contained in:
parent
58d039a70d
commit
0959e1501f
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -673,6 +673,7 @@ dependencies = [
|
|||||||
"criterion",
|
"criterion",
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"csv",
|
"csv",
|
||||||
|
"either",
|
||||||
"flate2",
|
"flate2",
|
||||||
"fst",
|
"fst",
|
||||||
"fxhash",
|
"fxhash",
|
||||||
|
@ -10,6 +10,7 @@ bstr = "0.2.13"
|
|||||||
byteorder = "1.3.4"
|
byteorder = "1.3.4"
|
||||||
crossbeam-channel = "0.5.0"
|
crossbeam-channel = "0.5.0"
|
||||||
csv = "1.1.3"
|
csv = "1.1.3"
|
||||||
|
either = "1.6.1"
|
||||||
flate2 = "1.0.17"
|
flate2 = "1.0.17"
|
||||||
fst = "0.4.4"
|
fst = "0.4.4"
|
||||||
fxhash = "0.2.1"
|
fxhash = "0.2.1"
|
||||||
|
1
http-ui/Cargo.lock
generated
1
http-ui/Cargo.lock
generated
@ -999,6 +999,7 @@ dependencies = [
|
|||||||
"byteorder",
|
"byteorder",
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"csv",
|
"csv",
|
||||||
|
"either",
|
||||||
"flate2",
|
"flate2",
|
||||||
"fst",
|
"fst",
|
||||||
"fxhash",
|
"fxhash",
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::ops::Bound::{self, Included, Excluded, Unbounded};
|
use std::ops::Bound::{self, Included, Excluded, Unbounded};
|
||||||
|
|
||||||
use heed::types::DecodeIgnore;
|
use either::Either::{self, Left, Right};
|
||||||
|
use heed::types::{DecodeIgnore, ByteSlice};
|
||||||
use heed::{BytesEncode, BytesDecode};
|
use heed::{BytesEncode, BytesDecode};
|
||||||
use heed::{Database, RoRange, LazyDecode};
|
use heed::{Database, RoRange, RoRevRange, LazyDecode};
|
||||||
|
use log::debug;
|
||||||
use num_traits::Bounded;
|
use num_traits::Bounded;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -76,16 +78,78 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct FacetRevRange<'t, T: 't, KC> {
|
||||||
|
iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
||||||
|
end: Bound<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t, T: 't, KC> FacetRevRange<'t, T, KC>
|
||||||
|
where
|
||||||
|
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||||
|
T: PartialOrd + Copy + Bounded,
|
||||||
|
{
|
||||||
|
fn new(
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
db: Database<KC, CboRoaringBitmapCodec>,
|
||||||
|
field_id: FieldId,
|
||||||
|
level: u8,
|
||||||
|
left: Bound<T>,
|
||||||
|
right: Bound<T>,
|
||||||
|
) -> heed::Result<FacetRevRange<'t, T, KC>>
|
||||||
|
{
|
||||||
|
let left_bound = match left {
|
||||||
|
Included(left) => Included((field_id, level, left, T::min_value())),
|
||||||
|
Excluded(left) => Excluded((field_id, level, left, T::min_value())),
|
||||||
|
Unbounded => Included((field_id, level, T::min_value(), T::min_value())),
|
||||||
|
};
|
||||||
|
let right_bound = Included((field_id, level, T::max_value(), T::max_value()));
|
||||||
|
let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?;
|
||||||
|
Ok(FacetRevRange { iter, end: right })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t, T, KC> Iterator for FacetRevRange<'t, T, KC>
|
||||||
|
where
|
||||||
|
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||||
|
KC: BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
|
||||||
|
T: PartialOrd + Copy,
|
||||||
|
{
|
||||||
|
type Item = heed::Result<((FieldId, u8, T, T), RoaringBitmap)>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
loop {
|
||||||
|
match self.iter.next() {
|
||||||
|
Some(Ok(((fid, level, left, right), docids))) => {
|
||||||
|
let must_be_returned = match self.end {
|
||||||
|
Included(end) => right <= end,
|
||||||
|
Excluded(end) => right < end,
|
||||||
|
Unbounded => true,
|
||||||
|
};
|
||||||
|
if must_be_returned {
|
||||||
|
match docids.decode() {
|
||||||
|
Ok(docids) => return Some(Ok(((fid, level, left, right), docids))),
|
||||||
|
Err(e) => return Some(Err(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
},
|
||||||
|
Some(Err(e)) => return Some(Err(e)),
|
||||||
|
None => return None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct FacetIter<'t, T: 't, KC> {
|
pub struct FacetIter<'t, T: 't, KC> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: Database<KC, CboRoaringBitmapCodec>,
|
db: Database<KC, CboRoaringBitmapCodec>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
documents_ids: RoaringBitmap,
|
level_iters: Vec<(RoaringBitmap, Either<FacetRange<'t, T, KC>, FacetRevRange<'t, T, KC>>)>,
|
||||||
level_iters: Vec<FacetRange<'t, T, KC>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, T, KC> FacetIter<'t, T, KC>
|
impl<'t, T, KC> FacetIter<'t, T, KC>
|
||||||
where
|
where
|
||||||
|
KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
|
||||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||||
T: PartialOrd + Copy + Bounded,
|
T: PartialOrd + Copy + Bounded,
|
||||||
{
|
{
|
||||||
@ -97,8 +161,31 @@ where
|
|||||||
) -> heed::Result<FacetIter<'t, T, KC>>
|
) -> heed::Result<FacetIter<'t, T, KC>>
|
||||||
{
|
{
|
||||||
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
|
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
|
||||||
let level_0_iter = FacetRange::new(rtxn, db, field_id, 0, Unbounded, Unbounded)?;
|
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||||
Ok(FacetIter { rtxn, db, field_id, documents_ids, level_iters: vec![level_0_iter] })
|
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||||
|
Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Left(highest_iter))] })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_reverse(
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
index: &'t Index,
|
||||||
|
field_id: FieldId,
|
||||||
|
documents_ids: RoaringBitmap,
|
||||||
|
) -> heed::Result<FacetIter<'t, T, KC>>
|
||||||
|
{
|
||||||
|
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
|
||||||
|
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||||
|
let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||||
|
Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Right(highest_iter))] })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn highest_level<X>(rtxn: &'t heed::RoTxn, db: Database<KC, X>, fid: FieldId) -> heed::Result<Option<u8>> {
|
||||||
|
let level = db.remap_types::<ByteSlice, DecodeIgnore>()
|
||||||
|
.prefix_iter(rtxn, &[fid][..])?
|
||||||
|
.remap_key_type::<KC>()
|
||||||
|
.last().transpose()?
|
||||||
|
.map(|((_, level, _, _), _)| level);
|
||||||
|
Ok(level)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -106,35 +193,54 @@ impl<'t, T: 't, KC> Iterator for FacetIter<'t, T, KC>
|
|||||||
where
|
where
|
||||||
KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
|
KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
|
||||||
KC: for<'x> heed::BytesEncode<'x, EItem = (FieldId, u8, T, T)>,
|
KC: for<'x> heed::BytesEncode<'x, EItem = (FieldId, u8, T, T)>,
|
||||||
T: PartialOrd + Copy + Bounded,
|
T: PartialOrd + Copy + Bounded + Debug,
|
||||||
{
|
{
|
||||||
type Item = heed::Result<(T, RoaringBitmap)>;
|
type Item = heed::Result<(T, RoaringBitmap)>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
loop {
|
'outer: loop {
|
||||||
let last = self.level_iters.last_mut()?;
|
let (documents_ids, last) = self.level_iters.last_mut()?;
|
||||||
|
let is_ascending = last.is_left();
|
||||||
for result in last {
|
for result in last {
|
||||||
|
// If the last iterator must find an empty set of documents it means
|
||||||
|
// that we found all the documents in the sub level iterations already,
|
||||||
|
// we can pop this level iterator.
|
||||||
|
if documents_ids.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(((_fid, level, left, right), mut docids)) => {
|
Ok(((_fid, level, left, right), mut docids)) => {
|
||||||
if level == 0 {
|
|
||||||
docids.intersect_with(&self.documents_ids);
|
docids.intersect_with(&documents_ids);
|
||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
self.documents_ids.difference_with(&docids);
|
documents_ids.difference_with(&docids);
|
||||||
|
|
||||||
|
if level == 0 {
|
||||||
|
debug!("found {:?} at {:?}", docids, left);
|
||||||
return Some(Ok((left, docids)));
|
return Some(Ok((left, docids)));
|
||||||
}
|
}
|
||||||
} else if !docids.is_disjoint(&self.documents_ids) {
|
|
||||||
let result = FacetRange::new(
|
let rtxn = self.rtxn;
|
||||||
self.rtxn,
|
let db = self.db;
|
||||||
self.db,
|
let fid = self.field_id;
|
||||||
self.field_id,
|
let left = Included(left);
|
||||||
level - 1,
|
let right = Included(right);
|
||||||
Included(left),
|
|
||||||
Included(right),
|
debug!("calling with {:?} to {:?} (level {}) to find {:?}",
|
||||||
|
left, right, level - 1, docids,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let result = if is_ascending {
|
||||||
|
FacetRange::new(rtxn, db, fid, level - 1, left, right).map(Left)
|
||||||
|
} else {
|
||||||
|
FacetRevRange::new(rtxn, db, fid, level - 1, left, right).map(Right)
|
||||||
|
};
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(iter) => {
|
Ok(iter) => {
|
||||||
self.level_iters.push(iter);
|
self.level_iters.push((docids, iter));
|
||||||
break;
|
continue 'outer;
|
||||||
},
|
},
|
||||||
Err(e) => return Some(Err(e)),
|
Err(e) => return Some(Err(e)),
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@ use crate::query_tokens::{QueryTokens, QueryToken};
|
|||||||
use crate::{Index, FieldId, DocumentId, Criterion};
|
use crate::{Index, FieldId, DocumentId, Criterion};
|
||||||
|
|
||||||
pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
||||||
|
pub use self::facet::{FacetIter};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||||
@ -151,7 +152,7 @@ impl<'a> Search<'a> {
|
|||||||
&self,
|
&self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
order: Order,
|
ascending: bool,
|
||||||
documents_ids: RoaringBitmap,
|
documents_ids: RoaringBitmap,
|
||||||
limit: usize,
|
limit: usize,
|
||||||
) -> anyhow::Result<Vec<DocumentId>>
|
) -> anyhow::Result<Vec<DocumentId>>
|
||||||
@ -160,34 +161,30 @@ impl<'a> Search<'a> {
|
|||||||
let mut output = Vec::new();
|
let mut output = Vec::new();
|
||||||
match facet_type {
|
match facet_type {
|
||||||
FacetType::Float => {
|
FacetType::Float => {
|
||||||
facet_number_recurse::<f64, FacetLevelValueF64Codec, _>(
|
let facet_fn = if ascending {
|
||||||
self.rtxn,
|
FacetIter::<f64, FacetLevelValueF64Codec>::new
|
||||||
self.index,
|
} else {
|
||||||
field_id,
|
FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse
|
||||||
order,
|
};
|
||||||
documents_ids,
|
for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? {
|
||||||
|_val, docids| {
|
let (_val, docids) = result?;
|
||||||
limit_tmp = limit_tmp.saturating_sub(docids.len() as usize);
|
limit_tmp = limit_tmp.saturating_sub(docids.len() as usize);
|
||||||
debug!("Facet ordered iteration find {:?}", docids);
|
output.push(docids);
|
||||||
output.push(docids);
|
if limit_tmp == 0 { break }
|
||||||
limit_tmp != 0 // Returns `true` if we must continue iterating
|
}
|
||||||
}
|
|
||||||
)?;
|
|
||||||
},
|
},
|
||||||
FacetType::Integer => {
|
FacetType::Integer => {
|
||||||
facet_number_recurse::<i64, FacetLevelValueI64Codec, _>(
|
let facet_fn = if ascending {
|
||||||
self.rtxn,
|
FacetIter::<i64, FacetLevelValueI64Codec>::new
|
||||||
self.index,
|
} else {
|
||||||
field_id,
|
FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse
|
||||||
order,
|
};
|
||||||
documents_ids,
|
for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? {
|
||||||
|_val, docids| {
|
let (_val, docids) = result?;
|
||||||
limit_tmp = limit_tmp.saturating_sub(docids.len() as usize);
|
limit_tmp = limit_tmp.saturating_sub(docids.len() as usize);
|
||||||
debug!("Facet ordered iteration find {:?}", docids);
|
output.push(docids);
|
||||||
output.push(docids);
|
if limit_tmp == 0 { break }
|
||||||
limit_tmp != 0 // Returns `true` if we must continue iterating
|
}
|
||||||
}
|
|
||||||
)?;
|
|
||||||
},
|
},
|
||||||
FacetType::String => bail!("criteria facet type must be a number"),
|
FacetType::String => bail!("criteria facet type must be a number"),
|
||||||
}
|
}
|
||||||
@ -214,16 +211,16 @@ impl<'a> Search<'a> {
|
|||||||
let criteria = self.index.criteria(self.rtxn)?;
|
let criteria = self.index.criteria(self.rtxn)?;
|
||||||
let result = criteria.into_iter().flat_map(|criterion| {
|
let result = criteria.into_iter().flat_map(|criterion| {
|
||||||
match criterion {
|
match criterion {
|
||||||
Criterion::Asc(fid) => Some((fid, Order::Asc)),
|
Criterion::Asc(fid) => Some((fid, true)),
|
||||||
Criterion::Desc(fid) => Some((fid, Order::Desc)),
|
Criterion::Desc(fid) => Some((fid, false)),
|
||||||
_ => None
|
_ => None
|
||||||
}
|
}
|
||||||
}).next();
|
}).next();
|
||||||
match result {
|
match result {
|
||||||
Some((fid, order)) => {
|
Some((fid, is_ascending)) => {
|
||||||
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||||
let ftype = *faceted_fields.get(&fid).context("unknown field id")?;
|
let ftype = *faceted_fields.get(&fid).context("unknown field id")?;
|
||||||
Some((fid, ftype, order))
|
Some((fid, ftype, is_ascending))
|
||||||
},
|
},
|
||||||
None => None,
|
None => None,
|
||||||
}
|
}
|
||||||
@ -244,7 +241,9 @@ impl<'a> Search<'a> {
|
|||||||
// If the query is not set or results in no DFAs but
|
// If the query is not set or results in no DFAs but
|
||||||
// there is some facet conditions we return a placeholder.
|
// there is some facet conditions we return a placeholder.
|
||||||
let documents_ids = match order_by_facet {
|
let documents_ids = match order_by_facet {
|
||||||
Some((fid, ftype, order)) => self.facet_ordered(fid, ftype, order, facet_candidates, limit)?,
|
Some((fid, ftype, is_ascending)) => {
|
||||||
|
self.facet_ordered(fid, ftype, is_ascending, facet_candidates, limit)?
|
||||||
|
},
|
||||||
None => facet_candidates.iter().take(limit).collect(),
|
None => facet_candidates.iter().take(limit).collect(),
|
||||||
};
|
};
|
||||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
return Ok(SearchResult { documents_ids, ..Default::default() })
|
||||||
@ -253,7 +252,9 @@ impl<'a> Search<'a> {
|
|||||||
// If the query is not set or results in no DFAs we return a placeholder.
|
// If the query is not set or results in no DFAs we return a placeholder.
|
||||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
||||||
let documents_ids = match order_by_facet {
|
let documents_ids = match order_by_facet {
|
||||||
Some((fid, ftype, order)) => self.facet_ordered(fid, ftype, order, documents_ids, limit)?,
|
Some((fid, ftype, is_ascending)) => {
|
||||||
|
self.facet_ordered(fid, ftype, is_ascending, documents_ids, limit)?
|
||||||
|
},
|
||||||
None => documents_ids.iter().take(limit).collect(),
|
None => documents_ids.iter().take(limit).collect(),
|
||||||
};
|
};
|
||||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
return Ok(SearchResult { documents_ids, ..Default::default() })
|
||||||
|
Loading…
Reference in New Issue
Block a user