Introduce the FacetRevRange Iterator struct

This commit is contained in:
Clément Renault 2020-11-28 14:52:50 +01:00
parent 58d039a70d
commit 0959e1501f
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
5 changed files with 166 additions and 56 deletions

1
Cargo.lock generated
View File

@ -673,6 +673,7 @@ dependencies = [
"criterion", "criterion",
"crossbeam-channel", "crossbeam-channel",
"csv", "csv",
"either",
"flate2", "flate2",
"fst", "fst",
"fxhash", "fxhash",

View File

@ -10,6 +10,7 @@ bstr = "0.2.13"
byteorder = "1.3.4" byteorder = "1.3.4"
crossbeam-channel = "0.5.0" crossbeam-channel = "0.5.0"
csv = "1.1.3" csv = "1.1.3"
either = "1.6.1"
flate2 = "1.0.17" flate2 = "1.0.17"
fst = "0.4.4" fst = "0.4.4"
fxhash = "0.2.1" fxhash = "0.2.1"

1
http-ui/Cargo.lock generated
View File

@ -999,6 +999,7 @@ dependencies = [
"byteorder", "byteorder",
"crossbeam-channel", "crossbeam-channel",
"csv", "csv",
"either",
"flate2", "flate2",
"fst", "fst",
"fxhash", "fxhash",

View File

@ -1,9 +1,11 @@
use std::fmt::Debug; use std::fmt::Debug;
use std::ops::Bound::{self, Included, Excluded, Unbounded}; use std::ops::Bound::{self, Included, Excluded, Unbounded};
use heed::types::DecodeIgnore; use either::Either::{self, Left, Right};
use heed::types::{DecodeIgnore, ByteSlice};
use heed::{BytesEncode, BytesDecode}; use heed::{BytesEncode, BytesDecode};
use heed::{Database, RoRange, LazyDecode}; use heed::{Database, RoRange, RoRevRange, LazyDecode};
use log::debug;
use num_traits::Bounded; use num_traits::Bounded;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -76,16 +78,78 @@ where
} }
} }
struct FacetRevRange<'t, T: 't, KC> {
iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
end: Bound<T>,
}
impl<'t, T: 't, KC> FacetRevRange<'t, T, KC>
where
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy + Bounded,
{
fn new(
rtxn: &'t heed::RoTxn,
db: Database<KC, CboRoaringBitmapCodec>,
field_id: FieldId,
level: u8,
left: Bound<T>,
right: Bound<T>,
) -> heed::Result<FacetRevRange<'t, T, KC>>
{
let left_bound = match left {
Included(left) => Included((field_id, level, left, T::min_value())),
Excluded(left) => Excluded((field_id, level, left, T::min_value())),
Unbounded => Included((field_id, level, T::min_value(), T::min_value())),
};
let right_bound = Included((field_id, level, T::max_value(), T::max_value()));
let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?;
Ok(FacetRevRange { iter, end: right })
}
}
impl<'t, T, KC> Iterator for FacetRevRange<'t, T, KC>
where
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
KC: BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy,
{
type Item = heed::Result<((FieldId, u8, T, T), RoaringBitmap)>;
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.iter.next() {
Some(Ok(((fid, level, left, right), docids))) => {
let must_be_returned = match self.end {
Included(end) => right <= end,
Excluded(end) => right < end,
Unbounded => true,
};
if must_be_returned {
match docids.decode() {
Ok(docids) => return Some(Ok(((fid, level, left, right), docids))),
Err(e) => return Some(Err(e)),
}
}
continue;
},
Some(Err(e)) => return Some(Err(e)),
None => return None,
}
}
}
}
pub struct FacetIter<'t, T: 't, KC> { pub struct FacetIter<'t, T: 't, KC> {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: Database<KC, CboRoaringBitmapCodec>, db: Database<KC, CboRoaringBitmapCodec>,
field_id: FieldId, field_id: FieldId,
documents_ids: RoaringBitmap, level_iters: Vec<(RoaringBitmap, Either<FacetRange<'t, T, KC>, FacetRevRange<'t, T, KC>>)>,
level_iters: Vec<FacetRange<'t, T, KC>>,
} }
impl<'t, T, KC> FacetIter<'t, T, KC> impl<'t, T, KC> FacetIter<'t, T, KC>
where where
KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy + Bounded, T: PartialOrd + Copy + Bounded,
{ {
@ -97,8 +161,31 @@ where
) -> heed::Result<FacetIter<'t, T, KC>> ) -> heed::Result<FacetIter<'t, T, KC>>
{ {
let db = index.facet_field_id_value_docids.remap_key_type::<KC>(); let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
let level_0_iter = FacetRange::new(rtxn, db, field_id, 0, Unbounded, Unbounded)?; let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
Ok(FacetIter { rtxn, db, field_id, documents_ids, level_iters: vec![level_0_iter] }) let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Left(highest_iter))] })
}
pub fn new_reverse(
rtxn: &'t heed::RoTxn,
index: &'t Index,
field_id: FieldId,
documents_ids: RoaringBitmap,
) -> heed::Result<FacetIter<'t, T, KC>>
{
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Right(highest_iter))] })
}
fn highest_level<X>(rtxn: &'t heed::RoTxn, db: Database<KC, X>, fid: FieldId) -> heed::Result<Option<u8>> {
let level = db.remap_types::<ByteSlice, DecodeIgnore>()
.prefix_iter(rtxn, &[fid][..])?
.remap_key_type::<KC>()
.last().transpose()?
.map(|((_, level, _, _), _)| level);
Ok(level)
} }
} }
@ -106,35 +193,54 @@ impl<'t, T: 't, KC> Iterator for FacetIter<'t, T, KC>
where where
KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>, KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
KC: for<'x> heed::BytesEncode<'x, EItem = (FieldId, u8, T, T)>, KC: for<'x> heed::BytesEncode<'x, EItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy + Bounded, T: PartialOrd + Copy + Bounded + Debug,
{ {
type Item = heed::Result<(T, RoaringBitmap)>; type Item = heed::Result<(T, RoaringBitmap)>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
loop { 'outer: loop {
let last = self.level_iters.last_mut()?; let (documents_ids, last) = self.level_iters.last_mut()?;
let is_ascending = last.is_left();
for result in last { for result in last {
// If the last iterator must find an empty set of documents it means
// that we found all the documents in the sub level iterations already,
// we can pop this level iterator.
if documents_ids.is_empty() {
break;
}
match result { match result {
Ok(((_fid, level, left, right), mut docids)) => { Ok(((_fid, level, left, right), mut docids)) => {
if level == 0 {
docids.intersect_with(&self.documents_ids); docids.intersect_with(&documents_ids);
if !docids.is_empty() { if !docids.is_empty() {
self.documents_ids.difference_with(&docids); documents_ids.difference_with(&docids);
if level == 0 {
debug!("found {:?} at {:?}", docids, left);
return Some(Ok((left, docids))); return Some(Ok((left, docids)));
} }
} else if !docids.is_disjoint(&self.documents_ids) {
let result = FacetRange::new( let rtxn = self.rtxn;
self.rtxn, let db = self.db;
self.db, let fid = self.field_id;
self.field_id, let left = Included(left);
level - 1, let right = Included(right);
Included(left),
Included(right), debug!("calling with {:?} to {:?} (level {}) to find {:?}",
left, right, level - 1, docids,
); );
let result = if is_ascending {
FacetRange::new(rtxn, db, fid, level - 1, left, right).map(Left)
} else {
FacetRevRange::new(rtxn, db, fid, level - 1, left, right).map(Right)
};
match result { match result {
Ok(iter) => { Ok(iter) => {
self.level_iters.push(iter); self.level_iters.push((docids, iter));
break; continue 'outer;
}, },
Err(e) => return Some(Err(e)), Err(e) => return Some(Err(e)),
} }

View File

@ -17,6 +17,7 @@ use crate::query_tokens::{QueryTokens, QueryToken};
use crate::{Index, FieldId, DocumentId, Criterion}; use crate::{Index, FieldId, DocumentId, Criterion};
pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator}; pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator};
pub use self::facet::{FacetIter};
// Building these factories is not free. // Building these factories is not free.
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true)); static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
@ -151,7 +152,7 @@ impl<'a> Search<'a> {
&self, &self,
field_id: FieldId, field_id: FieldId,
facet_type: FacetType, facet_type: FacetType,
order: Order, ascending: bool,
documents_ids: RoaringBitmap, documents_ids: RoaringBitmap,
limit: usize, limit: usize,
) -> anyhow::Result<Vec<DocumentId>> ) -> anyhow::Result<Vec<DocumentId>>
@ -160,34 +161,30 @@ impl<'a> Search<'a> {
let mut output = Vec::new(); let mut output = Vec::new();
match facet_type { match facet_type {
FacetType::Float => { FacetType::Float => {
facet_number_recurse::<f64, FacetLevelValueF64Codec, _>( let facet_fn = if ascending {
self.rtxn, FacetIter::<f64, FacetLevelValueF64Codec>::new
self.index, } else {
field_id, FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse
order, };
documents_ids, for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? {
|_val, docids| { let (_val, docids) = result?;
limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); limit_tmp = limit_tmp.saturating_sub(docids.len() as usize);
debug!("Facet ordered iteration find {:?}", docids); output.push(docids);
output.push(docids); if limit_tmp == 0 { break }
limit_tmp != 0 // Returns `true` if we must continue iterating }
}
)?;
}, },
FacetType::Integer => { FacetType::Integer => {
facet_number_recurse::<i64, FacetLevelValueI64Codec, _>( let facet_fn = if ascending {
self.rtxn, FacetIter::<i64, FacetLevelValueI64Codec>::new
self.index, } else {
field_id, FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse
order, };
documents_ids, for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? {
|_val, docids| { let (_val, docids) = result?;
limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); limit_tmp = limit_tmp.saturating_sub(docids.len() as usize);
debug!("Facet ordered iteration find {:?}", docids); output.push(docids);
output.push(docids); if limit_tmp == 0 { break }
limit_tmp != 0 // Returns `true` if we must continue iterating }
}
)?;
}, },
FacetType::String => bail!("criteria facet type must be a number"), FacetType::String => bail!("criteria facet type must be a number"),
} }
@ -214,16 +211,16 @@ impl<'a> Search<'a> {
let criteria = self.index.criteria(self.rtxn)?; let criteria = self.index.criteria(self.rtxn)?;
let result = criteria.into_iter().flat_map(|criterion| { let result = criteria.into_iter().flat_map(|criterion| {
match criterion { match criterion {
Criterion::Asc(fid) => Some((fid, Order::Asc)), Criterion::Asc(fid) => Some((fid, true)),
Criterion::Desc(fid) => Some((fid, Order::Desc)), Criterion::Desc(fid) => Some((fid, false)),
_ => None _ => None
} }
}).next(); }).next();
match result { match result {
Some((fid, order)) => { Some((fid, is_ascending)) => {
let faceted_fields = self.index.faceted_fields(self.rtxn)?; let faceted_fields = self.index.faceted_fields(self.rtxn)?;
let ftype = *faceted_fields.get(&fid).context("unknown field id")?; let ftype = *faceted_fields.get(&fid).context("unknown field id")?;
Some((fid, ftype, order)) Some((fid, ftype, is_ascending))
}, },
None => None, None => None,
} }
@ -244,7 +241,9 @@ impl<'a> Search<'a> {
// If the query is not set or results in no DFAs but // If the query is not set or results in no DFAs but
// there is some facet conditions we return a placeholder. // there is some facet conditions we return a placeholder.
let documents_ids = match order_by_facet { let documents_ids = match order_by_facet {
Some((fid, ftype, order)) => self.facet_ordered(fid, ftype, order, facet_candidates, limit)?, Some((fid, ftype, is_ascending)) => {
self.facet_ordered(fid, ftype, is_ascending, facet_candidates, limit)?
},
None => facet_candidates.iter().take(limit).collect(), None => facet_candidates.iter().take(limit).collect(),
}; };
return Ok(SearchResult { documents_ids, ..Default::default() }) return Ok(SearchResult { documents_ids, ..Default::default() })
@ -253,7 +252,9 @@ impl<'a> Search<'a> {
// If the query is not set or results in no DFAs we return a placeholder. // If the query is not set or results in no DFAs we return a placeholder.
let documents_ids = self.index.documents_ids(self.rtxn)?; let documents_ids = self.index.documents_ids(self.rtxn)?;
let documents_ids = match order_by_facet { let documents_ids = match order_by_facet {
Some((fid, ftype, order)) => self.facet_ordered(fid, ftype, order, documents_ids, limit)?, Some((fid, ftype, is_ascending)) => {
self.facet_ordered(fid, ftype, is_ascending, documents_ids, limit)?
},
None => documents_ids.iter().take(limit).collect(), None => documents_ids.iter().take(limit).collect(),
}; };
return Ok(SearchResult { documents_ids, ..Default::default() }) return Ok(SearchResult { documents_ids, ..Default::default() })