Make the facet level search system generic on f64 and i64

This commit is contained in:
Clément Renault 2020-11-18 21:52:08 +01:00
parent 9e2cbe3362
commit 38c76754ef
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 166 additions and 76 deletions

5
Cargo.lock generated
View File

@ -617,6 +617,7 @@ dependencies = [
"maplit", "maplit",
"memmap", "memmap",
"near-proximity", "near-proximity",
"num-traits",
"obkv", "obkv",
"once_cell", "once_cell",
"ordered-float", "ordered-float",
@ -675,9 +676,9 @@ dependencies = [
[[package]] [[package]]
name = "num-traits" name = "num-traits"
version = "0.2.12" version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611" checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [ dependencies = [
"autocfg", "autocfg",
] ]

View File

@ -21,6 +21,7 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
linked-hash-map = "0.5.3" linked-hash-map = "0.5.3"
memmap = "0.7.0" memmap = "0.7.0"
near-proximity = { git = "https://github.com/Kerollmops/plane-sweep-proximity", rev = "6608205" } near-proximity = { git = "https://github.com/Kerollmops/plane-sweep-proximity", rev = "6608205" }
num-traits = "0.2.14"
obkv = "0.1.0" obkv = "0.1.0"
once_cell = "1.4.0" once_cell = "1.4.0"
ordered-float = "2.0.0" ordered-float = "2.0.0"

View File

@ -1,7 +1,9 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::fmt; use std::error::Error as StdError;
use std::fmt::{self, Debug};
use std::ops::Bound::{self, Unbounded, Included, Excluded}; use std::ops::Bound::{self, Unbounded, Included, Excluded};
use std::str::FromStr;
use anyhow::{bail, ensure, Context}; use anyhow::{bail, ensure, Context};
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
@ -9,11 +11,12 @@ use heed::types::DecodeIgnore;
use levenshtein_automata::DFA; use levenshtein_automata::DFA;
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder; use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
use log::debug; use log::debug;
use num_traits::Bounded;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap; use roaring::bitmap::RoaringBitmap;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::FacetLevelValueI64Codec; use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec};
use crate::mdfs::Mdfs; use crate::mdfs::Mdfs;
use crate::query_tokens::{QueryTokens, QueryToken}; use crate::query_tokens::{QueryTokens, QueryToken};
use crate::{Index, DocumentId}; use crate::{Index, DocumentId};
@ -24,20 +27,21 @@ static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true)); static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
// TODO support also floats // TODO support also floats
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Copy, Clone, PartialEq)]
pub enum FacetOperator { pub enum FacetOperator<T> {
GreaterThan(i64), GreaterThan(T),
GreaterThanOrEqual(i64), GreaterThanOrEqual(T),
LowerThan(i64), LowerThan(T),
LowerThanOrEqual(i64), LowerThanOrEqual(T),
Equal(i64), Equal(T),
Between(i64, i64), Between(T, T),
} }
// TODO also support ANDs, ORs, NOTs. // TODO also support ANDs, ORs, NOTs.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Copy, Clone, PartialEq)]
pub enum FacetCondition { pub enum FacetCondition {
Operator(u8, FacetOperator), OperatorI64(u8, FacetOperator<i64>),
OperatorF64(u8, FacetOperator<f64>),
} }
impl FacetCondition { impl FacetCondition {
@ -48,7 +52,6 @@ impl FacetCondition {
) -> anyhow::Result<Option<FacetCondition>> ) -> anyhow::Result<Option<FacetCondition>>
{ {
use FacetCondition::*; use FacetCondition::*;
use FacetOperator::*;
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let faceted_fields = index.faceted_fields(rtxn)?; let faceted_fields = index.faceted_fields(rtxn)?;
@ -64,33 +67,44 @@ impl FacetCondition {
let field_id = fields_ids_map.id(&field_name).with_context(|| format!("field {} not found", field_name))?; let field_id = fields_ids_map.id(&field_name).with_context(|| format!("field {} not found", field_name))?;
let field_type = faceted_fields.get(&field_id).with_context(|| format!("field {} is not faceted", field_name))?; let field_type = faceted_fields.get(&field_id).with_context(|| format!("field {} is not faceted", field_name))?;
ensure!(*field_type == FacetType::Integer, "Only conditions on integer facets"); match field_type {
FacetType::Integer => Self::parse_condition(iter).map(|op| Some(OperatorI64(field_id, op))),
FacetType::Float => Self::parse_condition(iter).map(|op| Some(OperatorF64(field_id, op))),
FacetType::String => bail!("invalid facet type"),
}
}
fn parse_condition<'a, T: FromStr>(
mut iter: impl Iterator<Item=&'a str>,
) -> anyhow::Result<FacetOperator<T>>
where T::Err: Send + Sync + StdError + 'static,
{
use FacetOperator::*;
match iter.next() { match iter.next() {
Some(">") => { Some(">") => {
let param = iter.next().context("missing parameter")?; let param = iter.next().context("missing parameter")?;
let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?; let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?;
Ok(Some(Operator(field_id, GreaterThan(value)))) Ok(GreaterThan(value))
}, },
Some(">=") => { Some(">=") => {
let param = iter.next().context("missing parameter")?; let param = iter.next().context("missing parameter")?;
let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?; let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?;
Ok(Some(Operator(field_id, GreaterThanOrEqual(value)))) Ok(GreaterThanOrEqual(value))
}, },
Some("<") => { Some("<") => {
let param = iter.next().context("missing parameter")?; let param = iter.next().context("missing parameter")?;
let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?; let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?;
Ok(Some(Operator(field_id, LowerThan(value)))) Ok(LowerThan(value))
}, },
Some("<=") => { Some("<=") => {
let param = iter.next().context("missing parameter")?; let param = iter.next().context("missing parameter")?;
let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?; let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?;
Ok(Some(Operator(field_id, LowerThanOrEqual(value)))) Ok(LowerThanOrEqual(value))
}, },
Some("=") => { Some("=") => {
let param = iter.next().context("missing parameter")?; let param = iter.next().context("missing parameter")?;
let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?; let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?;
Ok(Some(Operator(field_id, Equal(value)))) Ok(Equal(value))
}, },
Some(otherwise) => { Some(otherwise) => {
// BETWEEN or X TO Y (both inclusive) // BETWEEN or X TO Y (both inclusive)
@ -98,7 +112,7 @@ impl FacetCondition {
ensure!(iter.next().map_or(false, |s| s.eq_ignore_ascii_case("to")), "TO keyword missing or invalid"); ensure!(iter.next().map_or(false, |s| s.eq_ignore_ascii_case("to")), "TO keyword missing or invalid");
let next = iter.next().context("missing second TO parameter")?; let next = iter.next().context("missing second TO parameter")?;
let right_param = next.parse().with_context(|| format!("invalid second TO parameter ({:?})", next))?; let right_param = next.parse().with_context(|| format!("invalid second TO parameter ({:?})", next))?;
Ok(Some(Operator(field_id, Between(left_param, right_param)))) Ok(Between(left_param, right_param))
}, },
None => bail!("missing facet filter first parameter"), None => bail!("missing facet filter first parameter"),
} }
@ -229,19 +243,23 @@ impl<'a> Search<'a> {
/// Aggregates the documents ids that are part of the specified range automatically /// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels. /// going deeper through the levels.
fn explore_facet_levels( fn explore_facet_levels<T: 'a, KC>(
&self, &self,
field_id: u8, field_id: u8,
level: u8, level: u8,
left: Bound<i64>, left: Bound<T>,
right: Bound<i64>, right: Bound<T>,
output: &mut RoaringBitmap, output: &mut RoaringBitmap,
) -> anyhow::Result<()> ) -> anyhow::Result<()>
where
T: Copy + PartialEq + PartialOrd + Bounded + Debug,
KC: heed::BytesDecode<'a, DItem = (u8, u8, T, T)>,
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
{ {
match (left, right) { match (left, right) {
// If the request is an exact value we must go directly to the deepest level. // If the request is an exact value we must go directly to the deepest level.
(Included(l), Included(r)) if l == r && level > 0 => { (Included(l), Included(r)) if l == r && level > 0 => {
return self.explore_facet_levels(field_id, 0, left, right, output); return self.explore_facet_levels::<T, KC>(field_id, 0, left, right, output);
}, },
// lower TO upper when lower > upper must return no result // lower TO upper when lower > upper must return no result
(Included(l), Included(r)) if l > r => return Ok(()), (Included(l), Included(r)) if l > r => return Ok(()),
@ -257,12 +275,12 @@ impl<'a> Search<'a> {
// We must create a custom iterator to be able to iterate over the // We must create a custom iterator to be able to iterate over the
// requested range as the range iterator cannot express some conditions. // requested range as the range iterator cannot express some conditions.
let left_bound = match left { let left_bound = match left {
Included(left) => Included((field_id, level, left, i64::MIN)), Included(left) => Included((field_id, level, left, T::min_value())),
Excluded(left) => Excluded((field_id, level, left, i64::MIN)), Excluded(left) => Excluded((field_id, level, left, T::min_value())),
Unbounded => Unbounded, Unbounded => Unbounded,
}; };
let right_bound = Included((field_id, level, i64::MAX, i64::MAX)); let right_bound = Included((field_id, level, T::max_value(), T::max_value()));
let db = self.index.facet_field_id_value_docids.remap_key_type::<FacetLevelValueI64Codec>(); let db = self.index.facet_field_id_value_docids.remap_key_type::<KC>();
let iter = db let iter = db
.range(self.rtxn, &(left_bound, right_bound))? .range(self.rtxn, &(left_bound, right_bound))?
.take_while(|r| r.as_ref().map_or(true, |((.., r), _)| { .take_while(|r| r.as_ref().map_or(true, |((.., r), _)| {
@ -277,7 +295,7 @@ impl<'a> Search<'a> {
for (i, result) in iter.enumerate() { for (i, result) in iter.enumerate() {
let ((_fid, _level, l, r), docids) = result?; let ((_fid, _level, l, r), docids) = result?;
debug!("{} to {} (level {}) found {} documents", l, r, _level, docids.len()); debug!("{:?} to {:?} (level {}) found {} documents", l, r, _level, docids.len());
output.union_with(&docids); output.union_with(&docids);
// We save the leftest and rightest bounds we actually found at this level. // We save the leftest and rightest bounds we actually found at this level.
if i == 0 { left_found = Some(l); } if i == 0 { left_found = Some(l); }
@ -298,18 +316,18 @@ impl<'a> Search<'a> {
if !matches!(left, Included(l) if l == left_found) { if !matches!(left, Included(l) if l == left_found) {
let sub_right = Excluded(left_found); let sub_right = Excluded(left_found);
debug!("calling left with {:?} to {:?} (level {})", left, sub_right, deeper_level); debug!("calling left with {:?} to {:?} (level {})", left, sub_right, deeper_level);
self.explore_facet_levels(field_id, deeper_level, left, sub_right, output)?; self.explore_facet_levels::<T, KC>(field_id, deeper_level, left, sub_right, output)?;
} }
if !matches!(right, Included(r) if r == right_found) { if !matches!(right, Included(r) if r == right_found) {
let sub_left = Excluded(right_found); let sub_left = Excluded(right_found);
debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level); debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level);
self.explore_facet_levels(field_id, deeper_level, sub_left, right, output)?; self.explore_facet_levels::<T, KC>(field_id, deeper_level, sub_left, right, output)?;
} }
}, },
None => { None => {
// If we found nothing at this level it means that we must find // If we found nothing at this level it means that we must find
// the same bounds but at a deeper, more precise level. // the same bounds but at a deeper, more precise level.
self.explore_facet_levels(field_id, deeper_level, left, right, output)?; self.explore_facet_levels::<T, KC>(field_id, deeper_level, left, right, output)?;
}, },
} }
@ -327,10 +345,10 @@ impl<'a> Search<'a> {
}; };
// We create the original candidates with the facet conditions results. // We create the original candidates with the facet conditions results.
let facet_candidates = match self.facet_condition {
Some(FacetCondition::Operator(fid, operator)) => {
use FacetOperator::*; use FacetOperator::*;
let facet_candidates = match self.facet_condition {
// TODO make that generic over floats and integers.
Some(FacetCondition::OperatorI64(fid, operator)) => {
// Make sure we always bound the ranges with the field id and the level, // Make sure we always bound the ranges with the field id and the level,
// as the facets values are all in the same database and prefixed by the // as the facets values are all in the same database and prefixed by the
// field id and the level. // field id and the level.
@ -357,7 +375,40 @@ impl<'a> Search<'a> {
match biggest_level { match biggest_level {
Some(level) => { Some(level) => {
let mut output = RoaringBitmap::new(); let mut output = RoaringBitmap::new();
self.explore_facet_levels(fid, level, left, right, &mut output)?; self.explore_facet_levels::<i64, FacetLevelValueI64Codec>(fid, level, left, right, &mut output)?;
Some(output)
},
None => None,
}
},
Some(FacetCondition::OperatorF64(fid, operator)) => {
// Make sure we always bound the ranges with the field id and the level,
// as the facets values are all in the same database and prefixed by the
// field id and the level.
let (left, right) = match operator {
GreaterThan(val) => (Excluded(val), Included(f64::MAX)),
GreaterThanOrEqual(val) => (Included(val), Included(f64::MAX)),
LowerThan(val) => (Included(f64::MIN), Excluded(val)),
LowerThanOrEqual(val) => (Included(f64::MIN), Included(val)),
Equal(val) => (Included(val), Included(val)),
Between(left, right) => (Included(left), Included(right)),
};
let db = self.index
.facet_field_id_value_docids
.remap_key_type::<FacetLevelValueF64Codec>();
// Ask for the biggest value that can exist for this specific field, if it exists
// that's fine if it don't, the value just before will be returned instead.
let biggest_level = db
.remap_data_type::<DecodeIgnore>()
.get_lower_than_or_equal_to(self.rtxn, &(fid, u8::MAX, f64::MAX, f64::MAX))?
.and_then(|((id, level, _, _), _)| if id == fid { Some(level) } else { None });
match biggest_level {
Some(level) => {
let mut output = RoaringBitmap::new();
self.explore_facet_levels::<f64, FacetLevelValueF64Codec>(fid, level, left, right, &mut output)?;
Some(output) Some(output)
}, },
None => None, None => None,

View File

@ -6,10 +6,12 @@ use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesEncode, Error}; use heed::{BytesEncode, Error};
use itertools::Itertools; use itertools::Itertools;
use log::debug; use log::debug;
use num_traits::{Bounded, Zero};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::{facet::FacetLevelValueI64Codec, CboRoaringBitmapCodec}; use crate::heed_codec::CboRoaringBitmapCodec;
use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec};
use crate::Index; use crate::Index;
use crate::update::index_documents::WriteMethod; use crate::update::index_documents::WriteMethod;
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database}; use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
@ -68,15 +70,15 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
debug!("Computing and writing the facet values levels docids into LMDB on disk..."); debug!("Computing and writing the facet values levels docids into LMDB on disk...");
for (field_id, facet_type) in faceted_fields { for (field_id, facet_type) in faceted_fields {
if facet_type == FacetType::String { continue } let content = match facet_type {
FacetType::Integer => {
clear_field_levels( clear_field_levels::<i64, FacetLevelValueI64Codec>(
self.wtxn, self.wtxn,
self.index.facet_field_id_value_docids, self.index.facet_field_id_value_docids,
field_id, field_id,
)?; )?;
let content = compute_facet_levels( compute_facet_levels::<i64, FacetLevelValueI64Codec>(
self.wtxn, self.wtxn,
self.index.facet_field_id_value_docids, self.index.facet_field_id_value_docids,
self.chunk_compression_type, self.chunk_compression_type,
@ -86,9 +88,30 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
self.number_of_levels, self.number_of_levels,
self.easing_function, self.easing_function,
field_id, field_id,
facet_type, )?
},
FacetType::Float => {
clear_field_levels::<f64, FacetLevelValueF64Codec>(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?; )?;
compute_facet_levels::<f64, FacetLevelValueF64Codec>(
self.wtxn,
self.index.facet_field_id_value_docids,
self.chunk_compression_type,
self.chunk_compression_level,
self.chunk_fusing_shrink_size,
self.last_level_size,
self.number_of_levels,
self.easing_function,
field_id,
)?
},
FacetType::String => continue,
};
write_into_lmdb_database( write_into_lmdb_database(
self.wtxn, self.wtxn,
*self.index.facet_field_id_value_docids.as_polymorph(), *self.index.facet_field_id_value_docids.as_polymorph(),
@ -102,20 +125,26 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
} }
} }
fn clear_field_levels( fn clear_field_levels<'t, T: 't, KC>(
wtxn: &mut heed::RwTxn, wtxn: &'t mut heed::RwTxn,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>, db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
field_id: u8, field_id: u8,
) -> heed::Result<()> ) -> heed::Result<()>
where
T: Copy + Bounded,
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
{ {
let range = (field_id, 1, i64::MIN, i64::MIN)..=(field_id, u8::MAX, i64::MAX, i64::MAX); let left = (field_id, 1, T::min_value(), T::min_value());
db.remap_key_type::<FacetLevelValueI64Codec>() let right = (field_id, u8::MAX, T::max_value(), T::max_value());
let range = left..=right;
db.remap_key_type::<KC>()
.delete_range(wtxn, &range) .delete_range(wtxn, &range)
.map(drop) .map(drop)
} }
fn compute_facet_levels( fn compute_facet_levels<'t, T: 't, KC>(
rtxn: &heed::RoTxn, rtxn: &'t heed::RoTxn,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>, db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
compression_type: CompressionType, compression_type: CompressionType,
compression_level: Option<u32>, compression_level: Option<u32>,
@ -124,8 +153,11 @@ fn compute_facet_levels(
number_of_levels: NonZeroUsize, number_of_levels: NonZeroUsize,
easing_function: EasingName, easing_function: EasingName,
field_id: u8, field_id: u8,
facet_type: FacetType,
) -> anyhow::Result<Reader<FileFuse>> ) -> anyhow::Result<Reader<FileFuse>>
where
T: Copy + PartialEq + PartialOrd + Bounded + Zero,
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
{ {
let first_level_size = db.prefix_iter(rtxn, &[field_id])? let first_level_size = db.prefix_iter(rtxn, &[field_id])?
.remap_types::<DecodeIgnore, DecodeIgnore>() .remap_types::<DecodeIgnore, DecodeIgnore>()
@ -137,7 +169,12 @@ fn compute_facet_levels(
create_writer(compression_type, compression_level, file) create_writer(compression_type, compression_level, file)
})?; })?;
let level_0_range = (field_id, 0, i64::MIN, i64::MIN)..=(field_id, 0, i64::MAX, i64::MAX); let level_0_range = {
let left = (field_id, 0, T::min_value(), T::min_value());
let right = (field_id, 0, T::max_value(), T::max_value());
left..=right
};
let level_sizes_iter = let level_sizes_iter =
levels_iterator(first_level_size, last_level_size.get(), number_of_levels.get(), easing_function) levels_iterator(first_level_size, last_level_size.get(), number_of_levels.get(), easing_function)
.map(|size| (first_level_size as f64 / size as f64).ceil() as usize) .map(|size| (first_level_size as f64 / size as f64).ceil() as usize)
@ -147,13 +184,11 @@ fn compute_facet_levels(
// TODO we must not create levels with identical group sizes. // TODO we must not create levels with identical group sizes.
for (level, level_entry_sizes) in level_sizes_iter { for (level, level_entry_sizes) in level_sizes_iter {
let mut left = 0; let mut left = T::zero();
let mut right = 0; let mut right = T::zero();
let mut group_docids = RoaringBitmap::new(); let mut group_docids = RoaringBitmap::new();
dbg!(level, level_entry_sizes, first_level_size); let db = db.remap_key_type::<KC>();
let db = db.remap_key_type::<FacetLevelValueI64Codec>();
for (i, result) in db.range(rtxn, &level_0_range)?.enumerate() { for (i, result) in db.range(rtxn, &level_0_range)?.enumerate() {
let ((_field_id, _level, value, _right), docids) = result?; let ((_field_id, _level, value, _right), docids) = result?;
@ -162,7 +197,7 @@ fn compute_facet_levels(
} else if i % level_entry_sizes == 0 { } else if i % level_entry_sizes == 0 {
// we found the first bound of the next group, we must store the left // we found the first bound of the next group, we must store the left
// and right bounds associated with the docids. // and right bounds associated with the docids.
write_entry(&mut writer, field_id, level as u8, left, right, &group_docids)?; write_entry::<T, KC>(&mut writer, field_id, level as u8, left, right, &group_docids)?;
// We save the left bound for the new group and also reset the docids. // We save the left bound for the new group and also reset the docids.
group_docids = RoaringBitmap::new(); group_docids = RoaringBitmap::new();
@ -175,24 +210,26 @@ fn compute_facet_levels(
} }
if !group_docids.is_empty() { if !group_docids.is_empty() {
write_entry(&mut writer, field_id, level as u8, left, right, &group_docids)?; write_entry::<T, KC>(&mut writer, field_id, level as u8, left, right, &group_docids)?;
} }
} }
writer_into_reader(writer, shrink_size) writer_into_reader(writer, shrink_size)
} }
fn write_entry( fn write_entry<T, KC>(
writer: &mut Writer<File>, writer: &mut Writer<File>,
field_id: u8, field_id: u8,
level: u8, level: u8,
left: i64, left: T,
right: i64, right: T,
ids: &RoaringBitmap, ids: &RoaringBitmap,
) -> anyhow::Result<()> ) -> anyhow::Result<()>
where
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
{ {
let key = (field_id, level, left, right); let key = (field_id, level, left, right);
let key = FacetLevelValueI64Codec::bytes_encode(&key).ok_or(Error::Encoding)?; let key = KC::bytes_encode(&key).ok_or(Error::Encoding)?;
let data = CboRoaringBitmapCodec::bytes_encode(&ids).ok_or(Error::Encoding)?; let data = CboRoaringBitmapCodec::bytes_encode(&ids).ok_or(Error::Encoding)?;
writer.insert(&key, &data)?; writer.insert(&key, &data)?;
Ok(()) Ok(())