From b8a1caad5e8d9a55ba7c7807805a4ee2fbb6b980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 30 Aug 2022 15:22:39 +0200 Subject: [PATCH] Add range search and incremental indexing algorithm --- milli/Cargo.toml | 2 +- .../search/facet/facet_distribution_iter.rs | 70 +-- milli/src/search/facet/facet_range_search.rs | 451 +++++++++++++++++ .../src/search/facet/facet_sort_ascending.rs | 56 ++- .../src/search/facet/facet_sort_descending.rs | 73 +-- milli/src/search/facet/filter.rs | 1 - milli/src/search/facet/incremental_update.rs | 459 ++++++++++++++++++ milli/src/search/facet/mod.rs | 148 +++++- 8 files changed, 1145 insertions(+), 115 deletions(-) create mode 100644 milli/src/search/facet/facet_range_search.rs create mode 100644 milli/src/search/facet/incremental_update.rs diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 835425714..658ef0d24 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -54,7 +54,7 @@ big_s = "1.0.2" insta = "1.21.0" maplit = "1.0.2" md5 = "0.7.0" -rand = "0.8.5" +rand = {version = "0.8.5", features = ["small_rng"] } [features] default = [ "charabia/default" ] diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 2dfe3580f..83079028c 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -1,8 +1,8 @@ +use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice}; +use crate::Result; use roaring::RoaringBitmap; use std::ops::ControlFlow; -use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice}; - use super::{get_first_facet_value, get_highest_level}; pub fn iterate_over_facet_distribution<'t, CB>( @@ -11,18 +11,19 @@ pub fn iterate_over_facet_distribution<'t, CB>( field_id: u16, candidates: &RoaringBitmap, callback: CB, -) where +) -> Result<()> +where CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, { let mut fd = FacetDistribution { rtxn, db, field_id, callback }; let highest_level = - get_highest_level(rtxn, &db.remap_key_type::>(), field_id); + get_highest_level(rtxn, &db.remap_key_type::>(), field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id) { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { fd.iterate(candidates, highest_level, first_bound, usize::MAX); - return; + return Ok(()); } else { - return; + return Ok(()); } } @@ -45,26 +46,26 @@ where candidates: &RoaringBitmap, starting_bound: &'t [u8], group_size: usize, - ) -> ControlFlow<()> { + ) -> Result> { let starting_key = FacetKey { field_id: self.field_id, level: 0, left_bound: starting_bound }; - let iter = self.db.range(self.rtxn, &(starting_key..)).unwrap().take(group_size); + let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size); for el in iter { - let (key, value) = el.unwrap(); + let (key, value) = el?; // The range is unbounded on the right and the group size for the highest level is MAX, // so we need to check that we are not iterating over the next field id if key.field_id != self.field_id { - return ControlFlow::Break(()); + return Ok(ControlFlow::Break(())); } let docids_in_common = value.bitmap.intersection_len(candidates); if docids_in_common > 0 { match (self.callback)(key.left_bound, docids_in_common) { ControlFlow::Continue(_) => {} - ControlFlow::Break(_) => return ControlFlow::Break(()), + ControlFlow::Break(_) => return Ok(ControlFlow::Break(())), } } } - return ControlFlow::Continue(()); + return Ok(ControlFlow::Continue(())); } fn iterate( &mut self, @@ -72,7 +73,7 @@ where level: u8, starting_bound: &'t [u8], group_size: usize, - ) -> ControlFlow<()> { + ) -> Result> { if level == 0 { return self.iterate_level_0(candidates, starting_bound, group_size); } @@ -84,34 +85,42 @@ where // The range is unbounded on the right and the group size for the highest level is MAX, // so we need to check that we are not iterating over the next field id if key.field_id != self.field_id { - return ControlFlow::Break(()); + return Ok(ControlFlow::Break(())); } let docids_in_common = value.bitmap & candidates; if docids_in_common.len() > 0 { - let cf = - self.iterate(&docids_in_common, level - 1, key.left_bound, value.size as usize); + let cf = self.iterate( + &docids_in_common, + level - 1, + key.left_bound, + value.size as usize, + )?; match cf { ControlFlow::Continue(_) => {} - ControlFlow::Break(_) => return ControlFlow::Break(()), + ControlFlow::Break(_) => return Ok(ControlFlow::Break(())), } } } - return ControlFlow::Continue(()); + return Ok(ControlFlow::Continue(())); } } #[cfg(test)] mod tests { - use crate::{codec::U16Codec, Index}; use heed::BytesDecode; + use rand::{rngs::SmallRng, Rng, SeedableRng}; use roaring::RoaringBitmap; use std::ops::ControlFlow; + use crate::{ + heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec, search::facet::test::FacetIndex, + }; + use super::iterate_over_facet_distribution; - fn get_simple_index() -> Index { - let index = Index::::new(4, 8); + fn get_simple_index() -> FacetIndex { + let index = FacetIndex::::new(4, 8); let mut txn = index.env.write_txn().unwrap(); for i in 0..256u16 { let mut bitmap = RoaringBitmap::new(); @@ -121,18 +130,19 @@ mod tests { txn.commit().unwrap(); index } - fn get_random_looking_index() -> Index { - let index = Index::::new(4, 8); + fn get_random_looking_index() -> FacetIndex { + let index = FacetIndex::::new(4, 8); let mut txn = index.env.write_txn().unwrap(); - let rng = fastrand::Rng::with_seed(0); - let keys = std::iter::from_fn(|| Some(rng.u32(..256))).take(128).collect::>(); + let rng = rand::rngs::SmallRng::from_seed([0; 32]); + let keys = + std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); for (_i, key) in keys.into_iter().enumerate() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(key); - bitmap.insert(key + 100); - index.insert(&mut txn, 0, &(key as u16), &bitmap); + bitmap.insert(key + 100.); + index.insert(&mut txn, 0, &(key as f64), &bitmap); } txn.commit().unwrap(); index @@ -156,7 +166,7 @@ mod tests { 0, &candidates, |facet, count| { - let facet = U16Codec::bytes_decode(facet).unwrap(); + let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); results.push_str(&format!("{facet}: {count}\n")); ControlFlow::Continue(()) }, @@ -180,7 +190,7 @@ mod tests { 0, &candidates, |facet, count| { - let facet = U16Codec::bytes_decode(facet).unwrap(); + let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); if nbr_facets == 100 { return ControlFlow::Break(()); } else { diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs new file mode 100644 index 000000000..c01346b25 --- /dev/null +++ b/milli/src/search/facet/facet_range_search.rs @@ -0,0 +1,451 @@ +use heed::BytesEncode; +use roaring::RoaringBitmap; +use std::ops::Bound; +use std::ops::RangeBounds; + +use crate::heed_codec::facet::new::FacetGroupValueCodec; +use crate::heed_codec::facet::new::FacetKey; +use crate::heed_codec::facet::new::FacetKeyCodec; +use crate::heed_codec::facet::new::MyByteSlice; +use crate::Result; + +use super::get_first_facet_value; +use super::get_highest_level; +use super::get_last_facet_value; + +pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>( + rtxn: &'t heed::RoTxn<'t>, + db: &'t heed::Database, FacetGroupValueCodec>, + field_id: u16, + left: &'t Bound<>::EItem>, + right: &'t Bound<>::EItem>, +) -> Result +where + BoundCodec: for<'a> BytesEncode<'a>, + for<'a> >::EItem: Sized, +{ + let inner; + let left = match left { + Bound::Included(left) => { + inner = BoundCodec::bytes_encode(left).unwrap(); + Bound::Included(inner.as_ref()) + } + Bound::Excluded(left) => { + inner = BoundCodec::bytes_encode(left).unwrap(); + Bound::Excluded(inner.as_ref()) + } + Bound::Unbounded => Bound::Unbounded, + }; + let inner; + let right = match right { + Bound::Included(right) => { + inner = BoundCodec::bytes_encode(right).unwrap(); + Bound::Included(inner.as_ref()) + } + Bound::Excluded(right) => { + inner = BoundCodec::bytes_encode(right).unwrap(); + Bound::Excluded(inner.as_ref()) + } + Bound::Unbounded => Bound::Unbounded, + }; + + let mut docids = RoaringBitmap::new(); + let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids }; + let highest_level = get_highest_level(rtxn, db, field_id)?; + + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); + f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?; + Ok(docids) + } else { + return Ok(RoaringBitmap::new()); + } +} + +/// Fetch the document ids that have a facet with a value between the two given bounds +struct FacetRangeSearch<'t, 'b, 'bitmap> { + rtxn: &'t heed::RoTxn<'t>, + db: &'t heed::Database, FacetGroupValueCodec>, + field_id: u16, + left: Bound<&'b [u8]>, + right: Bound<&'b [u8]>, + docids: &'bitmap mut RoaringBitmap, +} +impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { + fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> { + let left_key = + FacetKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound }; + let iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size); + for el in iter { + let (key, value) = el?; + // the right side of the iter range is unbounded, so we need to make sure that we are not iterating + // on the next field id + if key.field_id != self.field_id { + return Ok(()); + } + let should_skip = { + match self.left { + Bound::Included(left) => left > key.left_bound, + Bound::Excluded(left) => left >= key.left_bound, + Bound::Unbounded => false, + } + }; + if should_skip { + continue; + } + let should_stop = { + match self.right { + Bound::Included(right) => right < key.left_bound, + Bound::Excluded(right) => right <= key.left_bound, + Bound::Unbounded => false, + } + }; + if should_stop { + break; + } + + if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) { + *self.docids |= value.bitmap; + } + } + Ok(()) + } + + /// Recursive part of the algorithm for level > 0 + fn run( + &mut self, + level: u8, + starting_left_bound: &'t [u8], + rightmost_bound: Bound<&'t [u8]>, + group_size: usize, + ) -> Result<()> { + if level == 0 { + return self.run_level_0(starting_left_bound, group_size); + } + + let left_key = FacetKey { field_id: self.field_id, level, left_bound: starting_left_bound }; + let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size); + + let (mut previous_key, mut previous_value) = iter.next().unwrap()?; + for el in iter { + let (next_key, next_value) = el?; + // the right of the iter range is unbounded, so we need to make sure that we are not iterating + // on the next field id + if next_key.field_id != self.field_id { + return Ok(()); + } + // now, do we skip, stop, or visit? + let should_skip = { + match self.left { + Bound::Included(left) => left >= next_key.left_bound, + Bound::Excluded(left) => left >= next_key.left_bound, // TODO: use > instead? + Bound::Unbounded => false, + } + }; + if should_skip { + previous_key = next_key; + previous_value = next_value; + continue; + } + + // should we stop? + let should_stop = { + match self.right { + Bound::Included(right) => right < previous_key.left_bound, + Bound::Excluded(right) => right <= previous_key.left_bound, + Bound::Unbounded => false, + } + }; + if should_stop { + return Ok(()); + } + // should we take the whole thing, without recursing down? + let should_take_whole_group = { + let left_condition = match self.left { + Bound::Included(left) => previous_key.left_bound >= left, + Bound::Excluded(left) => previous_key.left_bound > left, + Bound::Unbounded => true, + }; + let right_condition = match self.right { + Bound::Included(right) => next_key.left_bound <= right, + Bound::Excluded(right) => next_key.left_bound <= right, + Bound::Unbounded => true, + }; + left_condition && right_condition + }; + if should_take_whole_group { + *self.docids |= &previous_value.bitmap; + previous_key = next_key; + previous_value = next_value; + continue; + } + + let level = level - 1; + let starting_left_bound = previous_key.left_bound; + let rightmost_bound = Bound::Excluded(next_key.left_bound); + let group_size = previous_value.size as usize; + + self.run(level, starting_left_bound, rightmost_bound, group_size)?; + + previous_key = next_key; + previous_value = next_value; + } + // previous_key/previous_value are the last element + + // now, do we skip, stop, or visit? + let should_skip = { + match (self.left, rightmost_bound) { + (Bound::Included(left), Bound::Included(right)) => left > right, + (Bound::Included(left), Bound::Excluded(right)) => left >= right, + (Bound::Excluded(left), Bound::Included(right) | Bound::Excluded(right)) => { + left >= right + } + (Bound::Unbounded, _) => false, + (_, Bound::Unbounded) => false, // should never run? + } + }; + if should_skip { + return Ok(()); + } + + // should we stop? + let should_stop = { + match self.right { + Bound::Included(right) => right <= previous_key.left_bound, + Bound::Excluded(right) => right < previous_key.left_bound, + Bound::Unbounded => false, + } + }; + if should_stop { + return Ok(()); + } + // should we take the whole thing, without recursing down? + let should_take_whole_group = { + let left_condition = match self.left { + Bound::Included(left) => previous_key.left_bound >= left, + Bound::Excluded(left) => previous_key.left_bound > left, + Bound::Unbounded => true, + }; + let right_condition = match (self.right, rightmost_bound) { + (Bound::Included(right), Bound::Included(rightmost)) => rightmost <= right, + (Bound::Included(right), Bound::Excluded(rightmost)) => rightmost < right, + // e.g. x < 8 and rightmost is <= y + // condition met if rightmost < 8 + (Bound::Excluded(right), Bound::Included(rightmost)) => rightmost < right, + // e.g. x < 8 and rightmost is < y + // condition met only if y <= 8? + (Bound::Excluded(right), Bound::Excluded(rightmost)) => rightmost <= right, + // e.g. x < inf. , so yes we take the whole thing + (Bound::Unbounded, _) => true, + // e.g. x < 7 , righmost is inf + (_, Bound::Unbounded) => false, // panic? + }; + left_condition && right_condition + }; + if should_take_whole_group { + *self.docids |= &previous_value.bitmap; + } else { + let level = level - 1; + let starting_left_bound = previous_key.left_bound; + let group_size = previous_value.size as usize; + + self.run(level, starting_left_bound, rightmost_bound, group_size)?; + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use crate::{ + heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec, + search::facet::test::FacetIndex, snapshot_tests::display_bitmap, + }; + use rand::{Rng, SeedableRng}; + use roaring::RoaringBitmap; + use std::ops::Bound; + + use super::find_docids_of_facet_within_bounds; + + fn get_simple_index() -> FacetIndex { + let index = FacetIndex::::new(4, 8); + let mut txn = index.env.write_txn().unwrap(); + for i in 0..256u16 { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(i as u32); + index.insert(&mut txn, 0, &(i as f64), &bitmap); + } + txn.commit().unwrap(); + index + } + fn get_random_looking_index() -> FacetIndex { + let index = FacetIndex::::new(4, 8); + let mut txn = index.env.write_txn().unwrap(); + + let rng = rand::rngs::SmallRng::from_seed([0; 32]); + let keys = + std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); + + for (_i, key) in keys.into_iter().enumerate() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(key); + bitmap.insert(key + 100); + index.insert(&mut txn, 0, &(key as f64), &bitmap); + } + txn.commit().unwrap(); + index + } + + #[test] + fn random_looking_index_snap() { + let index = get_random_looking_index(); + insta::assert_display_snapshot!(index) + } + #[test] + fn filter_range_increasing() { + let indexes = [get_simple_index(), get_random_looking_index()]; + for (i, index) in indexes.into_iter().enumerate() { + let txn = index.env.read_txn().unwrap(); + let mut results = String::new(); + for i in 0..=255 { + let i = i as f64; + let start = Bound::Included(0.); + let end = Bound::Included(i); + let docids = find_docids_of_facet_within_bounds::( + &txn, + &index.db.content, + 0, + &start, + &end, + ) + .unwrap(); + results.push_str(&format!("{}\n", display_bitmap(&docids))); + } + insta::assert_snapshot!( + format!("filter_range_{i}_increasing_included_bounds"), + results + ); + let mut results = String::new(); + for i in 0..=255 { + let i = i as f64; + let start = Bound::Excluded(0.); + let end = Bound::Excluded(i); + let docids = find_docids_of_facet_within_bounds::( + &txn, + &index.db.content, + 0, + &start, + &end, + ) + .unwrap(); + results.push_str(&format!("{}\n", display_bitmap(&docids))); + } + insta::assert_snapshot!( + format!("filter_range_{i}_increasing_excluded_bounds"), + results + ); + txn.commit().unwrap(); + } + } + #[test] + fn filter_range_decreasing() { + let indexes = [get_simple_index(), get_random_looking_index()]; + for (i, index) in indexes.into_iter().enumerate() { + let txn = index.env.read_txn().unwrap(); + + let mut results = String::new(); + + for i in (0..=255).into_iter().rev() { + let i = i as f64; + let start = Bound::Included(i); + let end = Bound::Included(255.); + let docids = find_docids_of_facet_within_bounds::( + &txn, + &index.db.content, + 0, + &start, + &end, + ) + .unwrap(); + results.push_str(&format!("{}\n", display_bitmap(&docids))); + } + + insta::assert_snapshot!( + format!("filter_range_{i}_decreasing_included_bounds"), + results + ); + + let mut results = String::new(); + + for i in (0..=255).into_iter().rev() { + let i = i as f64; + let start = Bound::Excluded(i); + let end = Bound::Excluded(255.); + let docids = find_docids_of_facet_within_bounds::( + &txn, + &index.db.content, + 0, + &start, + &end, + ) + .unwrap(); + results.push_str(&format!("{}\n", display_bitmap(&docids))); + } + + insta::assert_snapshot!( + format!("filter_range_{i}_decreasing_excluded_bounds"), + results + ); + + txn.commit().unwrap(); + } + } + #[test] + fn filter_range_pinch() { + let indexes = [get_simple_index(), get_random_looking_index()]; + for (i, index) in indexes.into_iter().enumerate() { + let txn = index.env.read_txn().unwrap(); + + let mut results = String::new(); + + for i in (0..=128).into_iter().rev() { + let i = i as f64; + let start = Bound::Included(i); + let end = Bound::Included(255. - i); + let docids = find_docids_of_facet_within_bounds::( + &txn, + &index.db.content, + 0, + &start, + &end, + ) + .unwrap(); + results.push_str(&format!("{}\n", display_bitmap(&docids))); + } + + insta::assert_snapshot!(format!("filter_range_{i}_pinch_included_bounds"), results); + + let mut results = String::new(); + + for i in (0..=128).into_iter().rev() { + let i = i as f64; + let start = Bound::Excluded(i); + let end = Bound::Excluded(255. - i); + let docids = find_docids_of_facet_within_bounds::( + &txn, + &index.db.content, + 0, + &start, + &end, + ) + .unwrap(); + results.push_str(&format!("{}\n", display_bitmap(&docids))); + } + + insta::assert_snapshot!(format!("filter_range_{i}_pinch_excluded_bounds"), results); + + txn.commit().unwrap(); + } + } +} diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index c9abd9556..73491d4ae 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -1,8 +1,8 @@ -use roaring::RoaringBitmap; - use crate::heed_codec::facet::new::{ FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, }; +use crate::Result; +use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level}; @@ -11,20 +11,20 @@ pub fn ascending_facet_sort<'t>( db: &'t heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, -) -> Box + 't> { +) -> Result> + 't>> { let highest_level = - get_highest_level(rtxn, &db.remap_key_type::>(), field_id); + get_highest_level(rtxn, &db.remap_key_type::>(), field_id)?; if let Some(first_bound) = get_first_facet_value::( rtxn, &db.remap_key_type::>(), field_id, - ) { + )? { let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound }; let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); - Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }) + Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] })) } else { - return Box::new(std::iter::empty()); + Ok(Box::new(std::iter::empty())) } } @@ -39,7 +39,7 @@ struct AscendingFacetSort<'t, 'e> { } impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { - type Item = (&'t [u8], RoaringBitmap); + type Item = Result<(&'t [u8], RoaringBitmap)>; fn next(&mut self) -> Option { 'outer: loop { @@ -67,15 +67,15 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { *documents_ids -= &bitmap; if level == 0 { - return Some((left_bound, bitmap)); + return Some(Ok((left_bound, bitmap))); } let starting_key_below = FacetKey { field_id: self.field_id, level: level - 1, left_bound }; - let iter = self - .db - .range(&self.rtxn, &(starting_key_below..)) - .unwrap() - .take(group_size as usize); + let iter = match self.db.range(&self.rtxn, &(starting_key_below..)) { + Ok(iter) => iter, + Err(e) => return Some(Err(e.into())), + } + .take(group_size as usize); self.stack.push((bitmap, iter)); continue 'outer; @@ -88,14 +88,19 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { #[cfg(test)] mod tests { - use crate::{ - ascending_facet_sort::ascending_facet_sort, codec::U16Codec, display_bitmap, Index, - }; use heed::BytesDecode; + use rand::Rng; + use rand::SeedableRng; use roaring::RoaringBitmap; - fn get_simple_index() -> Index { - let index = Index::::new(4, 8); + use crate::{ + heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec, + search::facet::{facet_sort_ascending::ascending_facet_sort, test::FacetIndex}, + snapshot_tests::display_bitmap, + }; + + fn get_simple_index() -> FacetIndex { + let index = FacetIndex::::new(4, 8); let mut txn = index.env.write_txn().unwrap(); for i in 0..256u16 { let mut bitmap = RoaringBitmap::new(); @@ -105,18 +110,19 @@ mod tests { txn.commit().unwrap(); index } - fn get_random_looking_index() -> Index { - let index = Index::::new(4, 8); + fn get_random_looking_index() -> FacetIndex { + let index = FacetIndex::::new(4, 8); let mut txn = index.env.write_txn().unwrap(); - let rng = fastrand::Rng::with_seed(0); - let keys = std::iter::from_fn(|| Some(rng.u32(..256))).take(128).collect::>(); + let rng = rand::rngs::SmallRng::from_seed([0; 32]); + let keys = + std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); for (_i, key) in keys.into_iter().enumerate() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(key); bitmap.insert(key + 100); - index.insert(&mut txn, 0, &(key as u16), &bitmap); + index.insert(&mut txn, 0, &(key as f64), &bitmap); } txn.commit().unwrap(); index @@ -136,7 +142,7 @@ mod tests { let mut results = String::new(); let iter = ascending_facet_sort(&txn, &index.db.content, 0, candidates); for (facet, docids) in iter { - let facet = U16Codec::bytes_decode(facet).unwrap(); + let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); results.push_str(&format!("{facet}: {}\n", display_bitmap(&docids))); } insta::assert_snapshot!(format!("filter_sort_{i}_ascending"), results); diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index d3c9d54f8..81b0eb09d 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -1,10 +1,10 @@ use std::ops::Bound; -use roaring::RoaringBitmap; - use crate::heed_codec::facet::new::{ FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, }; +use crate::Result; +use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; @@ -13,21 +13,21 @@ fn descending_facet_sort<'t>( db: &'t heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, -) -> Box + 't> { - let highest_level = get_highest_level(rtxn, db, field_id); - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id) { +) -> Result> + 't>> { + let highest_level = get_highest_level(rtxn, db, field_id)?; + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound }; - let last_bound = get_last_facet_value::(rtxn, db, field_id).unwrap(); + let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); let last_key = FacetKey { field_id, level: highest_level, left_bound: last_bound }; - let iter = db.rev_range(rtxn, &(first_key..=last_key)).unwrap().take(usize::MAX); - Box::new(DescendingFacetSort { + let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); + Ok(Box::new(DescendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter, Bound::Included(last_bound))], - }) + })) } else { - return Box::new(std::iter::empty()); + Ok(Box::new(std::iter::empty())) } } @@ -43,7 +43,7 @@ struct DescendingFacetSort<'t> { } impl<'t> Iterator for DescendingFacetSort<'t> { - type Item = (&'t [u8], RoaringBitmap); + type Item = Result<(&'t [u8], RoaringBitmap)>; fn next(&mut self) -> Option { 'outer: loop { @@ -70,7 +70,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> { *documents_ids -= &bitmap; if level == 0 { - return Some((left_bound, bitmap)); + return Some(Ok((left_bound, bitmap))); } let starting_key_below = FacetKey { field_id, level: level - 1, left_bound }; @@ -89,14 +89,14 @@ impl<'t> Iterator for DescendingFacetSort<'t> { }; let prev_right_bound = *right_bound; *right_bound = Bound::Excluded(left_bound); - let iter = self - .db - .rev_range( - &self.rtxn, - &(Bound::Included(starting_key_below), end_key_kelow), - ) - .unwrap() - .take(group_size as usize); + let iter = match self.db.rev_range( + &self.rtxn, + &(Bound::Included(starting_key_below), end_key_kelow), + ) { + Ok(iter) => iter, + Err(e) => return Some(Err(e.into())), + } + .take(group_size as usize); self.stack.push((bitmap, iter, prev_right_bound)); continue 'outer; @@ -110,16 +110,20 @@ impl<'t> Iterator for DescendingFacetSort<'t> { #[cfg(test)] mod tests { - use crate::{ - codec::{MyByteSlice, U16Codec}, - descending_facet_sort::descending_facet_sort, - display_bitmap, FacetKeyCodec, Index, - }; + use heed::BytesDecode; + use rand::Rng; + use rand::SeedableRng; use roaring::RoaringBitmap; - fn get_simple_index() -> Index { - let index = Index::::new(4, 8); + use crate::{ + heed_codec::facet::new::{ordered_f64_codec::OrderedF64Codec, FacetKeyCodec, MyByteSlice}, + search::facet::{facet_sort_descending::descending_facet_sort, test::FacetIndex}, + snapshot_tests::display_bitmap, + }; + + fn get_simple_index() -> FacetIndex { + let index = FacetIndex::::new(4, 8); let mut txn = index.env.write_txn().unwrap(); for i in 0..256u16 { let mut bitmap = RoaringBitmap::new(); @@ -129,18 +133,19 @@ mod tests { txn.commit().unwrap(); index } - fn get_random_looking_index() -> Index { - let index = Index::::new(4, 8); + fn get_random_looking_index() -> FacetIndex { + let index = FacetIndex::::new(4, 8); let mut txn = index.env.write_txn().unwrap(); - let rng = fastrand::Rng::with_seed(0); - let keys = std::iter::from_fn(|| Some(rng.u32(..256))).take(128).collect::>(); + let rng = rand::rngs::SmallRng::from_seed([0; 32]); + let keys = + std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); for (_i, key) in keys.into_iter().enumerate() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(key); - bitmap.insert(key + 100); - index.insert(&mut txn, 0, &(key as u16), &bitmap); + bitmap.insert(key + 100.); + index.insert(&mut txn, 0, &(key as f64), &bitmap); } txn.commit().unwrap(); index @@ -161,7 +166,7 @@ mod tests { let db = index.db.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, &db, 0, candidates); for (facet, docids) in iter { - let facet = U16Codec::bytes_decode(facet).unwrap(); + let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); results.push_str(&format!("{facet}: {}\n", display_bitmap(&docids))); } insta::assert_snapshot!(format!("filter_sort_{i}_descending"), results); diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index e911dfb15..dd34abe6d 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -7,7 +7,6 @@ use either::Either; pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token}; use heed::types::DecodeIgnore; use heed::LazyDecode; -use log::debug; use roaring::RoaringBitmap; // use super::FacetNumberRange; diff --git a/milli/src/search/facet/incremental_update.rs b/milli/src/search/facet/incremental_update.rs new file mode 100644 index 000000000..a437efb2d --- /dev/null +++ b/milli/src/search/facet/incremental_update.rs @@ -0,0 +1,459 @@ +use crate::heed_codec::facet::new::{ + FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, +}; +use crate::Result; +use heed::Error; +use heed::{types::ByteSlice, BytesDecode, RoTxn, RwTxn}; +use roaring::RoaringBitmap; + +use super::get_highest_level; + +enum InsertionResult { + InPlace, + Insert, +} +enum DeletionResult { + InPlace, + Reduce { prev: Option>, next: Option> }, + Remove { prev: Option>, next: Option> }, +} + +struct IncrementalFacetUpdate<'i> { + db: &'i heed::Database, FacetGroupValueCodec>, + group_size: usize, + min_level_size: usize, + max_group_size: usize, +} +impl<'i> IncrementalFacetUpdate<'i> { + fn find_insertion_key_value<'a>( + &self, + field_id: u16, + level: u8, + search_key: &[u8], + txn: &RoTxn, + ) -> Result<(FacetKey>, FacetGroupValue)> { + let mut prefix = vec![]; + prefix.extend_from_slice(&field_id.to_be_bytes()); + prefix.push(level); + prefix.extend_from_slice(search_key); + + let mut prefix_iter = self + .db + .as_polymorph() + .prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(txn, &prefix.as_slice())?; + if let Some(e) = prefix_iter.next() { + let (key_bytes, value) = e?; + let key = FacetKeyCodec::::bytes_decode(&key_bytes) + .ok_or(heed::Error::Encoding)?; + Ok(( + FacetKeyCodec::::bytes_decode(&key_bytes) + .ok_or(Error::Encoding)? + .into_owned(), + value, + )) + } else { + let key = FacetKey { field_id, level, left_bound: search_key }; + match self.db.get_lower_than(txn, &key)? { + Some((key, value)) => { + if key.level != level || key.field_id != field_id { + let mut prefix = vec![]; + prefix.extend_from_slice(&field_id.to_be_bytes()); + prefix.push(level); + + let mut iter = self + .db + .as_polymorph() + .prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>( + txn, + &prefix.as_slice(), + )?; + let (key_bytes, value) = iter.next().unwrap()?; + Ok(( + FacetKeyCodec::::bytes_decode(&key_bytes) + .ok_or(Error::Encoding)? + .into_owned(), + value, + )) + } else { + Ok((key.into_owned(), value)) + } + } + None => panic!(), + } + } + } + + fn insert_in_level_0<'t>( + &self, + txn: &'t mut RwTxn, + field_id: u16, + new_key: &[u8], + new_values: &RoaringBitmap, + ) -> Result { + let key = FacetKey { field_id, level: 0, left_bound: new_key }; + let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 }; + + let mut level0_prefix = vec![]; + level0_prefix.extend_from_slice(&field_id.to_be_bytes()); + level0_prefix.push(0); + + let mut iter = self + .db + .as_polymorph() + .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &level0_prefix)?; + + if iter.next().is_none() { + drop(iter); + self.db.put(txn, &key, &value)?; + return Ok(InsertionResult::Insert); + } else { + drop(iter); + let old_value = self.db.get(&txn, &key)?; + match old_value { + Some(mut updated_value) => { + // now merge the two + updated_value.bitmap |= value.bitmap; + self.db.put(txn, &key, &updated_value)?; + Ok(InsertionResult::InPlace) + } + None => { + self.db.put(txn, &key, &value)?; + Ok(InsertionResult::Insert) + } + } + } + } + fn insert_in_level<'t>( + &self, + txn: &'t mut RwTxn, + field_id: u16, + level: u8, + new_key: &[u8], + new_values: &RoaringBitmap, + ) -> Result { + if level == 0 { + return self.insert_in_level_0(txn, field_id, new_key, new_values); + } + + let max_group_size = self.max_group_size; + + let (insertion_key, insertion_value) = + self.find_insertion_key_value(field_id, level, new_key, txn)?; + + let result = self.insert_in_level(txn, field_id, level - 1, new_key.clone(), new_values)?; + // level below inserted an element + + let insertion_key = { + let mut new_insertion_key = insertion_key.clone(); + let mut modified = false; + + if new_key < insertion_key.left_bound.as_slice() { + new_insertion_key.left_bound = new_key.to_vec(); + modified = true; + } + if modified { + let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?; + assert!(is_deleted); + self.db.put(txn, &new_insertion_key.as_ref(), &insertion_value)?; + } + new_insertion_key + }; + + match result { + // TODO: this could go above the block recomputing insertion key + // because we know that if we inserted in place, the key is not a new one + // thus it doesn't extend a group + InsertionResult::InPlace => { + let mut updated_value = self.db.get(&txn, &insertion_key.as_ref())?.unwrap(); + updated_value.bitmap |= new_values; + self.db.put(txn, &insertion_key.as_ref(), &updated_value)?; + + return Ok(InsertionResult::InPlace); + } + InsertionResult::Insert => {} + } + let mut updated_value = self.db.get(&txn, &insertion_key.as_ref())?.unwrap(); + + updated_value.size += 1; + if updated_value.size as usize == max_group_size { + // need to split it + // recompute left element and right element + // replace current group by left element + // add one more group to the right + + let size_left = max_group_size / 2; + let size_right = max_group_size - size_left; + + let level_below = level - 1; + + let (start_key, _) = self + .db + .get_greater_than_or_equal_to( + &txn, + &FacetKey { + field_id, + level: level_below, + left_bound: insertion_key.left_bound.as_slice(), + }, + )? + .unwrap(); + + let mut iter = self.db.range(&txn, &(start_key..))?.take(max_group_size); + + let group_left = { + let mut values_left = RoaringBitmap::new(); + + let mut i = 0; + while let Some(next) = iter.next() { + let (_key, value) = next?; + i += 1; + values_left |= &value.bitmap; + if i == size_left { + break; + } + } + + let key = + FacetKey { field_id, level, left_bound: insertion_key.left_bound.clone() }; + let value = FacetGroupValue { size: size_left as u8, bitmap: values_left }; + (key, value) + }; + + let group_right = { + let mut values_right = RoaringBitmap::new(); + let mut right_start_key = None; + + while let Some(next) = iter.next() { + let (key, value) = next?; + if right_start_key.is_none() { + right_start_key = Some(key.left_bound); + } + values_right |= &value.bitmap; + } + + let key = + FacetKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() }; + let value = FacetGroupValue { size: size_right as u8, bitmap: values_right }; + (key, value) + }; + drop(iter); + + let _ = self.db.delete(txn, &insertion_key.as_ref())?; + + self.db.put(txn, &group_left.0.as_ref(), &group_left.1)?; + self.db.put(txn, &group_right.0.as_ref(), &group_right.1)?; + + Ok(InsertionResult::Insert) + } else { + let mut value = self.db.get(&txn, &insertion_key.as_ref())?.unwrap(); + value.bitmap |= new_values; + value.size += 1; + self.db.put(txn, &insertion_key.as_ref(), &value).unwrap(); + + Ok(InsertionResult::InPlace) + } + } + + pub fn insert<'a, 't>( + &self, + txn: &'t mut RwTxn, + field_id: u16, + new_key: &[u8], + new_values: &RoaringBitmap, + ) -> Result<()> { + if new_values.is_empty() { + return Ok(()); + } + let group_size = self.group_size; + + let highest_level = get_highest_level(&txn, &self.db, field_id)?; + + let result = + self.insert_in_level(txn, field_id, highest_level as u8, new_key, new_values)?; + match result { + InsertionResult::InPlace => return Ok(()), + InsertionResult::Insert => {} + } + + let mut highest_level_prefix = vec![]; + highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); + highest_level_prefix.push(highest_level); + + let size_highest_level = self + .db + .as_polymorph() + .prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)? + .count(); + + if size_highest_level < self.min_level_size { + return Ok(()); + } + + let mut groups_iter = self + .db + .as_polymorph() + .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &highest_level_prefix)?; + + let mut to_add = vec![]; + for _ in 0..group_size { + let mut first_key = None; + let mut values = RoaringBitmap::new(); + for _ in 0..group_size { + let (key_bytes, value_i) = groups_iter.next().unwrap()?; + let key_i = FacetKeyCodec::::bytes_decode(&key_bytes) + .ok_or(Error::Encoding)?; + + if first_key.is_none() { + first_key = Some(key_i); + } + values |= value_i.bitmap; + } + let key = FacetKey { + field_id, + level: highest_level + 1, + left_bound: first_key.unwrap().left_bound, + }; + let value = FacetGroupValue { size: group_size as u8, bitmap: values }; + to_add.push((key.into_owned(), value)); + } + drop(groups_iter); + for (key, value) in to_add { + self.db.put(txn, &key.as_ref(), &value)?; + } + Ok(()) + } + + fn delete_in_level<'t>( + &self, + txn: &'t mut RwTxn, + field_id: u16, + level: u8, + key: &[u8], + value: u32, + ) -> Result { + if level == 0 { + return self.delete_in_level_0(txn, field_id, key, value); + } + let (deletion_key, mut bitmap) = + self.find_insertion_key_value(field_id, level, key, txn)?; + + let result = self.delete_in_level(txn, field_id, level - 1, key.clone(), value)?; + + let mut decrease_size = false; + let (prev_key, next_key) = match result { + DeletionResult::InPlace => { + bitmap.bitmap.remove(value); + self.db.put(txn, &deletion_key.as_ref(), &bitmap)?; + return Ok(DeletionResult::InPlace); + } + DeletionResult::Reduce { prev, next } => (prev, next), + DeletionResult::Remove { prev, next } => { + decrease_size = true; + (prev, next) + } + }; + + let mut updated_value = bitmap; + if decrease_size { + updated_value.size -= 1; + } + + if updated_value.size == 0 { + self.db.delete(txn, &deletion_key.as_ref())?; + Ok(DeletionResult::Remove { prev: prev_key, next: next_key }) + } else { + let mut updated_deletion_key = deletion_key.clone(); + if key == deletion_key.left_bound { + updated_deletion_key.left_bound = next_key.clone().unwrap(); + } + updated_value.bitmap.remove(value); + let _ = self.db.delete(txn, &deletion_key.as_ref())?; + self.db.put(txn, &updated_deletion_key.as_ref(), &updated_value)?; + + Ok(DeletionResult::Reduce { prev: prev_key, next: next_key }) + } + } + + fn delete_in_level_0<'t>( + &self, + txn: &'t mut RwTxn, + field_id: u16, + key: &[u8], + value: u32, + ) -> Result { + let key = FacetKey { field_id, level: 0, left_bound: key }; + let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap; + bitmap.remove(value); + + if bitmap.is_empty() { + let mut prev_key = None; + let mut next_key = None; + + if let Some(prev) = self.db.get_lower_than(&txn, &key)? { + prev_key = Some(prev.0.left_bound.to_vec()); + } + if let Some(next) = self.db.get_greater_than(&txn, &key)? { + if next.0.level == 0 { + next_key = Some(next.0.left_bound.to_vec()); + } + } + self.db.delete(txn, &key)?; + Ok(DeletionResult::Remove { prev: prev_key, next: next_key }) + } else { + self.db.put(txn, &key, &FacetGroupValue { size: 1, bitmap })?; + Ok(DeletionResult::InPlace) + } + } + + pub fn delete<'a, 't>( + &self, + txn: &'t mut RwTxn, + field_id: u16, + key: &[u8], + value: u32, + ) -> Result<()> { + if self.db.get(txn, &FacetKey { field_id, level: 0, left_bound: key })?.is_none() { + return Ok(()); + } + let highest_level = get_highest_level(&txn, &self.db, field_id)?; + + // let key_bytes = BoundCodec::bytes_encode(&key).unwrap(); + + let result = self.delete_in_level(txn, field_id, highest_level as u8, key, value)?; + match result { + DeletionResult::InPlace => return Ok(()), + DeletionResult::Reduce { .. } => {} + DeletionResult::Remove { .. } => {} + } + let mut highest_level_prefix = vec![]; + highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); + highest_level_prefix.push(highest_level); + + if highest_level == 0 + || self + .db + .as_polymorph() + .prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)? + .count() + >= self.group_size + { + return Ok(()); + } + let mut to_delete = vec![]; + let mut iter = self + .db + .as_polymorph() + .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?; + while let Some(el) = iter.next() { + let (k, _) = el?; + to_delete.push( + FacetKeyCodec::::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(), + ); + } + drop(iter); + for k in to_delete { + self.db.delete(txn, &k.as_ref())?; + } + Ok(()) + } +} diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index ceedff1e0..d27206af2 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -10,38 +10,39 @@ pub use self::filter::Filter; mod facet_distribution; mod facet_distribution_iter; +mod facet_range_search; mod facet_sort_ascending; mod facet_sort_descending; mod filter; +mod incremental_update; -fn get_first_facet_value<'t, BoundCodec>( +pub(crate) fn get_first_facet_value<'t, BoundCodec>( txn: &'t RoTxn, db: &'t heed::Database, FacetGroupValueCodec>, field_id: u16, -) -> Option +) -> crate::Result> where BoundCodec: BytesDecode<'t>, { let mut level0prefix = vec![]; level0prefix.extend_from_slice(&field_id.to_be_bytes()); level0prefix.push(0); - let mut level0_iter_forward = db - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice()) - .unwrap(); + let mut level0_iter_forward = + db.as_polymorph().prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())?; if let Some(first) = level0_iter_forward.next() { - let (first_key, _) = first.unwrap(); - let first_key = FacetKeyCodec::::bytes_decode(first_key).unwrap(); - Some(first_key.left_bound) + let (first_key, _) = first?; + let first_key = + FacetKeyCodec::::bytes_decode(first_key).ok_or(heed::Error::Encoding)?; + Ok(Some(first_key.left_bound)) } else { - None + Ok(None) } } -fn get_last_facet_value<'t, BoundCodec>( +pub(crate) fn get_last_facet_value<'t, BoundCodec>( txn: &'t RoTxn, db: &'t heed::Database, FacetGroupValueCodec>, field_id: u16, -) -> Option +) -> crate::Result> where BoundCodec: BytesDecode<'t>, { @@ -50,30 +51,129 @@ where level0prefix.push(0); let mut level0_iter_backward = db .as_polymorph() - .rev_prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice()) - .unwrap(); + .rev_prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())?; if let Some(last) = level0_iter_backward.next() { - let (last_key, _) = last.unwrap(); - let last_key = FacetKeyCodec::::bytes_decode(last_key).unwrap(); - Some(last_key.left_bound) + let (last_key, _) = last?; + let last_key = + FacetKeyCodec::::bytes_decode(last_key).ok_or(heed::Error::Encoding)?; + Ok(Some(last_key.left_bound)) } else { - None + Ok(None) } } -fn get_highest_level<'t>( +pub(crate) fn get_highest_level<'t>( txn: &'t RoTxn<'t>, db: &'t heed::Database, FacetGroupValueCodec>, field_id: u16, -) -> u8 { +) -> crate::Result { let field_id_prefix = &field_id.to_be_bytes(); - db.as_polymorph() - .rev_prefix_iter::<_, ByteSlice, ByteSlice>(&txn, field_id_prefix) - .unwrap() + Ok(db + .as_polymorph() + .rev_prefix_iter::<_, ByteSlice, ByteSlice>(&txn, field_id_prefix)? .next() .map(|el| { let (key, _) = el.unwrap(); let key = FacetKeyCodec::::bytes_decode(key).unwrap(); key.level }) - .unwrap_or(0) + .unwrap_or(0)) +} + +#[cfg(test)] +mod test { + use std::{fmt::Display, marker::PhantomData, rc::Rc}; + + use heed::{BytesDecode, BytesEncode, Env}; + use tempfile::TempDir; + + use crate::{ + heed_codec::facet::new::{ + FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, + }, + snapshot_tests::display_bitmap, + }; + + pub struct FacetIndex + where + for<'a> BoundCodec: + BytesEncode<'a> + BytesDecode<'a, DItem = >::EItem>, + { + pub env: Env, + pub db: Database, + _phantom: PhantomData, + } + + pub struct Database { + pub content: heed::Database, FacetGroupValueCodec>, + pub group_size: usize, + pub max_group_size: usize, + _tempdir: Rc, + } + + impl FacetIndex + where + for<'a> BoundCodec: + BytesEncode<'a> + BytesDecode<'a, DItem = >::EItem>, + { + pub fn open_from_tempdir( + tempdir: Rc, + group_size: u8, + max_group_size: u8, + ) -> FacetIndex { + let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)) as usize; + let max_group_size = std::cmp::max(group_size * 2, max_group_size as usize); + let mut options = heed::EnvOpenOptions::new(); + let options = options.map_size(4096 * 4 * 10 * 100); + unsafe { + options.flag(heed::flags::Flags::MdbAlwaysFreePages); + } + let env = options.open(tempdir.path()).unwrap(); + let content = env.open_database(None).unwrap().unwrap(); + + FacetIndex { + db: Database { content, group_size, max_group_size, _tempdir: tempdir }, + env, + _phantom: PhantomData, + } + } + pub fn new(group_size: u8, max_group_size: u8) -> FacetIndex { + let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)) as usize; + let max_group_size = std::cmp::max(group_size * 2, max_group_size as usize); + let mut options = heed::EnvOpenOptions::new(); + let options = options.map_size(4096 * 4 * 100); + let tempdir = tempfile::TempDir::new_in("databases/").unwrap(); + let env = options.open(tempdir.path()).unwrap(); + let content = env.create_database(None).unwrap(); + + FacetIndex { + db: Database { content, group_size, max_group_size, _tempdir: Rc::new(tempdir) }, + env, + _phantom: PhantomData, + } + } + } + + impl Display for FacetIndex + where + for<'a> >::EItem: Sized + Display, + for<'a> BoundCodec: + BytesEncode<'a> + BytesDecode<'a, DItem = >::EItem>, + { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let txn = self.env.read_txn().unwrap(); + let mut iter = self.db.content.iter(&txn).unwrap(); + while let Some(el) = iter.next() { + let (key, value) = el.unwrap(); + let FacetKey { field_id, level, left_bound: bound } = key; + let bound = BoundCodec::bytes_decode(bound).unwrap(); + let FacetGroupValue { size, bitmap } = value; + writeln!( + f, + "{field_id:<2} {level:<2} k{bound:<8} {size:<4} {values:?}", + values = display_bitmap(&bitmap) + )?; + } + Ok(()) + } + } }