diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index f347b9d7e..16b83c2db 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -109,7 +109,7 @@ where #[cfg(test)] mod tests { use heed::BytesDecode; - use rand::{rngs::SmallRng, Rng, SeedableRng}; + use rand::{Rng, SeedableRng}; use roaring::RoaringBitmap; use std::ops::ControlFlow; @@ -125,7 +125,7 @@ mod tests { for i in 0..256u16 { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i as u32); - index.insert(&mut txn, 0, &i, &bitmap); + index.insert(&mut txn, 0, &(i as f64), &bitmap); } txn.commit().unwrap(); index @@ -134,14 +134,14 @@ mod tests { let index = FacetIndex::::new(4, 8); let mut txn = index.env.write_txn().unwrap(); - let rng = rand::rngs::SmallRng::from_seed([0; 32]); + let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); let keys = std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); for (_i, key) in keys.into_iter().enumerate() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(key); - bitmap.insert(key + 100.); + bitmap.insert(key + 100); index.insert(&mut txn, 0, &(key as f64), &bitmap); } txn.commit().unwrap(); @@ -156,13 +156,13 @@ mod tests { #[test] fn filter_distribution_all() { let indexes = [get_simple_index(), get_random_looking_index()]; - for (i, index) in indexes.into_iter().enumerate() { + for (i, index) in indexes.iter().enumerate() { let txn = index.env.read_txn().unwrap(); let candidates = (0..=255).into_iter().collect::(); let mut results = String::new(); iterate_over_facet_distribution( &txn, - &index.db.content, + index.db.content, 0, &candidates, |facet, count| { @@ -170,7 +170,8 @@ mod tests { results.push_str(&format!("{facet}: {count}\n")); ControlFlow::Continue(()) }, - ); + ) + .unwrap(); insta::assert_snapshot!(format!("filter_distribution_{i}_all"), results); txn.commit().unwrap(); @@ -179,14 +180,14 @@ mod tests { #[test] fn filter_distribution_all_stop_early() { let indexes = [get_simple_index(), get_random_looking_index()]; - for (i, index) in indexes.into_iter().enumerate() { + for (i, index) in indexes.iter().enumerate() { let txn = index.env.read_txn().unwrap(); let candidates = (0..=255).into_iter().collect::(); let mut results = String::new(); let mut nbr_facets = 0; iterate_over_facet_distribution( &txn, - &index.db.content, + index.db.content, 0, &candidates, |facet, count| { @@ -200,7 +201,8 @@ mod tests { ControlFlow::Continue(()) } }, - ); + ) + .unwrap(); insta::assert_snapshot!(format!("filter_distribution_{i}_all_stop_early"), results); txn.commit().unwrap(); diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs index b05a3c275..7e7c5e713 100644 --- a/milli/src/search/facet/facet_range_search.rs +++ b/milli/src/search/facet/facet_range_search.rs @@ -259,8 +259,9 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { #[cfg(test)] mod tests { use crate::{ - heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec, - search::facet::test::FacetIndex, snapshot_tests::display_bitmap, + heed_codec::facet::new::{ordered_f64_codec::OrderedF64Codec, FacetKeyCodec}, + search::facet::test::FacetIndex, + snapshot_tests::display_bitmap, }; use rand::{Rng, SeedableRng}; use roaring::RoaringBitmap; @@ -283,7 +284,7 @@ mod tests { let index = FacetIndex::::new(4, 8); let mut txn = index.env.write_txn().unwrap(); - let rng = rand::rngs::SmallRng::from_seed([0; 32]); + let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); let keys = std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); @@ -305,7 +306,7 @@ mod tests { #[test] fn filter_range_increasing() { let indexes = [get_simple_index(), get_random_looking_index()]; - for (i, index) in indexes.into_iter().enumerate() { + for (i, index) in indexes.iter().enumerate() { let txn = index.env.read_txn().unwrap(); let mut results = String::new(); for i in 0..=255 { @@ -314,7 +315,7 @@ mod tests { let end = Bound::Included(i); let docids = find_docids_of_facet_within_bounds::( &txn, - &index.db.content, + index.db.content.remap_key_type::>(), 0, &start, &end, @@ -333,7 +334,7 @@ mod tests { let end = Bound::Excluded(i); let docids = find_docids_of_facet_within_bounds::( &txn, - &index.db.content, + index.db.content.remap_key_type::>(), 0, &start, &end, @@ -351,7 +352,7 @@ mod tests { #[test] fn filter_range_decreasing() { let indexes = [get_simple_index(), get_random_looking_index()]; - for (i, index) in indexes.into_iter().enumerate() { + for (i, index) in indexes.iter().enumerate() { let txn = index.env.read_txn().unwrap(); let mut results = String::new(); @@ -362,7 +363,7 @@ mod tests { let end = Bound::Included(255.); let docids = find_docids_of_facet_within_bounds::( &txn, - &index.db.content, + index.db.content.remap_key_type::>(), 0, &start, &end, @@ -384,7 +385,7 @@ mod tests { let end = Bound::Excluded(255.); let docids = find_docids_of_facet_within_bounds::( &txn, - &index.db.content, + index.db.content.remap_key_type::>(), 0, &start, &end, @@ -404,7 +405,7 @@ mod tests { #[test] fn filter_range_pinch() { let indexes = [get_simple_index(), get_random_looking_index()]; - for (i, index) in indexes.into_iter().enumerate() { + for (i, index) in indexes.iter().enumerate() { let txn = index.env.read_txn().unwrap(); let mut results = String::new(); @@ -415,7 +416,7 @@ mod tests { let end = Bound::Included(255. - i); let docids = find_docids_of_facet_within_bounds::( &txn, - &index.db.content, + index.db.content.remap_key_type::>(), 0, &start, &end, @@ -434,7 +435,7 @@ mod tests { let end = Bound::Excluded(255. - i); let docids = find_docids_of_facet_within_bounds::( &txn, - &index.db.content, + index.db.content.remap_key_type::>(), 0, &start, &end, diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index e4b77c691..8af191089 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -83,7 +83,6 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { #[cfg(test)] mod tests { - use heed::BytesDecode; use rand::Rng; use rand::SeedableRng; use roaring::RoaringBitmap; @@ -100,7 +99,7 @@ mod tests { for i in 0..256u16 { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i as u32); - index.insert(&mut txn, 0, &i, &bitmap); + index.insert(&mut txn, 0, &(i as f64), &bitmap); } txn.commit().unwrap(); index @@ -109,7 +108,7 @@ mod tests { let index = FacetIndex::::new(4, 8); let mut txn = index.env.write_txn().unwrap(); - let rng = rand::rngs::SmallRng::from_seed([0; 32]); + let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); let keys = std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); @@ -131,14 +130,14 @@ mod tests { #[test] fn filter_sort() { let indexes = [get_simple_index(), get_random_looking_index()]; - for (i, index) in indexes.into_iter().enumerate() { + for (i, index) in indexes.iter().enumerate() { let txn = index.env.read_txn().unwrap(); let candidates = (200..=300).into_iter().collect::(); let mut results = String::new(); - let iter = ascending_facet_sort(&txn, &index.db.content, 0, candidates); - for (facet, docids) in iter { - let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); - results.push_str(&format!("{facet}: {}\n", display_bitmap(&docids))); + let iter = ascending_facet_sort(&txn, index.db.content, 0, candidates).unwrap(); + for el in iter { + let docids = el.unwrap(); + results.push_str(&display_bitmap(&docids)); } insta::assert_snapshot!(format!("filter_sort_{i}_ascending"), results); diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index fc62b894f..5ce55ec6d 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -111,8 +111,6 @@ impl<'t> Iterator for DescendingFacetSort<'t> { #[cfg(test)] mod tests { - - use heed::BytesDecode; use rand::Rng; use rand::SeedableRng; use roaring::RoaringBitmap; @@ -129,7 +127,7 @@ mod tests { for i in 0..256u16 { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i as u32); - index.insert(&mut txn, 0, &i, &bitmap); + index.insert(&mut txn, 0, &(i as f64), &bitmap); } txn.commit().unwrap(); index @@ -138,14 +136,14 @@ mod tests { let index = FacetIndex::::new(4, 8); let mut txn = index.env.write_txn().unwrap(); - let rng = rand::rngs::SmallRng::from_seed([0; 32]); + let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); let keys = std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); for (_i, key) in keys.into_iter().enumerate() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(key); - bitmap.insert(key + 100.); + bitmap.insert(key + 100); index.insert(&mut txn, 0, &(key as f64), &bitmap); } txn.commit().unwrap(); @@ -160,15 +158,15 @@ mod tests { #[test] fn filter_sort_descending() { let indexes = [get_simple_index(), get_random_looking_index()]; - for (i, index) in indexes.into_iter().enumerate() { + for (i, index) in indexes.iter().enumerate() { let txn = index.env.read_txn().unwrap(); let candidates = (200..=300).into_iter().collect::(); let mut results = String::new(); let db = index.db.content.remap_key_type::>(); - let iter = descending_facet_sort(&txn, &db, 0, candidates); - for (facet, docids) in iter { - let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); - results.push_str(&format!("{facet}: {}\n", display_bitmap(&docids))); + let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); + for el in iter { + let docids = el.unwrap(); + results.push_str(&display_bitmap(&docids)); } insta::assert_snapshot!(format!("filter_sort_{i}_descending"), results); diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 12074cc12..2ca6c0689 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -78,9 +78,10 @@ pub(crate) fn get_highest_level<'t>( #[cfg(test)] mod test { + use crate::update::FacetsUpdateIncremental; + use heed::{BytesDecode, BytesEncode, Env, RwTxn}; + use roaring::RoaringBitmap; use std::{fmt::Display, marker::PhantomData, rc::Rc}; - - use heed::{BytesDecode, BytesEncode, Env}; use tempfile::TempDir; use crate::{ @@ -148,6 +149,17 @@ mod test { _phantom: PhantomData, } } + pub fn insert<'a>( + &self, + rwtxn: &'a mut RwTxn, + field_id: u16, + key: &'a >::EItem, + docids: &RoaringBitmap, + ) { + let update = FacetsUpdateIncremental::new(self.db.content); + let key_bytes = BoundCodec::bytes_encode(&key).unwrap(); + update.insert(rwtxn, field_id, &key_bytes, docids).unwrap(); + } } impl Display for FacetIndex diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index 17ee3f392..c6b83eeb6 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -1,15 +1,10 @@ +use crate::heed_codec::facet::new::{FacetGroupValue, FacetKey}; +use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index}; +use roaring::RoaringBitmap; use std::borrow::Cow; use std::fmt::Write; use std::path::Path; -use heed::types::ByteSlice; -use heed::BytesDecode; -use roaring::RoaringBitmap; - -use crate::heed_codec::facet::new::{FacetGroupValue, FacetKey}; -use crate::heed_codec::facet::FacetStringZeroBoundsValueCodec; -use crate::{make_db_snap_from_iter, CboRoaringBitmapCodec, ExternalDocumentsIds, Index}; - #[track_caller] pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> insta::Settings { let mut settings = insta::Settings::clone_current(); diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index d2fb3755f..df0b93839 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -17,13 +17,18 @@ enum DeletionResult { Remove { prev: Option>, next: Option> }, } -struct FacetUpdateIncremental { +pub struct FacetsUpdateIncremental { db: heed::Database, FacetGroupValueCodec>, group_size: usize, min_level_size: usize, max_group_size: usize, } -impl FacetUpdateIncremental { +impl FacetsUpdateIncremental { + pub fn new(db: heed::Database, FacetGroupValueCodec>) -> Self { + Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 } + } +} +impl FacetsUpdateIncremental { fn find_insertion_key_value( &self, field_id: u16, @@ -263,7 +268,7 @@ impl FacetUpdateIncremental { } let group_size = self.group_size; - let highest_level = get_highest_level(&txn, *self.db, field_id)?; + let highest_level = get_highest_level(&txn, self.db, field_id)?; let result = self.insert_in_level(txn, field_id, highest_level as u8, new_key, new_values)?; @@ -412,7 +417,7 @@ impl FacetUpdateIncremental { if self.db.get(txn, &FacetKey { field_id, level: 0, left_bound: key })?.is_none() { return Ok(()); } - let highest_level = get_highest_level(&txn, *self.db, field_id)?; + let highest_level = get_highest_level(&txn, self.db, field_id)?; // let key_bytes = BoundCodec::bytes_encode(&key).unwrap(); diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs index cd96d3e88..8fba16d3d 100644 --- a/milli/src/update/mod.rs +++ b/milli/src/update/mod.rs @@ -2,6 +2,7 @@ pub use self::available_documents_ids::AvailableDocumentsIds; pub use self::clear_documents::ClearDocuments; pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult}; pub use self::facet::bulk::FacetsUpdateBulk; +pub use self::facet::incremental::FacetsUpdateIncremental; pub use self::index_documents::{ DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, };