From 3a97d30cd96fa898285f4a8ed60b04ff8f85e400 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 13 Feb 2024 14:53:52 +0100 Subject: [PATCH] Disable incremental facet update as a stop gap --- .../search/facet/facet_distribution_iter.rs | 69 - milli/src/search/facet/facet_range_search.rs | 344 ----- .../src/search/facet/facet_sort_ascending.rs | 116 -- .../src/search/facet/facet_sort_descending.rs | 125 -- milli/src/search/facet/mod.rs | 106 -- milli/src/update/facet/bulk.rs | 48 - milli/src/update/facet/incremental.rs | 1214 ----------------- milli/src/update/facet/mod.rs | 157 +-- milli/src/update/mod.rs | 1 - 9 files changed, 11 insertions(+), 2169 deletions(-) delete mode 100644 milli/src/update/facet/incremental.rs diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index d993ef2dc..b28ac7d71 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -222,72 +222,3 @@ where Ok(ControlFlow::Continue(())) } } - -#[cfg(test)] -mod tests { - use std::ops::ControlFlow; - - use heed::BytesDecode; - use roaring::RoaringBitmap; - - use super::lexicographically_iterate_over_facet_distribution; - use crate::heed_codec::facet::OrderedF64Codec; - use crate::milli_snap; - use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; - - #[test] - fn filter_distribution_all() { - let indexes = [get_simple_index(), get_random_looking_index()]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let candidates = (0..=255).collect::(); - let mut results = String::new(); - lexicographically_iterate_over_facet_distribution( - &txn, - index.content, - 0, - &candidates, - |facet, count, _| { - let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); - results.push_str(&format!("{facet}: {count}\n")); - Ok(ControlFlow::Continue(())) - }, - ) - .unwrap(); - milli_snap!(results, i); - - txn.commit().unwrap(); - } - } - - #[test] - fn filter_distribution_all_stop_early() { - let indexes = [get_simple_index(), get_random_looking_index()]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let candidates = (0..=255).collect::(); - let mut results = String::new(); - let mut nbr_facets = 0; - lexicographically_iterate_over_facet_distribution( - &txn, - index.content, - 0, - &candidates, - |facet, count, _| { - let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); - if nbr_facets == 100 { - Ok(ControlFlow::Break(())) - } else { - nbr_facets += 1; - results.push_str(&format!("{facet}: {count}\n")); - Ok(ControlFlow::Continue(())) - } - }, - ) - .unwrap(); - milli_snap!(results, i); - - txn.commit().unwrap(); - } - } -} diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs index f1a26ded5..5c6c568b9 100644 --- a/milli/src/search/facet/facet_range_search.rs +++ b/milli/src/search/facet/facet_range_search.rs @@ -303,347 +303,3 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { Ok(()) } } - -#[cfg(test)] -mod tests { - use std::ops::Bound; - - use roaring::RoaringBitmap; - - use super::find_docids_of_facet_within_bounds; - use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; - use crate::milli_snap; - use crate::search::facet::tests::{ - get_random_looking_index, get_random_looking_index_with_multiple_field_ids, - get_simple_index, get_simple_index_with_multiple_field_ids, - }; - use crate::snapshot_tests::display_bitmap; - - #[test] - fn random_looking_index_snap() { - let index = get_random_looking_index(); - milli_snap!(format!("{index}"), @"3256c76a7c1b768a013e78d5fa6e9ff9"); - } - - #[test] - fn random_looking_index_with_multiple_field_ids_snap() { - let index = get_random_looking_index_with_multiple_field_ids(); - milli_snap!(format!("{index}"), @"c3e5fe06a8f1c404ed4935b32c90a89b"); - } - - #[test] - fn simple_index_snap() { - let index = get_simple_index(); - milli_snap!(format!("{index}"), @"5dbfa134cc44abeb3ab6242fc182e48e"); - } - - #[test] - fn simple_index_with_multiple_field_ids_snap() { - let index = get_simple_index_with_multiple_field_ids(); - milli_snap!(format!("{index}"), @"a4893298218f682bc76357f46777448c"); - } - - #[test] - fn filter_range_increasing() { - let indexes = [ - get_simple_index(), - get_random_looking_index(), - get_simple_index_with_multiple_field_ids(), - get_random_looking_index_with_multiple_field_ids(), - ]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let mut results = String::new(); - for i in 0..=255 { - let i = i as f64; - let start = Bound::Included(0.); - let end = Bound::Included(i); - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 0, - &start, - &end, - &mut docids, - ) - .unwrap(); - #[allow(clippy::format_push_string)] - results.push_str(&format!("0 <= . <= {i} : {}\n", display_bitmap(&docids))); - } - milli_snap!(results, format!("included_{i}")); - let mut results = String::new(); - for i in 0..=255 { - let i = i as f64; - let start = Bound::Excluded(0.); - let end = Bound::Excluded(i); - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 0, - &start, - &end, - &mut docids, - ) - .unwrap(); - #[allow(clippy::format_push_string)] - results.push_str(&format!("0 < . < {i} : {}\n", display_bitmap(&docids))); - } - milli_snap!(results, format!("excluded_{i}")); - txn.commit().unwrap(); - } - } - #[test] - fn filter_range_decreasing() { - let indexes = [ - get_simple_index(), - get_random_looking_index(), - get_simple_index_with_multiple_field_ids(), - get_random_looking_index_with_multiple_field_ids(), - ]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - - let mut results = String::new(); - - for i in (0..=255).rev() { - let i = i as f64; - let start = Bound::Included(i); - let end = Bound::Included(255.); - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 0, - &start, - &end, - &mut docids, - ) - .unwrap(); - results.push_str(&format!("{i} <= . <= 255 : {}\n", display_bitmap(&docids))); - } - - milli_snap!(results, format!("included_{i}")); - - let mut results = String::new(); - - for i in (0..=255).rev() { - let i = i as f64; - let start = Bound::Excluded(i); - let end = Bound::Excluded(255.); - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 0, - &start, - &end, - &mut docids, - ) - .unwrap(); - results.push_str(&format!("{i} < . < 255 : {}\n", display_bitmap(&docids))); - } - - milli_snap!(results, format!("excluded_{i}")); - - txn.commit().unwrap(); - } - } - #[test] - fn filter_range_pinch() { - let indexes = [ - get_simple_index(), - get_random_looking_index(), - get_simple_index_with_multiple_field_ids(), - get_random_looking_index_with_multiple_field_ids(), - ]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - - let mut results = String::new(); - - for i in (0..=128).rev() { - let i = i as f64; - let start = Bound::Included(i); - let end = Bound::Included(255. - i); - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 0, - &start, - &end, - &mut docids, - ) - .unwrap(); - results.push_str(&format!( - "{i} <= . <= {r} : {docids}\n", - r = 255. - i, - docids = display_bitmap(&docids) - )); - } - - milli_snap!(results, format!("included_{i}")); - - let mut results = String::new(); - - for i in (0..=128).rev() { - let i = i as f64; - let start = Bound::Excluded(i); - let end = Bound::Excluded(255. - i); - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 0, - &start, - &end, - &mut docids, - ) - .unwrap(); - results.push_str(&format!( - "{i} < . < {r} {docids}\n", - r = 255. - i, - docids = display_bitmap(&docids) - )); - } - - milli_snap!(results, format!("excluded_{i}")); - - txn.commit().unwrap(); - } - } - - #[test] - fn filter_range_unbounded() { - let indexes = [ - get_simple_index(), - get_random_looking_index(), - get_simple_index_with_multiple_field_ids(), - get_random_looking_index_with_multiple_field_ids(), - ]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let mut results = String::new(); - for i in 0..=255 { - let i = i as f64; - let start = Bound::Included(i); - let end = Bound::Unbounded; - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 0, - &start, - &end, - &mut docids, - ) - .unwrap(); - #[allow(clippy::format_push_string)] - results.push_str(&format!(">= {i}: {}\n", display_bitmap(&docids))); - } - milli_snap!(results, format!("start_from_included_{i}")); - let mut results = String::new(); - for i in 0..=255 { - let i = i as f64; - let start = Bound::Unbounded; - let end = Bound::Included(i); - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 0, - &start, - &end, - &mut docids, - ) - .unwrap(); - #[allow(clippy::format_push_string)] - results.push_str(&format!("<= {i}: {}\n", display_bitmap(&docids))); - } - milli_snap!(results, format!("end_at_included_{i}")); - - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 0, - &Bound::Unbounded, - &Bound::Unbounded, - &mut docids, - ) - .unwrap(); - milli_snap!( - &format!("all field_id 0: {}\n", display_bitmap(&docids)), - format!("unbounded_field_id_0_{i}") - ); - - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 1, - &Bound::Unbounded, - &Bound::Unbounded, - &mut docids, - ) - .unwrap(); - milli_snap!( - &format!("all field_id 1: {}\n", display_bitmap(&docids)), - format!("unbounded_field_id_1_{i}") - ); - - drop(txn); - } - } - - #[test] - fn filter_range_exact() { - let indexes = [ - get_simple_index(), - get_random_looking_index(), - get_simple_index_with_multiple_field_ids(), - get_random_looking_index_with_multiple_field_ids(), - ]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let mut results_0 = String::new(); - let mut results_1 = String::new(); - for i in 0..=255 { - let i = i as f64; - let start = Bound::Included(i); - let end = Bound::Included(i); - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 0, - &start, - &end, - &mut docids, - ) - .unwrap(); - #[allow(clippy::format_push_string)] - results_0.push_str(&format!("{i}: {}\n", display_bitmap(&docids))); - - let mut docids = RoaringBitmap::new(); - find_docids_of_facet_within_bounds::( - &txn, - index.content.remap_key_type::>(), - 1, - &start, - &end, - &mut docids, - ) - .unwrap(); - #[allow(clippy::format_push_string)] - results_1.push_str(&format!("{i}: {}\n", display_bitmap(&docids))); - } - milli_snap!(results_0, format!("field_id_0_exact_{i}")); - milli_snap!(results_1, format!("field_id_1_exact_{i}")); - - drop(txn); - } - } -} diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index 20c277c63..2b82eb2fa 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -112,119 +112,3 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { } } } - -#[cfg(test)] -mod tests { - use roaring::RoaringBitmap; - - use crate::milli_snap; - use crate::search::facet::facet_sort_ascending::ascending_facet_sort; - use crate::search::facet::tests::{ - get_random_looking_index, get_random_looking_string_index_with_multiple_field_ids, - get_simple_index, get_simple_string_index_with_multiple_field_ids, - }; - use crate::snapshot_tests::display_bitmap; - - #[test] - fn filter_sort_ascending() { - let indexes = [get_simple_index(), get_random_looking_index()]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let candidates = (200..=300).collect::(); - let mut results = String::new(); - let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - milli_snap!(results, i); - - txn.commit().unwrap(); - } - } - - #[test] - fn filter_sort_ascending_multiple_field_ids() { - let indexes = [ - get_simple_string_index_with_multiple_field_ids(), - get_random_looking_string_index_with_multiple_field_ids(), - ]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let candidates = (200..=300).collect::(); - let mut results = String::new(); - let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - milli_snap!(results, format!("{i}-0")); - - let mut results = String::new(); - let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - milli_snap!(results, format!("{i}-1")); - - txn.commit().unwrap(); - } - } - - #[test] - fn filter_sort_ascending_with_no_candidates() { - let indexes = [ - get_simple_string_index_with_multiple_field_ids(), - get_random_looking_string_index_with_multiple_field_ids(), - ]; - for (_i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let candidates = RoaringBitmap::new(); - let mut results = String::new(); - let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - assert!(results.is_empty()); - - let mut results = String::new(); - let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - assert!(results.is_empty()); - - txn.commit().unwrap(); - } - } - - #[test] - fn filter_sort_ascending_with_inexisting_field_id() { - let indexes = [ - get_simple_string_index_with_multiple_field_ids(), - get_random_looking_string_index_with_multiple_field_ids(), - ]; - for (_i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let candidates = RoaringBitmap::new(); - let mut results = String::new(); - let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - assert!(results.is_empty()); - - txn.commit().unwrap(); - } - } -} diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index ae6eb60d0..188f5182f 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -117,128 +117,3 @@ impl<'t> Iterator for DescendingFacetSort<'t> { } } } - -#[cfg(test)] -mod tests { - use roaring::RoaringBitmap; - - use crate::heed_codec::facet::FacetGroupKeyCodec; - use crate::heed_codec::BytesRefCodec; - use crate::milli_snap; - use crate::search::facet::facet_sort_descending::descending_facet_sort; - use crate::search::facet::tests::{ - get_random_looking_index, get_random_looking_string_index_with_multiple_field_ids, - get_simple_index, get_simple_index_with_multiple_field_ids, - get_simple_string_index_with_multiple_field_ids, - }; - use crate::snapshot_tests::display_bitmap; - - #[test] - fn filter_sort_descending() { - let indexes = [ - get_simple_index(), - get_random_looking_index(), - get_simple_index_with_multiple_field_ids(), - ]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let candidates = (200..=300).collect::(); - let mut results = String::new(); - let db = index.content.remap_key_type::>(); - let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - milli_snap!(results, i); - - txn.commit().unwrap(); - } - } - - #[test] - fn filter_sort_descending_multiple_field_ids() { - let indexes = [ - get_simple_string_index_with_multiple_field_ids(), - get_random_looking_string_index_with_multiple_field_ids(), - ]; - for (i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let candidates = (200..=300).collect::(); - let mut results = String::new(); - let db = index.content.remap_key_type::>(); - let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - milli_snap!(results, format!("{i}-0")); - - let mut results = String::new(); - - let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - milli_snap!(results, format!("{i}-1")); - - txn.commit().unwrap(); - } - } - #[test] - fn filter_sort_ascending_with_no_candidates() { - let indexes = [ - get_simple_string_index_with_multiple_field_ids(), - get_random_looking_string_index_with_multiple_field_ids(), - ]; - for (_i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let candidates = RoaringBitmap::new(); - let mut results = String::new(); - let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - assert!(results.is_empty()); - - let mut results = String::new(); - let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - assert!(results.is_empty()); - - txn.commit().unwrap(); - } - } - - #[test] - fn filter_sort_ascending_with_inexisting_field_id() { - let indexes = [ - get_simple_string_index_with_multiple_field_ids(), - get_random_looking_string_index_with_multiple_field_ids(), - ]; - for (_i, index) in indexes.iter().enumerate() { - let txn = index.env.read_txn().unwrap(); - let candidates = RoaringBitmap::new(); - let mut results = String::new(); - let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap(); - for el in iter { - let (docids, _) = el.unwrap(); - results.push_str(&display_bitmap(&docids)); - results.push('\n'); - } - assert!(results.is_empty()); - - txn.commit().unwrap(); - } - } -} diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index f44d6a153..e8ca6898d 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -116,109 +116,3 @@ pub(crate) fn get_highest_level<'t>( }) .unwrap_or(0)) } - -#[cfg(test)] -pub(crate) mod tests { - use rand::{Rng, SeedableRng}; - use roaring::RoaringBitmap; - - use crate::heed_codec::facet::OrderedF64Codec; - use crate::heed_codec::StrRefCodec; - use crate::update::facet::test_helpers::FacetIndex; - - pub fn get_simple_index() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - } - txn.commit().unwrap(); - index - } - pub fn get_random_looking_index() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - - for (_i, key) in std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).enumerate() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - bitmap.insert(key + 100); - index.insert(&mut txn, 0, &(key as f64), &bitmap); - } - txn.commit().unwrap(); - index - } - pub fn get_simple_index_with_multiple_field_ids() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - for fid in 0..2 { - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - index.insert(&mut txn, fid, &(i as f64), &bitmap); - } - } - txn.commit().unwrap(); - index - } - pub fn get_random_looking_index_with_multiple_field_ids() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - let keys = - std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); - for fid in 0..2 { - for (_i, &key) in keys.iter().enumerate() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - bitmap.insert(key + 100); - index.insert(&mut txn, fid, &(key as f64), &bitmap); - } - } - txn.commit().unwrap(); - index - } - pub fn get_simple_string_index_with_multiple_field_ids() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - for fid in 0..2 { - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - if i % 2 == 0 { - index.insert(&mut txn, fid, &format!("{i}").as_str(), &bitmap); - } else { - index.insert(&mut txn, fid, &"", &bitmap); - } - } - } - txn.commit().unwrap(); - index - } - pub fn get_random_looking_string_index_with_multiple_field_ids() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - let keys = - std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); - for fid in 0..2 { - for (_i, &key) in keys.iter().enumerate() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - bitmap.insert(key + 100); - if key % 2 == 0 { - index.insert(&mut txn, fid, &format!("{key}").as_str(), &bitmap); - } else { - index.insert(&mut txn, fid, &"", &bitmap); - } - } - } - txn.commit().unwrap(); - index - } -} diff --git a/milli/src/update/facet/bulk.rs b/milli/src/update/facet/bulk.rs index 3bd4cf5f5..a4aa796cc 100644 --- a/milli/src/update/facet/bulk.rs +++ b/milli/src/update/facet/bulk.rs @@ -407,54 +407,6 @@ mod tests { test("large_group_small_min_level", 16, 2); test("odd_group_odd_min_level", 7, 3); } - #[test] - fn insert_delete_field_insert() { - let test = |name: &str, group_size: u8, min_level_size: u8| { - let index = - FacetIndex::::new(group_size, 0 /*NA*/, min_level_size); - let mut wtxn = index.env.write_txn().unwrap(); - - let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new(); - for i in 0..100u32 { - // field id = 0, left_bound = i, docids = [i] - elements.push(((0, i as f64), once(i).collect())); - } - for i in 0..100u32 { - // field id = 1, left_bound = i, docids = [i] - elements.push(((1, i as f64), once(i).collect())); - } - index.bulk_insert(&mut wtxn, &[0, 1], elements.iter()); - - index.verify_structure_validity(&wtxn, 0); - index.verify_structure_validity(&wtxn, 1); - // delete all the elements for the facet id 0 - for i in 0..100u32 { - index.delete_single_docid(&mut wtxn, 0, &(i as f64), i); - } - index.verify_structure_validity(&wtxn, 0); - index.verify_structure_validity(&wtxn, 1); - - let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new(); - // then add some elements again for the facet id 1 - for i in 0..110u32 { - // field id = 1, left_bound = i, docids = [i] - elements.push(((1, i as f64), once(i).collect())); - } - index.verify_structure_validity(&wtxn, 0); - index.verify_structure_validity(&wtxn, 1); - index.bulk_insert(&mut wtxn, &[0, 1], elements.iter()); - - wtxn.commit().unwrap(); - - milli_snap!(format!("{index}"), name); - }; - - test("default", 4, 5); - test("small_group_small_min_level", 2, 2); - test("small_group_large_min_level", 2, 128); - test("large_group_small_min_level", 16, 2); - test("odd_group_odd_min_level", 7, 3); - } #[test] fn bug_3165() { diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs deleted file mode 100644 index 9d8ee08f4..000000000 --- a/milli/src/update/facet/incremental.rs +++ /dev/null @@ -1,1214 +0,0 @@ -use std::fs::File; -use std::io::BufReader; - -use heed::types::{Bytes, DecodeIgnore}; -use heed::{BytesDecode, Error, RoTxn, RwTxn}; -use obkv::KvReader; -use roaring::RoaringBitmap; - -use crate::facet::FacetType; -use crate::heed_codec::facet::{ - FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, -}; -use crate::heed_codec::BytesRefCodec; -use crate::search::facet::get_highest_level; -use crate::update::del_add::DelAdd; -use crate::update::index_documents::valid_lmdb_key; -use crate::{CboRoaringBitmapCodec, Index, Result}; - -enum InsertionResult { - InPlace, - Expand, - Insert, -} -enum DeletionResult { - InPlace, - Reduce { next: Option> }, - Remove { next: Option> }, -} - -/// Algorithm to incrementally insert and delete elememts into the -/// `facet_id_(string/f64)_docids` databases. -pub struct FacetsUpdateIncremental { - inner: FacetsUpdateIncrementalInner, - delta_data: grenad::Reader>, -} - -impl FacetsUpdateIncremental { - pub fn new( - index: &Index, - facet_type: FacetType, - delta_data: grenad::Reader>, - group_size: u8, - min_level_size: u8, - max_group_size: u8, - ) -> Self { - FacetsUpdateIncremental { - inner: FacetsUpdateIncrementalInner { - db: match facet_type { - FacetType::String => index - .facet_id_string_docids - .remap_key_type::>(), - FacetType::Number => index - .facet_id_f64_docids - .remap_key_type::>(), - }, - group_size, - max_group_size, - min_level_size, - }, - delta_data, - } - } - - pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> { - let mut cursor = self.delta_data.into_cursor()?; - while let Some((key, value)) = cursor.move_on_next()? { - if !valid_lmdb_key(key) { - continue; - } - let key = FacetGroupKeyCodec::::bytes_decode(key) - .map_err(heed::Error::Encoding)?; - let value = KvReader::new(value); - - let docids_to_delete = value - .get(DelAdd::Deletion) - .map(CboRoaringBitmapCodec::bytes_decode) - .map(|o| o.map_err(heed::Error::Encoding)); - - let docids_to_add = value - .get(DelAdd::Addition) - .map(CboRoaringBitmapCodec::bytes_decode) - .map(|o| o.map_err(heed::Error::Encoding)); - - if let Some(docids_to_delete) = docids_to_delete { - let docids_to_delete = docids_to_delete?; - self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?; - } - - if let Some(docids_to_add) = docids_to_add { - let docids_to_add = docids_to_add?; - self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?; - } - } - - Ok(()) - } -} - -/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type -pub struct FacetsUpdateIncrementalInner { - pub db: heed::Database, FacetGroupValueCodec>, - pub group_size: u8, - pub min_level_size: u8, - pub max_group_size: u8, -} -impl FacetsUpdateIncrementalInner { - /// Find the `FacetGroupKey`/`FacetGroupValue` in the database that - /// should be used to insert the new `facet_value` for the given `field_id` and `level` - /// where `level` must be strictly greater than 0. - /// - /// For example, when inserting the facet value `4`, there are two possibilities: - /// - /// 1. We find a key whose lower bound is 3 followed by a key whose lower bound is 6. Therefore, - /// we know that the implicit range of the first key is 3..6, which contains 4. - /// So the new facet value belongs in that first key/value pair. - /// - /// 2. The first key of the level has a lower bound of `5`. We return this key/value pair - /// but will need to change the lowerbound of this key to `4` in order to insert this facet value. - fn find_insertion_key_value( - &self, - field_id: u16, - level: u8, - facet_value: &[u8], - txn: &RoTxn, - ) -> Result<(FacetGroupKey>, FacetGroupValue)> { - assert!(level > 0); - match self.db.get_lower_than_or_equal_to( - txn, - &FacetGroupKey { field_id, level, left_bound: facet_value }, - )? { - Some((key, value)) => { - if key.level != level { - let mut prefix = vec![]; - prefix.extend_from_slice(&field_id.to_be_bytes()); - prefix.push(level); - - let mut iter = self - .db - .remap_types::() - .prefix_iter(txn, prefix.as_slice())?; - let (key_bytes, value) = iter.next().unwrap()?; - Ok(( - FacetGroupKeyCodec::::bytes_decode(key_bytes) - .map_err(Error::Encoding)? - .into_owned(), - value, - )) - } else { - Ok((key.into_owned(), value)) - } - } - None => { - // We checked that the level is > 0 - // Since all keys of level 1 are greater than those of level 0, - // we are guaranteed that db.get_lower_than_or_equal_to(key) exists - panic!() - } - } - } - - /// Insert the given facet value and corresponding document ids in the level 0 of the database - /// - /// ## Return - /// See documentation of `insert_in_level` - fn insert_in_level_0( - &self, - txn: &mut RwTxn, - field_id: u16, - facet_value: &[u8], - docids: &RoaringBitmap, - ) -> Result { - let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value }; - let value = FacetGroupValue { bitmap: docids.clone(), size: 1 }; - - let mut level0_prefix = vec![]; - level0_prefix.extend_from_slice(&field_id.to_be_bytes()); - level0_prefix.push(0); - - let mut iter = - self.db.remap_types::().prefix_iter(txn, &level0_prefix)?; - - if iter.next().is_none() { - drop(iter); - self.db.put(txn, &key, &value)?; - Ok(InsertionResult::Insert) - } else { - drop(iter); - let old_value = self.db.get(txn, &key)?; - match old_value { - Some(mut updated_value) => { - // now merge the two - updated_value.bitmap |= value.bitmap; - self.db.put(txn, &key, &updated_value)?; - Ok(InsertionResult::InPlace) - } - None => { - self.db.put(txn, &key, &value)?; - Ok(InsertionResult::Insert) - } - } - } - } - - /// Insert the given facet value and corresponding document ids in all the levels of the database up to the given `level`. - /// This function works recursively. - /// - /// ## Return - /// Returns the effect of adding the facet value to the database on the given `level`. - /// - /// - `InsertionResult::InPlace` means that inserting the `facet_value` into the `level` did not have - /// an effect on the number of keys in that level. Therefore, it did not increase the number of children - /// of the parent node. - /// - /// - `InsertionResult::Insert` means that inserting the `facet_value` into the `level` resulted - /// in the addition of a new key in that level, and that therefore the number of children - /// of the parent node should be incremented. - fn insert_in_level( - &self, - txn: &mut RwTxn, - field_id: u16, - level: u8, - facet_value: &[u8], - docids: &RoaringBitmap, - ) -> Result { - if level == 0 { - return self.insert_in_level_0(txn, field_id, facet_value, docids); - } - - let max_group_size = self.max_group_size; - - let result = self.insert_in_level(txn, field_id, level - 1, facet_value, docids)?; - // level below inserted an element - - let (insertion_key, insertion_value) = - self.find_insertion_key_value(field_id, level, facet_value, txn)?; - - match result { - // because we know that we inserted in place, the facet_value is not a new one - // thus it doesn't extend a group, and thus the insertion key computed above is - // still correct - InsertionResult::InPlace => { - let mut updated_value = insertion_value; - updated_value.bitmap |= docids; - self.db.put(txn, &insertion_key.as_ref(), &updated_value)?; - - return Ok(InsertionResult::InPlace); - } - InsertionResult::Expand => {} - InsertionResult::Insert => {} - } - - // Here we know that inserting the facet value in the level below resulted in the creation - // of a new key. Therefore, it may be the case that we need to modify the left bound of the - // insertion key (see documentation of `find_insertion_key_value` for an example of when that - // could happen). - let (insertion_key, insertion_key_was_modified) = { - let mut new_insertion_key = insertion_key.clone(); - let mut key_should_be_modified = false; - - if facet_value < insertion_key.left_bound.as_slice() { - new_insertion_key.left_bound = facet_value.to_vec(); - key_should_be_modified = true; - } - if key_should_be_modified { - let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?; - assert!(is_deleted); - self.db.put(txn, &new_insertion_key.as_ref(), &insertion_value)?; - } - (new_insertion_key, key_should_be_modified) - }; - // Now we know that the insertion key contains the `facet_value`. - - // We still need to update the insertion value by: - // 1. Incrementing the number of children (since the recursive call returned `InsertionResult::Insert`) - // 2. Merge the previous docids with the new one - let mut updated_value = insertion_value; - - if matches!(result, InsertionResult::Insert) { - updated_value.size += 1; - } - - if updated_value.size < max_group_size { - updated_value.bitmap |= docids; - self.db.put(txn, &insertion_key.as_ref(), &updated_value)?; - if insertion_key_was_modified { - return Ok(InsertionResult::Expand); - } else { - return Ok(InsertionResult::InPlace); - } - } - - // We've increased the group size of the value and realised it has become greater than or equal to `max_group_size` - // Therefore it must be split into two nodes. - - let size_left = updated_value.size / 2; - let size_right = updated_value.size - size_left; - - let level_below = level - 1; - - let start_key = FacetGroupKey { - field_id, - level: level_below, - left_bound: insertion_key.left_bound.as_slice(), - }; - - let mut iter = - self.db.range(txn, &(start_key..))?.take((size_left as usize) + (size_right as usize)); - - let group_left = { - let mut values_left = RoaringBitmap::new(); - - let mut i = 0; - for next in iter.by_ref() { - let (_key, value) = next?; - i += 1; - values_left |= &value.bitmap; - if i == size_left { - break; - } - } - - let key = - FacetGroupKey { field_id, level, left_bound: insertion_key.left_bound.clone() }; - let value = FacetGroupValue { size: size_left, bitmap: values_left }; - (key, value) - }; - - let group_right = { - let ( - FacetGroupKey { left_bound: right_left_bound, .. }, - FacetGroupValue { bitmap: mut values_right, .. }, - ) = iter.next().unwrap()?; - - for next in iter.by_ref() { - let (_, value) = next?; - values_right |= &value.bitmap; - } - - let key = FacetGroupKey { field_id, level, left_bound: right_left_bound.to_vec() }; - let value = FacetGroupValue { size: size_right, bitmap: values_right }; - (key, value) - }; - drop(iter); - - let _ = self.db.delete(txn, &insertion_key.as_ref())?; - - self.db.put(txn, &group_left.0.as_ref(), &group_left.1)?; - self.db.put(txn, &group_right.0.as_ref(), &group_right.1)?; - - Ok(InsertionResult::Insert) - } - - /// Insert the given facet value and corresponding document ids in the database. - pub fn insert( - &self, - txn: &mut RwTxn, - field_id: u16, - facet_value: &[u8], - docids: &RoaringBitmap, - ) -> Result<()> { - if docids.is_empty() { - return Ok(()); - } - let group_size = self.group_size; - - let highest_level = get_highest_level(txn, self.db, field_id)?; - - let result = self.insert_in_level(txn, field_id, highest_level, facet_value, docids)?; - match result { - InsertionResult::InPlace => return Ok(()), - InsertionResult::Expand => return Ok(()), - InsertionResult::Insert => {} - } - - // Here we check whether the highest level has exceeded `min_level_size` * `self.group_size`. - // If it has, we must build an addition level above it. - - let mut highest_level_prefix = vec![]; - highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); - highest_level_prefix.push(highest_level); - - let size_highest_level = - self.db.remap_types::().prefix_iter(txn, &highest_level_prefix)?.count(); - - if size_highest_level < self.group_size as usize * self.min_level_size as usize { - return Ok(()); - } - - let mut groups_iter = self - .db - .remap_types::() - .prefix_iter(txn, &highest_level_prefix)?; - - let nbr_new_groups = size_highest_level / self.group_size as usize; - let nbr_leftover_elements = size_highest_level % self.group_size as usize; - - let mut to_add = vec![]; - for _ in 0..nbr_new_groups { - let mut first_key = None; - let mut values = RoaringBitmap::new(); - for _ in 0..group_size { - let (key_bytes, value_i) = groups_iter.next().unwrap()?; - let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) - .map_err(Error::Encoding)?; - - if first_key.is_none() { - first_key = Some(key_i); - } - values |= value_i.bitmap; - } - let key = FacetGroupKey { - field_id, - level: highest_level + 1, - left_bound: first_key.unwrap().left_bound, - }; - let value = FacetGroupValue { size: group_size, bitmap: values }; - to_add.push((key.into_owned(), value)); - } - // now we add the rest of the level, in case its size is > group_size * min_level_size - // this can indeed happen if the min_level_size parameter changes between two calls to `insert` - if nbr_leftover_elements > 0 { - let mut first_key = None; - let mut values = RoaringBitmap::new(); - for _ in 0..nbr_leftover_elements { - let (key_bytes, value_i) = groups_iter.next().unwrap()?; - let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) - .map_err(Error::Encoding)?; - - if first_key.is_none() { - first_key = Some(key_i); - } - values |= value_i.bitmap; - } - let key = FacetGroupKey { - field_id, - level: highest_level + 1, - left_bound: first_key.unwrap().left_bound, - }; - // Note: nbr_leftover_elements can be casted to a u8 since it is bounded by `max_group_size` - // when it is created above. - let value = FacetGroupValue { size: nbr_leftover_elements as u8, bitmap: values }; - to_add.push((key.into_owned(), value)); - } - - drop(groups_iter); - for (key, value) in to_add { - self.db.put(txn, &key.as_ref(), &value)?; - } - Ok(()) - } - - /// Delete the given document id from the given facet value in the database, from level 0 to the - /// the given level. - /// - /// ## Return - /// Returns the effect of removing the document id from the database on the given `level`. - /// - /// - `DeletionResult::InPlace` means that deleting the document id did not have - /// an effect on the keys in that level. - /// - /// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the - /// number of keys in the level. For example, removing a document id from the facet value `3` could - /// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted - /// entirely. In that case, `DeletionResult::Remove` is returned. The parent of the deleted key must - /// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well. - /// - /// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the - /// bounds of the keys of the level. For example, removing a document id from the facet value - /// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore, - /// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4). - /// In that case `DeletionResult::Reduce` is returned. The parent of the reduced key may need to adjust - /// its left bound as well. - fn delete_in_level( - &self, - txn: &mut RwTxn, - field_id: u16, - level: u8, - facet_value: &[u8], - docids: &RoaringBitmap, - ) -> Result { - if level == 0 { - return self.delete_in_level_0(txn, field_id, facet_value, docids); - } - let (deletion_key, mut bitmap) = - self.find_insertion_key_value(field_id, level, facet_value, txn)?; - - let result = self.delete_in_level(txn, field_id, level - 1, facet_value, docids)?; - - let mut decrease_size = false; - let next_key = match result { - DeletionResult::InPlace => { - bitmap.bitmap -= docids; - self.db.put(txn, &deletion_key.as_ref(), &bitmap)?; - return Ok(DeletionResult::InPlace); - } - DeletionResult::Reduce { next } => next, - DeletionResult::Remove { next } => { - decrease_size = true; - next - } - }; - // If either DeletionResult::Reduce or DeletionResult::Remove was returned, - // then we may need to adjust the left_bound of the deletion key. - - // If DeletionResult::Remove was returned, then we need to decrease the group - // size of the deletion key. - let mut updated_value = bitmap; - if decrease_size { - updated_value.size -= 1; - } - - if updated_value.size == 0 { - self.db.delete(txn, &deletion_key.as_ref())?; - Ok(DeletionResult::Remove { next: next_key }) - } else { - let mut updated_deletion_key = deletion_key.clone(); - let reduced_range = facet_value == deletion_key.left_bound; - if reduced_range { - updated_deletion_key.left_bound = next_key.clone().unwrap(); - } - updated_value.bitmap -= docids; - let _ = self.db.delete(txn, &deletion_key.as_ref())?; - self.db.put(txn, &updated_deletion_key.as_ref(), &updated_value)?; - if reduced_range { - Ok(DeletionResult::Reduce { next: next_key }) - } else { - Ok(DeletionResult::InPlace) - } - } - } - - fn delete_in_level_0( - &self, - txn: &mut RwTxn, - field_id: u16, - facet_value: &[u8], - docids: &RoaringBitmap, - ) -> Result { - let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value }; - let mut bitmap = self.db.get(txn, &key)?.unwrap().bitmap; - bitmap -= docids; - - if bitmap.is_empty() { - let mut next_key = None; - if let Some((next, _)) = - self.db.remap_data_type::().get_greater_than(txn, &key)? - { - if next.field_id == field_id && next.level == 0 { - next_key = Some(next.left_bound.to_vec()); - } - } - self.db.delete(txn, &key)?; - Ok(DeletionResult::Remove { next: next_key }) - } else { - self.db.put(txn, &key, &FacetGroupValue { size: 1, bitmap })?; - Ok(DeletionResult::InPlace) - } - } - - pub fn delete( - &self, - txn: &mut RwTxn, - field_id: u16, - facet_value: &[u8], - docids: &RoaringBitmap, - ) -> Result<()> { - if self - .db - .remap_data_type::() - .get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })? - .is_none() - { - return Ok(()); - } - let highest_level = get_highest_level(txn, self.db, field_id)?; - - let result = self.delete_in_level(txn, field_id, highest_level, facet_value, docids)?; - match result { - DeletionResult::InPlace => return Ok(()), - DeletionResult::Reduce { .. } => return Ok(()), - DeletionResult::Remove { .. } => {} - } - - // if we either removed a key from the highest level, its size may have fallen - // below `min_level_size`, in which case we need to remove the entire level - - let mut highest_level_prefix = vec![]; - highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); - highest_level_prefix.push(highest_level); - - if highest_level == 0 - || self - .db - .remap_types::() - .prefix_iter(txn, &highest_level_prefix)? - .count() - >= self.min_level_size as usize - { - return Ok(()); - } - let mut to_delete = vec![]; - let mut iter = - self.db.remap_types::().prefix_iter(txn, &highest_level_prefix)?; - for el in iter.by_ref() { - let (k, _) = el?; - to_delete.push( - FacetGroupKeyCodec::::bytes_decode(k) - .map_err(Error::Encoding)? - .into_owned(), - ); - } - drop(iter); - for k in to_delete { - self.db.delete(txn, &k.as_ref())?; - } - Ok(()) - } -} - -impl<'a> FacetGroupKey<&'a [u8]> { - pub fn into_owned(self) -> FacetGroupKey> { - FacetGroupKey { - field_id: self.field_id, - level: self.level, - left_bound: self.left_bound.to_vec(), - } - } -} - -impl FacetGroupKey> { - pub fn as_ref(&self) -> FacetGroupKey<&[u8]> { - FacetGroupKey { - field_id: self.field_id, - level: self.level, - left_bound: self.left_bound.as_slice(), - } - } -} - -#[cfg(test)] -mod tests { - use rand::seq::SliceRandom; - use rand::{Rng, SeedableRng}; - use roaring::RoaringBitmap; - - use crate::heed_codec::facet::OrderedF64Codec; - use crate::heed_codec::StrRefCodec; - use crate::milli_snap; - use crate::update::facet::test_helpers::FacetIndex; - - #[test] - fn append() { - let index = FacetIndex::::new(4, 8, 5); - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - let mut txn = index.env.write_txn().unwrap(); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - txn.commit().unwrap(); - } - let txn = index.env.read_txn().unwrap(); - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}")); - } - #[test] - fn many_field_ids_append() { - let index = FacetIndex::::new(4, 8, 5); - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - let mut txn = index.env.write_txn().unwrap(); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - txn.commit().unwrap(); - } - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - let mut txn = index.env.write_txn().unwrap(); - index.insert(&mut txn, 2, &(i as f64), &bitmap); - txn.commit().unwrap(); - } - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - let mut txn = index.env.write_txn().unwrap(); - index.insert(&mut txn, 1, &(i as f64), &bitmap); - txn.commit().unwrap(); - } - let txn = index.env.read_txn().unwrap(); - index.verify_structure_validity(&txn, 0); - index.verify_structure_validity(&txn, 1); - index.verify_structure_validity(&txn, 2); - txn.commit().unwrap(); - milli_snap!(format!("{index}")); - } - #[test] - fn many_field_ids_prepend() { - let index = FacetIndex::::new(4, 8, 5); - for i in (0..256).rev() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - let mut txn = index.env.write_txn().unwrap(); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - txn.commit().unwrap(); - } - for i in (0..256).rev() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - let mut txn = index.env.write_txn().unwrap(); - index.insert(&mut txn, 2, &(i as f64), &bitmap); - txn.commit().unwrap(); - } - for i in (0..256).rev() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - let mut txn = index.env.write_txn().unwrap(); - index.insert(&mut txn, 1, &(i as f64), &bitmap); - txn.commit().unwrap(); - } - let txn = index.env.read_txn().unwrap(); - index.verify_structure_validity(&txn, 0); - index.verify_structure_validity(&txn, 1); - index.verify_structure_validity(&txn, 2); - txn.commit().unwrap(); - milli_snap!(format!("{index}")); - } - - #[test] - fn prepend() { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - for i in (0..256).rev() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - } - - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}")); - } - - #[test] - fn shuffled() { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut keys = (0..256).collect::>(); - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - keys.shuffle(&mut rng); - - for (_i, key) in keys.into_iter().enumerate() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - index.insert(&mut txn, 0, &(key as f64), &bitmap); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}")); - } - - #[test] - fn merge_values() { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut keys = (0..256).collect::>(); - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - keys.shuffle(&mut rng); - - for (_i, key) in keys.into_iter().enumerate() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - bitmap.insert(rng.gen_range(256..512)); - index.verify_structure_validity(&txn, 0); - index.insert(&mut txn, 0, &(key as f64), &bitmap); - } - - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}")); - } - - #[test] - fn delete_from_end() { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - for i in 0..256 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i); - index.verify_structure_validity(&txn, 0); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - } - - for i in (200..256).rev() { - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 200); - let mut txn = index.env.write_txn().unwrap(); - - for i in (150..200).rev() { - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 150); - let mut txn = index.env.write_txn().unwrap(); - for i in (100..150).rev() { - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 100); - let mut txn = index.env.write_txn().unwrap(); - for i in (17..100).rev() { - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 17); - let mut txn = index.env.write_txn().unwrap(); - for i in (15..17).rev() { - index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 15); - let mut txn = index.env.write_txn().unwrap(); - for i in (0..15).rev() { - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 0); - } - - #[test] - fn delete_from_start() { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - for i in 0..256 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i); - index.verify_structure_validity(&txn, 0); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - } - - for i in 0..128 { - index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 127); - let mut txn = index.env.write_txn().unwrap(); - for i in 128..216 { - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 215); - let mut txn = index.env.write_txn().unwrap(); - for i in 216..256 { - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 255); - } - - #[test] - #[allow(clippy::needless_range_loop)] - fn delete_shuffled() { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - for i in 0..256 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i); - index.verify_structure_validity(&txn, 0); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - } - - let mut keys = (0..256).collect::>(); - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - keys.shuffle(&mut rng); - - for i in 0..128 { - let key = keys[i]; - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(key as f64), key as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 127); - let mut txn = index.env.write_txn().unwrap(); - for i in 128..216 { - let key = keys[i]; - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(key as f64), key as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - let mut txn = index.env.write_txn().unwrap(); - milli_snap!(format!("{index}"), 215); - for i in 216..256 { - let key = keys[i]; - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(key as f64), key as u32); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), 255); - } - - #[test] - fn in_place_level0_insert() { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut keys = (0..16).collect::>(); - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - keys.shuffle(&mut rng); - for i in 0..4 { - for &key in keys.iter() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(rng.gen_range(i * 256..(i + 1) * 256)); - index.verify_structure_validity(&txn, 0); - index.insert(&mut txn, 0, &(key as f64), &bitmap); - } - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}")); - } - - #[test] - fn in_place_level0_delete() { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut keys = (0..64).collect::>(); - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - keys.shuffle(&mut rng); - - for &key in keys.iter() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - bitmap.insert(key + 100); - index.verify_structure_validity(&txn, 0); - - index.insert(&mut txn, 0, &(key as f64), &bitmap); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), "before_delete"); - - let mut txn = index.env.write_txn().unwrap(); - - for &key in keys.iter() { - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &(key as f64), key + 100); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), "after_delete"); - } - - #[test] - fn shuffle_merge_string_and_delete() { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut keys = (1000..1064).collect::>(); - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - keys.shuffle(&mut rng); - - for &key in keys.iter() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - bitmap.insert(key + 100); - index.verify_structure_validity(&txn, 0); - index.insert(&mut txn, 0, &format!("{key:x}").as_str(), &bitmap); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), "before_delete"); - - let mut txn = index.env.write_txn().unwrap(); - - for &key in keys.iter() { - index.verify_structure_validity(&txn, 0); - index.delete_single_docid(&mut txn, 0, &format!("{key:x}").as_str(), key + 100); - } - index.verify_structure_validity(&txn, 0); - txn.commit().unwrap(); - milli_snap!(format!("{index}"), "after_delete"); - } -} - -// fuzz tests -#[cfg(all(test, fuzzing))] -/** -Fuzz test for the incremental indxer. - -The fuzz test uses fuzzcheck, a coverage-guided fuzzer. -See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org -for more information. - -It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with: -```sh -cargo install cargo-fuzzcheck -``` -To start the fuzz test, run (from the base folder or from milli/): -```sh -cargo fuzzcheck update::facet::incremental::fuzz::fuzz -``` -and wait a couple minutes to make sure the code was thoroughly tested, then -hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz. - -To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file: -```toml -[build] -rustflags = ["--cfg", "fuzzing"] -``` - -The fuzz test generates sequences of additions and deletions to the facet database and -ensures that: -1. its structure is still internally valid -2. its content is the same as a trivially correct implementation of the same database -*/ -mod fuzz { - use std::collections::{BTreeMap, HashMap}; - use std::iter::FromIterator; - use std::rc::Rc; - - use fuzzcheck::mutators::integer::U8Mutator; - use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator}; - use fuzzcheck::mutators::vector::VecMutator; - use fuzzcheck::DefaultMutator; - use roaring::RoaringBitmap; - use tempfile::TempDir; - - use super::*; - use crate::update::facet::test_helpers::FacetIndex; - #[derive(Default)] - pub struct TrivialDatabase { - pub elements: BTreeMap>, - } - impl TrivialDatabase - where - T: Ord + Clone + Eq + std::fmt::Debug, - { - #[no_coverage] - pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) { - if new_values.is_empty() { - return; - } - let values_field_id = self.elements.entry(field_id).or_default(); - let values = values_field_id.entry(new_key.clone()).or_default(); - *values |= new_values; - } - #[no_coverage] - pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) { - if let Some(values_field_id) = self.elements.get_mut(&field_id) { - if let Some(values) = values_field_id.get_mut(&key) { - *values -= values_to_remove; - if values.is_empty() { - values_field_id.remove(&key); - } - } - if values_field_id.is_empty() { - self.elements.remove(&field_id); - } - } - } - } - #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] - struct Operation { - #[field_mutator(VecMutator = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })] - key: Vec, - #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] - group_size: u8, - #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] - max_group_size: u8, - #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] - min_level_size: u8, - #[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })] - field_id: u16, - kind: OperationKind, - } - #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] - enum OperationKind { - Insert( - #[field_mutator(VecMutator = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] - Vec, - ), - Delete( - #[field_mutator(VecMutator = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] - Vec, - ), - } - - #[no_coverage] - fn compare_with_trivial_database(tempdir: Rc, operations: &[Operation]) { - let index = FacetIndex::::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten - let mut txn = index.env.write_txn().unwrap(); - - let mut trivial_db = TrivialDatabase::>::default(); - let mut value_to_keys = HashMap::>>::new(); - for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in - operations - { - index.set_group_size(*group_size); - index.set_max_group_size(*max_group_size); - index.set_min_level_size(*min_level_size); - match kind { - OperationKind::Insert(values) => { - let mut bitmap = RoaringBitmap::new(); - for value in values { - bitmap.insert(*value as u32); - value_to_keys.entry(*value).or_default().push(key.clone()); - } - index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap); - trivial_db.insert(*field_id, &key, &bitmap); - } - OperationKind::Delete(values) => { - let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32)); - let mut values_per_key = HashMap::new(); - - for value in values { - if let Some(keys) = value_to_keys.get(&(value as u8)) { - for key in keys { - let values: &mut RoaringBitmap = - values_per_key.entry(key).or_default(); - values.insert(value); - } - } - } - for (key, values) in values_per_key { - index.delete(&mut txn, *field_id, &key.as_slice(), &values); - trivial_db.delete(*field_id, &key, &values); - } - } - } - } - - for (field_id, values_field_id) in trivial_db.elements.iter() { - let level0iter = index - .content - .as_polymorph() - .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes()) - .unwrap(); - - for ((key, values), group) in values_field_id.iter().zip(level0iter) { - let (group_key, group_values) = group.unwrap(); - let group_key = - FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); - assert_eq!(key, &group_key.left_bound); - assert_eq!(values, &group_values.bitmap); - } - } - - for (field_id, values_field_id) in trivial_db.elements.iter() { - let level0iter = index - .content - .as_polymorph() - .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) - .unwrap(); - - for ((key, values), group) in values_field_id.iter().zip(level0iter) { - let (group_key, group_values) = group.unwrap(); - let group_key = - FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); - assert_eq!(key, &group_key.left_bound); - assert_eq!(values, &group_values.bitmap); - } - index.verify_structure_validity(&txn, *field_id); - } - txn.abort().unwrap(); - } - - #[test] - #[no_coverage] - fn fuzz() { - let tempdir = Rc::new(TempDir::new().unwrap()); - let tempdir_cloned = tempdir.clone(); - let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| { - compare_with_trivial_database(tempdir_cloned.clone(), operations) - }) - .default_mutator() - .serde_serializer() - .default_sensor_and_pool_with_custom_filter(|file, function| { - file == std::path::Path::new("milli/src/update/facet/incremental.rs") - && !function.contains("serde") - && !function.contains("tests::") - && !function.contains("fuzz::") - && !function.contains("display_bitmap") - }) - .arguments_from_cargo_fuzzcheck() - .launch(); - assert!(!result.found_test_failure); - } -} diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index ad8a838c8..514774027 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -72,7 +72,6 @@ two methods. Related PR: https://github.com/meilisearch/milli/pull/619 */ -pub const FACET_MAX_GROUP_SIZE: u8 = 8; pub const FACET_GROUP_SIZE: u8 = 4; pub const FACET_MIN_LEVEL_SIZE: u8 = 5; @@ -88,17 +87,14 @@ use heed::BytesEncode; use log::debug; use time::OffsetDateTime; -use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; use crate::facet::FacetType; -use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; -use crate::heed_codec::BytesRefCodec; +use crate::heed_codec::facet::FacetGroupKey; use crate::update::index_documents::create_sorter; use crate::update::merge_btreeset_string; use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH}; pub mod bulk; -pub mod incremental; /// A builder used to add new elements to the `facet_id_string_docids` or `facet_id_f64_docids` databases. /// @@ -106,11 +102,9 @@ pub mod incremental; /// a bulk update method or an incremental update method. pub struct FacetsUpdate<'i> { index: &'i Index, - database: heed::Database, FacetGroupValueCodec>, facet_type: FacetType, delta_data: grenad::Reader>, group_size: u8, - max_group_size: u8, min_level_size: u8, } impl<'i> FacetsUpdate<'i> { @@ -119,19 +113,9 @@ impl<'i> FacetsUpdate<'i> { facet_type: FacetType, delta_data: grenad::Reader>, ) -> Self { - let database = match facet_type { - FacetType::String => { - index.facet_id_string_docids.remap_key_type::>() - } - FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() - } - }; Self { index, - database, group_size: FACET_GROUP_SIZE, - max_group_size: FACET_MAX_GROUP_SIZE, min_level_size: FACET_MIN_LEVEL_SIZE, facet_type, delta_data, @@ -145,30 +129,16 @@ impl<'i> FacetsUpdate<'i> { debug!("Computing and writing the facet values levels docids into LMDB on disk..."); self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; - // See self::comparison_bench::benchmark_facet_indexing - if self.delta_data.len() >= (self.database.len(wtxn)? / 50) { - let field_ids = - self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::>(); - let bulk_update = FacetsUpdateBulk::new( - self.index, - field_ids, - self.facet_type, - self.delta_data, - self.group_size, - self.min_level_size, - ); - bulk_update.execute(wtxn)?; - } else { - let incremental_update = FacetsUpdateIncremental::new( - self.index, - self.facet_type, - self.delta_data, - self.group_size, - self.min_level_size, - self.max_group_size, - ); - incremental_update.execute(wtxn)?; - } + let field_ids = self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::>(); + let bulk_update = FacetsUpdateBulk::new( + self.index, + field_ids, + self.facet_type, + self.delta_data, + self.group_size, + self.min_level_size, + ); + bulk_update.execute(wtxn)?; // We clear the list of normalized-for-search facets // and the previous FSTs to compute everything from scratch @@ -264,7 +234,6 @@ impl<'i> FacetsUpdate<'i> { pub(crate) mod test_helpers { use std::cell::Cell; use std::fmt::Display; - use std::iter::FromIterator; use std::marker::PhantomData; use std::rc::Rc; @@ -280,7 +249,6 @@ pub(crate) mod test_helpers { use crate::search::facet::get_highest_level; use crate::snapshot_tests::display_bitmap; use crate::update::del_add::{DelAdd, KvWriterDelAdd}; - use crate::update::FacetsUpdateIncrementalInner; use crate::CboRoaringBitmapCodec; /// Utility function to generate a string whose position in a lexicographically @@ -396,49 +364,6 @@ pub(crate) mod test_helpers { self.min_level_size.set(std::cmp::max(1, min_level_size)); } - pub fn insert<'a>( - &self, - wtxn: &'a mut RwTxn, - field_id: u16, - key: &'a >::EItem, - docids: &RoaringBitmap, - ) { - let update = FacetsUpdateIncrementalInner { - db: self.content, - group_size: self.group_size.get(), - min_level_size: self.min_level_size.get(), - max_group_size: self.max_group_size.get(), - }; - let key_bytes = BoundCodec::bytes_encode(key).unwrap(); - update.insert(wtxn, field_id, &key_bytes, docids).unwrap(); - } - pub fn delete_single_docid<'a>( - &self, - wtxn: &'a mut RwTxn, - field_id: u16, - key: &'a >::EItem, - docid: u32, - ) { - self.delete(wtxn, field_id, key, &RoaringBitmap::from_iter(std::iter::once(docid))) - } - - pub fn delete<'a>( - &self, - wtxn: &'a mut RwTxn, - field_id: u16, - key: &'a >::EItem, - docids: &RoaringBitmap, - ) { - let update = FacetsUpdateIncrementalInner { - db: self.content, - group_size: self.group_size.get(), - min_level_size: self.min_level_size.get(), - max_group_size: self.max_group_size.get(), - }; - let key_bytes = BoundCodec::bytes_encode(key).unwrap(); - update.delete(wtxn, field_id, &key_bytes, docids).unwrap(); - } - pub fn bulk_insert<'a, 'b>( &self, wtxn: &'a mut RwTxn, @@ -555,63 +480,3 @@ pub(crate) mod test_helpers { } } } - -#[allow(unused)] -#[cfg(test)] -mod comparison_bench { - use std::iter::once; - - use rand::Rng; - use roaring::RoaringBitmap; - - use super::test_helpers::FacetIndex; - use crate::heed_codec::facet::OrderedF64Codec; - - // This is a simple test to get an intuition on the relative speed - // of the incremental vs. bulk indexer. - // - // The benchmark shows the worst-case scenario for the incremental indexer, since - // each facet value contains only one document ID. - // - // In that scenario, it appears that the incremental indexer is about 50 times slower than the - // bulk indexer. - // #[test] - fn benchmark_facet_indexing() { - let mut facet_value = 0; - - let mut r = rand::thread_rng(); - - for i in 1..=20 { - let size = 50_000 * i; - let index = FacetIndex::::new(4, 8, 5); - - let mut txn = index.env.write_txn().unwrap(); - let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new(); - for i in 0..size { - // field id = 0, left_bound = i, docids = [i] - elements.push(((0, facet_value as f64), once(i).collect())); - facet_value += 1; - } - let timer = std::time::Instant::now(); - index.bulk_insert(&mut txn, &[0], elements.iter()); - let time_spent = timer.elapsed().as_millis(); - println!("bulk {size} : {time_spent}ms"); - - txn.commit().unwrap(); - - for nbr_doc in [1, 100, 1000, 10_000] { - let mut txn = index.env.write_txn().unwrap(); - let timer = std::time::Instant::now(); - // - // insert one document - // - for _ in 0..nbr_doc { - index.insert(&mut txn, 0, &r.gen(), &once(1).collect()); - } - let time_spent = timer.elapsed().as_millis(); - println!(" add {nbr_doc} : {time_spent}ms"); - txn.abort(); - } - } - } -} diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs index 66c52a52f..6b0820040 100644 --- a/milli/src/update/mod.rs +++ b/milli/src/update/mod.rs @@ -1,7 +1,6 @@ pub use self::available_documents_ids::AvailableDocumentsIds; pub use self::clear_documents::ClearDocuments; pub use self::facet::bulk::FacetsUpdateBulk; -pub use self::facet::incremental::FacetsUpdateIncrementalInner; pub use self::index_documents::{ merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,