From a034a1e628175fcc046741037670bf030bda056c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 12 Oct 2022 09:42:55 +0200 Subject: [PATCH] Move StrRefCodec and ByteSliceRefCodec to their own files --- milli/Cargo.toml | 2 +- milli/src/heed_codec/byte_slice_ref.rs | 23 ++++++++++++++++ milli/src/heed_codec/facet/mod.rs | 26 +++---------------- milli/src/heed_codec/mod.rs | 4 +++ milli/src/heed_codec/{facet => }/str_ref.rs | 0 milli/src/index.rs | 3 ++- milli/src/search/criteria/asc_desc.rs | 7 ++--- milli/src/search/facet/facet_distribution.rs | 13 +++++++--- .../search/facet/facet_distribution_iter.rs | 22 +++++++++------- milli/src/search/facet/facet_range_search.rs | 18 +++++++------ .../src/search/facet/facet_sort_ascending.rs | 13 ++++++---- .../src/search/facet/facet_sort_descending.rs | 20 +++++++------- milli/src/search/facet/mod.rs | 12 ++++----- milli/src/update/facet/bulk.rs | 19 +++++++------- milli/src/update/facet/delete.rs | 13 +++++----- milli/src/update/facet/incremental.rs | 25 ++++++++++-------- milli/src/update/facet/mod.rs | 24 +++++++++-------- .../extract/extract_facet_string_docids.rs | 3 ++- 18 files changed, 140 insertions(+), 107 deletions(-) create mode 100644 milli/src/heed_codec/byte_slice_ref.rs rename milli/src/heed_codec/{facet => }/str_ref.rs (100%) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 49988da0b..b768476e3 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -57,7 +57,7 @@ md5 = "0.7.0" rand = {version = "0.8.5", features = ["small_rng"] } [target.'cfg(fuzzing)'.dev-dependencies] -fuzzcheck = { git = "https://github.com/loiclec/fuzzcheck-rs", branch = "main" } +fuzzcheck = { git = "https://github.com/loiclec/fuzzcheck-rs", branch = "main" } # TODO: use released version [features] default = [ "charabia/default" ] diff --git a/milli/src/heed_codec/byte_slice_ref.rs b/milli/src/heed_codec/byte_slice_ref.rs new file mode 100644 index 000000000..48eda63c5 --- /dev/null +++ b/milli/src/heed_codec/byte_slice_ref.rs @@ -0,0 +1,23 @@ +use std::borrow::Cow; + +use heed::{BytesDecode, BytesEncode}; + +/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated +/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure. +pub struct ByteSliceRefCodec; + +impl<'a> BytesEncode<'a> for ByteSliceRefCodec { + type EItem = &'a [u8]; + + fn bytes_encode(item: &'a Self::EItem) -> Option> { + Some(Cow::Borrowed(item)) + } +} + +impl<'a> BytesDecode<'a> for ByteSliceRefCodec { + type DItem = &'a [u8]; + + fn bytes_decode(bytes: &'a [u8]) -> Option { + Some(bytes) + } +} diff --git a/milli/src/heed_codec/facet/mod.rs b/milli/src/heed_codec/facet/mod.rs index 35ec925dc..a727b148f 100644 --- a/milli/src/heed_codec/facet/mod.rs +++ b/milli/src/heed_codec/facet/mod.rs @@ -1,6 +1,5 @@ mod field_doc_id_facet_codec; mod ordered_f64_codec; -mod str_ref; use std::borrow::Cow; use std::convert::TryFrom; @@ -12,9 +11,10 @@ use roaring::RoaringBitmap; pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec; pub use self::ordered_f64_codec::OrderedF64Codec; -pub use self::str_ref::StrRefCodec; use crate::{CboRoaringBitmapCodec, BEU16}; +use super::StrRefCodec; + pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec; pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec; pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec; @@ -33,7 +33,7 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> { /// The key in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`] /// databases. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] // TODO: try removing PartialOrd and Ord pub struct FacetGroupKey { pub field_id: u16, pub level: u8, @@ -103,23 +103,3 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { Some(FacetGroupValue { size, bitmap }) } } - -/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated -/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure. -pub struct ByteSliceRef; - -impl<'a> BytesEncode<'a> for ByteSliceRef { - type EItem = &'a [u8]; - - fn bytes_encode(item: &'a Self::EItem) -> Option> { - Some(Cow::Borrowed(item)) - } -} - -impl<'a> BytesDecode<'a> for ByteSliceRef { - type DItem = &'a [u8]; - - fn bytes_decode(bytes: &'a [u8]) -> Option { - Some(bytes) - } -} diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs index e07e47c79..6a058f95f 100644 --- a/milli/src/heed_codec/mod.rs +++ b/milli/src/heed_codec/mod.rs @@ -1,10 +1,12 @@ mod beu32_str_codec; +mod byte_slice_ref; pub mod facet; mod field_id_word_count_codec; mod obkv_codec; mod roaring_bitmap; mod roaring_bitmap_length; mod str_beu32_codec; +mod str_ref; mod str_str_u8_codec; pub use self::beu32_str_codec::BEU32StrCodec; @@ -16,3 +18,5 @@ pub use self::roaring_bitmap_length::{ }; pub use self::str_beu32_codec::StrBEU32Codec; pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; +pub use byte_slice_ref::ByteSliceRefCodec; +pub use str_ref::StrRefCodec; diff --git a/milli/src/heed_codec/facet/str_ref.rs b/milli/src/heed_codec/str_ref.rs similarity index 100% rename from milli/src/heed_codec/facet/str_ref.rs rename to milli/src/heed_codec/str_ref.rs diff --git a/milli/src/index.rs b/milli/src/index.rs index 893817d59..7c5e92d05 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -16,8 +16,9 @@ use crate::facet::FacetType; use crate::fields_ids_map::FieldsIdsMap; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, - FieldIdCodec, OrderedF64Codec, StrRefCodec, + FieldIdCodec, OrderedF64Codec, }; +use crate::heed_codec::StrRefCodec; use crate::{ default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index 586605116..fd03b1b60 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -7,7 +7,8 @@ use roaring::RoaringBitmap; use super::{Criterion, CriterionParameters, CriterionResult}; use crate::facet::FacetType; -use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec}; +use crate::heed_codec::facet::FacetGroupKeyCodec; +use crate::heed_codec::ByteSliceRefCodec; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; use crate::search::query_tree::Operation; @@ -194,14 +195,14 @@ fn facet_ordered<'t>( let number_iter = make_iter( rtxn, - index.facet_id_f64_docids.remap_key_type::>(), + index.facet_id_f64_docids.remap_key_type::>(), field_id, candidates.clone(), )?; let string_iter = make_iter( rtxn, - index.facet_id_string_docids.remap_key_type::>(), + index.facet_id_string_docids.remap_key_type::>(), field_id, candidates, )?; diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 2e2e448c2..f6a53dbd4 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -9,9 +9,10 @@ use roaring::RoaringBitmap; use crate::error::UserError; use crate::facet::FacetType; use crate::heed_codec::facet::{ - ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, - FieldDocIdFacetStringCodec, OrderedF64Codec, StrRefCodec, + FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, + OrderedF64Codec, }; +use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; use crate::search::facet::facet_distribution_iter; use crate::{FieldId, Index, Result}; @@ -137,7 +138,9 @@ impl<'a> FacetDistribution<'a> { ) -> heed::Result<()> { facet_distribution_iter::iterate_over_facet_distribution( self.rtxn, - self.index.facet_id_f64_docids.remap_key_type::>(), + self.index + .facet_id_f64_docids + .remap_key_type::>(), field_id, candidates, |facet_key, nbr_docids, _| { @@ -160,7 +163,9 @@ impl<'a> FacetDistribution<'a> { ) -> heed::Result<()> { facet_distribution_iter::iterate_over_facet_distribution( self.rtxn, - self.index.facet_id_string_docids.remap_key_type::>(), + self.index + .facet_id_string_docids + .remap_key_type::>(), field_id, candidates, |facet_key, nbr_docids, any_docid| { diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 4c6dc75fa..0fdca4118 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -4,9 +4,8 @@ use heed::Result; use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level}; -use crate::heed_codec::facet::{ - ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, -}; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; +use crate::heed_codec::ByteSliceRefCodec; use crate::DocumentId; /// Call the given closure on the facet distribution of the candidate documents. @@ -22,7 +21,7 @@ use crate::DocumentId; /// keep iterating over the different facet values or stop. pub fn iterate_over_facet_distribution<'t, CB>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: &RoaringBitmap, callback: CB, @@ -31,10 +30,13 @@ where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { let mut fd = FacetDistribution { rtxn, db, field_id, callback }; - let highest_level = - get_highest_level(rtxn, db.remap_key_type::>(), field_id)?; + let highest_level = get_highest_level( + rtxn, + db.remap_key_type::>(), + field_id, + )?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; return Ok(()); } else { @@ -47,7 +49,7 @@ where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, callback: CB, } @@ -72,11 +74,13 @@ where if key.field_id != self.field_id { return Ok(ControlFlow::Break(())); } + // TODO: use real intersection and then take min()? let docids_in_common = value.bitmap.intersection_len(candidates); if docids_in_common > 0 { + // TODO: use min() let any_docid = value.bitmap.iter().next().unwrap(); match (self.callback)(key.left_bound, docids_in_common, any_docid)? { - ControlFlow::Continue(_) => {} + ControlFlow::Continue(_) => (), // TODO use unit instead of empty scope ControlFlow::Break(_) => return Ok(ControlFlow::Break(())), } } diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs index a7b4674f1..07300e920 100644 --- a/milli/src/search/facet/facet_range_search.rs +++ b/milli/src/search/facet/facet_range_search.rs @@ -4,9 +4,8 @@ use heed::BytesEncode; use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; -use crate::heed_codec::facet::{ - ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, -}; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; +use crate::heed_codec::ByteSliceRefCodec; use crate::Result; /// Find all the document ids for which the given field contains a value contained within @@ -47,13 +46,16 @@ where } Bound::Unbounded => Bound::Unbounded, }; - let db = db.remap_key_type::>(); + let db = db.remap_key_type::>(); let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids }; let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(starting_left_bound) = get_first_facet_value::(rtxn, db, field_id)? { - let rightmost_bound = - Bound::Included(get_last_facet_value::(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded + if let Some(starting_left_bound) = + get_first_facet_value::(rtxn, db, field_id)? + { + let rightmost_bound = Bound::Included( + get_last_facet_value::(rtxn, db, field_id)?.unwrap(), + ); // will not fail because get_first_facet_value succeeded let group_size = usize::MAX; f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?; Ok(()) @@ -65,7 +67,7 @@ where /// Fetch the document ids that have a facet with a value between the two given bounds struct FacetRangeSearch<'t, 'b, 'bitmap> { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, left: Bound<&'b [u8]>, right: Bound<&'b [u8]>, diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index 2b0a45e15..2f1f73db3 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -3,8 +3,9 @@ use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level}; use crate::heed_codec::facet::{ - ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; +use crate::heed_codec::ByteSliceRefCodec; /// Return an iterator which iterates over the given candidate documents in /// ascending order of their facet value for the given field id. @@ -30,12 +31,12 @@ use crate::heed_codec::facet::{ /// Note that once a document id is returned by the iterator, it is never returned again. pub fn ascending_facet_sort<'t>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, ) -> Result> + 't>> { let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); @@ -47,11 +48,13 @@ pub fn ascending_facet_sort<'t>( struct AscendingFacetSort<'t, 'e> { rtxn: &'t heed::RoTxn<'e>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, stack: Vec<( RoaringBitmap, - std::iter::Take, FacetGroupValueCodec>>, + std::iter::Take< + heed::RoRange<'t, FacetGroupKeyCodec, FacetGroupValueCodec>, + >, )>, } diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index 47d0f145b..5f09d708b 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -5,22 +5,23 @@ use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; use crate::heed_codec::facet::{ - ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; +use crate::heed_codec::ByteSliceRefCodec; /// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort). /// /// This function does the same thing, but in the opposite order. pub fn descending_facet_sort<'t>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, ) -> Result> + 't>> { let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; - let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); + let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); Ok(Box::new(DescendingFacetSort { @@ -36,12 +37,12 @@ pub fn descending_facet_sort<'t>( struct DescendingFacetSort<'t> { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, stack: Vec<( RoaringBitmap, std::iter::Take< - heed::RoRevRange<'t, FacetGroupKeyCodec, FacetGroupValueCodec>, + heed::RoRevRange<'t, FacetGroupKeyCodec, FacetGroupValueCodec>, >, Bound<&'t [u8]>, )>, @@ -97,7 +98,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> { *right_bound = Bound::Excluded(left_bound); let iter = match self .db - .remap_key_type::>() + .remap_key_type::>() .rev_range( &self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow), @@ -121,7 +122,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> { mod tests { use roaring::RoaringBitmap; - use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec}; + use crate::heed_codec::facet::FacetGroupKeyCodec; + use crate::heed_codec::ByteSliceRefCodec; use crate::milli_snap; use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; @@ -134,7 +136,7 @@ mod tests { let txn = index.env.read_txn().unwrap(); let candidates = (200..=300).into_iter().collect::(); let mut results = String::new(); - let db = index.content.remap_key_type::>(); + let db = index.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); for el in iter { let docids = el.unwrap(); diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index c854b546d..ccf40d6aa 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -5,8 +5,8 @@ use heed::{BytesDecode, RoTxn}; pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; pub use self::filter::Filter; -use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}; - +use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec}; +use crate::heed_codec::ByteSliceRefCodec; mod facet_distribution; mod facet_distribution_iter; mod facet_range_search; @@ -17,7 +17,7 @@ mod filter; /// Get the first facet value in the facet database pub(crate) fn get_first_facet_value<'t, BoundCodec>( txn: &'t RoTxn, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result> where @@ -42,7 +42,7 @@ where /// Get the last facet value in the facet database pub(crate) fn get_last_facet_value<'t, BoundCodec>( txn: &'t RoTxn, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result> where @@ -67,7 +67,7 @@ where /// Get the height of the highest level in the facet database pub(crate) fn get_highest_level<'t>( txn: &'t RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result { let field_id_prefix = &field_id.to_be_bytes(); @@ -77,7 +77,7 @@ pub(crate) fn get_highest_level<'t>( .next() .map(|el| { let (key, _) = el.unwrap(); - let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); key.level }) .unwrap_or(0)) diff --git a/milli/src/update/facet/bulk.rs b/milli/src/update/facet/bulk.rs index d3db0a0fa..4e10c22dd 100644 --- a/milli/src/update/facet/bulk.rs +++ b/milli/src/update/facet/bulk.rs @@ -11,8 +11,9 @@ use time::OffsetDateTime; use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; use crate::facet::FacetType; use crate::heed_codec::facet::{ - ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; +use crate::heed_codec::ByteSliceRefCodec; use crate::update::index_documents::{create_writer, writer_into_reader}; use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; @@ -75,11 +76,11 @@ impl<'i> FacetsUpdateBulk<'i> { let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self; let db = match facet_type { - FacetType::String => { - index.facet_id_string_docids.remap_key_type::>() - } + FacetType::String => index + .facet_id_string_docids + .remap_key_type::>(), FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }; @@ -98,7 +99,7 @@ impl<'i> FacetsUpdateBulk<'i> { /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type pub(crate) struct FacetsUpdateBulkInner { - pub db: heed::Database, FacetGroupValueCodec>, + pub db: heed::Database, FacetGroupValueCodec>, pub new_data: Option>, pub group_size: u8, pub min_level_size: u8, @@ -216,7 +217,7 @@ impl FacetsUpdateBulkInner { .db .as_polymorph() .prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())? - .remap_types::, FacetGroupValueCodec>(); + .remap_types::, FacetGroupValueCodec>(); let mut left_bound: &[u8] = &[]; let mut first_iteration_for_new_group = true; @@ -299,7 +300,7 @@ impl FacetsUpdateBulkInner { bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) { let key = FacetGroupKey { field_id, level, left_bound }; - let key = FacetGroupKeyCodec::::bytes_encode(&key) + let key = FacetGroupKeyCodec::::bytes_encode(&key) .ok_or(Error::Encoding)?; let value = FacetGroupValue { size: group_size, bitmap }; let value = @@ -328,7 +329,7 @@ impl FacetsUpdateBulkInner { bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) { let key = FacetGroupKey { field_id, level, left_bound }; - let key = FacetGroupKeyCodec::::bytes_encode(&key) + let key = FacetGroupKeyCodec::::bytes_encode(&key) .ok_or(Error::Encoding)?; let value = FacetGroupValue { size: group_size, bitmap }; let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; diff --git a/milli/src/update/facet/delete.rs b/milli/src/update/facet/delete.rs index efe1d800a..74c17e8f2 100644 --- a/milli/src/update/facet/delete.rs +++ b/milli/src/update/facet/delete.rs @@ -1,7 +1,8 @@ use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; use crate::{ facet::FacetType, - heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}, + heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}, + heed_codec::ByteSliceRefCodec, update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner}, FieldId, Index, Result, }; @@ -11,7 +12,7 @@ use std::collections::{HashMap, HashSet}; pub struct FacetsDelete<'i, 'b> { index: &'i Index, - database: heed::Database, FacetGroupValueCodec>, + database: heed::Database, FacetGroupValueCodec>, facet_type: FacetType, affected_facet_values: HashMap>>, docids_to_delete: &'b RoaringBitmap, @@ -27,11 +28,11 @@ impl<'i, 'b> FacetsDelete<'i, 'b> { docids_to_delete: &'b RoaringBitmap, ) -> Self { let database = match facet_type { - FacetType::String => { - index.facet_id_string_docids.remap_key_type::>() - } + FacetType::String => index + .facet_id_string_docids + .remap_key_type::>(), FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }; Self { diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index 895713d43..9dda86a46 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -7,8 +7,9 @@ use roaring::RoaringBitmap; use crate::facet::FacetType; use crate::heed_codec::facet::{ - ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; +use crate::heed_codec::ByteSliceRefCodec; use crate::search::facet::get_highest_level; use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; @@ -50,10 +51,10 @@ impl<'i> FacetsUpdateIncremental<'i> { db: match facet_type { FacetType::String => index .facet_id_string_docids - .remap_key_type::>(), + .remap_key_type::>(), FacetType::Number => index .facet_id_f64_docids - .remap_key_type::>(), + .remap_key_type::>(), }, group_size, max_group_size, @@ -69,7 +70,7 @@ impl<'i> FacetsUpdateIncremental<'i> { let mut cursor = self.new_data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { - let key = FacetGroupKeyCodec::::bytes_decode(key) + let key = FacetGroupKeyCodec::::bytes_decode(key) .ok_or(heed::Error::Encoding)?; let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?; self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?; @@ -87,7 +88,7 @@ impl<'i> FacetsUpdateIncremental<'i> { /// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type pub struct FacetsUpdateIncrementalInner { - pub db: heed::Database, FacetGroupValueCodec>, + pub db: heed::Database, FacetGroupValueCodec>, pub group_size: u8, pub min_level_size: u8, pub max_group_size: u8, @@ -126,7 +127,7 @@ impl FacetsUpdateIncrementalInner { if let Some(e) = prefix_iter.next() { let (key_bytes, value) = e?; Ok(( - FacetGroupKeyCodec::::bytes_decode(&key_bytes) + FacetGroupKeyCodec::::bytes_decode(&key_bytes) .ok_or(Error::Encoding)? .into_owned(), value, @@ -149,7 +150,7 @@ impl FacetsUpdateIncrementalInner { )?; let (key_bytes, value) = iter.next().unwrap()?; Ok(( - FacetGroupKeyCodec::::bytes_decode(&key_bytes) + FacetGroupKeyCodec::::bytes_decode(&key_bytes) .ok_or(Error::Encoding)? .into_owned(), value, @@ -411,7 +412,7 @@ impl FacetsUpdateIncrementalInner { let mut values = RoaringBitmap::new(); for _ in 0..group_size { let (key_bytes, value_i) = groups_iter.next().unwrap()?; - let key_i = FacetGroupKeyCodec::::bytes_decode(&key_bytes) + let key_i = FacetGroupKeyCodec::::bytes_decode(&key_bytes) .ok_or(Error::Encoding)?; if first_key.is_none() { @@ -434,7 +435,7 @@ impl FacetsUpdateIncrementalInner { let mut values = RoaringBitmap::new(); for _ in 0..nbr_leftover_elements { let (key_bytes, value_i) = groups_iter.next().unwrap()?; - let key_i = FacetGroupKeyCodec::::bytes_decode(&key_bytes) + let key_i = FacetGroupKeyCodec::::bytes_decode(&key_bytes) .ok_or(Error::Encoding)?; if first_key.is_none() { @@ -616,7 +617,7 @@ impl FacetsUpdateIncrementalInner { while let Some(el) = iter.next() { let (k, _) = el?; to_delete.push( - FacetGroupKeyCodec::::bytes_decode(k) + FacetGroupKeyCodec::::bytes_decode(k) .ok_or(Error::Encoding)? .into_owned(), ); @@ -655,7 +656,8 @@ mod tests { use rand::{Rng, SeedableRng}; use roaring::RoaringBitmap; - use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec}; + use crate::heed_codec::facet::OrderedF64Codec; + use crate::heed_codec::StrRefCodec; use crate::milli_snap; use crate::update::facet::tests::FacetIndex; @@ -1019,6 +1021,7 @@ mod tests { // fuzz tests } + #[cfg(all(test, fuzzing))] mod fuzz { use std::borrow::Cow; diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index c75713158..a6d8c3d60 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -77,7 +77,8 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5; use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; use crate::facet::FacetType; -use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}; +use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec}; +use crate::heed_codec::ByteSliceRefCodec; use crate::{Index, Result}; use std::fs::File; @@ -87,7 +88,7 @@ pub mod incremental; pub struct FacetsUpdate<'i> { index: &'i Index, - database: heed::Database, FacetGroupValueCodec>, + database: heed::Database, FacetGroupValueCodec>, facet_type: FacetType, new_data: grenad::Reader, group_size: u8, @@ -97,11 +98,11 @@ pub struct FacetsUpdate<'i> { impl<'i> FacetsUpdate<'i> { pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader) -> Self { let database = match facet_type { - FacetType::String => { - index.facet_id_string_docids.remap_key_type::>() - } + FacetType::String => index + .facet_id_string_docids + .remap_key_type::>(), FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }; Self { @@ -159,8 +160,9 @@ pub(crate) mod tests { use super::bulk::FacetsUpdateBulkInner; use crate::heed_codec::facet::{ - ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; + use crate::heed_codec::ByteSliceRefCodec; use crate::search::facet::get_highest_level; use crate::snapshot_tests::display_bitmap; use crate::update::FacetsUpdateIncrementalInner; @@ -173,7 +175,7 @@ pub(crate) mod tests { BytesEncode<'a> + BytesDecode<'a, DItem = >::EItem>, { pub env: Env, - pub content: heed::Database, FacetGroupValueCodec>, + pub content: heed::Database, FacetGroupValueCodec>, pub group_size: Cell, pub min_level_size: Cell, pub max_group_size: Cell, @@ -327,7 +329,7 @@ pub(crate) mod tests { let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned(); let key: FacetGroupKey<&[u8]> = FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes }; - let key = FacetGroupKeyCodec::::bytes_encode(&key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_encode(&key).unwrap(); let value = CboRoaringBitmapCodec::bytes_encode(&docids).unwrap(); writer.insert(&key, &value).unwrap(); } @@ -362,7 +364,7 @@ pub(crate) mod tests { .unwrap(); while let Some(el) = iter.next() { let (key, value) = el.unwrap(); - let key = FacetGroupKeyCodec::::bytes_decode(&key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_decode(&key).unwrap(); let mut prefix_start_below = vec![]; prefix_start_below.extend_from_slice(&field_id.to_be_bytes()); @@ -379,7 +381,7 @@ pub(crate) mod tests { ) .unwrap(); let (key_bytes, _) = start_below_iter.next().unwrap().unwrap(); - FacetGroupKeyCodec::::bytes_decode(&key_bytes).unwrap() + FacetGroupKeyCodec::::bytes_decode(&key_bytes).unwrap() }; assert!(value.size > 0); diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index bf523cbb3..221356ba0 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -4,7 +4,8 @@ use std::io; use heed::BytesEncode; use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; -use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, StrRefCodec}; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; +use crate::heed_codec::StrRefCodec; use crate::update::index_documents::merge_cbo_roaring_bitmaps; use crate::{FieldId, Result};