From 51961e10645135d0f7cfc76db9bc98d8ec9a1dc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 8 Sep 2022 13:41:01 +0200 Subject: [PATCH] Polish some details --- .../facet/field_doc_id_facet_codec.rs | 6 ++-- milli/src/heed_codec/facet/mod.rs | 4 +++ milli/src/heed_codec/facet/str_ref.rs | 2 ++ .../extract/extract_facet_string_docids.rs | 5 +--- .../helpers/merge_functions.rs | 28 ------------------- 5 files changed, 11 insertions(+), 34 deletions(-) diff --git a/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs b/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs index 7c636e98a..4e18a0145 100644 --- a/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs +++ b/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs @@ -1,8 +1,10 @@ -use crate::{try_split_array_at, DocumentId, FieldId}; -use heed::{BytesDecode, BytesEncode}; use std::borrow::Cow; use std::marker::PhantomData; +use heed::{BytesDecode, BytesEncode}; + +use crate::{try_split_array_at, DocumentId, FieldId}; + pub struct FieldDocIdFacetCodec(PhantomData); impl<'a, C> BytesDecode<'a> for FieldDocIdFacetCodec diff --git a/milli/src/heed_codec/facet/mod.rs b/milli/src/heed_codec/facet/mod.rs index 8db8b7df1..35ec925dc 100644 --- a/milli/src/heed_codec/facet/mod.rs +++ b/milli/src/heed_codec/facet/mod.rs @@ -40,6 +40,8 @@ pub struct FacetGroupKey { pub left_bound: T, } +/// The value in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`] +/// databases. #[derive(Debug)] pub struct FacetGroupValue { pub size: u8, @@ -102,6 +104,8 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { } } +/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated +/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure. pub struct ByteSliceRef; impl<'a> BytesEncode<'a> for ByteSliceRef { diff --git a/milli/src/heed_codec/facet/str_ref.rs b/milli/src/heed_codec/facet/str_ref.rs index 80a51c803..36e702627 100644 --- a/milli/src/heed_codec/facet/str_ref.rs +++ b/milli/src/heed_codec/facet/str_ref.rs @@ -2,6 +2,8 @@ use std::borrow::Cow; use heed::{BytesDecode, BytesEncode}; +/// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated +/// types are equivalent (= `&'a str`) and these values can reside within another structure. pub struct StrRefCodec; impl<'a> BytesEncode<'a> for StrRefCodec { type EItem = &'a str; diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index a7b027ce3..bf523cbb3 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -33,10 +33,6 @@ pub fn extract_facet_string_docids( let (field_id_bytes, bytes) = try_split_array_at(key).unwrap(); let field_id = FieldId::from_be_bytes(field_id_bytes); - // document_id_bytes is a big-endian u32 - // merge_cbo_roaring_bitmap works with native endian u32s - // that is a problem, I think - let (document_id_bytes, normalized_value_bytes) = try_split_array_at::<_, 4>(bytes).unwrap(); let document_id = u32::from_be_bytes(document_id_bytes); @@ -45,6 +41,7 @@ pub fn extract_facet_string_docids( let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value }; let key_bytes = FacetGroupKeyCodec::::bytes_encode(&key).unwrap(); + // document id is encoded in native-endian because of the CBO roaring bitmap codec facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?; } diff --git a/milli/src/update/index_documents/helpers/merge_functions.rs b/milli/src/update/index_documents/helpers/merge_functions.rs index cef27ab30..37af7ab6a 100644 --- a/milli/src/update/index_documents/helpers/merge_functions.rs +++ b/milli/src/update/index_documents/helpers/merge_functions.rs @@ -5,7 +5,6 @@ use std::result::Result as StdResult; use roaring::RoaringBitmap; use super::read_u32_ne_bytes; -// use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string}; use crate::heed_codec::CboRoaringBitmapCodec; use crate::Result; @@ -49,33 +48,6 @@ pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Resul } } -// pub fn keep_first_prefix_value_merge_roaring_bitmaps<'a>( -// _key: &[u8], -// values: &[Cow<'a, [u8]>], -// ) -> Result> { -// if values.len() == 1 { -// Ok(values[0].clone()) -// } else { -// let original = decode_prefix_string(&values[0]).unwrap().0; -// let merged_bitmaps = values -// .iter() -// .map(AsRef::as_ref) -// .map(decode_prefix_string) -// .map(Option::unwrap) -// .map(|(_, bitmap_bytes)| bitmap_bytes) -// .map(RoaringBitmap::deserialize_from) -// .map(StdResult::unwrap) -// .reduce(|a, b| a | b) -// .unwrap(); - -// let cap = std::mem::size_of::() + original.len() + merged_bitmaps.serialized_size(); -// let mut buffer = Vec::with_capacity(cap); -// encode_prefix_string(original, &mut buffer)?; -// merged_bitmaps.serialize_into(&mut buffer)?; -// Ok(Cow::Owned(buffer)) -// } -// } - pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result> { Ok(values[0].clone()) }