diff --git a/milli/src/heed_codec/facet/facet_value_string_codec.rs b/milli/src/heed_codec/facet/facet_value_string_codec.rs index 350efc450..259dab972 100644 --- a/milli/src/heed_codec/facet/facet_value_string_codec.rs +++ b/milli/src/heed_codec/facet/facet_value_string_codec.rs @@ -5,6 +5,14 @@ use crate::FieldId; pub struct FacetValueStringCodec; +impl FacetValueStringCodec { + pub fn serialize_into(field_id: FieldId, value: &str, out: &mut Vec) { + out.reserve(value.len() + 1); + out.push(field_id); + out.extend_from_slice(value.as_bytes()); + } +} + impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec { type DItem = (FieldId, &'a str); @@ -19,9 +27,8 @@ impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec { type EItem = (FieldId, &'a str); fn bytes_encode((field_id, value): &Self::EItem) -> Option> { - let mut bytes = Vec::with_capacity(value.len() + 1); - bytes.push(*field_id); - bytes.extend_from_slice(value.as_bytes()); + let mut bytes = Vec::new(); + FacetValueStringCodec::serialize_into(*field_id, value, &mut bytes); Some(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs b/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs index 2e282b2a0..b002346e9 100644 --- a/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs +++ b/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs @@ -6,6 +6,15 @@ use crate::{FieldId, DocumentId}; pub struct FieldDocIdFacetStringCodec; +impl FieldDocIdFacetStringCodec { + pub fn serialize_into(field_id: FieldId, document_id: DocumentId, value: &str, out: &mut Vec) { + out.reserve(1 + 4 + value.len()); + out.push(field_id); + out.extend_from_slice(&document_id.to_be_bytes()); + out.extend_from_slice(value.as_bytes()); + } +} + impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec { type DItem = (FieldId, DocumentId, &'a str); @@ -22,10 +31,8 @@ impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetStringCodec { type EItem = (FieldId, DocumentId, &'a str); fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option> { - let mut bytes = Vec::with_capacity(1 + 4 + value.len()); - bytes.push(*field_id); - bytes.extend_from_slice(&document_id.to_be_bytes()); - bytes.extend_from_slice(value.as_bytes()); + let mut bytes = Vec::new(); + FieldDocIdFacetStringCodec::serialize_into(*field_id, *document_id, value, &mut bytes); Some(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs index 8d1eb79dd..994e23b39 100644 --- a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs @@ -6,6 +6,13 @@ use roaring::RoaringBitmap; pub struct BoRoaringBitmapCodec; +impl BoRoaringBitmapCodec { + pub fn serialize_into(bitmap: &RoaringBitmap, out: &mut Vec) { + out.reserve(bitmap.len() as usize * size_of::()); + bitmap.iter().map(u32::to_ne_bytes).for_each(|bytes| out.extend_from_slice(&bytes)); + } +} + impl heed::BytesDecode<'_> for BoRoaringBitmapCodec { type DItem = RoaringBitmap; @@ -25,12 +32,8 @@ impl heed::BytesEncode<'_> for BoRoaringBitmapCodec { type EItem = RoaringBitmap; fn bytes_encode(item: &Self::EItem) -> Option> { - let mut out = Vec::with_capacity(item.len() as usize * size_of::()); - - item.iter() - .map(|i| i.to_ne_bytes()) - .for_each(|bytes| out.extend_from_slice(&bytes)); - + let mut out = Vec::new(); + BoRoaringBitmapCodec::serialize_into(item, &mut out); Some(Cow::Owned(out)) } } diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs index 16837ca7b..4662cd609 100644 --- a/milli/src/update/index_documents/store.rs +++ b/milli/src/update/index_documents/store.rs @@ -421,6 +421,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { { // We prefix the words by the document id. let mut key = id.to_be_bytes().to_vec(); + let mut buffer = Vec::new(); let base_size = key.len(); // We order the words lexicographically, this way we avoid passing by a sorter. @@ -429,13 +430,15 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { for (word, positions) in words_positions { key.truncate(base_size); key.extend_from_slice(word.as_bytes()); + buffer.clear(); + // We serialize the positions into a buffer. let positions = RoaringBitmap::from_iter(positions.iter().cloned()); - let bytes = BoRoaringBitmapCodec::bytes_encode(&positions) - .with_context(|| "could not serialize positions")?; + BoRoaringBitmapCodec::serialize_into(&positions, &mut buffer); + // that we write under the generated key into MTBL if lmdb_key_valid_size(&key) { - writer.insert(&key, &bytes)?; + writer.insert(&key, &buffer)?; } } @@ -483,14 +486,18 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { ) -> anyhow::Result<()> where I: IntoIterator { + let mut key_buffer = Vec::new(); + let mut data_buffer = Vec::new(); + for ((field_id, value), docids) in iter { - let key = FacetValueStringCodec::bytes_encode(&(field_id, &value)) - .map(Cow::into_owned) - .context("could not serialize facet key")?; - let bytes = CboRoaringBitmapCodec::bytes_encode(&docids) - .context("could not serialize docids")?; - if lmdb_key_valid_size(&key) { - sorter.insert(&key, &bytes)?; + key_buffer.clear(); + data_buffer.clear(); + + FacetValueStringCodec::serialize_into(field_id, &value, &mut key_buffer); + CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer); + + if lmdb_key_valid_size(&key_buffer) { + sorter.insert(&key_buffer, &data_buffer)?; } } @@ -503,14 +510,19 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { ) -> anyhow::Result<()> where I: IntoIterator), RoaringBitmap)> { + let mut data_buffer = Vec::new(); + for ((field_id, value), docids) in iter { + data_buffer.clear(); + let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value)) .map(Cow::into_owned) - .context("could not serialize facet key")?; - let bytes = CboRoaringBitmapCodec::bytes_encode(&docids) - .context("could not serialize docids")?; + .context("could not serialize facet level value key")?; + + CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer); + if lmdb_key_valid_size(&key) { - sorter.insert(&key, &bytes)?; + sorter.insert(&key, &data_buffer)?; } } @@ -526,7 +538,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { { let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value)) .map(Cow::into_owned) - .context("could not serialize facet key")?; + .context("could not serialize facet level value key")?; if lmdb_key_valid_size(&key) { sorter.insert(&key, &[])?; @@ -542,12 +554,12 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { value: &str, ) -> anyhow::Result<()> { - let key = FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, value)) - .map(Cow::into_owned) - .context("could not serialize facet key")?; + let mut buffer = Vec::new(); - if lmdb_key_valid_size(&key) { - sorter.insert(&key, &[])?; + FieldDocIdFacetStringCodec::serialize_into(field_id, document_id, value, &mut buffer); + + if lmdb_key_valid_size(&buffer) { + sorter.insert(&buffer, &[])?; } Ok(())