From 9680e1e41f4389bea2c67d1d9a05f248f5db76f3 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 13 Jun 2023 14:41:53 +0200 Subject: [PATCH] Introduce a BytesDecodeOwned trait in heed_codecs --- milli/src/heed_codec/mod.rs | 6 ++++++ .../roaring_bitmap/bo_roaring_bitmap_codec.rs | 13 ++++++++++++- .../roaring_bitmap/cbo_roaring_bitmap_codec.rs | 10 ++++++++++ .../roaring_bitmap/roaring_bitmap_codec.rs | 10 ++++++++++ .../bo_roaring_bitmap_len_codec.rs | 14 +++++++++++++- .../cbo_roaring_bitmap_len_codec.rs | 13 ++++++++++++- .../roaring_bitmap_len_codec.rs | 10 ++++++++++ 7 files changed, 73 insertions(+), 3 deletions(-) diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs index de2644e11..c54168a36 100644 --- a/milli/src/heed_codec/mod.rs +++ b/milli/src/heed_codec/mod.rs @@ -23,3 +23,9 @@ pub use self::roaring_bitmap_length::{ pub use self::script_language_codec::ScriptLanguageCodec; pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec}; pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; + +pub trait BytesDecodeOwned { + type DItem; + + fn bytes_decode_owned(bytes: &[u8]) -> Option; +} diff --git a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs index 994e23b39..9ad2e9707 100644 --- a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs @@ -2,8 +2,11 @@ use std::borrow::Cow; use std::convert::TryInto; use std::mem::size_of; +use heed::BytesDecode; use roaring::RoaringBitmap; +use crate::heed_codec::BytesDecodeOwned; + pub struct BoRoaringBitmapCodec; impl BoRoaringBitmapCodec { @@ -13,7 +16,7 @@ impl BoRoaringBitmapCodec { } } -impl heed::BytesDecode<'_> for BoRoaringBitmapCodec { +impl BytesDecode<'_> for BoRoaringBitmapCodec { type DItem = RoaringBitmap; fn bytes_decode(bytes: &[u8]) -> Option { @@ -28,6 +31,14 @@ impl heed::BytesDecode<'_> for BoRoaringBitmapCodec { } } +impl BytesDecodeOwned for BoRoaringBitmapCodec { + type DItem = RoaringBitmap; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + Self::bytes_decode(bytes) + } +} + impl heed::BytesEncode<'_> for BoRoaringBitmapCodec { type EItem = RoaringBitmap; diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index 01ce523ba..bf76287d8 100644 --- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -5,6 +5,8 @@ use std::mem::size_of; use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; use roaring::RoaringBitmap; +use crate::heed_codec::BytesDecodeOwned; + /// This is the limit where using a byteorder became less size efficient /// than using a direct roaring encoding, it is also the point where we are able /// to determine the encoding used only by using the array of bytes length. @@ -103,6 +105,14 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { } } +impl BytesDecodeOwned for CboRoaringBitmapCodec { + type DItem = RoaringBitmap; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + Self::deserialize_from(bytes).ok() + } +} + impl heed::BytesEncode<'_> for CboRoaringBitmapCodec { type EItem = RoaringBitmap; diff --git a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs index 6cec0eb44..f982cc105 100644 --- a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs @@ -2,6 +2,8 @@ use std::borrow::Cow; use roaring::RoaringBitmap; +use crate::heed_codec::BytesDecodeOwned; + pub struct RoaringBitmapCodec; impl heed::BytesDecode<'_> for RoaringBitmapCodec { @@ -12,6 +14,14 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec { } } +impl BytesDecodeOwned for RoaringBitmapCodec { + type DItem = RoaringBitmap; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + RoaringBitmap::deserialize_from(bytes).ok() + } +} + impl heed::BytesEncode<'_> for RoaringBitmapCodec { type EItem = RoaringBitmap; diff --git a/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs index e749680a0..8fae60df7 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs @@ -1,11 +1,23 @@ use std::mem; +use heed::BytesDecode; + +use crate::heed_codec::BytesDecodeOwned; + pub struct BoRoaringBitmapLenCodec; -impl heed::BytesDecode<'_> for BoRoaringBitmapLenCodec { +impl BytesDecode<'_> for BoRoaringBitmapLenCodec { type DItem = u64; fn bytes_decode(bytes: &[u8]) -> Option { Some((bytes.len() / mem::size_of::()) as u64) } } + +impl BytesDecodeOwned for BoRoaringBitmapLenCodec { + type DItem = u64; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + Self::bytes_decode(bytes) + } +} diff --git a/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs index 4f728f1cd..5719a538a 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs @@ -1,11 +1,14 @@ use std::mem; +use heed::BytesDecode; + use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec}; use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD; +use crate::heed_codec::BytesDecodeOwned; pub struct CboRoaringBitmapLenCodec; -impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec { +impl BytesDecode<'_> for CboRoaringBitmapLenCodec { type DItem = u64; fn bytes_decode(bytes: &[u8]) -> Option { @@ -20,3 +23,11 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec { } } } + +impl BytesDecodeOwned for CboRoaringBitmapLenCodec { + type DItem = u64; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + Self::bytes_decode(bytes) + } +} diff --git a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs index 4d266e413..a9b0506ff 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs @@ -3,6 +3,8 @@ use std::mem; use byteorder::{LittleEndian, ReadBytesExt}; +use crate::heed_codec::BytesDecodeOwned; + const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; const SERIAL_COOKIE: u16 = 12347; @@ -59,6 +61,14 @@ impl heed::BytesDecode<'_> for RoaringBitmapLenCodec { } } +impl BytesDecodeOwned for RoaringBitmapLenCodec { + type DItem = u64; + + fn bytes_decode_owned(bytes: &[u8]) -> Option { + RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() + } +} + #[cfg(test)] mod tests { use heed::BytesEncode;