From 5a6a698e1db08d8ef6b24d95b087f4a54c28b22c Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 1 Oct 2020 11:10:56 +0200 Subject: [PATCH] Introduce the CboRoaringBitmapCodec --- src/heed_codec/cbo_roaring_bitmap_codec.rs | 40 ++++++++++++++++++++++ src/heed_codec/mod.rs | 2 ++ 2 files changed, 42 insertions(+) create mode 100644 src/heed_codec/cbo_roaring_bitmap_codec.rs diff --git a/src/heed_codec/cbo_roaring_bitmap_codec.rs b/src/heed_codec/cbo_roaring_bitmap_codec.rs new file mode 100644 index 000000000..9d1a0486a --- /dev/null +++ b/src/heed_codec/cbo_roaring_bitmap_codec.rs @@ -0,0 +1,40 @@ +use std::borrow::Cow; +use std::mem::size_of; +use roaring::RoaringBitmap; +use super::{BoRoaringBitmapCodec, RoaringBitmapCodec}; + +/// A conditionnal codec that either use the RoaringBitmap +/// or a lighter ByteOrder en/decoding method. +pub struct CboRoaringBitmapCodec; + +impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { + type DItem = RoaringBitmap; + + fn bytes_decode(bytes: &[u8]) -> Option { + if bytes.len() <= 4 * size_of::() { + // If there is 4 or less than 4 integers that can fit into this array + // of bytes it means that we used the ByteOrder codec serializer. + BoRoaringBitmapCodec::bytes_decode(bytes) + } else { + // Otherwise, it means we used the classic RoaringBitmapCodec and + // that the header takes 4 integers. + RoaringBitmapCodec::bytes_decode(bytes) + } + } +} + +impl heed::BytesEncode<'_> for CboRoaringBitmapCodec { + type EItem = RoaringBitmap; + + fn bytes_encode(item: &Self::EItem) -> Option> { + if item.len() <= 4 { + // If the number of items (u32s) to encode is less than or equal to 4 + // it means that it would weigh the same or less than the RoaringBitmap + // header, so we directly encode them using ByteOrder instead. + BoRoaringBitmapCodec::bytes_encode(item) + } else { + // Otherwise, we use the classic RoaringBitmapCodec that writes a header. + RoaringBitmapCodec::bytes_encode(item) + } + } +} diff --git a/src/heed_codec/mod.rs b/src/heed_codec/mod.rs index edae12351..e81e9416a 100644 --- a/src/heed_codec/mod.rs +++ b/src/heed_codec/mod.rs @@ -1,11 +1,13 @@ mod beu32_str_codec; mod bo_roaring_bitmap_codec; +mod cbo_roaring_bitmap_codec; mod csv_string_record_codec; mod roaring_bitmap_codec; mod str_str_u8_codec; pub use self::beu32_str_codec::BEU32StrCodec; pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec; +pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec; pub use self::csv_string_record_codec::CsvStringRecordCodec; pub use self::roaring_bitmap_codec::RoaringBitmapCodec; pub use self::str_str_u8_codec::StrStrU8Codec;