From 7b67ae6972f7e2c1fed47f9f884a9bdd7ed8decb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 22 Sep 2020 12:38:12 +0200 Subject: [PATCH] Introduce the StrStrU8 heed codec --- src/heed_codec/mod.rs | 2 ++ src/heed_codec/str_str_u8_codec.rs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 src/heed_codec/str_str_u8_codec.rs diff --git a/src/heed_codec/mod.rs b/src/heed_codec/mod.rs index 10b28fb50..3a367dc6f 100644 --- a/src/heed_codec/mod.rs +++ b/src/heed_codec/mod.rs @@ -2,8 +2,10 @@ mod beu32_str_codec; mod byteorder_x_roaring_bitmap_codec; mod csv_string_record_codec; mod roaring_bitmap_codec; +mod str_str_u8_codec; pub use self::beu32_str_codec::BEU32StrCodec; pub use self::byteorder_x_roaring_bitmap_codec::ByteorderXRoaringBitmapCodec; pub use self::csv_string_record_codec::CsvStringRecordCodec; pub use self::roaring_bitmap_codec::RoaringBitmapCodec; +pub use self::str_str_u8_codec::StrStrU8Codec; diff --git a/src/heed_codec/str_str_u8_codec.rs b/src/heed_codec/str_str_u8_codec.rs new file mode 100644 index 000000000..2454e7d56 --- /dev/null +++ b/src/heed_codec/str_str_u8_codec.rs @@ -0,0 +1,30 @@ +use std::borrow::Cow; +use std::str; + +pub struct StrStrU8Codec; + +impl<'a> heed::BytesDecode<'a> for StrStrU8Codec { + type DItem = (&'a str, &'a str, u8); + + fn bytes_decode(bytes: &'a [u8]) -> Option { + let (n, bytes) = bytes.split_last()?; + let s1_end = bytes.iter().position(|b| *b == 0)?; + let (s1_bytes, s2_bytes) = bytes.split_at(s1_end); + let s1 = str::from_utf8(s1_bytes).ok()?; + let s2 = str::from_utf8(&s2_bytes[1..]).ok()?; + Some((s1, s2, *n)) + } +} + +impl<'a> heed::BytesEncode<'a> for StrStrU8Codec { + type EItem = (&'a str, &'a str, u8); + + fn bytes_encode((s1, s2, n): &Self::EItem) -> Option> { + let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1 + 1); + bytes.extend_from_slice(s1.as_bytes()); + bytes.push(0); + bytes.extend_from_slice(s2.as_bytes()); + bytes.push(*n); + Some(Cow::Owned(bytes)) + } +}