From 5676b204dd388c8cca7458ac7a3dd7ab7e0be565 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Sun, 4 Jul 2021 18:09:53 +0200
Subject: [PATCH] Fix the facet string levels codecs

---
 .../facet_string_zero_bounds_value_codec.rs   | 46 ++++++++++++++++++-
 milli/src/update/index_documents/store.rs     |  4 ++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs b/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs
index 3c2ce4657..6161118b6 100644
--- a/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs
+++ b/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs
@@ -16,7 +16,7 @@ where
     type DItem = (Option<(&'a str, &'a str)>, C::DItem);
 
     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
-        let (contains_bounds, tail_bytes) = bytes.split_first()?;
+        let (contains_bounds, bytes) = bytes.split_first()?;
 
         if *contains_bounds != 0 {
             let (left_len, bytes) = try_split_at(bytes, 2)?;
@@ -33,7 +33,7 @@ where
 
             C::bytes_decode(bytes).map(|item| (Some((left, right)), item))
         } else {
-            C::bytes_decode(tail_bytes).map(|item| (None, item))
+            C::bytes_decode(bytes).map(|item| (None, item))
         }
     }
 }
@@ -49,11 +49,21 @@ where
 
         match bounds {
             Some((left, right)) => {
+                bytes.push(u8::max_value());
+
+                if left.is_empty() || right.is_empty() {
+                    return None;
+                }
+
                 let left_len: u16 = left.len().try_into().ok()?;
                 let right_len: u16 = right.len().try_into().ok()?;
+
                 bytes.extend_from_slice(&left_len.to_be_bytes());
                 bytes.extend_from_slice(&right_len.to_be_bytes());
 
+                bytes.extend_from_slice(left.as_bytes());
+                bytes.extend_from_slice(right.as_bytes());
+
                 let value_bytes = C::bytes_encode(&value)?;
                 bytes.extend_from_slice(&value_bytes[..]);
 
@@ -78,3 +88,35 @@ fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
         None
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use heed::types::Unit;
+    use heed::{BytesDecode, BytesEncode};
+    use roaring::RoaringBitmap;
+
+    use super::*;
+    use crate::CboRoaringBitmapCodec;
+
+    #[test]
+    fn deserialize_roaring_bitmaps() {
+        let bounds = Some(("abc", "def"));
+        let docids: RoaringBitmap = (0..100).chain(3500..4398).collect();
+        let key = (bounds, docids.clone());
+        let bytes =
+            FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(&key).unwrap();
+        let (out_bounds, out_docids) =
+            FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(&bytes).unwrap();
+        assert_eq!((out_bounds, out_docids), (bounds, docids));
+    }
+
+    #[test]
+    fn deserialize_unit() {
+        let bounds = Some(("abc", "def"));
+        let key = (bounds, ());
+        let bytes = FacetStringZeroBoundsValueCodec::<Unit>::bytes_encode(&key).unwrap();
+        let (out_bounds, out_unit) =
+            FacetStringZeroBoundsValueCodec::<Unit>::bytes_decode(&bytes).unwrap();
+        assert_eq!((out_bounds, out_unit), (bounds, ()));
+    }
+}
diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs
index f0225ff43..4c1071aab 100644
--- a/milli/src/update/index_documents/store.rs
+++ b/milli/src/update/index_documents/store.rs
@@ -286,6 +286,10 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         value: String,
         id: DocumentId,
     ) -> Result<()> {
+        if value.is_empty() {
+            return Ok(());
+        }
+
         let sorter = &mut self.field_id_docid_facet_strings_sorter;
         Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?;