mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 12:35:05 +08:00
Optimize cbo roaring bitmaps merge
This commit is contained in:
parent
8f702828ca
commit
9452fabfb2
@ -52,6 +52,46 @@ impl CboRoaringBitmapCodec {
|
|||||||
RoaringBitmap::deserialize_from(bytes)
|
RoaringBitmap::deserialize_from(bytes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Merge serialized CboRoaringBitmaps in a buffer.
|
||||||
|
///
|
||||||
|
/// if the merged values len is under the threshold,
|
||||||
|
/// values are directly serialized in the buffer;
|
||||||
|
/// else a RoaringBitmap is created from the values and is serialized in the buffer.
|
||||||
|
pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
|
||||||
|
let mut roaring = RoaringBitmap::new();
|
||||||
|
let mut vec = Vec::new();
|
||||||
|
|
||||||
|
for bytes in slices {
|
||||||
|
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||||
|
let mut reader = bytes.as_ref();
|
||||||
|
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
||||||
|
vec.push(integer);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
roaring |= RoaringBitmap::deserialize_from(bytes.as_ref())?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if roaring.is_empty() {
|
||||||
|
vec.sort_unstable();
|
||||||
|
vec.dedup();
|
||||||
|
|
||||||
|
if vec.len() <= THRESHOLD {
|
||||||
|
for integer in vec {
|
||||||
|
buffer.extend_from_slice(&integer.to_ne_bytes());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter());
|
||||||
|
roaring.serialize_into(buffer)?;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
roaring.extend(vec);
|
||||||
|
roaring.serialize_into(buffer)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
||||||
@ -106,4 +146,40 @@ mod tests {
|
|||||||
|
|
||||||
assert!(roaring_size > bo_size);
|
assert!(roaring_size > bo_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn merge_cbo_roaring_bitmaps() {
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
|
||||||
|
let small_data = vec![
|
||||||
|
RoaringBitmap::from_sorted_iter(1..4),
|
||||||
|
RoaringBitmap::from_sorted_iter(2..5),
|
||||||
|
RoaringBitmap::from_sorted_iter(4..6),
|
||||||
|
RoaringBitmap::from_sorted_iter(1..3),
|
||||||
|
];
|
||||||
|
|
||||||
|
let small_data: Vec<_> =
|
||||||
|
small_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||||
|
CboRoaringBitmapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
|
||||||
|
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||||
|
let expected = RoaringBitmap::from_sorted_iter(1..6);
|
||||||
|
assert_eq!(bitmap, expected);
|
||||||
|
|
||||||
|
let medium_data = vec![
|
||||||
|
RoaringBitmap::from_sorted_iter(1..4),
|
||||||
|
RoaringBitmap::from_sorted_iter(2..5),
|
||||||
|
RoaringBitmap::from_sorted_iter(4..8),
|
||||||
|
RoaringBitmap::from_sorted_iter(0..3),
|
||||||
|
RoaringBitmap::from_sorted_iter(7..23),
|
||||||
|
];
|
||||||
|
|
||||||
|
let medium_data: Vec<_> =
|
||||||
|
medium_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||||
|
buffer.clear();
|
||||||
|
CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
|
||||||
|
|
||||||
|
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||||
|
let expected = RoaringBitmap::from_sorted_iter(0..23);
|
||||||
|
assert_eq!(bitmap, expected);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -120,52 +120,11 @@ pub fn merge_cbo_roaring_bitmaps<'a>(
|
|||||||
_key: &[u8],
|
_key: &[u8],
|
||||||
values: &[Cow<'a, [u8]>],
|
values: &[Cow<'a, [u8]>],
|
||||||
) -> Result<Cow<'a, [u8]>> {
|
) -> Result<Cow<'a, [u8]>> {
|
||||||
match values.split_first().unwrap() {
|
if values.len() == 1 {
|
||||||
(head, []) => Ok(head.clone()),
|
Ok(values[0].clone())
|
||||||
(head, tail) => {
|
} else {
|
||||||
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
|
|
||||||
|
|
||||||
for value in tail {
|
|
||||||
head |= CboRoaringBitmapCodec::deserialize_from(&value[..])?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut vec = Vec::new();
|
let mut vec = Vec::new();
|
||||||
CboRoaringBitmapCodec::serialize_into(&head, &mut vec);
|
CboRoaringBitmapCodec::merge_into(values, &mut vec)?;
|
||||||
Ok(Cow::from(vec))
|
Ok(Cow::from(vec))
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// /// Uses the FacetStringLevelZeroValueCodec to merge the values.
|
|
||||||
// pub fn tuple_string_cbo_roaring_bitmap_merge<'a>(
|
|
||||||
// _key: &[u8],
|
|
||||||
// values: &[Cow<[u8]>],
|
|
||||||
// ) -> Result<Cow<'a, [u8]>> {
|
|
||||||
// let (head, tail) = values.split_first().unwrap();
|
|
||||||
// let (head_string, mut head_rb) = FacetStringLevelZeroValueCodec::bytes_decode(&head[..])
|
|
||||||
// .ok_or(SerializationError::Decoding { db_name: None })?;
|
|
||||||
|
|
||||||
// for value in tail {
|
|
||||||
// let (_string, rb) = FacetStringLevelZeroValueCodec::bytes_decode(&value[..])
|
|
||||||
// .ok_or(SerializationError::Decoding { db_name: None })?;
|
|
||||||
// head_rb |= rb;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// FacetStringLevelZeroValueCodec::bytes_encode(&(head_string, head_rb))
|
|
||||||
// .map(|cow| cow.into_owned())
|
|
||||||
// .ok_or(SerializationError::Encoding { db_name: None })
|
|
||||||
// .map_err(Into::into)
|
|
||||||
// }
|
|
||||||
|
|
||||||
// pub fn cbo_roaring_bitmap_merge<'a>(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Cow<'a, [u8]>> {
|
|
||||||
// let (head, tail) = values.split_first().unwrap();
|
|
||||||
// let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
|
|
||||||
|
|
||||||
// for value in tail {
|
|
||||||
// head |= CboRoaringBitmapCodec::deserialize_from(&value[..])?;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// let mut vec = Vec::new();
|
|
||||||
// CboRoaringBitmapCodec::serialize_into(&head, &mut vec);
|
|
||||||
// Ok(vec)
|
|
||||||
// }
|
|
||||||
|
@ -188,15 +188,22 @@ fn merge_roaring_bitmaps(new_value: &[u8], db_value: &[u8], buffer: &mut Vec<u8>
|
|||||||
Ok(serialize_roaring_bitmap(&value, buffer)?)
|
Ok(serialize_roaring_bitmap(&value, buffer)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
fn merge_cbo_roaring_bitmaps(
|
fn merge_cbo_roaring_bitmaps(
|
||||||
new_value: &[u8],
|
new_value: &[u8],
|
||||||
db_value: &[u8],
|
db_value: &[u8],
|
||||||
buffer: &mut Vec<u8>,
|
buffer: &mut Vec<u8>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let new_value = CboRoaringBitmapCodec::deserialize_from(new_value)?;
|
Ok(CboRoaringBitmapCodec::merge_into(
|
||||||
let db_value = CboRoaringBitmapCodec::deserialize_from(db_value)?;
|
&[Cow::Borrowed(db_value), Cow::Borrowed(new_value)],
|
||||||
let value = new_value | db_value;
|
buffer,
|
||||||
Ok(CboRoaringBitmapCodec::serialize_into(&value, buffer))
|
)?)
|
||||||
|
|
||||||
|
// let new_value = CboRoaringBitmapCodec::deserialize_from(new_value)?;
|
||||||
|
// let db_value = CboRoaringBitmapCodec::deserialize_from(db_value)?;
|
||||||
|
// let value = new_value | db_value;
|
||||||
|
// Ok(CboRoaringBitmapCodec::serialize_into(&value, buffer))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Write provided entries in database using serialize_value function.
|
/// Write provided entries in database using serialize_value function.
|
||||||
|
Loading…
Reference in New Issue
Block a user