From 54f2eb4507401b241b858e54f153f688c9102734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 30 Aug 2024 14:33:58 +0200 Subject: [PATCH] Remove duplication of grenad merger --- .../merge/del_add_roaring_bitmap_merger.rs | 61 ------------------- milli/src/update/new/merge/mod.rs | 3 - milli/src/update/new/mod.rs | 7 ++- 3 files changed, 4 insertions(+), 67 deletions(-) delete mode 100644 milli/src/update/new/merge/del_add_roaring_bitmap_merger.rs delete mode 100644 milli/src/update/new/merge/mod.rs diff --git a/milli/src/update/new/merge/del_add_roaring_bitmap_merger.rs b/milli/src/update/new/merge/del_add_roaring_bitmap_merger.rs deleted file mode 100644 index 5aa2c31f8..000000000 --- a/milli/src/update/new/merge/del_add_roaring_bitmap_merger.rs +++ /dev/null @@ -1,61 +0,0 @@ -use std::borrow::Cow; -use std::io; - -use grenad::MergeFunction; -use roaring::RoaringBitmap; - -use crate::update::del_add::DelAdd; -use crate::update::new::indexer::{KvReaderDelAdd, KvWriterDelAdd}; - -/// Do a union of CboRoaringBitmaps on both sides of a DelAdd obkv -/// separately and outputs a new DelAdd with both unions. -pub struct DelAddRoaringBitmapMerger; - -impl MergeFunction for DelAddRoaringBitmapMerger { - type Error = io::Error; - - fn merge<'a>( - &self, - _key: &[u8], - values: &[Cow<'a, [u8]>], - ) -> std::result::Result, Self::Error> { - if values.len() == 1 { - Ok(values[0].clone()) - } else { - // Retrieve the bitmaps from both sides - let mut del_bitmaps_bytes = Vec::new(); - let mut add_bitmaps_bytes = Vec::new(); - for value in values { - let obkv: &KvReaderDelAdd = value.as_ref().into(); - if let Some(bitmap_bytes) = obkv.get(DelAdd::Deletion) { - del_bitmaps_bytes.push(bitmap_bytes); - } - if let Some(bitmap_bytes) = obkv.get(DelAdd::Addition) { - add_bitmaps_bytes.push(bitmap_bytes); - } - } - - let mut output_deladd_obkv = KvWriterDelAdd::memory(); - - // Deletion - let mut buffer = Vec::new(); - let mut merged = RoaringBitmap::new(); - for bytes in del_bitmaps_bytes { - merged |= RoaringBitmap::deserialize_unchecked_from(bytes)?; - } - merged.serialize_into(&mut buffer)?; - output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?; - - // Addition - buffer.clear(); - merged.clear(); - for bytes in add_bitmaps_bytes { - merged |= RoaringBitmap::deserialize_unchecked_from(bytes)?; - } - merged.serialize_into(&mut buffer)?; - output_deladd_obkv.insert(DelAdd::Addition, &buffer)?; - - output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into) - } - } -} diff --git a/milli/src/update/new/merge/mod.rs b/milli/src/update/new/merge/mod.rs deleted file mode 100644 index 6057b8d89..000000000 --- a/milli/src/update/new/merge/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod del_add_roaring_bitmap_merger; - -pub use del_add_roaring_bitmap_merger::DelAddRoaringBitmapMerger; diff --git a/milli/src/update/new/mod.rs b/milli/src/update/new/mod.rs index fc587cb2a..6dc600545 100644 --- a/milli/src/update/new/mod.rs +++ b/milli/src/update/new/mod.rs @@ -2,7 +2,6 @@ mod document_change; // mod extract; mod channel; mod items_pool; -mod merge; /// TODO remove this // mod global_fields_ids_map; @@ -38,7 +37,9 @@ mod indexer { }; use crate::update::del_add::DelAdd; use crate::update::new::channel::MergerOperation; - use crate::update::{AvailableDocumentsIds, IndexDocumentsMethod}; + use crate::update::{ + AvailableDocumentsIds, IndexDocumentsMethod, MergeDeladdCboRoaringBitmaps, + }; use crate::{ CboRoaringBitmapCodec, DocumentId, Error, FieldId, FieldsIdsMap, Index, InternalError, Result, UserError, @@ -428,7 +429,7 @@ mod indexer { let sender = sender.word_docids(); let database = index.word_docids.remap_types::(); - let mut builder = grenad::MergerBuilder::new(merge::DelAddRoaringBitmapMerger); + let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps); builder.extend(cursors); /// TODO manage the error correctly let mut merger_iter = builder.build().into_stream_merger_iter().unwrap();