diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index c65438928..e083f510a 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -9,9 +9,11 @@ use log::debug; use crate::error::SerializationError; use crate::heed_codec::StrBEU16Codec; use crate::index::main_key::WORDS_PREFIXES_FST_KEY; +use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd}; use crate::update::index_documents::{ - create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key, - CursorClonableMmap, MergeFn, + create_sorter, merge_deladd_cbo_roaring_bitmaps, + merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key, + write_sorter_into_database, CursorClonableMmap, MergeFn, }; use crate::{CboRoaringBitmapCodec, Result}; @@ -55,7 +57,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { let mut prefix_integer_docids_sorter = create_sorter( grenad::SortAlgorithm::Unstable, - merge_cbo_roaring_bitmaps, + merge_deladd_cbo_roaring_bitmaps, self.chunk_compression_type, self.chunk_compression_level, self.max_nb_chunks, @@ -108,6 +110,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { // We fetch the docids associated to the newly added word prefix fst only. let db = self.word_database.remap_data_type::(); + let mut buffer = Vec::new(); for prefix_bytes in new_prefix_fst_words { let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| { SerializationError::Decoding { db_name: Some(WORDS_PREFIXES_FST_KEY) } @@ -123,7 +126,11 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { if word.starts_with(prefix) { let key = (prefix, pos); let bytes = StrBEU16Codec::bytes_encode(&key).unwrap(); - prefix_integer_docids_sorter.insert(bytes, data)?; + + buffer.clear(); + let mut writer = KvWriterDelAdd::new(&mut buffer); + writer.insert(DelAdd::Addition, data)?; + prefix_integer_docids_sorter.insert(bytes, writer.into_inner()?)?; } } } @@ -143,12 +150,16 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { drop(iter); } + let database_is_empty = self.prefix_database.is_empty(self.wtxn)?; + // We finally write all the word prefix integer docids into the LMDB database. - sorter_into_lmdb_database( - self.wtxn, - *self.prefix_database.as_polymorph(), + write_sorter_into_database( prefix_integer_docids_sorter, - merge_cbo_roaring_bitmaps, + &self.prefix_database, + self.wtxn, + database_is_empty, + deladd_serialize_add_side, + merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, )?; Ok(()) @@ -159,6 +170,7 @@ fn write_prefixes_in_sorter( prefixes: &mut HashMap, Vec>>, sorter: &mut grenad::Sorter, ) -> Result<()> { + // TODO: Merge before insertion. for (key, data_slices) in prefixes.drain() { for data in data_slices { if valid_lmdb_key(&key) {