Fix bbbul merger

This commit is contained in:
Clément Renault 2024-11-06 18:59:49 +01:00 committed by Louis Dureuil
parent 39366a67c4
commit c9f478bc45
No known key found for this signature in database
3 changed files with 23 additions and 32 deletions

2
Cargo.lock generated
View File

@ -4473,7 +4473,7 @@ dependencies = [
[[package]] [[package]]
name = "raw-collections" name = "raw-collections"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/dureuill/raw-collections.git#e04a52424e1124ca63df66338a79c628e8f3bfd7" source = "git+https://github.com/dureuill/raw-collections.git#15e5d7bdebc0c149b2a28b2454f307c717d07f8a"
dependencies = [ dependencies = [
"allocator-api2", "allocator-api2",
"bitpacking", "bitpacking",

View File

@ -526,7 +526,7 @@ where
for (map_index, map) in maps.iter_mut().enumerate() { for (map_index, map) in maps.iter_mut().enumerate() {
if first_entry.source_index != map_index { if first_entry.source_index != map_index {
if let Some(new) = map.get_mut(first_key) { if let Some(new) = map.get_mut(first_key) {
output.append_and_clear_bbbul(new); output.union_and_clear_bbbul(new);
} }
} }
} }
@ -543,20 +543,22 @@ where
// Then manage the content on the HashMap entries that weren't taken (mem::take). // Then manage the content on the HashMap entries that weren't taken (mem::take).
while let Some(mut map) = maps.pop() { while let Some(mut map) = maps.pop() {
for (key, bbbul) in map.iter_mut() { for (key, bbbul) in map.iter_mut() {
let mut output = DelAddRoaringBitmap::empty();
output.append_and_clear_bbbul(bbbul);
// Make sure we don't try to work with entries already managed by the spilled // Make sure we don't try to work with entries already managed by the spilled
if !bbbul.is_empty() { if bbbul.is_empty() {
for rhs in maps.iter_mut() { continue;
if let Some(new) = rhs.get_mut(key) {
output.append_and_clear_bbbul(new);
}
}
// We send the merged entry outside.
(f)(key, output)?;
} }
let mut output = DelAddRoaringBitmap::empty();
output.union_and_clear_bbbul(bbbul);
for rhs in maps.iter_mut() {
if let Some(new) = rhs.get_mut(key) {
output.union_and_clear_bbbul(new);
}
}
// We send the merged entry outside.
(f)(key, output)?;
} }
} }
@ -596,14 +598,6 @@ pub struct DelAddBbbul<'bump, B> {
} }
impl<'bump, B: BitPacker> DelAddBbbul<'bump, B> { impl<'bump, B: BitPacker> DelAddBbbul<'bump, B> {
pub fn insert_del_u32_in(&mut self, n: u32, bump: &'bump Bump) {
self.del.get_or_insert_with(|| Bbbul::new_in(bump)).insert(n);
}
pub fn insert_add_u32_in(&mut self, n: u32, bump: &'bump Bump) {
self.add.get_or_insert_with(|| Bbbul::new_in(bump)).insert(n);
}
pub fn new_del_u32_in(n: u32, bump: &'bump Bump) -> Self { pub fn new_del_u32_in(n: u32, bump: &'bump Bump) -> Self {
let mut bbbul = Bbbul::new_in(bump); let mut bbbul = Bbbul::new_in(bump);
bbbul.insert(n); bbbul.insert(n);
@ -655,11 +649,6 @@ impl DelAddRoaringBitmap {
DelAddRoaringBitmap { del: None, add: None } DelAddRoaringBitmap { del: None, add: None }
} }
pub fn is_empty(&self) -> bool {
let DelAddRoaringBitmap { del, add } = self;
del.is_none() && add.is_none()
}
pub fn insert_del_u32(&mut self, n: u32) { pub fn insert_del_u32(&mut self, n: u32) {
self.del.get_or_insert_with(RoaringBitmap::new).insert(n); self.del.get_or_insert_with(RoaringBitmap::new).insert(n);
} }
@ -676,14 +665,16 @@ impl DelAddRoaringBitmap {
DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) } DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) }
} }
pub fn append_and_clear_bbbul<B: BitPacker>(&mut self, bbbul: &mut FrozenDelAddBbbul<'_, B>) { pub fn union_and_clear_bbbul<B: BitPacker>(&mut self, bbbul: &mut FrozenDelAddBbbul<'_, B>) {
let FrozenDelAddBbbul { del, add } = bbbul; let FrozenDelAddBbbul { del, add } = bbbul;
if let Some(ref mut bbbul) = del.take() { if let Some(ref mut bbbul) = del.take() {
let del = self.del.get_or_insert_with(RoaringBitmap::new); let del = self.del.get_or_insert_with(RoaringBitmap::new);
let mut iter = bbbul.iter_and_clear(); let mut iter = bbbul.iter_and_clear();
while let Some(block) = iter.next_block() { while let Some(block) = iter.next_block() {
del.append(block.iter().copied()); let iter = block.iter().copied();
let block = RoaringBitmap::from_sorted_iter(iter).unwrap();
*del |= block;
} }
} }
@ -691,7 +682,9 @@ impl DelAddRoaringBitmap {
let add = self.add.get_or_insert_with(RoaringBitmap::new); let add = self.add.get_or_insert_with(RoaringBitmap::new);
let mut iter = bbbul.iter_and_clear(); let mut iter = bbbul.iter_and_clear();
while let Some(block) = iter.next_block() { while let Some(block) = iter.next_block() {
add.append(block.iter().copied()); let iter = block.iter().copied();
let block = RoaringBitmap::from_sorted_iter(iter).unwrap();
*add |= block;
} }
} }
} }

View File

@ -589,10 +589,8 @@ fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result<Option<PrefixDelt
let _entered = span.enter(); let _entered = span.enter();
let (word_fst_mmap, prefix_data) = word_fst_builder.build(index, &rtxn)?; let (word_fst_mmap, prefix_data) = word_fst_builder.build(index, &rtxn)?;
// extractor_sender.main().write_words_fst(word_fst_mmap).unwrap();
index.main.remap_types::<Str, Bytes>().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?; index.main.remap_types::<Str, Bytes>().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?;
if let Some(PrefixData { prefixes_fst_mmap, prefix_delta }) = prefix_data { if let Some(PrefixData { prefixes_fst_mmap, prefix_delta }) = prefix_data {
// extractor_sender.main().write_words_prefixes_fst(prefixes_fst_mmap).unwrap();
index.main.remap_types::<Str, Bytes>().put( index.main.remap_types::<Str, Bytes>().put(
wtxn, wtxn,
WORDS_PREFIXES_FST_KEY, WORDS_PREFIXES_FST_KEY,