Fix another iteration bug on hashmap entries

This commit is contained in:
Clément Renault 2024-09-25 22:42:41 +02:00
parent 97d2860998
commit 7d61697f19
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 36 additions and 32 deletions

View File

@ -132,17 +132,17 @@ impl DelAddRoaringBitmap {
DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) } DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) }
} }
pub fn merge_with(&mut self, other: &DelAddRoaringBitmap) { pub fn merge_with(&mut self, other: DelAddRoaringBitmap) {
self.del = match (&self.del, &other.del) { self.del = match (self.del.take(), other.del) {
(None, None) => None, (None, None) => None,
(None, Some(other)) => Some(other.clone()), (None, Some(other)) => Some(other),
(Some(this), None) => Some(this.clone()), (Some(this), None) => Some(this),
(Some(this), Some(other)) => Some(this | other), (Some(this), Some(other)) => Some(this | other),
}; };
self.add = match (&self.add, &other.add) { self.add = match (self.add.take(), other.add) {
(None, None) => None, (None, None) => None,
(None, Some(other)) => Some(other.clone()), (None, Some(other)) => Some(other),
(Some(this), None) => Some(this.clone()), (Some(this), None) => Some(this),
(Some(this), Some(other)) => Some(this | other), (Some(this), Some(other)) => Some(this | other),
}; };
} }

View File

@ -43,12 +43,16 @@ impl HashMapMerger {
{ {
self.maps.extend(iter); self.maps.extend(iter);
} }
}
pub fn iter(&self) -> Iter<'_> { impl IntoIterator for HashMapMerger {
let mut entries: Vec<_> = type Item = (SmallVec<[u8; 12]>, cache::DelAddRoaringBitmap);
self.maps.iter().flat_map(|m| m.iter()).map(|(k, v)| (k.as_slice(), v)).collect(); type IntoIter = IntoIter;
entries.par_sort_unstable_by_key(|(key, _)| *key);
Iter { fn into_iter(self) -> Self::IntoIter {
let mut entries: Vec<_> = self.maps.into_iter().flat_map(|m| m.into_iter()).collect();
entries.par_sort_unstable_by(|(ka, _), (kb, _)| ka.cmp(kb));
IntoIter {
sorted_entries: entries.into_iter(), sorted_entries: entries.into_iter(),
current_key: None, current_key: None,
current_deladd: cache::DelAddRoaringBitmap::default(), current_deladd: cache::DelAddRoaringBitmap::default(),
@ -56,24 +60,24 @@ impl HashMapMerger {
} }
} }
pub struct Iter<'h> { pub struct IntoIter {
sorted_entries: std::vec::IntoIter<(&'h [u8], &'h cache::DelAddRoaringBitmap)>, sorted_entries: std::vec::IntoIter<(SmallVec<[u8; 12]>, cache::DelAddRoaringBitmap)>,
current_key: Option<&'h [u8]>, current_key: Option<SmallVec<[u8; 12]>>,
current_deladd: cache::DelAddRoaringBitmap, current_deladd: cache::DelAddRoaringBitmap,
} }
impl<'h> Iterator for Iter<'h> { impl Iterator for IntoIter {
type Item = (&'h [u8], cache::DelAddRoaringBitmap); type Item = (SmallVec<[u8; 12]>, cache::DelAddRoaringBitmap);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
loop { loop {
match self.sorted_entries.next() { match self.sorted_entries.next() {
Some((k, other)) => { Some((k, deladd)) => {
if self.current_key == Some(k) { if self.current_key.as_deref() == Some(k.as_slice()) {
self.current_deladd.merge_with(other); self.current_deladd.merge_with(deladd);
} else { } else {
let previous_key = self.current_key.replace(k); let previous_key = self.current_key.replace(k);
let previous_deladd = mem::replace(&mut self.current_deladd, other.clone()); let previous_deladd = mem::replace(&mut self.current_deladd, deladd);
if let Some(previous_key) = previous_key { if let Some(previous_key) = previous_key {
return Some((previous_key, previous_deladd)); return Some((previous_key, previous_deladd));
} }
@ -81,7 +85,7 @@ impl<'h> Iterator for Iter<'h> {
} }
None => { None => {
let current_deladd = mem::take(&mut self.current_deladd); let current_deladd = mem::take(&mut self.current_deladd);
return self.current_key.map(|ck| (ck, current_deladd)); return self.current_key.take().map(|ck| (ck, current_deladd));
} }
} }
} }

View File

@ -245,17 +245,17 @@ fn merge_and_send_docids(
docids_sender: impl DocidsSender, docids_sender: impl DocidsSender,
mut register_key: impl FnMut(DelAdd, &[u8]) -> Result<()>, mut register_key: impl FnMut(DelAdd, &[u8]) -> Result<()>,
) -> Result<()> { ) -> Result<()> {
for (key, deladd) in merger.iter() { for (key, deladd) in merger.into_iter() {
let current = database.get(rtxn, key)?; let current = database.get(rtxn, &key)?;
match merge_cbo_bitmaps(current, deladd.del, deladd.add)? { match merge_cbo_bitmaps(current, deladd.del, deladd.add)? {
Operation::Write(bitmap) => { Operation::Write(bitmap) => {
let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer); let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer);
docids_sender.write(key, value).unwrap(); docids_sender.write(&key, value).unwrap();
register_key(DelAdd::Addition, key)?; register_key(DelAdd::Addition, &key)?;
} }
Operation::Delete => { Operation::Delete => {
docids_sender.delete(key).unwrap(); docids_sender.delete(&key).unwrap();
register_key(DelAdd::Deletion, key)?; register_key(DelAdd::Deletion, &key)?;
} }
Operation::Ignore => (), Operation::Ignore => (),
} }
@ -272,15 +272,15 @@ fn merge_and_send_facet_docids(
buffer: &mut Vec<u8>, buffer: &mut Vec<u8>,
docids_sender: impl DocidsSender, docids_sender: impl DocidsSender,
) -> Result<()> { ) -> Result<()> {
for (key, deladd) in merger.iter() { for (key, deladd) in merger.into_iter() {
let current = database.get(rtxn, key)?; let current = database.get(rtxn, &key)?;
match merge_cbo_bitmaps(current, deladd.del, deladd.add)? { match merge_cbo_bitmaps(current, deladd.del, deladd.add)? {
Operation::Write(bitmap) => { Operation::Write(bitmap) => {
let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer); let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer);
docids_sender.write(key, value).unwrap(); docids_sender.write(&key, value).unwrap();
} }
Operation::Delete => { Operation::Delete => {
docids_sender.delete(key).unwrap(); docids_sender.delete(&key).unwrap();
} }
Operation::Ignore => (), Operation::Ignore => (),
} }