Fix another iteration bug on hashmap entries

This commit is contained in:
Clément Renault 2024-09-25 22:42:41 +02:00
parent 97d2860998
commit 7d61697f19
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 36 additions and 32 deletions

View File

@ -132,17 +132,17 @@ impl DelAddRoaringBitmap {
DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) }
}
pub fn merge_with(&mut self, other: &DelAddRoaringBitmap) {
self.del = match (&self.del, &other.del) {
pub fn merge_with(&mut self, other: DelAddRoaringBitmap) {
self.del = match (self.del.take(), other.del) {
(None, None) => None,
(None, Some(other)) => Some(other.clone()),
(Some(this), None) => Some(this.clone()),
(None, Some(other)) => Some(other),
(Some(this), None) => Some(this),
(Some(this), Some(other)) => Some(this | other),
};
self.add = match (&self.add, &other.add) {
self.add = match (self.add.take(), other.add) {
(None, None) => None,
(None, Some(other)) => Some(other.clone()),
(Some(this), None) => Some(this.clone()),
(None, Some(other)) => Some(other),
(Some(this), None) => Some(this),
(Some(this), Some(other)) => Some(this | other),
};
}

View File

@ -43,12 +43,16 @@ impl HashMapMerger {
{
self.maps.extend(iter);
}
}
pub fn iter(&self) -> Iter<'_> {
let mut entries: Vec<_> =
self.maps.iter().flat_map(|m| m.iter()).map(|(k, v)| (k.as_slice(), v)).collect();
entries.par_sort_unstable_by_key(|(key, _)| *key);
Iter {
impl IntoIterator for HashMapMerger {
type Item = (SmallVec<[u8; 12]>, cache::DelAddRoaringBitmap);
type IntoIter = IntoIter;
fn into_iter(self) -> Self::IntoIter {
let mut entries: Vec<_> = self.maps.into_iter().flat_map(|m| m.into_iter()).collect();
entries.par_sort_unstable_by(|(ka, _), (kb, _)| ka.cmp(kb));
IntoIter {
sorted_entries: entries.into_iter(),
current_key: None,
current_deladd: cache::DelAddRoaringBitmap::default(),
@ -56,24 +60,24 @@ impl HashMapMerger {
}
}
pub struct Iter<'h> {
sorted_entries: std::vec::IntoIter<(&'h [u8], &'h cache::DelAddRoaringBitmap)>,
current_key: Option<&'h [u8]>,
pub struct IntoIter {
sorted_entries: std::vec::IntoIter<(SmallVec<[u8; 12]>, cache::DelAddRoaringBitmap)>,
current_key: Option<SmallVec<[u8; 12]>>,
current_deladd: cache::DelAddRoaringBitmap,
}
impl<'h> Iterator for Iter<'h> {
type Item = (&'h [u8], cache::DelAddRoaringBitmap);
impl Iterator for IntoIter {
type Item = (SmallVec<[u8; 12]>, cache::DelAddRoaringBitmap);
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.sorted_entries.next() {
Some((k, other)) => {
if self.current_key == Some(k) {
self.current_deladd.merge_with(other);
Some((k, deladd)) => {
if self.current_key.as_deref() == Some(k.as_slice()) {
self.current_deladd.merge_with(deladd);
} else {
let previous_key = self.current_key.replace(k);
let previous_deladd = mem::replace(&mut self.current_deladd, other.clone());
let previous_deladd = mem::replace(&mut self.current_deladd, deladd);
if let Some(previous_key) = previous_key {
return Some((previous_key, previous_deladd));
}
@ -81,7 +85,7 @@ impl<'h> Iterator for Iter<'h> {
}
None => {
let current_deladd = mem::take(&mut self.current_deladd);
return self.current_key.map(|ck| (ck, current_deladd));
return self.current_key.take().map(|ck| (ck, current_deladd));
}
}
}

View File

@ -245,17 +245,17 @@ fn merge_and_send_docids(
docids_sender: impl DocidsSender,
mut register_key: impl FnMut(DelAdd, &[u8]) -> Result<()>,
) -> Result<()> {
for (key, deladd) in merger.iter() {
let current = database.get(rtxn, key)?;
for (key, deladd) in merger.into_iter() {
let current = database.get(rtxn, &key)?;
match merge_cbo_bitmaps(current, deladd.del, deladd.add)? {
Operation::Write(bitmap) => {
let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer);
docids_sender.write(key, value).unwrap();
register_key(DelAdd::Addition, key)?;
docids_sender.write(&key, value).unwrap();
register_key(DelAdd::Addition, &key)?;
}
Operation::Delete => {
docids_sender.delete(key).unwrap();
register_key(DelAdd::Deletion, key)?;
docids_sender.delete(&key).unwrap();
register_key(DelAdd::Deletion, &key)?;
}
Operation::Ignore => (),
}
@ -272,15 +272,15 @@ fn merge_and_send_facet_docids(
buffer: &mut Vec<u8>,
docids_sender: impl DocidsSender,
) -> Result<()> {
for (key, deladd) in merger.iter() {
let current = database.get(rtxn, key)?;
for (key, deladd) in merger.into_iter() {
let current = database.get(rtxn, &key)?;
match merge_cbo_bitmaps(current, deladd.del, deladd.add)? {
Operation::Write(bitmap) => {
let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer);
docids_sender.write(key, value).unwrap();
docids_sender.write(&key, value).unwrap();
}
Operation::Delete => {
docids_sender.delete(key).unwrap();
docids_sender.delete(&key).unwrap();
}
Operation::Ignore => (),
}