From 28782ff99d73e1f4632063971d8b03899d3f8e99 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 30 Jun 2021 11:22:57 +0200 Subject: [PATCH] Fix ExternalDocumentsIds struct when inserting previously deleted ids --- milli/src/external_documents_ids.rs | 48 +++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/milli/src/external_documents_ids.rs b/milli/src/external_documents_ids.rs index 419105bd5..3dce18b00 100644 --- a/milli/src/external_documents_ids.rs +++ b/milli/src/external_documents_ids.rs @@ -3,6 +3,7 @@ use std::collections::HashMap; use std::convert::TryInto; use std::{fmt, str}; +use fst::map::IndexedValue; use fst::{IntoStreamer, Streamer}; const DELETED_ID: u64 = u64::MAX; @@ -35,7 +36,6 @@ impl<'a> ExternalDocumentsIds<'a> { pub fn get>(&self, external_id: A) -> Option { let external_id = external_id.as_ref(); match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) { - // u64 MAX means deleted in the soft fst map Some(id) if id != DELETED_ID => Some(id.try_into().unwrap()), _otherwise => None, } @@ -53,7 +53,8 @@ impl<'a> ExternalDocumentsIds<'a> { // that it must be marked as deleted. new_soft_builder.insert(external_id, DELETED_ID)?; } else { - new_soft_builder.insert(external_id, docids[0].value)?; + let value = docids.iter().find(|v| v.index == 0).unwrap().value; + new_soft_builder.insert(external_id, value)?; } } @@ -69,8 +70,8 @@ impl<'a> ExternalDocumentsIds<'a> { let mut new_soft_builder = fst::MapBuilder::memory(); let mut iter = union_op.into_stream(); - while let Some((external_id, docids)) = iter.next() { - let id = docids.last().unwrap().value; + while let Some((external_id, marked_docids)) = iter.next() { + let id = indexed_last_value(marked_docids).unwrap(); new_soft_builder.insert(external_id, id)?; } @@ -89,7 +90,7 @@ impl<'a> ExternalDocumentsIds<'a> { let union_op = self.hard.op().add(&self.soft).r#union(); let mut iter = union_op.into_stream(); while let Some((external_id, marked_docids)) = iter.next() { - let id = marked_docids.last().unwrap().value; + let id = indexed_last_value(marked_docids).unwrap(); if id != DELETED_ID { let external_id = str::from_utf8(external_id).unwrap(); map.insert(external_id.to_owned(), id.try_into().unwrap()); @@ -105,13 +106,10 @@ impl<'a> ExternalDocumentsIds<'a> { let mut iter = union_op.into_stream(); let mut new_hard_builder = fst::MapBuilder::memory(); - while let Some((external_id, docids)) = iter.next() { - if docids.len() == 2 { - if docids[1].value != DELETED_ID { - new_hard_builder.insert(external_id, docids[1].value)?; - } - } else { - new_hard_builder.insert(external_id, docids[0].value)?; + while let Some((external_id, marked_docids)) = iter.next() { + let value = indexed_last_value(marked_docids).unwrap(); + if value != DELETED_ID { + new_hard_builder.insert(external_id, value)?; } } @@ -140,6 +138,11 @@ impl Default for ExternalDocumentsIds<'static> { } } +/// Returns the value of the `IndexedValue` with the highest _index_. +fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option { + indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value) +} + #[cfg(test)] mod tests { use super::*; @@ -190,4 +193,25 @@ mod tests { assert_eq!(external_documents_ids.get("g"), Some(7)); assert_eq!(external_documents_ids.get("h"), Some(8)); } + + #[test] + fn strange_delete_insert_ids() { + let mut external_documents_ids = ExternalDocumentsIds::default(); + + let new_ids = + fst::Map::from_iter(vec![("1", 0), ("123", 1), ("30", 2), ("456", 3)]).unwrap(); + external_documents_ids.insert_ids(&new_ids).unwrap(); + assert_eq!(external_documents_ids.get("1"), Some(0)); + assert_eq!(external_documents_ids.get("123"), Some(1)); + assert_eq!(external_documents_ids.get("30"), Some(2)); + assert_eq!(external_documents_ids.get("456"), Some(3)); + + let deleted_ids = fst::Set::from_iter(vec!["30"]).unwrap(); + external_documents_ids.delete_ids(deleted_ids).unwrap(); + assert_eq!(external_documents_ids.get("30"), None); + + let new_ids = fst::Map::from_iter(vec![("30", 2)]).unwrap(); + external_documents_ids.insert_ids(&new_ids).unwrap(); + assert_eq!(external_documents_ids.get("30"), Some(2)); + } }