From e3ee553dcca16e5ff5e688da6230acdab8eacc67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 12 Dec 2022 12:42:55 +0100 Subject: [PATCH] Remove soft deleted ids from ExternalDocumentIds during document import If the document import replaces a document using hard deletion --- milli/src/index.rs | 12 ++++++++-- milli/src/update/delete_documents.rs | 31 +++++++++++++++++++++---- milli/src/update/index_documents/mod.rs | 9 ++++--- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 8855d51cd..2d489fbd1 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2166,17 +2166,25 @@ pub(crate) mod tests { db_snap!(index, external_documents_ids, 2, @r###" soft: hard: - 3 0 4 3 5 2 "###); db_snap!(index, soft_deleted_documents_ids, 2, @"[]"); - // boom index .add_documents(documents!([ { "primary_key": "3" }, ])) .unwrap(); + + db_snap!(index, documents_ids, @"[0, 2, 3, ]"); + db_snap!(index, external_documents_ids, 2, @r###" + soft: + hard: + 3 0 + 4 3 + 5 2 + "###); + db_snap!(index, soft_deleted_documents_ids, 2, @"[]"); } } diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 88ec78420..0f77e2b13 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -34,6 +34,12 @@ pub struct DocumentDeletionResult { pub deleted_documents: u64, pub remaining_documents: u64, } +#[derive(Debug)] +pub struct DetailedDocumentDeletionResult { + pub deleted_documents: u64, + pub remaining_documents: u64, + pub used_soft_deletion: bool, +} impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { pub fn new( @@ -68,8 +74,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { self.delete_document(docid); Some(docid) } + pub fn execute(self) -> Result { + let DetailedDocumentDeletionResult { + deleted_documents, + remaining_documents, + used_soft_deletion: _, + } = self.execute_inner()?; - pub fn execute(mut self) -> Result { + Ok(DocumentDeletionResult { deleted_documents, remaining_documents }) + } + pub(crate) fn execute_inner(mut self) -> Result { self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; // We retrieve the current documents ids that are in the database. @@ -83,7 +97,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { if !soft_deleted_docids.is_empty() { ClearDocuments::new(self.wtxn, self.index).execute()?; } - return Ok(DocumentDeletionResult { deleted_documents: 0, remaining_documents: 0 }); + return Ok(DetailedDocumentDeletionResult { + deleted_documents: 0, + remaining_documents: 0, + used_soft_deletion: false, + }); } // We remove the documents ids that we want to delete @@ -95,9 +113,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { // to delete is exactly the number of documents in the database. if current_documents_ids_len == self.to_delete_docids.len() { let remaining_documents = ClearDocuments::new(self.wtxn, self.index).execute()?; - return Ok(DocumentDeletionResult { + return Ok(DetailedDocumentDeletionResult { deleted_documents: current_documents_ids_len, remaining_documents, + used_soft_deletion: false, }); } @@ -159,9 +178,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { && percentage_used_by_soft_deleted_documents < 10 { self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?; - return Ok(DocumentDeletionResult { + return Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), + used_soft_deletion: true, }); } @@ -488,9 +508,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { &self.to_delete_docids, )?; - Ok(DocumentDeletionResult { + Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), + used_soft_deletion: false, }) } } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index db6ffedc1..478a74065 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -210,7 +210,7 @@ where primary_key, fields_ids_map, field_distribution, - external_documents_ids, + mut external_documents_ids, new_documents_ids, replaced_documents_ids, documents_count, @@ -335,8 +335,11 @@ where deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion); debug!("documents to delete {:?}", replaced_documents_ids); deletion_builder.delete_documents(&replaced_documents_ids); - let deleted_documents_count = deletion_builder.execute()?; - debug!("{} documents actually deleted", deleted_documents_count.deleted_documents); + let deleted_documents_result = deletion_builder.execute_inner()?; + debug!("{} documents actually deleted", deleted_documents_result.deleted_documents); + if !deleted_documents_result.used_soft_deletion { + external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; + } } let index_documents_ids = self.index.documents_ids(self.wtxn)?;