From fc7618d49b1d1b3eb8004eb7ac7432cf2ea148c5 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 19 Dec 2022 09:47:54 +0100 Subject: [PATCH] Add DeletionStrategy --- milli/src/update/delete_documents.rs | 38 ++++++++++++++++++++++--- milli/src/update/index_documents/mod.rs | 8 +++--- milli/src/update/mod.rs | 2 +- 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 6c0f66685..25dc9fa12 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -26,7 +26,7 @@ pub struct DeleteDocuments<'t, 'u, 'i> { index: &'i Index, external_documents_ids: ExternalDocumentsIds<'static>, to_delete_docids: RoaringBitmap, - disable_soft_deletion: bool, + strategy: DeletionStrategy, } /// Result of a [`DeleteDocuments`] operation. @@ -36,6 +36,36 @@ pub struct DocumentDeletionResult { pub remaining_documents: u64, } +/// Strategy for deleting documents. +/// +/// - Soft-deleted documents are simply marked as deleted without being actually removed from DB. +/// - Hard-deleted documents are definitely suppressed from the DB. +/// +/// Soft-deleted documents trade disk space for runtime performance. +/// +/// Note that any of these variants can be used at any given moment for any indexation in a database. +/// For instance, you can use an [`AlwaysSoft`] followed by an [`AlwaysHard`] option without issue. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub enum DeletionStrategy { + #[default] + /// Definitely suppress documents according to the number of size of soft-deleted documents + Dynamic, + /// Never definitely suppress documents + AlwaysSoft, + /// Always definitely suppress documents + AlwaysHard, +} + +impl std::fmt::Display for DeletionStrategy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DeletionStrategy::Dynamic => write!(f, "dynamic"), + DeletionStrategy::AlwaysSoft => write!(f, "always_soft"), + DeletionStrategy::AlwaysHard => write!(f, "always_hard"), + } + } +} + /// Result of a [`DeleteDocuments`] operation, used for internal purposes. /// /// It is a superset of the [`DocumentDeletionResult`] structure, giving @@ -59,12 +89,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { index, external_documents_ids, to_delete_docids: RoaringBitmap::new(), - disable_soft_deletion: false, + strategy: Default::default(), }) } - pub fn disable_soft_deletion(&mut self, disable: bool) { - self.disable_soft_deletion = disable; + pub fn strategy(&mut self, strategy: DeletionStrategy) { + self.strategy = strategy; } pub fn delete_document(&mut self, docid: u32) { diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 74a8d2779..7b8408fe4 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -35,8 +35,8 @@ use crate::documents::{obkv_to_object, DocumentsBatchReader}; use crate::error::{Error, InternalError, UserError}; pub use crate::update::index_documents::helpers::CursorClonableMmap; use crate::update::{ - self, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, WordPrefixDocids, - WordPrefixPositionDocids, WordsPrefixesFst, + self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, + WordPrefixDocids, WordPrefixPositionDocids, WordsPrefixesFst, }; use crate::{Index, Result, RoaringBitmapCodec}; @@ -88,7 +88,7 @@ pub struct IndexDocumentsConfig { pub words_positions_level_group_size: Option, pub words_positions_min_level_size: Option, pub update_method: IndexDocumentsMethod, - pub disable_soft_deletion: bool, + pub deletion_strategy: DeletionStrategy, pub autogenerate_docids: bool, } @@ -332,7 +332,7 @@ where // able to simply insert all the documents even if they already exist in the database. if !replaced_documents_ids.is_empty() { let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?; - deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion); + deletion_builder.strategy(self.config.deletion_strategy); debug!("documents to delete {:?}", replaced_documents_ids); deletion_builder.delete_documents(&replaced_documents_ids); let deleted_documents_result = deletion_builder.execute_inner()?; diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs index 952720725..2dda24172 100644 --- a/milli/src/update/mod.rs +++ b/milli/src/update/mod.rs @@ -1,6 +1,6 @@ pub use self::available_documents_ids::AvailableDocumentsIds; pub use self::clear_documents::ClearDocuments; -pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult}; +pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDeletionResult}; pub use self::facet::bulk::FacetsUpdateBulk; pub use self::facet::incremental::FacetsUpdateIncrementalInner; pub use self::index_documents::{