Add DeletionStrategy

This commit is contained in:
Louis Dureuil 2022-12-19 09:47:54 +01:00
parent 5114686394
commit fc7618d49b
No known key found for this signature in database
3 changed files with 39 additions and 9 deletions

View File

@ -26,7 +26,7 @@ pub struct DeleteDocuments<'t, 'u, 'i> {
index: &'i Index, index: &'i Index,
external_documents_ids: ExternalDocumentsIds<'static>, external_documents_ids: ExternalDocumentsIds<'static>,
to_delete_docids: RoaringBitmap, to_delete_docids: RoaringBitmap,
disable_soft_deletion: bool, strategy: DeletionStrategy,
} }
/// Result of a [`DeleteDocuments`] operation. /// Result of a [`DeleteDocuments`] operation.
@ -36,6 +36,36 @@ pub struct DocumentDeletionResult {
pub remaining_documents: u64, pub remaining_documents: u64,
} }
/// Strategy for deleting documents.
///
/// - Soft-deleted documents are simply marked as deleted without being actually removed from DB.
/// - Hard-deleted documents are definitely suppressed from the DB.
///
/// Soft-deleted documents trade disk space for runtime performance.
///
/// Note that any of these variants can be used at any given moment for any indexation in a database.
/// For instance, you can use an [`AlwaysSoft`] followed by an [`AlwaysHard`] option without issue.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum DeletionStrategy {
#[default]
/// Definitely suppress documents according to the number of size of soft-deleted documents
Dynamic,
/// Never definitely suppress documents
AlwaysSoft,
/// Always definitely suppress documents
AlwaysHard,
}
impl std::fmt::Display for DeletionStrategy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DeletionStrategy::Dynamic => write!(f, "dynamic"),
DeletionStrategy::AlwaysSoft => write!(f, "always_soft"),
DeletionStrategy::AlwaysHard => write!(f, "always_hard"),
}
}
}
/// Result of a [`DeleteDocuments`] operation, used for internal purposes. /// Result of a [`DeleteDocuments`] operation, used for internal purposes.
/// ///
/// It is a superset of the [`DocumentDeletionResult`] structure, giving /// It is a superset of the [`DocumentDeletionResult`] structure, giving
@ -59,12 +89,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
index, index,
external_documents_ids, external_documents_ids,
to_delete_docids: RoaringBitmap::new(), to_delete_docids: RoaringBitmap::new(),
disable_soft_deletion: false, strategy: Default::default(),
}) })
} }
pub fn disable_soft_deletion(&mut self, disable: bool) { pub fn strategy(&mut self, strategy: DeletionStrategy) {
self.disable_soft_deletion = disable; self.strategy = strategy;
} }
pub fn delete_document(&mut self, docid: u32) { pub fn delete_document(&mut self, docid: u32) {

View File

@ -35,8 +35,8 @@ use crate::documents::{obkv_to_object, DocumentsBatchReader};
use crate::error::{Error, InternalError, UserError}; use crate::error::{Error, InternalError, UserError};
pub use crate::update::index_documents::helpers::CursorClonableMmap; pub use crate::update::index_documents::helpers::CursorClonableMmap;
use crate::update::{ use crate::update::{
self, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, WordPrefixDocids, self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
WordPrefixPositionDocids, WordsPrefixesFst, WordPrefixDocids, WordPrefixPositionDocids, WordsPrefixesFst,
}; };
use crate::{Index, Result, RoaringBitmapCodec}; use crate::{Index, Result, RoaringBitmapCodec};
@ -88,7 +88,7 @@ pub struct IndexDocumentsConfig {
pub words_positions_level_group_size: Option<NonZeroU32>, pub words_positions_level_group_size: Option<NonZeroU32>,
pub words_positions_min_level_size: Option<NonZeroU32>, pub words_positions_min_level_size: Option<NonZeroU32>,
pub update_method: IndexDocumentsMethod, pub update_method: IndexDocumentsMethod,
pub disable_soft_deletion: bool, pub deletion_strategy: DeletionStrategy,
pub autogenerate_docids: bool, pub autogenerate_docids: bool,
} }
@ -332,7 +332,7 @@ where
// able to simply insert all the documents even if they already exist in the database. // able to simply insert all the documents even if they already exist in the database.
if !replaced_documents_ids.is_empty() { if !replaced_documents_ids.is_empty() {
let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?; let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion); deletion_builder.strategy(self.config.deletion_strategy);
debug!("documents to delete {:?}", replaced_documents_ids); debug!("documents to delete {:?}", replaced_documents_ids);
deletion_builder.delete_documents(&replaced_documents_ids); deletion_builder.delete_documents(&replaced_documents_ids);
let deleted_documents_result = deletion_builder.execute_inner()?; let deleted_documents_result = deletion_builder.execute_inner()?;

View File

@ -1,6 +1,6 @@
pub use self::available_documents_ids::AvailableDocumentsIds; pub use self::available_documents_ids::AvailableDocumentsIds;
pub use self::clear_documents::ClearDocuments; pub use self::clear_documents::ClearDocuments;
pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult}; pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDeletionResult};
pub use self::facet::bulk::FacetsUpdateBulk; pub use self::facet::bulk::FacetsUpdateBulk;
pub use self::facet::incremental::FacetsUpdateIncrementalInner; pub use self::facet::incremental::FacetsUpdateIncrementalInner;
pub use self::index_documents::{ pub use self::index_documents::{