Add DeletionStrategy

This commit is contained in:
Louis Dureuil 2022-12-19 09:47:54 +01:00
parent 5114686394
commit fc7618d49b
No known key found for this signature in database
3 changed files with 39 additions and 9 deletions

View File

@ -26,7 +26,7 @@ pub struct DeleteDocuments<'t, 'u, 'i> {
index: &'i Index,
external_documents_ids: ExternalDocumentsIds<'static>,
to_delete_docids: RoaringBitmap,
disable_soft_deletion: bool,
strategy: DeletionStrategy,
}
/// Result of a [`DeleteDocuments`] operation.
@ -36,6 +36,36 @@ pub struct DocumentDeletionResult {
pub remaining_documents: u64,
}
/// Strategy for deleting documents.
///
/// - Soft-deleted documents are simply marked as deleted without being actually removed from DB.
/// - Hard-deleted documents are definitely suppressed from the DB.
///
/// Soft-deleted documents trade disk space for runtime performance.
///
/// Note that any of these variants can be used at any given moment for any indexation in a database.
/// For instance, you can use an [`AlwaysSoft`] followed by an [`AlwaysHard`] option without issue.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum DeletionStrategy {
#[default]
/// Definitely suppress documents according to the number of size of soft-deleted documents
Dynamic,
/// Never definitely suppress documents
AlwaysSoft,
/// Always definitely suppress documents
AlwaysHard,
}
impl std::fmt::Display for DeletionStrategy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DeletionStrategy::Dynamic => write!(f, "dynamic"),
DeletionStrategy::AlwaysSoft => write!(f, "always_soft"),
DeletionStrategy::AlwaysHard => write!(f, "always_hard"),
}
}
}
/// Result of a [`DeleteDocuments`] operation, used for internal purposes.
///
/// It is a superset of the [`DocumentDeletionResult`] structure, giving
@ -59,12 +89,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
index,
external_documents_ids,
to_delete_docids: RoaringBitmap::new(),
disable_soft_deletion: false,
strategy: Default::default(),
})
}
pub fn disable_soft_deletion(&mut self, disable: bool) {
self.disable_soft_deletion = disable;
pub fn strategy(&mut self, strategy: DeletionStrategy) {
self.strategy = strategy;
}
pub fn delete_document(&mut self, docid: u32) {

View File

@ -35,8 +35,8 @@ use crate::documents::{obkv_to_object, DocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
pub use crate::update::index_documents::helpers::CursorClonableMmap;
use crate::update::{
self, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, WordPrefixDocids,
WordPrefixPositionDocids, WordsPrefixesFst,
self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
WordPrefixDocids, WordPrefixPositionDocids, WordsPrefixesFst,
};
use crate::{Index, Result, RoaringBitmapCodec};
@ -88,7 +88,7 @@ pub struct IndexDocumentsConfig {
pub words_positions_level_group_size: Option<NonZeroU32>,
pub words_positions_min_level_size: Option<NonZeroU32>,
pub update_method: IndexDocumentsMethod,
pub disable_soft_deletion: bool,
pub deletion_strategy: DeletionStrategy,
pub autogenerate_docids: bool,
}
@ -332,7 +332,7 @@ where
// able to simply insert all the documents even if they already exist in the database.
if !replaced_documents_ids.is_empty() {
let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion);
deletion_builder.strategy(self.config.deletion_strategy);
debug!("documents to delete {:?}", replaced_documents_ids);
deletion_builder.delete_documents(&replaced_documents_ids);
let deleted_documents_result = deletion_builder.execute_inner()?;

View File

@ -1,6 +1,6 @@
pub use self::available_documents_ids::AvailableDocumentsIds;
pub use self::clear_documents::ClearDocuments;
pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult};
pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDeletionResult};
pub use self::facet::bulk::FacetsUpdateBulk;
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
pub use self::index_documents::{