Soft-deletion computation no longer takes into account the mapsize

Implemented solution 2.3 from https://github.com/meilisearch/meilisearch/issues/3231#issuecomment-1348628824
This commit is contained in:
Louis Dureuil 2022-12-15 12:04:46 +01:00
parent e2ae3b24aa
commit 171c942282
No known key found for this signature in database

View File

@ -189,29 +189,24 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
// decide for a hard or soft deletion depending on the strategy // decide for a hard or soft deletion depending on the strategy
let soft_deletion = match self.strategy { let soft_deletion = match self.strategy {
DeletionStrategy::Dynamic => { DeletionStrategy::Dynamic => {
// if we have less documents to delete than the threshold we simply save them in // decide to keep the soft deleted in the DB for now if they meet 2 criteria:
// the `soft_deleted_documents_ids` bitmap and early exit. // 1. There is less than a fixed rate of 50% of soft-deleted to actual documents, *and*
// 2. Soft-deleted occupy an average of less than a fixed size on disk
let size_used = self.index.used_size()?; let size_used = self.index.used_size()?;
let map_size = self.index.env.map_size()? as u64;
let nb_documents = self.index.number_of_documents(self.wtxn)?; let nb_documents = self.index.number_of_documents(self.wtxn)?;
let nb_soft_deleted = soft_deleted_docids.len(); let nb_soft_deleted = soft_deleted_docids.len();
let percentage_available = 100 - (size_used * 100 / map_size); (nb_soft_deleted < nb_documents) && {
let estimated_document_size = size_used / (nb_documents + nb_soft_deleted); const SOFT_DELETED_SIZE_BYTE_THRESHOLD: u64 = 1_073_741_824; // 1GiB
let estimated_size_used_by_soft_deleted = estimated_document_size * nb_soft_deleted;
let percentage_used_by_soft_deleted_documents =
estimated_size_used_by_soft_deleted * 100 / map_size;
// if we have more than 10% of disk space available and the soft deleted // nb_documents + nb_soft_deleted !=0 because if nb_documents is 0 we short-circuit earlier, and then we moved the documents to delete
// documents uses less than 10% of the total space available, // from the documents_docids to the soft_deleted_docids.
// we skip the deletion. Eg. let estimated_document_size = size_used / (nb_documents + nb_soft_deleted);
// - With 100Go of disk and 20Go used including 5Go of soft-deleted documents let estimated_size_used_by_soft_deleted =
// We dont delete anything. estimated_document_size * nb_soft_deleted;
// - With 100Go of disk and 95Go used including 1mo of soft-deleted documents estimated_size_used_by_soft_deleted < SOFT_DELETED_SIZE_BYTE_THRESHOLD
// We run the deletion. }
// - With 100Go of disk and 50Go used including 15Go of soft-deleted documents
// We run the deletion.
percentage_available > 10 && percentage_used_by_soft_deleted_documents < 10
} }
DeletionStrategy::AlwaysSoft => true, DeletionStrategy::AlwaysSoft => true,
DeletionStrategy::AlwaysHard => false, DeletionStrategy::AlwaysHard => false,
@ -227,7 +222,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
}); });
} }
// There is more than documents to delete than the threshold we needs to delete them all // Erase soft-deleted from DB
self.to_delete_docids = soft_deleted_docids; self.to_delete_docids = soft_deleted_docids;
// and we can reset the soft deleted bitmap // and we can reset the soft deleted bitmap
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?; self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;