mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Soft-deletion computation no longer takes into account the mapsize
Implemented solution 2.3 from https://github.com/meilisearch/meilisearch/issues/3231#issuecomment-1348628824
This commit is contained in:
parent
e2ae3b24aa
commit
171c942282
@ -189,29 +189,24 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
// decide for a hard or soft deletion depending on the strategy
|
// decide for a hard or soft deletion depending on the strategy
|
||||||
let soft_deletion = match self.strategy {
|
let soft_deletion = match self.strategy {
|
||||||
DeletionStrategy::Dynamic => {
|
DeletionStrategy::Dynamic => {
|
||||||
// if we have less documents to delete than the threshold we simply save them in
|
// decide to keep the soft deleted in the DB for now if they meet 2 criteria:
|
||||||
// the `soft_deleted_documents_ids` bitmap and early exit.
|
// 1. There is less than a fixed rate of 50% of soft-deleted to actual documents, *and*
|
||||||
|
// 2. Soft-deleted occupy an average of less than a fixed size on disk
|
||||||
|
|
||||||
let size_used = self.index.used_size()?;
|
let size_used = self.index.used_size()?;
|
||||||
let map_size = self.index.env.map_size()? as u64;
|
|
||||||
let nb_documents = self.index.number_of_documents(self.wtxn)?;
|
let nb_documents = self.index.number_of_documents(self.wtxn)?;
|
||||||
let nb_soft_deleted = soft_deleted_docids.len();
|
let nb_soft_deleted = soft_deleted_docids.len();
|
||||||
|
|
||||||
let percentage_available = 100 - (size_used * 100 / map_size);
|
(nb_soft_deleted < nb_documents) && {
|
||||||
let estimated_document_size = size_used / (nb_documents + nb_soft_deleted);
|
const SOFT_DELETED_SIZE_BYTE_THRESHOLD: u64 = 1_073_741_824; // 1GiB
|
||||||
let estimated_size_used_by_soft_deleted = estimated_document_size * nb_soft_deleted;
|
|
||||||
let percentage_used_by_soft_deleted_documents =
|
|
||||||
estimated_size_used_by_soft_deleted * 100 / map_size;
|
|
||||||
|
|
||||||
// if we have more than 10% of disk space available and the soft deleted
|
// nb_documents + nb_soft_deleted !=0 because if nb_documents is 0 we short-circuit earlier, and then we moved the documents to delete
|
||||||
// documents uses less than 10% of the total space available,
|
// from the documents_docids to the soft_deleted_docids.
|
||||||
// we skip the deletion. Eg.
|
let estimated_document_size = size_used / (nb_documents + nb_soft_deleted);
|
||||||
// - With 100Go of disk and 20Go used including 5Go of soft-deleted documents
|
let estimated_size_used_by_soft_deleted =
|
||||||
// We don’t delete anything.
|
estimated_document_size * nb_soft_deleted;
|
||||||
// - With 100Go of disk and 95Go used including 1mo of soft-deleted documents
|
estimated_size_used_by_soft_deleted < SOFT_DELETED_SIZE_BYTE_THRESHOLD
|
||||||
// We run the deletion.
|
}
|
||||||
// - With 100Go of disk and 50Go used including 15Go of soft-deleted documents
|
|
||||||
// We run the deletion.
|
|
||||||
percentage_available > 10 && percentage_used_by_soft_deleted_documents < 10
|
|
||||||
}
|
}
|
||||||
DeletionStrategy::AlwaysSoft => true,
|
DeletionStrategy::AlwaysSoft => true,
|
||||||
DeletionStrategy::AlwaysHard => false,
|
DeletionStrategy::AlwaysHard => false,
|
||||||
@ -227,7 +222,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// There is more than documents to delete than the threshold we needs to delete them all
|
// Erase soft-deleted from DB
|
||||||
self.to_delete_docids = soft_deleted_docids;
|
self.to_delete_docids = soft_deleted_docids;
|
||||||
// and we can reset the soft deleted bitmap
|
// and we can reset the soft deleted bitmap
|
||||||
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
|
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
|
||||||
|
Loading…
Reference in New Issue
Block a user