Remove soft deleted ids from ExternalDocumentIds during document import

If the document import replaces a document using hard deletion
This commit is contained in:
Loïc Lecrenier 2022-12-12 12:42:55 +01:00
parent bebd050961
commit e3ee553dcc
3 changed files with 42 additions and 10 deletions

View File

@ -2166,17 +2166,25 @@ pub(crate) mod tests {
db_snap!(index, external_documents_ids, 2, @r###" db_snap!(index, external_documents_ids, 2, @r###"
soft: soft:
hard: hard:
3 0
4 3 4 3
5 2 5 2
"###); "###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[]"); db_snap!(index, soft_deleted_documents_ids, 2, @"[]");
// boom
index index
.add_documents(documents!([ .add_documents(documents!([
{ "primary_key": "3" }, { "primary_key": "3" },
])) ]))
.unwrap(); .unwrap();
db_snap!(index, documents_ids, @"[0, 2, 3, ]");
db_snap!(index, external_documents_ids, 2, @r###"
soft:
hard:
3 0
4 3
5 2
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[]");
} }
} }

View File

@ -34,6 +34,12 @@ pub struct DocumentDeletionResult {
pub deleted_documents: u64, pub deleted_documents: u64,
pub remaining_documents: u64, pub remaining_documents: u64,
} }
#[derive(Debug)]
pub struct DetailedDocumentDeletionResult {
pub deleted_documents: u64,
pub remaining_documents: u64,
pub used_soft_deletion: bool,
}
impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
pub fn new( pub fn new(
@ -68,8 +74,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
self.delete_document(docid); self.delete_document(docid);
Some(docid) Some(docid)
} }
pub fn execute(self) -> Result<DocumentDeletionResult> {
let DetailedDocumentDeletionResult {
deleted_documents,
remaining_documents,
used_soft_deletion: _,
} = self.execute_inner()?;
pub fn execute(mut self) -> Result<DocumentDeletionResult> { Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
}
pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
// We retrieve the current documents ids that are in the database. // We retrieve the current documents ids that are in the database.
@ -83,7 +97,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
if !soft_deleted_docids.is_empty() { if !soft_deleted_docids.is_empty() {
ClearDocuments::new(self.wtxn, self.index).execute()?; ClearDocuments::new(self.wtxn, self.index).execute()?;
} }
return Ok(DocumentDeletionResult { deleted_documents: 0, remaining_documents: 0 }); return Ok(DetailedDocumentDeletionResult {
deleted_documents: 0,
remaining_documents: 0,
used_soft_deletion: false,
});
} }
// We remove the documents ids that we want to delete // We remove the documents ids that we want to delete
@ -95,9 +113,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
// to delete is exactly the number of documents in the database. // to delete is exactly the number of documents in the database.
if current_documents_ids_len == self.to_delete_docids.len() { if current_documents_ids_len == self.to_delete_docids.len() {
let remaining_documents = ClearDocuments::new(self.wtxn, self.index).execute()?; let remaining_documents = ClearDocuments::new(self.wtxn, self.index).execute()?;
return Ok(DocumentDeletionResult { return Ok(DetailedDocumentDeletionResult {
deleted_documents: current_documents_ids_len, deleted_documents: current_documents_ids_len,
remaining_documents, remaining_documents,
used_soft_deletion: false,
}); });
} }
@ -159,9 +178,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
&& percentage_used_by_soft_deleted_documents < 10 && percentage_used_by_soft_deleted_documents < 10
{ {
self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?; self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?;
return Ok(DocumentDeletionResult { return Ok(DetailedDocumentDeletionResult {
deleted_documents: self.to_delete_docids.len(), deleted_documents: self.to_delete_docids.len(),
remaining_documents: documents_ids.len(), remaining_documents: documents_ids.len(),
used_soft_deletion: true,
}); });
} }
@ -488,9 +508,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
&self.to_delete_docids, &self.to_delete_docids,
)?; )?;
Ok(DocumentDeletionResult { Ok(DetailedDocumentDeletionResult {
deleted_documents: self.to_delete_docids.len(), deleted_documents: self.to_delete_docids.len(),
remaining_documents: documents_ids.len(), remaining_documents: documents_ids.len(),
used_soft_deletion: false,
}) })
} }
} }

View File

@ -210,7 +210,7 @@ where
primary_key, primary_key,
fields_ids_map, fields_ids_map,
field_distribution, field_distribution,
external_documents_ids, mut external_documents_ids,
new_documents_ids, new_documents_ids,
replaced_documents_ids, replaced_documents_ids,
documents_count, documents_count,
@ -335,8 +335,11 @@ where
deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion); deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion);
debug!("documents to delete {:?}", replaced_documents_ids); debug!("documents to delete {:?}", replaced_documents_ids);
deletion_builder.delete_documents(&replaced_documents_ids); deletion_builder.delete_documents(&replaced_documents_ids);
let deleted_documents_count = deletion_builder.execute()?; let deleted_documents_result = deletion_builder.execute_inner()?;
debug!("{} documents actually deleted", deleted_documents_count.deleted_documents); debug!("{} documents actually deleted", deleted_documents_result.deleted_documents);
if !deleted_documents_result.used_soft_deletion {
external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?;
}
} }
let index_documents_ids = self.index.documents_ids(self.wtxn)?; let index_documents_ids = self.index.documents_ids(self.wtxn)?;