mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 09:04:59 +08:00
Merge #734
734: Fix bug 2945/3021 (missing key in documents database) r=Kerollmops a=loiclec # Pull Request ## Related issue Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/2945 (until we integrate the new milli bump into meilisearch). **Note that a dump will not be sufficient to upgrade from meilisearch v0.30.2 to meilisearch v0.30.3 due to this fix** because the bug could have caused the `documents` database to be corrupted. Instead, a full manual reimport of the documents will be necessary. ## What does this PR do? There was a bug happening when: 1. A few documents are added to the index 2. Some of these documents are soft-deleted 3. New documents are added, replacing existing ones and triggering a hard-deletion The `IndexDocuments::execute` method would then perform the hard-deletion but forget to change the `external_document_ids` structure appropriately. As a result, the `external_document_ids` would contain keys corresponding to documents that do no exist anymore. To fix this bug, I split the `DeleteDocuments::execute` method into two: `execute_inner` and `execute`. - `execute_inner` returns a `DetailedDocumentDeletionResult` which says whether soft-deletion was used or not - `execute` keeps the exact same signature and behaviour Then, when deleting replaced documents inside `IndexDocuments::execute`, we call `DeleteDocuments::execute_inner` instead of `DeleteDocuments::execute`. If soft-deletion was used, nothing more is done. But if hard-deletion was used, we remove every reference to soft-deleted documents in the new `external_documents_ids` structure. ## Correctness - Every other test still passes - The reproduction test case now passes - In a different branch ([`update-fuzz-test`](https://github.com/meilisearch/milli/pull/735)), I created a fuzz-test that reproduces the past two bugs. This fuzz test cannot find this bug through any combination of some hand-selected `DocumentAddition / DocumentDeletion / DocumentClear / SettingsUpdate` operations. In that test, each relevant operations can be executed with or without soft-deletion, and document additions can be done in batches, replacing or updating existing documents. Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
This commit is contained in:
commit
e0a8f8cb5a
@ -2111,4 +2111,80 @@ pub(crate) mod tests {
|
|||||||
"###);
|
"###);
|
||||||
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
|
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bug_3021_third() {
|
||||||
|
// https://github.com/meilisearch/meilisearch/issues/3021
|
||||||
|
let mut index = TempIndex::new();
|
||||||
|
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_primary_key("primary_key".to_owned());
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{ "primary_key": 3 },
|
||||||
|
{ "primary_key": 4 },
|
||||||
|
{ "primary_key": 5 }
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 1, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
3 0
|
||||||
|
4 1
|
||||||
|
5 2
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||||
|
delete.delete_external_id("3");
|
||||||
|
delete.execute().unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[1, 2, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 2, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
3 0
|
||||||
|
4 1
|
||||||
|
5 2
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 2, @"[0, ]");
|
||||||
|
|
||||||
|
index.index_documents_config.disable_soft_deletion = true;
|
||||||
|
|
||||||
|
index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[2, 3, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 2, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
4 3
|
||||||
|
5 2
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 2, @"[]");
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{ "primary_key": "3" },
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[0, 2, 3, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 2, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
3 0
|
||||||
|
4 3
|
||||||
|
5 2
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 2, @"[]");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -29,12 +29,24 @@ pub struct DeleteDocuments<'t, 'u, 'i> {
|
|||||||
disable_soft_deletion: bool,
|
disable_soft_deletion: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Result of a [`DeleteDocuments`] operation.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
pub struct DocumentDeletionResult {
|
pub struct DocumentDeletionResult {
|
||||||
pub deleted_documents: u64,
|
pub deleted_documents: u64,
|
||||||
pub remaining_documents: u64,
|
pub remaining_documents: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Result of a [`DeleteDocuments`] operation, used for internal purposes.
|
||||||
|
///
|
||||||
|
/// It is a superset of the [`DocumentDeletionResult`] structure, giving
|
||||||
|
/// additional information about the algorithm used to delete the documents.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct DetailedDocumentDeletionResult {
|
||||||
|
pub deleted_documents: u64,
|
||||||
|
pub remaining_documents: u64,
|
||||||
|
pub soft_deletion_used: bool,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
@ -68,8 +80,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
self.delete_document(docid);
|
self.delete_document(docid);
|
||||||
Some(docid)
|
Some(docid)
|
||||||
}
|
}
|
||||||
|
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
||||||
|
let DetailedDocumentDeletionResult {
|
||||||
|
deleted_documents,
|
||||||
|
remaining_documents,
|
||||||
|
soft_deletion_used: _,
|
||||||
|
} = self.execute_inner()?;
|
||||||
|
|
||||||
pub fn execute(mut self) -> Result<DocumentDeletionResult> {
|
Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
|
||||||
|
}
|
||||||
|
pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
|
||||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||||
|
|
||||||
// We retrieve the current documents ids that are in the database.
|
// We retrieve the current documents ids that are in the database.
|
||||||
@ -83,7 +103,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
if !soft_deleted_docids.is_empty() {
|
if !soft_deleted_docids.is_empty() {
|
||||||
ClearDocuments::new(self.wtxn, self.index).execute()?;
|
ClearDocuments::new(self.wtxn, self.index).execute()?;
|
||||||
}
|
}
|
||||||
return Ok(DocumentDeletionResult { deleted_documents: 0, remaining_documents: 0 });
|
return Ok(DetailedDocumentDeletionResult {
|
||||||
|
deleted_documents: 0,
|
||||||
|
remaining_documents: 0,
|
||||||
|
soft_deletion_used: false,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// We remove the documents ids that we want to delete
|
// We remove the documents ids that we want to delete
|
||||||
@ -95,9 +119,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
// to delete is exactly the number of documents in the database.
|
// to delete is exactly the number of documents in the database.
|
||||||
if current_documents_ids_len == self.to_delete_docids.len() {
|
if current_documents_ids_len == self.to_delete_docids.len() {
|
||||||
let remaining_documents = ClearDocuments::new(self.wtxn, self.index).execute()?;
|
let remaining_documents = ClearDocuments::new(self.wtxn, self.index).execute()?;
|
||||||
return Ok(DocumentDeletionResult {
|
return Ok(DetailedDocumentDeletionResult {
|
||||||
deleted_documents: current_documents_ids_len,
|
deleted_documents: current_documents_ids_len,
|
||||||
remaining_documents,
|
remaining_documents,
|
||||||
|
soft_deletion_used: false,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,9 +184,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
&& percentage_used_by_soft_deleted_documents < 10
|
&& percentage_used_by_soft_deleted_documents < 10
|
||||||
{
|
{
|
||||||
self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?;
|
self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?;
|
||||||
return Ok(DocumentDeletionResult {
|
return Ok(DetailedDocumentDeletionResult {
|
||||||
deleted_documents: self.to_delete_docids.len(),
|
deleted_documents: self.to_delete_docids.len(),
|
||||||
remaining_documents: documents_ids.len(),
|
remaining_documents: documents_ids.len(),
|
||||||
|
soft_deletion_used: true,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -488,9 +514,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
&self.to_delete_docids,
|
&self.to_delete_docids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(DocumentDeletionResult {
|
Ok(DetailedDocumentDeletionResult {
|
||||||
deleted_documents: self.to_delete_docids.len(),
|
deleted_documents: self.to_delete_docids.len(),
|
||||||
remaining_documents: documents_ids.len(),
|
remaining_documents: documents_ids.len(),
|
||||||
|
soft_deletion_used: false,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -210,7 +210,7 @@ where
|
|||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
field_distribution,
|
field_distribution,
|
||||||
external_documents_ids,
|
mut external_documents_ids,
|
||||||
new_documents_ids,
|
new_documents_ids,
|
||||||
replaced_documents_ids,
|
replaced_documents_ids,
|
||||||
documents_count,
|
documents_count,
|
||||||
@ -335,8 +335,11 @@ where
|
|||||||
deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion);
|
deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion);
|
||||||
debug!("documents to delete {:?}", replaced_documents_ids);
|
debug!("documents to delete {:?}", replaced_documents_ids);
|
||||||
deletion_builder.delete_documents(&replaced_documents_ids);
|
deletion_builder.delete_documents(&replaced_documents_ids);
|
||||||
let deleted_documents_count = deletion_builder.execute()?;
|
let deleted_documents_result = deletion_builder.execute_inner()?;
|
||||||
debug!("{} documents actually deleted", deleted_documents_count.deleted_documents);
|
debug!("{} documents actually deleted", deleted_documents_result.deleted_documents);
|
||||||
|
if !deleted_documents_result.soft_deletion_used {
|
||||||
|
external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let index_documents_ids = self.index.documents_ids(self.wtxn)?;
|
let index_documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||||
|
Loading…
Reference in New Issue
Block a user