608: Fix soft deleted documents r=ManyTheFish a=ManyTheFish

When we replaced or updated some documents, the indexing was skipping the replaced documents.

Related to https://github.com/meilisearch/meilisearch/issues/2672

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
bors[bot] 2022-08-17 13:38:10 +00:00 committed by GitHub
commit fb95e67a2a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 24 additions and 8 deletions

View File

@ -407,7 +407,7 @@ where
// We write the external documents ids into the main database.
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
let all_documents_ids = index_documents_ids | new_documents_ids | replaced_documents_ids;
let all_documents_ids = index_documents_ids | new_documents_ids;
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
self.execute_prefix_databases(
@ -654,6 +654,9 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let count = index.number_of_documents(&rtxn).unwrap();
assert_eq!(count, 3);
let count = index.all_documents(&rtxn).unwrap().count();
assert_eq!(count, 3);
drop(rtxn);
}
@ -888,12 +891,26 @@ mod tests {
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
index
.add_documents(documents!([{
"id": 2,
"author": "J. Austen",
"date": "1813"
}]))
.add_documents(documents!([
{"id":4,"title":"Harry Potter and the Half-Blood Princess"},
{"id":456,"title":"The Little Prince"}
]))
.unwrap();
index
.add_documents(documents!([
{ "id": 2, "author": "J. Austen", "date": "1813" }
]))
.unwrap();
// Check that there is **always** 6 documents.
let rtxn = index.read_txn().unwrap();
let count = index.number_of_documents(&rtxn).unwrap();
assert_eq!(count, 6);
let count = index.all_documents(&rtxn).unwrap().count();
assert_eq!(count, 6);
drop(rtxn);
}
#[test]

View File

@ -249,11 +249,10 @@ impl<'a, 'i> Transform<'a, 'i> {
None => self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?,
}
}
} else {
self.new_documents_ids.insert(docid);
}
if !skip_insertion {
self.new_documents_ids.insert(docid);
// We use the extracted/generated user id as the key for this document.
self.original_sorter.insert(&docid.to_be_bytes(), obkv_buffer.clone())?;