Merge #608

608: Fix soft deleted documents r=ManyTheFish a=ManyTheFish When we replaced or updated some documents, the indexing was skipping the replaced documents. Related to https://github.com/meilisearch/meilisearch/issues/2672 Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-20 17:45:54 +08:00 · 2022-08-17 13:38:10 +00:00 · 2022-08-17 13:38:10 +00:00 · fb95e67a2a
commit fb95e67a2a
parent e4a52e6e45 e9e2349ce6
2 changed files with 24 additions and 8 deletions
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -407,7 +407,7 @@ where
        // We write the external documents ids into the main database.
        self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;

-        let all_documents_ids = index_documents_ids | new_documents_ids | replaced_documents_ids;
+        let all_documents_ids = index_documents_ids | new_documents_ids;
        self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;

        self.execute_prefix_databases(
@ -654,6 +654,9 @@ mod tests {
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 3);
+        let count = index.all_documents(&rtxn).unwrap().count();
+        assert_eq!(count, 3);
+
        drop(rtxn);
    }

@ -888,12 +891,26 @@ mod tests {
        index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;

        index
-            .add_documents(documents!([{
-                "id": 2,
-                "author": "J. Austen",
-                "date": "1813"
-            }]))
+            .add_documents(documents!([
+                {"id":4,"title":"Harry Potter and the Half-Blood Princess"},
+                {"id":456,"title":"The Little Prince"}
+            ]))
            .unwrap();
+
+        index
+            .add_documents(documents!([
+                { "id": 2, "author": "J. Austen", "date": "1813" }
+            ]))
+            .unwrap();
+
+        // Check that there is **always** 6 documents.
+        let rtxn = index.read_txn().unwrap();
+        let count = index.number_of_documents(&rtxn).unwrap();
+        assert_eq!(count, 6);
+        let count = index.all_documents(&rtxn).unwrap().count();
+        assert_eq!(count, 6);
+
+        drop(rtxn);
    }

    #[test]
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@ -249,11 +249,10 @@ impl<'a, 'i> Transform<'a, 'i> {
                        None => self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?,
                    }
                }
-            } else {
-                self.new_documents_ids.insert(docid);
            }

            if !skip_insertion {
+                self.new_documents_ids.insert(docid);
                // We use the extracted/generated user id as the key for this document.
                self.original_sorter.insert(&docid.to_be_bytes(), obkv_buffer.clone())?;