diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 4b6711601..afe9c5189 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -324,7 +324,6 @@ impl ErrorCode for milli::Error { UserError::SerdeJson(_) | UserError::InvalidLmdbOpenOptions | UserError::DocumentLimitReached - | UserError::AccessingSoftDeletedDocument { .. } | UserError::UnknownInternalDocumentId { .. } => Code::Internal, UserError::InvalidStoreFile => Code::InvalidStoreFile, UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice, diff --git a/milli/src/error.rs b/milli/src/error.rs index e9e1fddd3..b249f2977 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -89,8 +89,6 @@ pub enum FieldIdMapMissingEntry { #[derive(Error, Debug)] pub enum UserError { - #[error("A soft deleted internal document id have been used: `{document_id}`.")] - AccessingSoftDeletedDocument { document_id: DocumentId }, #[error("A document cannot contain more than 65,535 fields.")] AttributeLimitReached, #[error(transparent)] diff --git a/milli/src/index.rs b/milli/src/index.rs index 3e48f5eb1..b20674d4c 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -40,7 +40,6 @@ pub mod main_key { pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key"; pub const DOCUMENTS_IDS_KEY: &str = "documents-ids"; - pub const SOFT_DELETED_DOCUMENTS_IDS_KEY: &str = "soft-deleted-documents-ids"; pub const HIDDEN_FACETED_FIELDS_KEY: &str = "hidden-faceted-fields"; pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields"; pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields"; @@ -367,29 +366,6 @@ impl Index { Ok(count.unwrap_or_default()) } - /* deleted documents ids */ - - /// Writes the soft deleted documents ids. - pub(crate) fn put_soft_deleted_documents_ids( - &self, - wtxn: &mut RwTxn, - docids: &RoaringBitmap, - ) -> heed::Result<()> { - self.main.put::<_, Str, RoaringBitmapCodec>( - wtxn, - main_key::SOFT_DELETED_DOCUMENTS_IDS_KEY, - docids, - ) - } - - /// Returns the soft deleted documents ids. - pub(crate) fn soft_deleted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result { - Ok(self - .main - .get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::SOFT_DELETED_DOCUMENTS_IDS_KEY)? - .unwrap_or_default()) - } - /* primary key */ /// Writes the documents primary key, this is the field name that is used to store the id. @@ -1187,12 +1163,7 @@ impl Index { rtxn: &'t RoTxn, ids: impl IntoIterator + 'a, ) -> Result)>> + 'a> { - let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?; - Ok(ids.into_iter().map(move |id| { - if soft_deleted_documents.contains(id) { - return Err(UserError::AccessingSoftDeletedDocument { document_id: id })?; - } let kv = self .documents .get(rtxn, &BEU32::new(id))? @@ -1418,14 +1389,10 @@ impl Index { rtxn: &RoTxn, key: &(Script, Language), ) -> heed::Result> { - let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?; - let doc_ids = self.script_language_docids.get(rtxn, key)?; - Ok(doc_ids.map(|ids| ids - soft_deleted_documents)) + Ok(self.script_language_docids.get(rtxn, key)?) } pub fn script_language(&self, rtxn: &RoTxn) -> heed::Result>> { - let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?; - let mut script_language: HashMap> = HashMap::new(); let mut script_language_doc_count: Vec<(Script, Language, u64)> = Vec::new(); let mut total = 0; @@ -1433,7 +1400,7 @@ impl Index { let ((script, language), docids) = sl?; // keep only Languages that contains at least 1 document. - let remaining_documents_count = (docids - &soft_deleted_documents).len(); + let remaining_documents_count = docids.len(); total += remaining_documents_count; if remaining_documents_count > 0 { script_language_doc_count.push((script, language, remaining_documents_count)); @@ -1918,7 +1885,6 @@ pub(crate) mod tests { 2 2 3 3 "###); - db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); db_snap!(index, facet_id_f64_docids, 1, @r###" 1 0 0 1 [0, ] 1 0 1 1 [1, ] @@ -1943,7 +1909,6 @@ pub(crate) mod tests { 2 6 3 3 "###); - db_snap!(index, soft_deleted_documents_ids, 2, @"[0, 1, 2, ]"); db_snap!(index, facet_id_f64_docids, 2, @r###" 1 0 0 1 [0, ] 1 0 1 1 [1, 4, ] @@ -1965,7 +1930,6 @@ pub(crate) mod tests { 2 6 3 3 "###); - db_snap!(index, soft_deleted_documents_ids, 3, @"[0, 1, 2, 3, ]"); db_snap!(index, facet_id_f64_docids, 3, @r###" 1 0 0 1 [0, ] 1 0 1 1 [1, 4, ] @@ -1989,7 +1953,6 @@ pub(crate) mod tests { 2 6 3 7 "###); - db_snap!(index, soft_deleted_documents_ids, 3, @"[]"); db_snap!(index, facet_id_f64_docids, 3, @r###" 0 0 0 1 [4, ] 0 0 1 1 [5, ] @@ -2052,7 +2015,6 @@ pub(crate) mod tests { 2 2 3 3 "###); - db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); db_snap!(index, facet_id_f64_docids, 1, @r###" 1 0 0 1 [0, ] 1 0 1 1 [1, ] @@ -2085,7 +2047,6 @@ pub(crate) mod tests { 2 6 3 3 "###); - db_snap!(index, soft_deleted_documents_ids, 1, @"[0, 1, 2, ]"); db_snap!(index, facet_id_f64_docids, 1, @r###" 1 0 0 1 [0, 4, ] 1 0 1 1 [1, 5, ] @@ -2153,7 +2114,6 @@ pub(crate) mod tests { 2 9 3 3 "###); - db_snap!(index, soft_deleted_documents_ids, 1, @"[0, 1, 2, 4, 5, 6, ]"); db_snap!(index, facet_id_f64_docids, 1, @r###" 1 0 0 1 [0, 4, 7, ] 1 0 1 1 [1, 5, 8, ] @@ -2221,7 +2181,7 @@ pub(crate) mod tests { 2 12 3 3 "###); - db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); + db_snap!(index, facet_id_f64_docids, 1, @r###" 1 0 0 1 [10, ] 1 0 3 1 [3, 11, ] @@ -2291,7 +2251,6 @@ pub(crate) mod tests { 34 1 38 0 "###); - db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); index.delete_document("34"); @@ -2302,7 +2261,6 @@ pub(crate) mod tests { 34 1 38 0 "###); - db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]"); index .update_settings(|s| { @@ -2318,7 +2276,6 @@ pub(crate) mod tests { hard: 38 0 "###); - db_snap!(index, soft_deleted_documents_ids, 3, @"[]"); // So that this document addition works correctly now. // It would be wrongly interpreted as a replacement before @@ -2331,7 +2288,6 @@ pub(crate) mod tests { 34 1 38 0 "###); - db_snap!(index, soft_deleted_documents_ids, 4, @"[]"); // We do the test again, but deleting the document with id 0 instead of id 1 now index.delete_document("38"); @@ -2343,7 +2299,6 @@ pub(crate) mod tests { 34 1 38 0 "###); - db_snap!(index, soft_deleted_documents_ids, 5, @"[0, ]"); index .update_settings(|s| { @@ -2357,7 +2312,6 @@ pub(crate) mod tests { hard: 34 1 "###); - db_snap!(index, soft_deleted_documents_ids, 6, @"[]"); // And adding lots of documents afterwards instead of just one. // These extra subtests don't add much, but it's better than nothing. @@ -2374,7 +2328,6 @@ pub(crate) mod tests { 41 3 42 5 "###); - db_snap!(index, soft_deleted_documents_ids, 7, @"[]"); } #[test] @@ -2403,7 +2356,6 @@ pub(crate) mod tests { 30 0 34 1 "###); - db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); index.delete_document("34"); @@ -2414,7 +2366,6 @@ pub(crate) mod tests { 30 0 34 1 "###); - db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]"); index .update_settings(|s| { @@ -2430,7 +2381,6 @@ pub(crate) mod tests { hard: 30 0 "###); - db_snap!(index, soft_deleted_documents_ids, 3, @"[]"); // So that when we add a new document index.add_documents(documents!({ "primary_key": 35, "b": 2 })).unwrap(); @@ -2444,7 +2394,6 @@ pub(crate) mod tests { 30 0 35 1 "###); - db_snap!(index, soft_deleted_documents_ids, 4, @"[]"); // And when we add 34 again, we don't replace document 35 index.add_documents(documents!({ "primary_key": 34, "a": 1 })).unwrap(); @@ -2458,7 +2407,6 @@ pub(crate) mod tests { 34 2 35 1 "###); - db_snap!(index, soft_deleted_documents_ids, 5, @"[]"); let rtxn = index.read_txn().unwrap(); let (_docid, obkv) = index.documents(&rtxn, [0]).unwrap()[0]; @@ -2499,7 +2447,6 @@ pub(crate) mod tests { 38 4 39 5 "###); - db_snap!(index, soft_deleted_documents_ids, 6, @"[]"); } #[test] @@ -2530,7 +2477,6 @@ pub(crate) mod tests { 4 1 5 2 "###); - db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); index.delete_document("3"); @@ -2542,7 +2488,6 @@ pub(crate) mod tests { 4 1 5 2 "###); - db_snap!(index, soft_deleted_documents_ids, 2, @"[0, ]"); index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap(); @@ -2553,7 +2498,6 @@ pub(crate) mod tests { 4 3 5 2 "###); - db_snap!(index, soft_deleted_documents_ids, 2, @"[]"); index .add_documents(documents!([ @@ -2569,7 +2513,6 @@ pub(crate) mod tests { 4 3 5 2 "###); - db_snap!(index, soft_deleted_documents_ids, 2, @"[]"); } #[test] @@ -2598,7 +2541,6 @@ pub(crate) mod tests { 11 0 4 1 "###); - db_snap!(index, soft_deleted_documents_ids, @"[]"); index .add_documents(documents!([ @@ -2615,7 +2557,6 @@ pub(crate) mod tests { 11 0 4 2 "###); - db_snap!(index, soft_deleted_documents_ids, @"[1, ]"); let mut wtxn = index.write_txn().unwrap(); let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap(); @@ -2630,7 +2571,6 @@ pub(crate) mod tests { 11 0 4 2 "###); - db_snap!(index, soft_deleted_documents_ids, @"[]"); index .add_documents(documents!([ @@ -2647,7 +2587,6 @@ pub(crate) mod tests { 11 0 4 1 "###); - db_snap!(index, soft_deleted_documents_ids, @"[2, 3, ]"); let rtxn = index.read_txn().unwrap(); let search = Search::new(&rtxn, &index); diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index fac7b68ea..4d9bbc183 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -223,12 +223,9 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result { // to avoid doing this for each recursive call we're going to do it ONCE ahead of time - let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?; let filterable_fields = index.filterable_fields(rtxn)?; - // and finally we delete all the soft_deleted_documents, again, only once at the very end self.inner_evaluate(rtxn, index, &filterable_fields) - .map(|result| result - soft_deleted_documents) } fn evaluate_operator( diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index 77d9f41ec..c22038f81 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -98,7 +98,6 @@ Create a snapshot test of the given database. - `facet_id_string_docids` - `documents_ids` - `stop_words` - - `soft_deleted_documents_ids` - `field_distribution` - `fields_ids_map` - `geo_faceted_documents_ids` @@ -308,12 +307,6 @@ pub fn snap_stop_words(index: &Index) -> String { let snap = format!("{stop_words:?}"); snap } -pub fn snap_soft_deleted_documents_ids(index: &Index) -> String { - let rtxn = index.read_txn().unwrap(); - let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap(); - - display_bitmap(&soft_deleted_documents_ids) -} pub fn snap_field_distributions(index: &Index) -> String { let rtxn = index.read_txn().unwrap(); let mut snap = String::new(); @@ -484,9 +477,6 @@ macro_rules! full_snap_of_db { ($index:ident, stop_words) => {{ $crate::snapshot_tests::snap_stop_words(&$index) }}; - ($index:ident, soft_deleted_documents_ids) => {{ - $crate::snapshot_tests::snap_soft_deleted_documents_ids(&$index) - }}; ($index:ident, field_distribution) => {{ $crate::snapshot_tests::snap_field_distributions(&$index) }}; diff --git a/milli/src/update/available_documents_ids.rs b/milli/src/update/available_documents_ids.rs index 784bee5a7..f460693ba 100644 --- a/milli/src/update/available_documents_ids.rs +++ b/milli/src/update/available_documents_ids.rs @@ -8,16 +8,11 @@ pub struct AvailableDocumentsIds { } impl AvailableDocumentsIds { - pub fn from_documents_ids( - docids: &RoaringBitmap, - soft_deleted_docids: &RoaringBitmap, - ) -> AvailableDocumentsIds { - let used_docids = docids | soft_deleted_docids; - - match used_docids.max() { + pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds { + match docids.max() { Some(last_id) => { let mut available = RoaringBitmap::from_iter(0..last_id); - available -= used_docids; + available -= docids; let iter = match last_id.checked_add(1) { Some(id) => id..=u32::max_value(), @@ -50,7 +45,7 @@ mod tests { #[test] fn empty() { let base = RoaringBitmap::new(); - let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new()); + let left = AvailableDocumentsIds::from_documents_ids(&base); let right = 0..=u32::max_value(); left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r)); } @@ -63,28 +58,8 @@ mod tests { base.insert(100); base.insert(405); - let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new()); + let left = AvailableDocumentsIds::from_documents_ids(&base); let right = (0..=u32::max_value()).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405); left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r)); } - - #[test] - fn soft_deleted() { - let mut base = RoaringBitmap::new(); - base.insert(0); - base.insert(10); - base.insert(100); - base.insert(405); - - let mut soft_deleted = RoaringBitmap::new(); - soft_deleted.insert(1); - soft_deleted.insert(11); - soft_deleted.insert(101); - soft_deleted.insert(406); - - let left = AvailableDocumentsIds::from_documents_ids(&base, &soft_deleted); - let right = - (0..=u32::max_value()).filter(|&n| ![0, 1, 10, 11, 100, 101, 405, 406].contains(&n)); - left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r)); - } } diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index 3eb7e0910..ca5f69808 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -56,7 +56,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?; self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?; self.index.put_documents_ids(self.wtxn, &empty_roaring)?; - self.index.put_soft_deleted_documents_ids(self.wtxn, &empty_roaring)?; self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?; self.index.delete_geo_rtree(self.wtxn)?; self.index.delete_geo_faceted_documents_ids(self.wtxn)?; diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 71e434599..70a5e24c8 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -594,7 +594,6 @@ mod tests { index.add_documents(documents).unwrap(); db_snap!(index, facet_id_f64_docids, "initial", @"777e0e221d778764b472c512617eeb3b"); - db_snap!(index, soft_deleted_documents_ids, "initial", @"[]"); let mut documents = vec![]; for i in 0..999 { @@ -616,7 +615,6 @@ mod tests { index.add_documents(documents).unwrap(); db_snap!(index, facet_id_f64_docids, "replaced_1_soft", @"abba175d7bed727d0efadaef85a4388f"); - db_snap!(index, soft_deleted_documents_ids, "replaced_1_soft", @"6c975deb900f286d2f6456d2d5c3a123"); // Then replace the last document while disabling soft_deletion let mut documents = vec![]; @@ -639,7 +637,6 @@ mod tests { index.add_documents(documents).unwrap(); db_snap!(index, facet_id_f64_docids, "replaced_2_hard", @"029e27a46d09c574ae949aa4289b45e6"); - db_snap!(index, soft_deleted_documents_ids, "replaced_2_hard", @"[]"); } } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index c8481bd48..864e13d04 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -999,7 +999,6 @@ mod tests { assert_eq!(count, 6); db_snap!(index, word_docids, "updated"); - db_snap!(index, soft_deleted_documents_ids, "updated", @"[0, 1, 4, ]"); drop(rtxn); } @@ -2649,8 +2648,6 @@ mod tests { 0 1 "###); - db_snap!(index, soft_deleted_documents_ids, @"[]"); - // BATCH 3 println!("--- ENTERING BATCH 3"); diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index e02da8cb5..872230d99 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -132,17 +132,13 @@ impl<'a, 'i> Transform<'a, 'i> { indexer_settings.max_memory.map(|mem| mem / 2), ); let documents_ids = index.documents_ids(wtxn)?; - let soft_deleted_documents_ids = index.soft_deleted_documents_ids(wtxn)?; Ok(Transform { index, fields_ids_map: index.fields_ids_map(wtxn)?, indexer_settings, autogenerate_docids, - available_documents_ids: AvailableDocumentsIds::from_documents_ids( - &documents_ids, - &soft_deleted_documents_ids, - ), + available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids), original_sorter, flattened_sorter, index_documents_method, diff --git a/milli/src/update/prefix_word_pairs/mod.rs b/milli/src/update/prefix_word_pairs/mod.rs index 7d77490bc..d6aa8e5a3 100644 --- a/milli/src/update/prefix_word_pairs/mod.rs +++ b/milli/src/update/prefix_word_pairs/mod.rs @@ -508,7 +508,6 @@ mod tests { db_snap!(index, word_docids, "replaced"); db_snap!(index, word_prefix_pair_proximity_docids, "replaced"); db_snap!(index, prefix_word_pair_proximity_docids, "replaced"); - db_snap!(index, soft_deleted_documents_ids, "replaced", @"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, ]"); } #[test] @@ -568,6 +567,5 @@ mod tests { db_snap!(index, word_docids, "replaced"); db_snap!(index, word_prefix_pair_proximity_docids, "replaced"); db_snap!(index, prefix_word_pair_proximity_docids, "replaced"); - db_snap!(index, soft_deleted_documents_ids, "replaced", @"[]"); } }