From 33fa17bf12e408c2f8e0fba22b8326f9a48d2b0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 10 May 2024 23:26:55 +0200 Subject: [PATCH] Support deleting documents with functions --- index-scheduler/src/batch.rs | 10 ++++++---- index-scheduler/src/insta_snapshot.rs | 5 ++++- meilisearch-types/src/task_view.rs | 23 ++++++++++++++--------- meilisearch-types/src/tasks.rs | 3 +++ milli/src/update/index_documents/mod.rs | 25 ++++++++++++++++++++----- 5 files changed, 47 insertions(+), 19 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index e75d2c60d..d504ef915 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -1420,7 +1420,7 @@ impl IndexScheduler { } else { unreachable!() }; - let edited_documents = edit_documents_by_function( + let result_count = edit_documents_by_function( index_wtxn, filter, context.clone(), @@ -1442,13 +1442,14 @@ impl IndexScheduler { unreachable!(); }; - match edited_documents { - Ok(edited_documents) => { + match result_count { + Ok((deleted_documents, edited_documents)) => { task.status = Status::Succeeded; task.details = Some(Details::DocumentEdition { original_filter, context, function, + deleted_documents: Some(deleted_documents), edited_documents: Some(edited_documents), }); } @@ -1458,6 +1459,7 @@ impl IndexScheduler { original_filter, context, function, + deleted_documents: Some(0), edited_documents: Some(0), }); task.error = Some(e.into()); @@ -1763,7 +1765,7 @@ fn edit_documents_by_function<'a>( indexer_config: &IndexerConfig, must_stop_processing: MustStopProcessing, index: &'a Index, -) -> Result { +) -> Result<(u64, u64)> { let candidates = match filter.as_ref().map(Filter::from_json) { Some(Ok(Some(filter))) => filter.evaluate(wtxn, index).map_err(|err| match err { milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index 5c117fe81..c65bb0716 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -178,12 +178,15 @@ fn snapshot_details(d: &Details) -> String { format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}") } Details::DocumentEdition { + deleted_documents, edited_documents, original_filter, context, function, } => { - format!("{{ edited_documents: {edited_documents:?}, context: {context:?}, function: {function:?}, original_filter: {original_filter:?} }}") + format!( + "{{ deleted_documents: {deleted_documents:?}, edited_documents: {edited_documents:?}, context: {context:?}, function: {function:?}, original_filter: {original_filter:?} }}" + ) } Details::SettingsUpdate { settings } => { format!("{{ settings: {settings:?} }}") diff --git a/meilisearch-types/src/task_view.rs b/meilisearch-types/src/task_view.rs index 05b362816..3075fa899 100644 --- a/meilisearch-types/src/task_view.rs +++ b/meilisearch-types/src/task_view.rs @@ -93,15 +93,20 @@ impl From
for DetailsView { ..DetailsView::default() } } - Details::DocumentEdition { edited_documents, original_filter, context, function } => { - DetailsView { - edited_documents: Some(edited_documents), - original_filter: Some(original_filter), - context: Some(context), - function: Some(function), - ..DetailsView::default() - } - } + Details::DocumentEdition { + deleted_documents, + edited_documents, + original_filter, + context, + function, + } => DetailsView { + deleted_documents: Some(deleted_documents), + edited_documents: Some(edited_documents), + original_filter: Some(original_filter), + context: Some(context), + function: Some(function), + ..DetailsView::default() + }, Details::SettingsUpdate { mut settings } => { settings.hide_secrets(); DetailsView { settings: Some(settings), ..DetailsView::default() } diff --git a/meilisearch-types/src/tasks.rs b/meilisearch-types/src/tasks.rs index 37c9bc22b..a4aa67991 100644 --- a/meilisearch-types/src/tasks.rs +++ b/meilisearch-types/src/tasks.rs @@ -215,6 +215,7 @@ impl KindWithContent { } KindWithContent::DocumentEdition { index_uid: _, filter_expr, context, function } => { Some(Details::DocumentEdition { + deleted_documents: None, edited_documents: None, original_filter: filter_expr.as_ref().map(|v| v.to_string()), context: context.clone(), @@ -271,6 +272,7 @@ impl KindWithContent { } KindWithContent::DocumentEdition { index_uid: _, filter_expr, context, function } => { Some(Details::DocumentEdition { + deleted_documents: Some(0), edited_documents: Some(0), original_filter: filter_expr.as_ref().map(|v| v.to_string()), context: context.clone(), @@ -533,6 +535,7 @@ pub enum Details { indexed_documents: Option, }, DocumentEdition { + deleted_documents: Option, edited_documents: Option, original_filter: Option, context: Option, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index adbb424b8..0ec4b3739 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -179,10 +179,10 @@ where documents: &RoaringBitmap, context: Option, code: &str, - ) -> Result<(Self, StdResult)> { + ) -> Result<(Self, StdResult<(u64, u64), UserError>)> { // Early return when there is no document to add if documents.is_empty() { - return Ok((self, Ok(0))); + return Ok((self, Ok((0, 0)))); } /// Transform every field of a raw obkv store into a Rhai Map. @@ -228,6 +228,7 @@ where let primary_key = self.index.primary_key(self.wtxn)?.unwrap(); let primary_key_id = fields_ids_map.id(primary_key).unwrap(); let mut documents_batch_builder = tempfile::tempfile().map(DocumentsBatchBuilder::new)?; + let mut documents_to_remove = RoaringBitmap::new(); let context: Dynamic = match context { Some(context) => serde_json::from_value(context.into()).unwrap(), @@ -252,8 +253,19 @@ where scope.push_constant_dynamic("context", context.clone()); scope.push("doc", document); let _ = engine.eval_ast_with_scope::(&mut scope, &ast).unwrap(); - let new_document = scope.remove("doc").unwrap(); - let new_document = rhaimap_to_object(new_document); + let new_document = match scope.remove::("doc") { + // If the "doc" variable has been removed from the scope + // or set to (), we effectively delete the document. + Some(doc) if doc.is_unit() => { + documents_to_remove.push(docid); + continue; + } + None => unreachable!(), + Some(document) => match document.try_cast() { + Some(document) => rhaimap_to_object(document), + None => panic!("Why is \"doc\" no longer a Map?"), + }, + }; if document_object != new_document { assert_eq!( @@ -268,7 +280,10 @@ where let file = documents_batch_builder.into_inner()?; let reader = DocumentsBatchReader::from_reader(file)?; - self.add_documents(reader) + let (this, removed) = self.remove_documents_from_db_no_batch(&documents_to_remove)?; + let (this, result) = this.add_documents(reader)?; + + Ok((this, result.map(|added| (removed, added)))) } pub fn with_embedders(mut self, embedders: EmbeddingConfigs) -> Self {