From e6dd66e4a090aa43b2d23daa5c3f45bc393ba23c Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 2 Sep 2024 10:34:28 +0200 Subject: [PATCH] Do not fail the whole batch when a single document deletion by filter fails --- index-scheduler/src/batch.rs | 37 +++++++++--- index-scheduler/src/lib.rs | 112 +++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 7 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 9b5368826..903ec1217 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -1441,6 +1441,8 @@ impl IndexScheduler { for task in tasks.iter_mut() { let before = to_delete.len(); + task.status = Status::Succeeded; + match &task.kind { KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { for id in documents_ids { @@ -1456,18 +1458,40 @@ impl IndexScheduler { } KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => { let before = to_delete.len(); - let filter = Filter::from_json(filter_expr)?; + let filter = match Filter::from_json(filter_expr) { + Ok(filter) => filter, + Err(err) => { + // theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens + task.status = Status::Failed; + task.error = match err { + milli::Error::UserError( + milli::UserError::InvalidFilterExpression { .. }, + ) => Some( + Error::from(err) + .with_custom_error_code(Code::InvalidDocumentFilter) + .into(), + ), + e => Some(e.into()), + }; + None + } + }; if let Some(filter) = filter { - let candidates = filter.evaluate(index_wtxn, index).map_err( - |err| match err { + let candidates = + filter.evaluate(index_wtxn, index).map_err(|err| match err { milli::Error::UserError( milli::UserError::InvalidFilter(_), ) => Error::from(err) .with_custom_error_code(Code::InvalidDocumentFilter), e => e.into(), - }, - )?; - to_delete |= candidates; + }); + match candidates { + Ok(candidates) => to_delete |= candidates, + Err(err) => { + task.status = Status::Failed; + task.error = Some(err.into()); + } + }; } let will_be_removed = to_delete.len() - before; if let Some(Details::DocumentDeletionByFilter { @@ -1483,7 +1507,6 @@ impl IndexScheduler { } _ => unreachable!(), } - task.status = Status::Succeeded; } let config = IndexDocumentsConfig { diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 21e503567..a6ddfde46 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1764,6 +1764,7 @@ mod tests { use crossbeam::channel::RecvTimeoutError; use file_store::File; use insta::assert_json_snapshot; + use maplit::btreeset; use meili_snap::{json_string, snapshot}; use meilisearch_auth::AuthFilter; use meilisearch_types::document_formats::DocumentFormatError; @@ -2553,6 +2554,117 @@ mod tests { snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); } + #[test] + fn fail_in_process_batch_for_document_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + use meilisearch_types::settings::{Settings, Unchecked}; + let mut new_settings: Box> = Box::default(); + new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto"))); + + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings, + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_setting_and_document_addition"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_settings"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_documents"); + + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1")], + }, + None, + false, + ) + .unwrap(); + // This one should not be catched by Meilisearch but it's still nice to handle it because if one day we break the filters it could happens + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!(true), + }, + None, + false, + ) + .unwrap(); + // Should fail because the ids are not filterable + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!("id = 2"), + }, + None, + false, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!("catto EXISTS"), + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_document_deletions"); + + // Everything should be batched together + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_documents"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork"); + } + #[test] fn do_not_batch_task_of_different_indexes() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);