From 9522b75454d7e751bc58043eb5e1c07360f0f89c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 13 Oct 2022 11:09:00 +0200 Subject: [PATCH] Continue implementation of task deletion 1. Matched tasks are a roaring bitmap 2. Start implementation in meilisearch-http 3. Snapshots use meili-snap 4. Rename to TaskDeletion --- Cargo.lock | 124 ++------------- index-scheduler/Cargo.toml | 3 +- index-scheduler/src/autobatcher.rs | 150 +++++++++--------- index-scheduler/src/batch.rs | 89 ++++++----- index-scheduler/src/lib.rs | 72 ++++----- index-scheduler/src/snapshot.rs | 15 +- .../document_addition/1.snap} | 3 +- .../document_addition/2.snap} | 3 +- .../document_addition/3.snap} | 3 +- .../1.snap} | 1 - .../register/1.snap} | 3 +- .../initial_tasks_enqueued.snap} | 3 +- .../initial_tasks_processed.snap} | 3 +- .../task_deletion_processed.snap} | 7 +- .../initial_tasks_enqueued.snap} | 3 +- .../task_deletion_done.snap} | 7 +- .../task_deletion_enqueued.snap} | 7 +- .../task_deletion_processing.snap} | 7 +- meilisearch-auth/Cargo.toml | 1 - meilisearch-auth/src/action.rs | 135 ---------------- meilisearch-auth/src/error.rs | 2 +- meilisearch-auth/src/store.rs | 1 + meilisearch-http/Cargo.toml | 1 + meilisearch-http/src/routes/tasks.rs | 68 +++++++- meilisearch-types/Cargo.toml | 3 +- meilisearch-types/src/keys.rs | 3 + meilisearch-types/src/tasks.rs | 29 ++-- 27 files changed, 290 insertions(+), 456 deletions(-) rename index-scheduler/src/snapshots/{index_scheduler__tests__document_addition.snap => lib.rs/document_addition/1.snap} (86%) rename index-scheduler/src/snapshots/{index_scheduler__tests__document_addition-2.snap => lib.rs/document_addition/2.snap} (86%) rename index-scheduler/src/snapshots/{index_scheduler__tests__document_addition-3.snap => lib.rs/document_addition/3.snap} (87%) rename index-scheduler/src/snapshots/{index_scheduler__tests__insert_task_while_another_task_is_processing.snap => lib.rs/insert_task_while_another_task_is_processing/1.snap} (93%) rename index-scheduler/src/snapshots/{index_scheduler__tests__register.snap => lib.rs/register/1.snap} (92%) rename index-scheduler/src/snapshots/{index_scheduler__tests__task_deletion_deleteable.snap => lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap} (89%) rename index-scheduler/src/snapshots/{index_scheduler__tests__task_deletion_deleteable-2.snap => lib.rs/task_deletion_deleteable/initial_tasks_processed.snap} (89%) rename index-scheduler/src/snapshots/{index_scheduler__tests__task_deletion_deleteable-3.snap => lib.rs/task_deletion_deleteable/task_deletion_processed.snap} (79%) rename index-scheduler/src/snapshots/{index_scheduler__tests__task_deletion_undeleteable.snap => lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap} (90%) rename index-scheduler/src/snapshots/{index_scheduler__tests__task_deletion_undeleteable-4.snap => lib.rs/task_deletion_undeleteable/task_deletion_done.snap} (84%) rename index-scheduler/src/snapshots/{index_scheduler__tests__task_deletion_undeleteable-2.snap => lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap} (84%) rename index-scheduler/src/snapshots/{index_scheduler__tests__task_deletion_undeleteable-3.snap => lib.rs/task_deletion_undeleteable/task_deletion_processing.snap} (84%) delete mode 100644 meilisearch-auth/src/action.rs diff --git a/Cargo.lock b/Cargo.lock index c9936c960..d8714827e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1331,16 +1331,7 @@ dependencies = [ [[package]] name = "filter-parser" version = "0.33.4" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" -dependencies = [ - "nom", - "nom_locate", -] - -[[package]] -name = "filter-parser" -version = "0.34.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70" +source = "git+https://github.com/meilisearch/milli.git?branch=indexation-abortion#fc03e536153d61da3224698f34fb8c6ee2312c2f" dependencies = [ "nom", "nom_locate", @@ -1359,15 +1350,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" version = "0.33.4" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" -dependencies = [ - "serde_json", -] - -[[package]] -name = "flatten-serde-json" -version = "0.34.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70" +source = "git+https://github.com/meilisearch/milli.git?branch=indexation-abortion#fc03e536153d61da3224698f34fb8c6ee2312c2f" dependencies = [ "serde_json", ] @@ -1501,12 +1484,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "geoutils" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e006f616a407d396ace1d2ebb3f43ed73189db8b098079bd129928d7645dd1e" - [[package]] name = "geoutils" version = "0.5.1" @@ -1799,9 +1776,10 @@ dependencies = [ "file-store", "insta", "log", + "meili-snap", "meilisearch-types", "nelson", - "roaring 0.9.0", + "roaring", "serde", "serde_json", "synchronoise", @@ -1910,15 +1888,7 @@ dependencies = [ [[package]] name = "json-depth-checker" version = "0.33.4" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" -dependencies = [ - "serde_json", -] - -[[package]] -name = "json-depth-checker" -version = "0.34.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70" +source = "git+https://github.com/meilisearch/milli.git?branch=indexation-abortion#fc03e536153d61da3224698f34fb8c6ee2312c2f" dependencies = [ "serde_json", ] @@ -2285,7 +2255,6 @@ dependencies = [ "enum-iterator 1.1.3", "hmac", "meilisearch-types", - "milli 0.34.0", "rand", "serde", "serde_json", @@ -2386,9 +2355,10 @@ dependencies = [ "enum-iterator 0.7.0", "insta", "meili-snap", - "milli 0.33.4", + "milli", "proptest", "proptest-derive", + "roaring", "serde", "serde_json", "thiserror", @@ -2424,52 +2394,7 @@ dependencies = [ [[package]] name = "milli" version = "0.33.4" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" -dependencies = [ - "bimap", - "bincode", - "bstr 0.2.17", - "byteorder", - "charabia", - "concat-arrays", - "crossbeam-channel", - "csv", - "either", - "filter-parser 0.33.4", - "flatten-serde-json 0.33.4", - "fst", - "fxhash", - "geoutils 0.4.1", - "grenad", - "heed", - "itertools", - "json-depth-checker 0.33.4", - "levenshtein_automata", - "log", - "logging_timer", - "memmap2", - "obkv", - "once_cell", - "ordered-float 2.10.0", - "rayon", - "roaring 0.9.0", - "rstar", - "serde", - "serde_json", - "slice-group-by", - "smallstr", - "smallvec", - "smartstring", - "tempfile", - "thiserror", - "time", - "uuid 1.1.2", -] - -[[package]] -name = "milli" -version = "0.34.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70" +source = "git+https://github.com/meilisearch/milli.git?branch=indexation-abortion#fc03e536153d61da3224698f34fb8c6ee2312c2f" dependencies = [ "bimap", "bincode", @@ -2480,24 +2405,24 @@ dependencies = [ "crossbeam-channel", "csv", "either", - "filter-parser 0.34.0", - "flatten-serde-json 0.34.0", + "filter-parser", + "flatten-serde-json", "fst", "fxhash", - "geoutils 0.5.1", + "geoutils", "grenad", "heed", "itertools", - "json-depth-checker 0.34.0", + "json-depth-checker", "levenshtein_automata", "log", "logging_timer", "memmap2", "obkv", "once_cell", - "ordered-float 3.3.0", + "ordered-float", "rayon", - "roaring 0.10.1", + "roaring", "rstar", "serde", "serde_json", @@ -2690,15 +2615,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" -[[package]] -name = "ordered-float" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" -dependencies = [ - "num-traits", -] - [[package]] name = "ordered-float" version = "3.3.0" @@ -3264,17 +3180,6 @@ dependencies = [ "regex", ] -[[package]] -name = "roaring" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd539cab4e32019956fe7e0cf160bb6d4802f4be2b52c4253d76d3bb0f85a5f7" -dependencies = [ - "bytemuck", - "byteorder", - "retain_mut", -] - [[package]] name = "roaring" version = "0.10.1" @@ -3284,6 +3189,7 @@ dependencies = [ "bytemuck", "byteorder", "retain_mut", + "serde", ] [[package]] diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 730f34b5c..3969e08ee 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -12,7 +12,7 @@ csv = "1.1.6" file-store = { path = "../file-store" } log = "0.4.14" meilisearch-types = { path = "../meilisearch-types" } -roaring = "0.9.0" +roaring = { version = "0.10.0", features = ["serde"] } serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } tempfile = "3.3.0" @@ -27,3 +27,4 @@ crossbeam = "0.8.2" nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"} insta = { version = "1.19.1", features = ["json", "redactions"] } big_s = "1.0.2" +meili-snap = { path = "../meili-snap" } \ No newline at end of file diff --git a/index-scheduler/src/autobatcher.rs b/index-scheduler/src/autobatcher.rs index 26386b566..f67573b31 100644 --- a/index-scheduler/src/autobatcher.rs +++ b/index-scheduler/src/autobatcher.rs @@ -23,7 +23,7 @@ enum AutobatchKind { IndexUpdate, IndexSwap, CancelTask, - DeleteTasks, + TaskDeletion, DumpExport, Snapshot, } @@ -63,7 +63,7 @@ impl From for AutobatchKind { KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate, KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap, KindWithContent::CancelTask { .. } => AutobatchKind::CancelTask, - KindWithContent::DeleteTasks { .. } => AutobatchKind::DeleteTasks, + KindWithContent::TaskDeletion { .. } => AutobatchKind::TaskDeletion, KindWithContent::DumpExport { .. } => AutobatchKind::DumpExport, KindWithContent::Snapshot => AutobatchKind::Snapshot, } @@ -153,7 +153,7 @@ impl BatchKind { allow_index_creation, settings_ids: vec![task_id], }), - K::DumpExport | K::Snapshot | K::CancelTask | K::DeleteTasks => { + K::DumpExport | K::Snapshot | K::CancelTask | K::TaskDeletion => { unreachable!() } } @@ -378,7 +378,7 @@ impl BatchKind { import_ids, }) } - (_, K::CancelTask | K::DeleteTasks | K::DumpExport | K::Snapshot) => { + (_, K::CancelTask | K::TaskDeletion | K::DumpExport | K::Snapshot) => { unreachable!() } ( @@ -414,7 +414,7 @@ pub fn autobatch(enqueued: Vec<(TaskId, KindWithContent)>) -> Option #[cfg(test)] mod tests { - use crate::assert_smol_debug_snapshot; + use crate::debug_snapshot; use super::*; use uuid::Uuid; @@ -492,129 +492,129 @@ mod tests { #[test] fn autobatch_simple_operation_together() { // we can autobatch one or multiple DocumentAddition together - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true)]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true)]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1, 2] })"); // we can autobatch one or multiple DocumentUpdate together - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true)]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 1, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true)]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 1, 2] })"); // we can autobatch one or multiple DocumentDeletion together - assert_smol_debug_snapshot!(autobatch_from([doc_del()]), @"Some(DocumentDeletion { deletion_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_del(), doc_del(), doc_del()]), @"Some(DocumentDeletion { deletion_ids: [0, 1, 2] })"); + debug_snapshot!(autobatch_from([doc_del()]), @"Some(DocumentDeletion { deletion_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_del(), doc_del(), doc_del()]), @"Some(DocumentDeletion { deletion_ids: [0, 1, 2] })"); // we can autobatch one or multiple Settings together - assert_smol_debug_snapshot!(autobatch_from([settings(true)]), @"Some(Settings { allow_index_creation: true, settings_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([settings(true), settings(true), settings(true)]), @"Some(Settings { allow_index_creation: true, settings_ids: [0, 1, 2] })"); + debug_snapshot!(autobatch_from([settings(true)]), @"Some(Settings { allow_index_creation: true, settings_ids: [0] })"); + debug_snapshot!(autobatch_from([settings(true), settings(true), settings(true)]), @"Some(Settings { allow_index_creation: true, settings_ids: [0, 1, 2] })"); } #[test] fn simple_document_operation_dont_autobatch_with_other() { // addition, updates and deletion can't batch together - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_del()]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_del()]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_del(), doc_imp(ReplaceDocuments, true)]), @"Some(DocumentDeletion { deletion_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_del(), doc_imp(UpdateDocuments, true)]), @"Some(DocumentDeletion { deletion_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_del()]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_del()]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_del(), doc_imp(ReplaceDocuments, true)]), @"Some(DocumentDeletion { deletion_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_del(), doc_imp(UpdateDocuments, true)]), @"Some(DocumentDeletion { deletion_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), idx_create()]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), idx_create()]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_del(), idx_create()]), @"Some(DocumentDeletion { deletion_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), idx_create()]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), idx_create()]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_del(), idx_create()]), @"Some(DocumentDeletion { deletion_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), idx_update()]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), idx_update()]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_del(), idx_update()]), @"Some(DocumentDeletion { deletion_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), idx_update()]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), idx_update()]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_del(), idx_update()]), @"Some(DocumentDeletion { deletion_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), idx_swap()]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), idx_swap()]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_del(), idx_swap()]), @"Some(DocumentDeletion { deletion_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), idx_swap()]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), idx_swap()]), @"Some(DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_del(), idx_swap()]), @"Some(DocumentDeletion { deletion_ids: [0] })"); } #[test] fn document_addition_batch_with_settings() { // simple case - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true)]), @"Some(settingsAnddoc_im(Repl)eDocuments)allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true)]), @"Some(settingsAnddoc_im(Upda)Documents)allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true)]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true)]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); // multiple settings and doc addition - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), settings(true), settings(true)]), @"Some(settingsAnddoc_im(Repl)eDocuments)allow_index_creation: true, import_ids: [0, 1] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), settings(true), settings(true)]), @"Some(settingsAnddoc_im(Repl)eDocuments)allow_index_creation: true, import_ids: [0, 1] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), settings(true), settings(true)]), @"Some(SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), settings(true), settings(true)]), @"Some(SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] })"); // addition and setting unordered - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), doc_imp(ReplaceDocuments, true), settings(true)]), @"Some(settingsAnddoc_im(Repl)eDocuments)allow_index_creation: true, import_ids: [0, 2] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), doc_imp(UpdateDocuments, true), settings(true)]), @"Some(settingsAnddoc_im(Upda)Documents)allow_index_creation: true, import_ids: [0, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), doc_imp(ReplaceDocuments, true), settings(true)]), @"Some(SettingsAndDocumentImport { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), doc_imp(UpdateDocuments, true), settings(true)]), @"Some(SettingsAndDocumentImport { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 2] })"); // We ensure this kind of batch doesn't batch with forbidden operations - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), doc_imp(UpdateDocuments, true)]), @"Some(settingsAnddoc_im(Repl)eDocuments)allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), doc_imp(ReplaceDocuments, true)]), @"Some(settingsAnddoc_im(Upda)Documents)allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), doc_del()]), @"Some(settingsAnddoc_im(Repl)eDocuments)allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), doc_del()]), @"Some(settingsAnddoc_im(Upda)Documents)allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), idx_create()]), @"Some(settingsAnddoc_im(Repl)eDocuments)allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), idx_create()]), @"Some(settingsAnddoc_im(Upda)Documents)allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), idx_update()]), @"Some(settingsAnddoc_im(Repl)eDocuments)allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), idx_update()]), @"Some(settingsAnddoc_im(Upda)Documents)allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), idx_swap()]), @"Some(settingsAnddoc_im(Repl)eDocuments)allow_index_creation: true, import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), idx_swap()]), @"Some(settingsAnddoc_im(Upda)Documents)allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), doc_imp(UpdateDocuments, true)]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), doc_imp(ReplaceDocuments, true)]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), doc_del()]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), doc_del()]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), idx_create()]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), idx_create()]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), idx_update()]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), idx_update()]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), idx_swap()]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), idx_swap()]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] })"); } #[test] fn clear_and_additions() { // these two doesn't need to batch - assert_smol_debug_snapshot!(autobatch_from([doc_clr(), doc_imp(ReplaceDocuments, true)]), @"Some(doc_clr() { ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_clr(), doc_imp(UpdateDocuments, true)]), @"Some(doc_clr() { ids: [0] })"); + debug_snapshot!(autobatch_from([doc_clr(), doc_imp(ReplaceDocuments, true)]), @"Some(DocumentClear { ids: [0] })"); + debug_snapshot!(autobatch_from([doc_clr(), doc_imp(UpdateDocuments, true)]), @"Some(DocumentClear { ids: [0] })"); // Basic use case - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr()]), @"Some(doc_clr() { ids: [0, 1, 2] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr()]), @"Some(doc_clr() { ids: [0, 1, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr()]), @"Some(DocumentClear { ids: [0, 1, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr()]), @"Some(DocumentClear { ids: [0, 1, 2] })"); // This batch kind doesn't mix with other document addition - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr(), doc_imp(ReplaceDocuments, true)]), @"Some(doc_clr() { ids: [0, 1, 2] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr(), doc_imp(UpdateDocuments, true)]), @"Some(doc_clr() { ids: [0, 1, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr(), doc_imp(ReplaceDocuments, true)]), @"Some(DocumentClear { ids: [0, 1, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr(), doc_imp(UpdateDocuments, true)]), @"Some(DocumentClear { ids: [0, 1, 2] })"); // But you can batch multiple clear together - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr(), doc_clr(), doc_clr()]), @"Some(doc_clr() { ids: [0, 1, 2, 3, 4] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr(), doc_clr(), doc_clr()]), @"Some(doc_clr() { ids: [0, 1, 2, 3, 4] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr(), doc_clr(), doc_clr()]), @"Some(DocumentClear { ids: [0, 1, 2, 3, 4] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr(), doc_clr(), doc_clr()]), @"Some(DocumentClear { ids: [0, 1, 2, 3, 4] })"); } #[test] fn clear_and_additions_and_settings() { // A clear don't need to autobatch the settings that happens AFTER there is no documents - assert_smol_debug_snapshot!(autobatch_from([doc_clr(), settings(true)]), @"Some(doc_clr() { ids: [0] })"); + debug_snapshot!(autobatch_from([doc_clr(), settings(true)]), @"Some(DocumentClear { ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([settings(true), doc_clr(), settings(true)]), @"Some(clearAndSettings([1) allow_index_creation: true, settings_ids: [0, 2] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), doc_clr()]), @"Some(clearAndSettings([0)2], allow_index_creation: true, settings_ids: [1] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), doc_clr()]), @"Some(clearAndSettings([0)2], allow_index_creation: true, settings_ids: [1] })"); + debug_snapshot!(autobatch_from([settings(true), doc_clr(), settings(true)]), @"Some(ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), doc_clr()]), @"Some(ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), doc_clr()]), @"Some(ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] })"); } #[test] fn anything_and_index_deletion() { // The indexdeletion doesn't batch with anything that happens AFTER - assert_smol_debug_snapshot!(autobatch_from([idx_del(), doc_imp(ReplaceDocuments, true)]), @"Some(idx_del() { ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([idx_del(), doc_imp(UpdateDocuments, true)]), @"Some(idx_del() { ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([idx_del(), doc_del()]), @"Some(idx_del() { ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([idx_del(), doc_clr()]), @"Some(idx_del() { ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([idx_del(), settings(true)]), @"Some(idx_del() { ids: [0] })"); + debug_snapshot!(autobatch_from([idx_del(), doc_imp(ReplaceDocuments, true)]), @"Some(IndexDeletion { ids: [0] })"); + debug_snapshot!(autobatch_from([idx_del(), doc_imp(UpdateDocuments, true)]), @"Some(IndexDeletion { ids: [0] })"); + debug_snapshot!(autobatch_from([idx_del(), doc_del()]), @"Some(IndexDeletion { ids: [0] })"); + debug_snapshot!(autobatch_from([idx_del(), doc_clr()]), @"Some(IndexDeletion { ids: [0] })"); + debug_snapshot!(autobatch_from([idx_del(), settings(true)]), @"Some(IndexDeletion { ids: [0] })"); // The index deletion can accept almost any type of BatchKind and transform it to an idx_del() // First, the basic cases - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), idx_del()]), @"Some(idx_del() { ids: [0, 1] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), idx_del()]), @"Some(idx_del() { ids: [0, 1] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_del(), idx_del()]), @"Some(idx_del() { ids: [0, 1] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_clr(), idx_del()]), @"Some(idx_del() { ids: [0, 1] })"); - assert_smol_debug_snapshot!(autobatch_from([settings(true), idx_del()]), @"Some(idx_del() { ids: [0, 1] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), idx_del()]), @"Some(IndexDeletion { ids: [0, 1] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), idx_del()]), @"Some(IndexDeletion { ids: [0, 1] })"); + debug_snapshot!(autobatch_from([doc_del(), idx_del()]), @"Some(IndexDeletion { ids: [0, 1] })"); + debug_snapshot!(autobatch_from([doc_clr(), idx_del()]), @"Some(IndexDeletion { ids: [0, 1] })"); + debug_snapshot!(autobatch_from([settings(true), idx_del()]), @"Some(IndexDeletion { ids: [0, 1] })"); // Then the mixed cases - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), idx_del()]), @"Some(idx_del() { ids: [0, 2, 1] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), idx_del()]), @"Some(idx_del() { ids: [0, 2, 1] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some(idx_del() { ids: [1, 3, 0, 2] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some(idx_del() { ids: [1, 3, 0, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), idx_del()]), @"Some(IndexDeletion { ids: [0, 2, 1] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), idx_del()]), @"Some(IndexDeletion { ids: [0, 2, 1] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some(IndexDeletion { ids: [1, 3, 0, 2] })"); + debug_snapshot!(autobatch_from([doc_imp(UpdateDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some(IndexDeletion { ids: [1, 3, 0, 2] })"); } #[test] fn allowed_and_disallowed_index_creation() { // doc_imp(indexes canbe)ixed with those disallowed to do so - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, true)]), @"Some(doc_imp(ReplaceDocuments, false)import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some(doc_imp(ReplaceDocuments, true)import_ids: [0, 1] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, false)]), @"Some(doc_imp(ReplaceDocuments, false)import_ids: [0, 1] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true)]), @"Some(settingsAnddoc_imp(: ReplaceDocuments: true)import_ids: [0] })"); - assert_smol_debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, false), settings(true)]), @"Some(doc_imp(ReplaceDocuments, false)import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, true)]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, false)]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, true), settings(true)]), @"Some(SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] })"); + debug_snapshot!(autobatch_from([doc_imp(ReplaceDocuments, false), settings(true)]), @"Some(DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] })"); } } diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 6f7f9c82d..b1d727c2f 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -1,3 +1,5 @@ +use std::collections::HashSet; + use crate::{autobatcher::BatchKind, Error, IndexScheduler, Result, TaskId}; use meilisearch_types::tasks::{Details, Kind, KindWithContent, Status, Task}; @@ -20,7 +22,7 @@ use uuid::Uuid; pub(crate) enum Batch { Cancel(Task), - DeleteTasks(Task), + TaskDeletion(Task), Snapshot(Vec), Dump(Vec), IndexOperation(IndexOperation), @@ -95,7 +97,7 @@ impl Batch { pub fn ids(&self) -> Vec { match self { Batch::Cancel(task) - | Batch::DeleteTasks(task) + | Batch::TaskDeletion(task) | Batch::IndexCreation { task, .. } | Batch::IndexUpdate { task, .. } => vec![task.uid], Batch::Snapshot(tasks) | Batch::Dump(tasks) | Batch::IndexDeletion { tasks, .. } => { @@ -379,13 +381,13 @@ impl IndexScheduler { } // 2. we get the next task to delete - let to_delete = self.get_kind(rtxn, Kind::DeleteTasks)?; + let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)?; if let Some(task_id) = to_delete.min() { let task = self .get_task(rtxn, task_id)? .ok_or(Error::CorruptedTaskQueue)?; - return Ok(Some(Batch::DeleteTasks(task))); + return Ok(Some(Batch::TaskDeletion(task))); } // 3. we batch the snapshot. @@ -445,10 +447,10 @@ impl IndexScheduler { pub(crate) fn process_batch(&self, batch: Batch) -> Result> { match batch { Batch::Cancel(_) => todo!(), - Batch::DeleteTasks(mut task) => { + Batch::TaskDeletion(mut task) => { // 1. Retrieve the tasks that matched the query at enqueue-time. let matched_tasks = - if let KindWithContent::DeleteTasks { tasks, query: _ } = &task.kind { + if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind { tasks } else { unreachable!() @@ -459,7 +461,7 @@ impl IndexScheduler { task.status = Status::Succeeded; match &mut task.details { - Some(Details::DeleteTasks { + Some(Details::TaskDeletion { matched_tasks: _, deleted_tasks, original_query: _, @@ -816,49 +818,52 @@ impl IndexScheduler { /// Delete each given task from all the databases (if it is deleteable). /// /// Return the number of tasks that were actually deleted - fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &[u32]) -> Result { + fn delete_matched_tasks( + &self, + wtxn: &mut RwTxn, + matched_tasks: &RoaringBitmap, + ) -> Result { // 1. Remove from this list the tasks that we are not allowed to delete let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?; let processing_tasks = &self.processing_tasks.read().unwrap().1; - let to_delete_tasks = matched_tasks - .iter() - .filter(|&&task_id| { - !processing_tasks.contains(task_id) && !enqueued_tasks.contains(task_id) - }) - .copied(); - let to_delete_tasks = RoaringBitmap::from_iter(to_delete_tasks); + + let mut to_delete_tasks = matched_tasks - processing_tasks; + to_delete_tasks -= enqueued_tasks; + // 2. We now have a list of tasks to delete, delete them + let mut affected_indexes = HashSet::new(); + let mut affected_statuses = HashSet::new(); + let mut affected_kinds = HashSet::new(); + for task_id in to_delete_tasks.iter() { - let task = self.all_tasks.get(wtxn, &BEU32::new(task_id))?.unwrap(); - self.delete_task(wtxn, &task)?; - } - - Ok(to_delete_tasks.len() as usize) - } - - /// Delete the given task from all the databases - fn delete_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { - let task_id = BEU32::new(task.uid); - - if let Some(indexes) = task.indexes() { - for index in indexes { - self.update_index(wtxn, index, |bitmap| { - bitmap.remove(task.uid); - })?; + if let Some(task) = self.all_tasks.get(wtxn, &BEU32::new(task_id))? { + if let Some(task_indexes) = task.indexes() { + affected_indexes.extend(task_indexes.into_iter().map(|x| x.to_owned())); + } + affected_statuses.insert(task.status); + affected_kinds.insert(task.kind.as_kind()); } } - - self.update_status(wtxn, task.status, |bitmap| { - bitmap.remove(task.uid); - })?; - - self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { - (bitmap.remove(task.uid)); - })?; - - self.all_tasks.delete(wtxn, &task_id)?; - Ok(()) + for index in affected_indexes { + self.update_index(wtxn, &index, |bitmap| { + *bitmap -= &to_delete_tasks; + })?; + } + for status in affected_statuses { + self.update_status(wtxn, status, |bitmap| { + *bitmap -= &to_delete_tasks; + })?; + } + for kind in affected_kinds { + self.update_kind(wtxn, kind, |bitmap| { + *bitmap -= &to_delete_tasks; + })?; + } + for task in to_delete_tasks.iter() { + self.all_tasks.delete(wtxn, &BEU32::new(task))?; + } + Ok(to_delete_tasks.len() as usize) } } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index bf6181935..7c3d81b0e 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -242,44 +242,52 @@ impl IndexScheduler { self.index_mapper.indexes(&rtxn) } - /// Returns the tasks corresponding to the query. - pub fn get_tasks(&self, query: Query) -> Result> { + /// Return the task ids corresponding to the query + pub fn get_task_ids(&self, query: &Query) -> Result { let rtxn = self.env.read_txn()?; let last_task_id = match self.last_task_id(&rtxn)? { Some(tid) => query.from.map(|from| from.min(tid)).unwrap_or(tid), - None => return Ok(Vec::new()), + None => return Ok(RoaringBitmap::new()), }; // This is the list of all the tasks. let mut tasks = RoaringBitmap::from_sorted_iter(0..last_task_id).unwrap(); - if let Some(uids) = query.uid { + if let Some(uids) = &query.uid { tasks &= RoaringBitmap::from_iter(uids); } - if let Some(status) = query.status { + if let Some(status) = &query.status { let mut status_tasks = RoaringBitmap::new(); for status in status { - status_tasks |= self.get_status(&rtxn, status)?; + status_tasks |= self.get_status(&rtxn, *status)?; } tasks &= status_tasks; } - if let Some(kind) = query.kind { + if let Some(kind) = &query.kind { let mut kind_tasks = RoaringBitmap::new(); for kind in kind { - kind_tasks |= self.get_kind(&rtxn, kind)?; + kind_tasks |= self.get_kind(&rtxn, *kind)?; } tasks &= kind_tasks; } - if let Some(index) = query.index_uid { + if let Some(index) = &query.index_uid { let mut index_tasks = RoaringBitmap::new(); for index in index { index_tasks |= self.index_tasks(&rtxn, &index)?; } tasks &= index_tasks; } + rtxn.commit().unwrap(); + Ok(tasks) + } + + /// Returns the tasks corresponding to the query. + pub fn get_tasks(&self, query: Query) -> Result> { + let rtxn = self.env.read_txn()?; + let tasks = self.get_task_ids(&query)?; let tasks = self.get_existing_tasks(&rtxn, tasks.into_iter().rev().take(query.limit as usize))?; @@ -442,7 +450,7 @@ impl IndexScheduler { #[cfg(test)] mod tests { use big_s::S; - use insta::*; + use meili_snap::snapshot; use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments; use tempfile::TempDir; use uuid::Uuid; @@ -580,7 +588,7 @@ mod tests { assert_eq!(task.kind.as_kind(), k); } - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler)); } #[test] @@ -597,7 +605,7 @@ mod tests { }) .unwrap(); - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler)); } /// We send a lot of tasks but notify the tasks scheduler only once as @@ -692,23 +700,23 @@ mod tests { // here we have registered all the tasks, but the index scheduler // has not progressed at all - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); index_scheduler - .register(KindWithContent::DeleteTasks { + .register(KindWithContent::TaskDeletion { query: "test_query".to_owned(), - tasks: vec![0, 1], + tasks: RoaringBitmap::from_iter(&[0, 1]), }) .unwrap(); // again, no progress made at all, but one more task is registered - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued"); // now we create the first batch handle.wait_till(Breakpoint::BatchCreated); // the task deletion should now be "processing" - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processing"); handle.wait_till(Breakpoint::AfterProcessing); @@ -716,7 +724,7 @@ mod tests { // because the tasks with ids 0 and 1 were still "enqueued", and thus undeleteable // the "task deletion" task should be marked as "succeeded" and, in its details, the // number of deleted tasks should be 0 - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_done"); } #[test] @@ -737,25 +745,25 @@ mod tests { file0.persist().unwrap(); file1.persist().unwrap(); - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); handle.wait_till(Breakpoint::AfterProcessing); // first addition of documents should be successful // TODO: currently the result of this operation is incorrect! // only the first task should be successful, because it should not be batched with // the second task, that operates on a different index! - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); // Now we delete the first task index_scheduler - .register(KindWithContent::DeleteTasks { + .register(KindWithContent::TaskDeletion { query: "test_query".to_owned(), - tasks: vec![0], + tasks: RoaringBitmap::from_iter(&[0]), }) .unwrap(); handle.wait_till(Breakpoint::AfterProcessing); - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); } #[test] @@ -784,15 +792,15 @@ mod tests { .unwrap(); file.persist().unwrap(); - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler)); handle.wait_till(Breakpoint::BatchCreated); - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler)); handle.wait_till(Breakpoint::AfterProcessing); - assert_snapshot!(snapshot_index_scheduler(&index_scheduler)); + snapshot!(snapshot_index_scheduler(&index_scheduler)); } #[test] @@ -833,18 +841,10 @@ mod tests { } #[macro_export] - macro_rules! assert_smol_debug_snapshot { + macro_rules! debug_snapshot { ($value:expr, @$snapshot:literal) => {{ let value = format!("{:?}", $value); - insta::assert_snapshot!(value, stringify!($value), @$snapshot); - }}; - ($name:expr, $value:expr) => {{ - let value = format!("{:?}", $value); - insta::assert_snapshot!(Some($name), value, stringify!($value)); - }}; - ($value:expr) => {{ - let value = format!("{:?}", $value); - insta::assert_snapshot!($crate::_macro_support::AutoName, value, stringify!($value)); + meili_snap::snapshot!(value, @$snapshot); }}; } diff --git a/index-scheduler/src/snapshot.rs b/index-scheduler/src/snapshot.rs index 03201baa5..bd89dd17a 100644 --- a/index-scheduler/src/snapshot.rs +++ b/index-scheduler/src/snapshot.rs @@ -1,9 +1,12 @@ -use meilisearch_types::heed::{ - types::{OwnedType, SerdeBincode, SerdeJson, Str}, - Database, RoTxn, -}; use meilisearch_types::milli::{RoaringBitmapCodec, BEU32}; -use meilisearch_types::tasks::{Details, Task}; +use meilisearch_types::tasks::Details; +use meilisearch_types::{ + heed::{ + types::{OwnedType, SerdeBincode, SerdeJson, Str}, + Database, RoTxn, + }, + tasks::Task, +}; use roaring::RoaringBitmap; use crate::{index_mapper::IndexMapper, IndexScheduler, Kind, Status}; @@ -127,7 +130,7 @@ fn snaphsot_details(d: &Details) -> String { Details::ClearAll { deleted_documents } => { format!("{{ deleted_documents: {deleted_documents:?} }}") }, - Details::DeleteTasks { + Details::TaskDeletion { matched_tasks, deleted_tasks, original_query, diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__document_addition.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/1.snap similarity index 86% rename from index-scheduler/src/snapshots/index_scheduler__tests__document_addition.snap rename to index-scheduler/src/snapshots/lib.rs/document_addition/1.snap index 0cd5fed11..b04d49bc8 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__document_addition.snap +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/1.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -13,7 +12,7 @@ expression: snapshot_index_scheduler(&index_scheduler) enqueued [0,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [0,] +"documentImport" [0,] ---------------------------------------------------------------------- ### Index Tasks: doggos [0,] diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__document_addition-2.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/2.snap similarity index 86% rename from index-scheduler/src/snapshots/index_scheduler__tests__document_addition-2.snap rename to index-scheduler/src/snapshots/lib.rs/document_addition/2.snap index 8e0ba2082..d57c56744 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__document_addition-2.snap +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/2.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -13,7 +12,7 @@ expression: snapshot_index_scheduler(&index_scheduler) enqueued [0,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [0,] +"documentImport" [0,] ---------------------------------------------------------------------- ### Index Tasks: doggos [0,] diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__document_addition-3.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/3.snap similarity index 87% rename from index-scheduler/src/snapshots/index_scheduler__tests__document_addition-3.snap rename to index-scheduler/src/snapshots/lib.rs/document_addition/3.snap index d05f00a6a..d90a4c95a 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__document_addition-3.snap +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/3.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -14,7 +13,7 @@ enqueued [] succeeded [0,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [0,] +"documentImport" [0,] ---------------------------------------------------------------------- ### Index Tasks: doggos [0,] diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__insert_task_while_another_task_is_processing.snap b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/1.snap similarity index 93% rename from index-scheduler/src/snapshots/index_scheduler__tests__insert_task_while_another_task_is_processing.snap rename to index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/1.snap index 831c9d571..8504cc177 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__insert_task_while_another_task_is_processing.snap +++ b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/1.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__register.snap b/index-scheduler/src/snapshots/lib.rs/register/1.snap similarity index 92% rename from index-scheduler/src/snapshots/index_scheduler__tests__register.snap rename to index-scheduler/src/snapshots/lib.rs/register/1.snap index 7fd457f82..1f4432c8a 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__register.snap +++ b/index-scheduler/src/snapshots/lib.rs/register/1.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -17,7 +16,7 @@ expression: snapshot_index_scheduler(&index_scheduler) enqueued [0,1,2,3,4,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [1,3,4,] +"documentImport" [1,3,4,] "indexCreation" [0,] "cancelTask" [2,] ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_deleteable.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap similarity index 89% rename from index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_deleteable.snap rename to index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap index eb829cc79..8415aacb9 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_deleteable.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -14,7 +13,7 @@ expression: snapshot_index_scheduler(&index_scheduler) enqueued [0,1,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [0,1,] +"documentImport" [0,1,] ---------------------------------------------------------------------- ### Index Tasks: catto [0,] diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_deleteable-2.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap similarity index 89% rename from index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_deleteable-2.snap rename to index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap index 6da21f7f6..3a0bf7bfc 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_deleteable-2.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -15,7 +14,7 @@ enqueued [] succeeded [0,1,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [0,1,] +"documentImport" [0,1,] ---------------------------------------------------------------------- ### Index Tasks: catto [0,] diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_deleteable-3.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap similarity index 79% rename from index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_deleteable-3.snap rename to index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap index f154ef96c..a2264bea5 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_deleteable-3.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -8,15 +7,15 @@ expression: snapshot_index_scheduler(&index_scheduler) ---------------------------------------------------------------------- ### All Tasks: 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: 1 }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} -2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: DeleteTasks { query: "test_query", tasks: [0] }} +2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} ---------------------------------------------------------------------- ### Status: enqueued [] succeeded [1,2,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [1,] -"deleteTasks" [2,] +"documentImport" [1,] +"taskDeletion" [2,] ---------------------------------------------------------------------- ### Index Tasks: catto [] diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap similarity index 90% rename from index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable.snap rename to index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap index 8f30eecfb..4b1b50b6c 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -15,7 +14,7 @@ expression: snapshot_index_scheduler(&index_scheduler) enqueued [0,1,2,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [1,2,] +"documentImport" [1,2,] "indexCreation" [0,] ---------------------------------------------------------------------- ### Index Tasks: diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable-4.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap similarity index 84% rename from index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable-4.snap rename to index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap index a657eaa48..6859b3d06 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable-4.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -10,16 +9,16 @@ expression: snapshot_index_scheduler(&index_scheduler) 0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: 0 }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 5000, indexed_documents: 0 }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 5000, allow_index_creation: true }} -3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_query: "test_query" }, kind: DeleteTasks { query: "test_query", tasks: [0, 1] }} +3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }} ---------------------------------------------------------------------- ### Status: enqueued [0,1,2,] succeeded [3,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [1,2,] +"documentImport" [1,2,] "indexCreation" [0,] -"deleteTasks" [3,] +"taskDeletion" [3,] ---------------------------------------------------------------------- ### Index Tasks: catto [0,1,] diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable-2.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap similarity index 84% rename from index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable-2.snap rename to index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap index 442b38631..61784f12e 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable-2.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -10,15 +9,15 @@ expression: snapshot_index_scheduler(&index_scheduler) 0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: 0 }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 5000, indexed_documents: 0 }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 5000, allow_index_creation: true }} -3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_query: "test_query" }, kind: DeleteTasks { query: "test_query", tasks: [0, 1] }} +3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }} ---------------------------------------------------------------------- ### Status: enqueued [0,1,2,3,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [1,2,] +"documentImport" [1,2,] "indexCreation" [0,] -"deleteTasks" [3,] +"taskDeletion" [3,] ---------------------------------------------------------------------- ### Index Tasks: catto [0,1,] diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable-3.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap similarity index 84% rename from index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable-3.snap rename to index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap index 69c993b68..4487da22a 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__task_deletion_undeleteable-3.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap @@ -1,6 +1,5 @@ --- source: index-scheduler/src/lib.rs -expression: snapshot_index_scheduler(&index_scheduler) --- ### Autobatching Enabled = true ### Processing Tasks: @@ -10,15 +9,15 @@ expression: snapshot_index_scheduler(&index_scheduler) 0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: 0 }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 5000, indexed_documents: 0 }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 5000, allow_index_creation: true }} -3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_query: "test_query" }, kind: DeleteTasks { query: "test_query", tasks: [0, 1] }} +3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }} ---------------------------------------------------------------------- ### Status: enqueued [0,1,2,3,] ---------------------------------------------------------------------- ### Kind: -{"documentImport":{"method":"ReplaceDocuments","allow_index_creation":true}} [1,2,] +"documentImport" [1,2,] "indexCreation" [0,] -"deleteTasks" [3,] +"taskDeletion" [3,] ---------------------------------------------------------------------- ### Index Tasks: catto [0,1,] diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index a872b4e9a..1b62fd949 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -7,7 +7,6 @@ edition = "2021" enum-iterator = "1.1.2" hmac = "0.12.1" meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.34.0", default-features = false } rand = "0.8.5" serde = { version = "1.0.145", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } diff --git a/meilisearch-auth/src/action.rs b/meilisearch-auth/src/action.rs deleted file mode 100644 index 19944d882..000000000 --- a/meilisearch-auth/src/action.rs +++ /dev/null @@ -1,135 +0,0 @@ -use serde::{Deserialize, Serialize}; -use std::hash::Hash; - -#[derive( - enum_iterator::Sequence, Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash, -)] -#[repr(u8)] -pub enum Action { - #[serde(rename = "*")] - All = 0, - #[serde(rename = "search")] - Search, - #[serde(rename = "documents.*")] - DocumentsAll, - #[serde(rename = "documents.add")] - DocumentsAdd, - #[serde(rename = "documents.get")] - DocumentsGet, - #[serde(rename = "documents.delete")] - DocumentsDelete, - #[serde(rename = "indexes.*")] - IndexesAll, - #[serde(rename = "indexes.create")] - IndexesAdd, - #[serde(rename = "indexes.get")] - IndexesGet, - #[serde(rename = "indexes.update")] - IndexesUpdate, - #[serde(rename = "indexes.delete")] - IndexesDelete, - #[serde(rename = "tasks.*")] - TasksAll, - #[serde(rename = "tasks.get")] - TasksGet, - #[serde(rename = "settings.*")] - SettingsAll, - #[serde(rename = "settings.get")] - SettingsGet, - #[serde(rename = "settings.update")] - SettingsUpdate, - #[serde(rename = "stats.*")] - StatsAll, - #[serde(rename = "stats.get")] - StatsGet, - #[serde(rename = "metrics.*")] - MetricsAll, - #[serde(rename = "metrics.get")] - MetricsGet, - #[serde(rename = "dumps.*")] - DumpsAll, - #[serde(rename = "dumps.create")] - DumpsCreate, - #[serde(rename = "version")] - Version, - #[serde(rename = "keys.create")] - KeysAdd, - #[serde(rename = "keys.get")] - KeysGet, - #[serde(rename = "keys.update")] - KeysUpdate, - #[serde(rename = "keys.delete")] - KeysDelete, -} - -impl Action { - pub const fn from_repr(repr: u8) -> Option { - use actions::*; - match repr { - ALL => Some(Self::All), - SEARCH => Some(Self::Search), - DOCUMENTS_ALL => Some(Self::DocumentsAll), - DOCUMENTS_ADD => Some(Self::DocumentsAdd), - DOCUMENTS_GET => Some(Self::DocumentsGet), - DOCUMENTS_DELETE => Some(Self::DocumentsDelete), - INDEXES_ALL => Some(Self::IndexesAll), - INDEXES_CREATE => Some(Self::IndexesAdd), - INDEXES_GET => Some(Self::IndexesGet), - INDEXES_UPDATE => Some(Self::IndexesUpdate), - INDEXES_DELETE => Some(Self::IndexesDelete), - TASKS_ALL => Some(Self::TasksAll), - TASKS_GET => Some(Self::TasksGet), - SETTINGS_ALL => Some(Self::SettingsAll), - SETTINGS_GET => Some(Self::SettingsGet), - SETTINGS_UPDATE => Some(Self::SettingsUpdate), - STATS_ALL => Some(Self::StatsAll), - STATS_GET => Some(Self::StatsGet), - METRICS_ALL => Some(Self::MetricsAll), - METRICS_GET => Some(Self::MetricsGet), - DUMPS_ALL => Some(Self::DumpsAll), - DUMPS_CREATE => Some(Self::DumpsCreate), - VERSION => Some(Self::Version), - KEYS_CREATE => Some(Self::KeysAdd), - KEYS_GET => Some(Self::KeysGet), - KEYS_UPDATE => Some(Self::KeysUpdate), - KEYS_DELETE => Some(Self::KeysDelete), - _otherwise => None, - } - } - - pub const fn repr(&self) -> u8 { - *self as u8 - } -} - -pub mod actions { - use super::Action::*; - - pub(crate) const ALL: u8 = All.repr(); - pub const SEARCH: u8 = Search.repr(); - pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr(); - pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr(); - pub const DOCUMENTS_GET: u8 = DocumentsGet.repr(); - pub const DOCUMENTS_DELETE: u8 = DocumentsDelete.repr(); - pub const INDEXES_ALL: u8 = IndexesAll.repr(); - pub const INDEXES_CREATE: u8 = IndexesAdd.repr(); - pub const INDEXES_GET: u8 = IndexesGet.repr(); - pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr(); - pub const INDEXES_DELETE: u8 = IndexesDelete.repr(); - pub const TASKS_ALL: u8 = TasksAll.repr(); - pub const TASKS_GET: u8 = TasksGet.repr(); - pub const SETTINGS_ALL: u8 = SettingsAll.repr(); - pub const SETTINGS_GET: u8 = SettingsGet.repr(); - pub const SETTINGS_UPDATE: u8 = SettingsUpdate.repr(); - pub const STATS_ALL: u8 = StatsAll.repr(); - pub const STATS_GET: u8 = StatsGet.repr(); - pub const METRICS_ALL: u8 = MetricsAll.repr(); - pub const METRICS_GET: u8 = MetricsGet.repr(); - pub const DUMPS_ALL: u8 = DumpsAll.repr(); - pub const DUMPS_CREATE: u8 = DumpsCreate.repr(); - pub const VERSION: u8 = Version.repr(); - pub const KEYS_CREATE: u8 = KeysAdd.repr(); - pub const KEYS_GET: u8 = KeysGet.repr(); - pub const KEYS_UPDATE: u8 = KeysUpdate.repr(); - pub const KEYS_DELETE: u8 = KeysDelete.repr(); -} diff --git a/meilisearch-auth/src/error.rs b/meilisearch-auth/src/error.rs index ecd4dbff8..41cb5619d 100644 --- a/meilisearch-auth/src/error.rs +++ b/meilisearch-auth/src/error.rs @@ -1,7 +1,7 @@ use std::error::Error; use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::{internal_error, keys}; +use meilisearch_types::{internal_error, keys, milli}; pub type Result = std::result::Result; diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index b1a2e9520..83a22ca3f 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -12,6 +12,7 @@ use std::sync::Arc; use hmac::{Hmac, Mac}; use meilisearch_types::keys::KeyId; use meilisearch_types::star_or::StarOr; +use meilisearch_types::milli; use milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; use milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use sha2::Sha256; diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index c84cbd933..547bfa5c9 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -82,6 +82,7 @@ tokio-stream = "0.1.10" toml = "0.5.9" uuid = { version = "1.1.2", features = ["serde", "v4"] } walkdir = "2.3.2" +yaup = "0.2.0" prometheus = { version = "0.13.2", features = ["process"], optional = true } lazy_static = "1.4.0" diff --git a/meilisearch-http/src/routes/tasks.rs b/meilisearch-http/src/routes/tasks.rs index 878ed8383..021ba1e68 100644 --- a/meilisearch-http/src/routes/tasks.rs +++ b/meilisearch-http/src/routes/tasks.rs @@ -1,11 +1,12 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; -use index_scheduler::{IndexScheduler, TaskId}; +use env_logger::filter; +use index_scheduler::{IndexScheduler, Query, TaskId}; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::star_or::StarOr; -use meilisearch_types::tasks::{serialize_duration, Details, Kind, Status, Task}; +use meilisearch_types::tasks::{serialize_duration, Details, Kind, KindWithContent, Status, Task}; use serde::{Deserialize, Serialize}; use serde_cs::vec::CS; use serde_json::json; @@ -21,7 +22,8 @@ const DEFAULT_LIMIT: fn() -> u32 = || 20; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::get().to(SeqHandler(get_tasks)))) - .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))); + .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))) + .service(web::resource("").route(web::delete().to(SeqHandler(delete_tasks)))); } #[derive(Debug, Clone, PartialEq, Serialize)] @@ -140,7 +142,7 @@ impl From
for DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }, - Details::DeleteTasks { + Details::TaskDeletion { matched_tasks, deleted_tasks, original_query, @@ -195,6 +197,29 @@ fn task_status_matches_events(status: &TaskStatus, events: &[TaskEvent]) -> bool }) } +async fn delete_tasks( + index_scheduler: GuardedData, Data>, + params: web::Query, + _req: HttpRequest, + _analytics: web::Data, +) -> Result { + let query = params.into_inner(); + let filtered_query = filter_out_inaccessible_indexes_from_query(&index_scheduler, &query); + + let tasks = index_scheduler.get_task_ids(&filtered_query)?; + let filtered_query_string = yaup::to_string(&filtered_query).unwrap(); + let task_deletion = KindWithContent::TaskDeletion { + query: filtered_query_string, + tasks, + }; + // TODO: Lo: analytics + let task = index_scheduler.register(task_deletion)?; + + let task_view: TaskView = task.into(); + + Ok(HttpResponse::Ok().json(task_view)) +} + async fn get_tasks( index_scheduler: GuardedData, Data>, params: web::Query, @@ -318,3 +343,38 @@ async fn get_task( Err(index_scheduler::Error::TaskNotFound(task_id).into()) } } + +fn filter_out_inaccessible_indexes_from_query( + index_scheduler: &GuardedData, Data>, + query: &Query, +) -> Query { + let mut query = query.clone(); + + // First remove all indexes from the query, we will add them back later + let indexes = query.index_uid.take(); + + let search_rules = &index_scheduler.filters().search_rules; + + let mut query = index_scheduler::Query::default(); + + // We filter on potential indexes and make sure that the search filter + // restrictions are also applied. + match indexes { + Some(indexes) => { + for name in indexes.iter() { + if search_rules.is_index_authorized(&name) { + query = query.with_index(name.to_string()); + } + } + } + None => { + if !search_rules.is_index_authorized("*") { + for (index, _policy) in search_rules.clone() { + query = query.with_index(index.to_string()); + } + } + } + }; + + query +} diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 42ee8e54b..468cbbee6 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -8,10 +8,11 @@ edition = "2021" actix-web = { version = "4.2.1", default-features = false } csv = "1.1.6" either = { version = "1.6.1", features = ["serde"] } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false } +milli = { git = "https://github.com/meilisearch/milli.git", branch = "indexation-abortion", default-features = false } enum-iterator = "0.7.0" proptest = { version = "1.0.0", optional = true } proptest-derive = { version = "0.3.0", optional = true } +roaring = { version = "0.10.0", features = ["serde"] } serde = { version = "1.0.145", features = ["derive"] } serde_json = "1.0.85" time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } diff --git a/meilisearch-types/src/keys.rs b/meilisearch-types/src/keys.rs index 1a9b2b19e..cbe888ce2 100644 --- a/meilisearch-types/src/keys.rs +++ b/meilisearch-types/src/keys.rs @@ -224,6 +224,8 @@ pub enum Action { IndexesDelete, #[serde(rename = "tasks.*")] TasksAll, + #[serde(rename = "tasks.*")] + TasksDelete, #[serde(rename = "tasks.get")] TasksGet, #[serde(rename = "settings.*")] @@ -311,6 +313,7 @@ pub mod actions { pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr(); pub const INDEXES_DELETE: u8 = IndexesDelete.repr(); pub const TASKS_ALL: u8 = TasksAll.repr(); + pub const TASKS_DELETE: u8 = TasksDelete.repr(); pub const TASKS_GET: u8 = TasksGet.repr(); pub const SETTINGS_ALL: u8 = SettingsAll.repr(); pub const SETTINGS_GET: u8 = SettingsGet.repr(); diff --git a/meilisearch-types/src/tasks.rs b/meilisearch-types/src/tasks.rs index 61e1f51d0..1828cdbcd 100644 --- a/meilisearch-types/src/tasks.rs +++ b/meilisearch-types/src/tasks.rs @@ -1,4 +1,5 @@ use milli::update::IndexDocumentsMethod; +use roaring::RoaringBitmap; use serde::{Deserialize, Serialize, Serializer}; use std::{ fmt::{Display, Write}, @@ -42,7 +43,7 @@ impl Task { DumpExport { .. } | Snapshot | CancelTask { .. } - | DeleteTasks { .. } + | TaskDeletion { .. } | IndexSwap { .. } => None, DocumentImport { index_uid, .. } | DocumentDeletion { index_uid, .. } @@ -59,7 +60,7 @@ impl Task { use KindWithContent::*; match &self.kind { - DumpExport { .. } | Snapshot | CancelTask { .. } | DeleteTasks { .. } => None, + DumpExport { .. } | Snapshot | CancelTask { .. } | TaskDeletion { .. } => None, DocumentImport { index_uid, .. } | DocumentDeletion { index_uid, .. } | DocumentClear { index_uid } @@ -114,9 +115,9 @@ pub enum KindWithContent { CancelTask { tasks: Vec, }, - DeleteTasks { + TaskDeletion { query: String, - tasks: Vec, + tasks: RoaringBitmap, }, DumpExport { output: PathBuf, @@ -136,7 +137,7 @@ impl KindWithContent { KindWithContent::IndexUpdate { .. } => Kind::IndexUpdate, KindWithContent::IndexSwap { .. } => Kind::IndexSwap, KindWithContent::CancelTask { .. } => Kind::CancelTask, - KindWithContent::DeleteTasks { .. } => Kind::DeleteTasks, + KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion, KindWithContent::DumpExport { .. } => Kind::DumpExport, KindWithContent::Snapshot => Kind::Snapshot, } @@ -146,7 +147,7 @@ impl KindWithContent { use KindWithContent::*; match self { - DumpExport { .. } | Snapshot | CancelTask { .. } | DeleteTasks { .. } => None, + DumpExport { .. } | Snapshot | CancelTask { .. } | TaskDeletion { .. } => None, DocumentImport { index_uid, .. } | DocumentDeletion { index_uid, .. } | DocumentClear { index_uid } @@ -192,8 +193,8 @@ impl KindWithContent { KindWithContent::CancelTask { .. } => { None // TODO: check correctness of this return value } - KindWithContent::DeleteTasks { query, tasks } => Some(Details::DeleteTasks { - matched_tasks: tasks.len(), + KindWithContent::TaskDeletion { query, tasks } => Some(Details::TaskDeletion { + matched_tasks: tasks.len() as usize, deleted_tasks: None, original_query: query.clone(), }), @@ -203,7 +204,7 @@ impl KindWithContent { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub enum Status { Enqueued, @@ -240,7 +241,7 @@ impl FromStr for Status { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub enum Kind { DocumentImport, @@ -252,7 +253,7 @@ pub enum Kind { IndexUpdate, IndexSwap, CancelTask, - DeleteTasks, + TaskDeletion, DumpExport, Snapshot, } @@ -272,7 +273,7 @@ impl FromStr for Kind { "index_update" => Ok(Kind::IndexUpdate), "index_swap" => Ok(Kind::IndexSwap), "cancel_task" => Ok(Kind::CancelTask), - "delete_tasks" => Ok(Kind::DeleteTasks), + "task_deletion" => Ok(Kind::TaskDeletion), "dump_export" => Ok(Kind::DumpExport), "snapshot" => Ok(Kind::Snapshot), s => Err(ResponseError::from_msg( @@ -304,7 +305,7 @@ pub enum Details { ClearAll { deleted_documents: Option, }, - DeleteTasks { + TaskDeletion { matched_tasks: usize, deleted_tasks: Option, original_query: String, @@ -373,7 +374,7 @@ mod tests { #[test] fn bad_deser() { - let details = Details::DeleteTasks { + let details = Details::TaskDeletion { matched_tasks: 1, deleted_tasks: None, original_query: "hello".to_owned(),