From eb25b0739028b306d90a4b7af42670f7092c16b0 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 7 Sep 2023 11:16:51 +0200 Subject: [PATCH 1/8] let you specify your task id --- index-scheduler/src/error.rs | 4 + index-scheduler/src/lib.rs | 815 ++++++++++++-------- meilisearch/src/lib.rs | 2 +- meilisearch/src/routes/dump.rs | 5 +- meilisearch/src/routes/indexes/documents.rs | 25 +- meilisearch/src/routes/indexes/mod.rs | 12 +- meilisearch/src/routes/indexes/settings.rs | 18 +- meilisearch/src/routes/mod.rs | 30 +- meilisearch/src/routes/snapshot.rs | 5 +- meilisearch/src/routes/swap_indexes.rs | 5 +- meilisearch/src/routes/tasks.rs | 9 +- meilisearch/tests/index/create_index.rs | 71 ++ 12 files changed, 655 insertions(+), 346 deletions(-) diff --git a/index-scheduler/src/error.rs b/index-scheduler/src/error.rs index bbe526460..223b84762 100644 --- a/index-scheduler/src/error.rs +++ b/index-scheduler/src/error.rs @@ -48,6 +48,8 @@ impl From for Code { pub enum Error { #[error("{1}")] WithCustomErrorCode(Code, Box), + #[error("Received bad task id: {received} should be >= to {expected}.")] + BadTaskId { received: TaskId, expected: TaskId }, #[error("Index `{0}` not found.")] IndexNotFound(String), #[error("Index `{0}` already exists.")] @@ -161,6 +163,7 @@ impl Error { match self { Error::IndexNotFound(_) | Error::WithCustomErrorCode(_, _) + | Error::BadTaskId { .. } | Error::IndexAlreadyExists(_) | Error::SwapDuplicateIndexFound(_) | Error::SwapDuplicateIndexesFound(_) @@ -205,6 +208,7 @@ impl ErrorCode for Error { fn error_code(&self) -> Code { match self { Error::WithCustomErrorCode(code, _) => *code, + Error::BadTaskId { .. } => Code::BadRequest, Error::IndexNotFound(_) => Code::IndexNotFound, Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists, Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 7514a2a68..b1edaabe5 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -993,7 +993,7 @@ impl IndexScheduler { /// Register a new task in the scheduler. /// /// If it fails and data was associated with the task, it tries to delete the associated data. - pub fn register(&self, kind: KindWithContent) -> Result { + pub fn register(&self, kind: KindWithContent, task_id: Option) -> Result { let mut wtxn = self.env.write_txn()?; // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task @@ -1003,8 +1003,16 @@ impl IndexScheduler { return Err(Error::NoSpaceLeftInTaskQueue); } + let next_task_id = self.next_task_id(&wtxn)?; + + if let Some(uid) = task_id { + if uid < next_task_id { + return Err(Error::BadTaskId { received: uid, expected: next_task_id }); + } + } + let mut task = Task { - uid: self.next_task_id(&wtxn)?, + uid: task_id.unwrap_or(next_task_id), enqueued_at: OffsetDateTime::now_utc(), started_at: None, finished_at: None, @@ -1386,13 +1394,16 @@ impl IndexScheduler { // increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date. let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1); - self.register(KindWithContent::TaskDeletion { - query: format!( - "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", - delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, - ), - tasks: to_delete, - })?; + self.register( + KindWithContent::TaskDeletion { + query: format!( + "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", + delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, + ), + tasks: to_delete, + }, + None, + )?; Ok(()) } @@ -2016,7 +2027,7 @@ mod tests { for (idx, kind) in kinds.into_iter().enumerate() { let k = kind.as_kind(); - let task = index_scheduler.register(kind).unwrap(); + let task = index_scheduler.register(kind, None).unwrap(); index_scheduler.assert_internally_consistent(); assert_eq!(task.uid, idx as u32); @@ -2031,18 +2042,18 @@ mod tests { fn insert_task_while_another_task_is_processing() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler.register(index_creation_task("index_a", "id")).unwrap(); + index_scheduler.register(index_creation_task("index_a", "id"), None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated]); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation"); // while the task is processing can we register another task? - index_scheduler.register(index_creation_task("index_b", "id")).unwrap(); + index_scheduler.register(index_creation_task("index_b", "id"), None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }) + .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); } @@ -2051,7 +2062,7 @@ mod tests { fn test_task_is_processing() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler.register(index_creation_task("index_a", "id")).unwrap(); + index_scheduler.register(index_creation_task("index_a", "id"), None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task"); handle.advance_till([Start, BatchCreated]); @@ -2065,17 +2076,23 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); @@ -2094,22 +2111,25 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task"); @@ -2142,7 +2162,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2151,10 +2171,13 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); index_scheduler - .register(KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0, 1]), - }) + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1]), + }, + None, + ) .unwrap(); // again, no progress made at all, but one more task is registered snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued"); @@ -2188,7 +2211,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); @@ -2199,10 +2222,13 @@ mod tests { // Now we delete the first task index_scheduler - .register(KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion"); @@ -2225,7 +2251,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); @@ -2237,10 +2263,13 @@ mod tests { // Now we delete the first task multiple times in a row for _ in 0..2 { index_scheduler - .register(KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2263,14 +2292,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); @@ -2292,7 +2324,10 @@ mod tests { }"#; index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2300,19 +2335,22 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); @@ -2336,21 +2374,27 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1"), S("2")], - }) + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); @@ -2373,10 +2417,13 @@ mod tests { fn document_deletion_and_document_addition() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); index_scheduler - .register(KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1"), S("2")], - }) + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2390,14 +2437,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); @@ -2428,17 +2478,20 @@ mod tests { for name in index_names { index_scheduler - .register(KindWithContent::IndexCreation { - index_uid: name.to_string(), - primary_key: None, - }) + .register( + KindWithContent::IndexCreation { + index_uid: name.to_string(), + primary_key: None, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } for name in index_names { index_scheduler - .register(KindWithContent::DocumentClear { index_uid: name.to_string() }) + .register(KindWithContent::DocumentClear { index_uid: name.to_string() }, None) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2463,7 +2516,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2477,18 +2530,24 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_d"); index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, - ], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, + ], + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered"); index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered"); @@ -2498,7 +2557,7 @@ mod tests { handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); - index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }).unwrap(); + index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }, None).unwrap(); handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); } @@ -2515,7 +2574,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } handle.advance_n_successful_batches(4); @@ -2525,12 +2584,15 @@ mod tests { snapshot!(first_snap, name: "initial_tasks_processed"); let err = index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, - ], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, + ], + }, + None, + ) .unwrap_err(); snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times."); @@ -2539,13 +2601,16 @@ mod tests { // Index `e` does not exist, but we don't check its existence yet index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, - IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, - ], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, + IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, + ], + }, + None, + ) .unwrap(); handle.advance_one_failed_batch(); // Now the first swap should have an error message saying `e` and `f` do not exist @@ -2566,17 +2631,20 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler)); @@ -2601,7 +2669,7 @@ mod tests { }, ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2618,7 +2686,7 @@ mod tests { file0.persist().unwrap(); let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0)) + .register(replace_document_import_task("catto", None, 0, documents_count0), None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2626,10 +2694,13 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processed"); index_scheduler - .register(KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + ) .unwrap(); handle.advance_one_successful_batch(); @@ -2644,7 +2715,7 @@ mod tests { file0.persist().unwrap(); let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0)) + .register(replace_document_import_task("catto", None, 0, documents_count0), None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2652,10 +2723,13 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processing"); index_scheduler - .register(KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_task_registered"); @@ -2685,7 +2759,7 @@ mod tests { replace_document_import_task("wolfo", None, 2, documents_count2), ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } handle.advance_one_successful_batch(); @@ -2693,10 +2767,13 @@ mod tests { handle.advance_till([Start, BatchCreated, InsideProcessBatch]); index_scheduler - .register(KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0, 1, 2]), - }) + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1, 2]), + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); @@ -2724,14 +2801,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2771,14 +2851,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2820,14 +2903,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2870,14 +2956,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2921,14 +3010,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2973,13 +3065,13 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let kind = index_creation_task("doggo", "bone"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); let kind = index_creation_task("whalo", "plankton"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); let kind = index_creation_task("catto", "his_own_vomit"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); handle.advance_n_successful_batches(3); @@ -3037,11 +3129,11 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3260,17 +3352,17 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], }; - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], }; - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3346,20 +3438,20 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _ = index_scheduler.register(kind).unwrap(); + let _ = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _ = index_scheduler.register(kind).unwrap(); + let _ = index_scheduler.register(kind, None).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], }; - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); handle.advance_n_successful_batches(1); let kind = KindWithContent::TaskCancelation { query: "test_query".to_string(), tasks: [0, 1, 2, 3].into_iter().collect(), }; - let task_cancelation = index_scheduler.register(kind).unwrap(); + let task_cancelation = index_scheduler.register(kind, None).unwrap(); handle.advance_n_successful_batches(1); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3394,7 +3486,7 @@ mod tests { let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); handle.advance_one_failed_batch(); @@ -3419,14 +3511,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated]); @@ -3457,14 +3552,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3513,14 +3611,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3561,14 +3662,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3596,7 +3700,10 @@ mod tests { // Create the index. index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_one_successful_batch(); @@ -3615,14 +3722,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3655,7 +3765,10 @@ mod tests { // Create the index. index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_one_successful_batch(); @@ -3674,14 +3787,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3718,7 +3834,10 @@ mod tests { // Create the index. index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_one_successful_batch(); @@ -3738,14 +3857,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3791,14 +3913,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3843,14 +3968,17 @@ mod tests { file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3904,14 +4032,17 @@ mod tests { file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3961,14 +4092,17 @@ mod tests { file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -4042,14 +4176,17 @@ mod tests { file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: primary_key.map(|pk| pk.to_string()), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: primary_key.map(|pk| pk.to_string()), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -4125,14 +4262,17 @@ mod tests { file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: primary_key.map(|pk| pk.to_string()), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: primary_key.map(|pk| pk.to_string()), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -4186,7 +4326,7 @@ mod tests { let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]); @@ -4206,15 +4346,18 @@ mod tests { }); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); handle.advance_one_successful_batch(); // on average this task takes ~600 bytes loop { - let result = index_scheduler.register(KindWithContent::IndexCreation { - index_uid: S("doggo"), - primary_key: None, - }); + let result = index_scheduler.register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ); if result.is_err() { break; } @@ -4224,7 +4367,10 @@ mod tests { // at this point the task DB shoud have reached its limit and we should not be able to register new tasks let result = index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap_err(); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code @@ -4232,10 +4378,10 @@ mod tests { // Even the task deletion that doesn't delete anything shouldn't be accepted let result = index_scheduler - .register(KindWithContent::TaskDeletion { - query: S("test"), - tasks: RoaringBitmap::new(), - }) + .register( + KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, + None, + ) .unwrap_err(); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code @@ -4243,13 +4389,19 @@ mod tests { // But a task deletion that delete something should works index_scheduler - .register(KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }) + .register( + KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, + None, + ) .unwrap(); handle.advance_one_successful_batch(); // Now we should be able to enqueue a few tasks again index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); handle.advance_one_failed_batch(); } @@ -4262,22 +4414,34 @@ mod tests { }); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); handle.advance_one_successful_batch(); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); handle.advance_one_failed_batch(); // at this point the max number of tasks is reached // we can still enqueue multiple tasks index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap(); @@ -4325,11 +4489,11 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" { @@ -4479,11 +4643,11 @@ mod tests { query: "cancel dump".to_owned(), tasks: RoaringBitmap::from_iter([0]), }; - let _ = index_scheduler.register(dump_creation).unwrap(); + let _ = index_scheduler.register(dump_creation, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register"); handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - let _ = index_scheduler.register(dump_cancellation).unwrap(); + let _ = index_scheduler.register(dump_cancellation, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered"); snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation"); @@ -4491,4 +4655,21 @@ mod tests { handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); } + + #[test] + fn basic_set_taskid() { + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, None).unwrap(); + snapshot!(task.uid, @"0"); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, Some(12)).unwrap(); + snapshot!(task.uid, @"12"); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let error = index_scheduler.register(kind, Some(5)).unwrap_err(); + snapshot!(error, @"Received bad task id: 5 should be >= to 13."); + } } diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 01ca63857..b91edaf01 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -265,7 +265,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Arc< .name(String::from("register-snapshot-tasks")) .spawn(move || loop { thread::sleep(snapshot_delay); - if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) { + if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation, None) { error!("Error while registering snapshot: {}", e); } }) diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 071ae60b8..8f44070d8 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -11,7 +11,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::SummarizedTaskView; +use crate::routes::{get_task_id, SummarizedTaskView}; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); @@ -29,8 +29,9 @@ pub async fn create_dump( keys: auth_controller.list_keys()?, instance_uid: analytics.instance_uid().cloned(), }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Create dump"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 1f41fa10c..492f039cf 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -7,7 +7,7 @@ use bstr::ByteSlice as _; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::Deserr; use futures::StreamExt; -use index_scheduler::IndexScheduler; +use index_scheduler::{IndexScheduler, TaskId}; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; @@ -36,7 +36,7 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; +use crate::routes::{get_task_id, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::search::parse_filter; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { @@ -130,9 +130,10 @@ pub async fn delete_document( index_uid: index_uid.to_string(), documents_ids: vec![document_id], }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!(returns = ?task, "Delete document"); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } @@ -277,6 +278,7 @@ pub async fn replace_documents( analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); + let uid = get_task_id(&req)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -285,6 +287,7 @@ pub async fn replace_documents( params.csv_delimiter, body, IndexDocumentsMethod::ReplaceDocuments, + uid, allow_index_creation, ) .await?; @@ -309,6 +312,7 @@ pub async fn update_documents( analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); + let uid = get_task_id(&req)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -317,6 +321,7 @@ pub async fn update_documents( params.csv_delimiter, body, IndexDocumentsMethod::UpdateDocuments, + uid, allow_index_creation, ) .await?; @@ -334,6 +339,7 @@ async fn document_addition( csv_delimiter: Option, mut body: Payload, method: IndexDocumentsMethod, + task_id: Option, allow_index_creation: bool, ) -> Result { let format = match ( @@ -450,7 +456,7 @@ async fn document_addition( }; let scheduler = index_scheduler.clone(); - let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? { + let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id)).await? { Ok(task) => task, Err(e) => { index_scheduler.delete_update_file(uuid)?; @@ -480,8 +486,9 @@ pub async fn delete_documents_batch( let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete documents by batch"); Ok(HttpResponse::Accepted().json(task)) @@ -516,8 +523,9 @@ pub async fn delete_documents_by_filter( .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?; let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete documents by filter"); Ok(HttpResponse::Accepted().json(task)) @@ -533,8 +541,9 @@ pub async fn clear_all_documents( analytics.delete_documents(DocumentDeletionKind::ClearAll, &req); let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete all documents"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index d80bd9c61..6451d930d 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -17,7 +17,7 @@ use serde_json::json; use time::OffsetDateTime; use tracing::debug; -use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; +use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; @@ -137,8 +137,9 @@ pub async fn create_index( ); let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Create index"); Ok(HttpResponse::Accepted().json(task)) @@ -206,8 +207,9 @@ pub async fn update_index( primary_key: body.primary_key, }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Update index"); Ok(HttpResponse::Accepted().json(task)) @@ -216,11 +218,13 @@ pub async fn update_index( pub async fn delete_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, + req: HttpRequest, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete index"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 23e8925c7..9fbd84161 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -15,7 +15,7 @@ use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; -use crate::routes::SummarizedTaskView; +use crate::routes::{get_task_id, SummarizedTaskView}; #[macro_export] macro_rules! make_setting_route { @@ -34,7 +34,7 @@ macro_rules! make_setting_route { use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::GuardedData; use $crate::extractors::sequential_extractor::SeqHandler; - use $crate::routes::SummarizedTaskView; + use $crate::routes::{get_task_id, SummarizedTaskView}; pub async fn delete( index_scheduler: GuardedData< @@ -42,6 +42,7 @@ macro_rules! make_setting_route { Data, >, index_uid: web::Path, + req: HttpRequest, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -56,8 +57,9 @@ macro_rules! make_setting_route { is_deletion: true, allow_index_creation, }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)) + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) .await?? .into(); @@ -105,8 +107,9 @@ macro_rules! make_setting_route { is_deletion: false, allow_index_creation, }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)) + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) .await?? .into(); @@ -767,8 +770,9 @@ pub async fn update_all( is_deletion: false, allow_index_creation, }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Update all settings"); Ok(HttpResponse::Accepted().json(task)) @@ -790,6 +794,7 @@ pub async fn get_all( pub async fn delete_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, + req: HttpRequest, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -803,8 +808,9 @@ pub async fn delete_all( is_deletion: true, allow_index_creation, }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete all settings"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 89cf63c50..61a9f3352 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -4,7 +4,7 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; -use meilisearch_types::error::ResponseError; +use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; use serde::{Deserialize, Serialize}; @@ -45,6 +45,34 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/experimental-features").configure(features::configure)); } +pub fn get_task_id(req: &HttpRequest) -> Result, ResponseError> { + let task_id = req + .headers() + .get("TaskId") + .map(|header| { + header.to_str().map_err(|e| { + ResponseError::from_msg( + format!("TaskId is not a valid utf-8 string: {e}"), + Code::BadRequest, + ) + }) + }) + .transpose()? + .map(|s| { + s.parse::().map_err(|e| { + ResponseError::from_msg( + format!( + "Could not parse the TaskId as a {}: {e}", + std::any::type_name::(), + ), + Code::BadRequest, + ) + }) + }) + .transpose()?; + Ok(task_id) +} + #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct SummarizedTaskView { diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index c94529932..28dbac85f 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -10,7 +10,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::SummarizedTaskView; +use crate::routes::{get_task_id, SummarizedTaskView}; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot)))); @@ -24,8 +24,9 @@ pub async fn create_snapshot( analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req)); let task = KindWithContent::SnapshotCreation; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Create snapshot"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index 79e619705..64268dbfa 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -10,7 +10,7 @@ use meilisearch_types::index_uid::IndexUid; use meilisearch_types::tasks::{IndexSwap, KindWithContent}; use serde_json::json; -use super::SummarizedTaskView; +use super::{get_task_id, SummarizedTaskView}; use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; @@ -60,7 +60,8 @@ pub async fn swap_indexes( } let task = KindWithContent::IndexSwap { swaps }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 03b63001d..26e1c43f8 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -18,7 +18,7 @@ use time::macros::format_description; use time::{Date, Duration, OffsetDateTime, Time}; use tokio::task; -use super::SummarizedTaskView; +use super::{get_task_id, SummarizedTaskView}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; @@ -197,7 +197,9 @@ async fn cancel_tasks( let task_cancelation = KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; - let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??; + let uid = get_task_id(&req)?; + let task = + task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid)).await??; let task: SummarizedTaskView = task.into(); Ok(HttpResponse::Ok().json(task)) @@ -242,7 +244,8 @@ async fn delete_tasks( let task_deletion = KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; - let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??; + let uid = get_task_id(&req)?; + let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid)).await??; let task: SummarizedTaskView = task.into(); Ok(HttpResponse::Ok().json(task)) diff --git a/meilisearch/tests/index/create_index.rs b/meilisearch/tests/index/create_index.rs index 7ce56d440..b9f755f35 100644 --- a/meilisearch/tests/index/create_index.rs +++ b/meilisearch/tests/index/create_index.rs @@ -199,3 +199,74 @@ async fn error_create_with_invalid_index_uid() { } "###); } + +#[actix_rt::test] +async fn send_task_id() { + let server = Server::new().await; + let app = server.init_web_app().await; + let index = server.index("catto"); + let (response, code) = index.create(None).await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 0, + "indexUid": "catto", + "status": "enqueued", + "type": "indexCreation", + "enqueuedAt": "[date]" + } + "###); + + let body = serde_json::to_string(&json!({ + "uid": "doggo", + "primaryKey": None::<&str>, + })) + .unwrap(); + let req = test::TestRequest::post() + .uri("/indexes") + .insert_header(("TaskId", "25")) + .insert_header(ContentType::json()) + .set_payload(body) + .to_request(); + + let res = test::call_service(&app, req).await; + snapshot!(res.status(), @"202 Accepted"); + + let bytes = test::read_body(res).await; + let response = serde_json::from_slice::(&bytes).expect("Expecting valid json"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 25, + "indexUid": "doggo", + "status": "enqueued", + "type": "indexCreation", + "enqueuedAt": "[date]" + } + "###); + + let body = serde_json::to_string(&json!({ + "uid": "girafo", + "primaryKey": None::<&str>, + })) + .unwrap(); + let req = test::TestRequest::post() + .uri("/indexes") + .insert_header(("TaskId", "12")) + .insert_header(ContentType::json()) + .set_payload(body) + .to_request(); + + let res = test::call_service(&app, req).await; + snapshot!(res.status(), @"400 Bad Request"); + + let bytes = test::read_body(res).await; + let response = serde_json::from_slice::(&bytes).expect("Expecting valid json"); + snapshot!(json_string!(response), @r###" + { + "message": "Received bad task id: 12 should be >= to 26.", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} From 507739bd9893ab30d8e8e6a63364c0aa1ccb4580 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 20 Feb 2024 11:24:44 +0100 Subject: [PATCH 2/8] add an experimental cli parameter to allow specifying your task id --- .../src/analytics/segment_analytics.rs | 11 +++++++++++ .../src/extractors/sequential_extractor.rs | 1 + meilisearch/src/lib.rs | 1 + meilisearch/src/option.rs | 17 +++++++++++++++++ meilisearch/src/routes/dump.rs | 4 +++- meilisearch/src/routes/indexes/documents.rs | 19 +++++++++++++------ meilisearch/src/routes/indexes/mod.rs | 10 +++++++--- meilisearch/src/routes/indexes/settings.rs | 14 ++++++++++---- meilisearch/src/routes/mod.rs | 6 +++++- meilisearch/src/routes/snapshot.rs | 4 +++- meilisearch/src/routes/swap_indexes.rs | 4 +++- meilisearch/src/routes/tasks.rs | 7 +++++-- meilisearch/tests/index/create_index.rs | 9 +++++++-- 13 files changed, 86 insertions(+), 21 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 7e9fff925..55dd02460 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -253,6 +253,7 @@ struct Infos { env: String, experimental_enable_metrics: bool, experimental_logs_mode: LogMode, + experimental_ha_parameters: bool, experimental_enable_logs_route: bool, experimental_reduce_indexing_memory_usage: bool, experimental_max_number_of_batched_tasks: usize, @@ -291,7 +292,12 @@ impl From for Infos { let Opt { db_path, experimental_enable_metrics, +<<<<<<< HEAD experimental_logs_mode, +||||||| parent of 01ae46dd8 (add an experimental cli parameter to allow specifying your task id) +======= + experimental_ha_parameters, +>>>>>>> 01ae46dd8 (add an experimental cli parameter to allow specifying your task id) experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, experimental_max_number_of_batched_tasks, @@ -339,7 +345,12 @@ impl From for Infos { Self { env, experimental_enable_metrics, +<<<<<<< HEAD experimental_logs_mode, +||||||| parent of 01ae46dd8 (add an experimental cli parameter to allow specifying your task id) +======= + experimental_ha_parameters, +>>>>>>> 01ae46dd8 (add an experimental cli parameter to allow specifying your task id) experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, db_path: db_path != PathBuf::from("./data.ms"), diff --git a/meilisearch/src/extractors/sequential_extractor.rs b/meilisearch/src/extractors/sequential_extractor.rs index c04210616..23d6cb997 100644 --- a/meilisearch/src/extractors/sequential_extractor.rs +++ b/meilisearch/src/extractors/sequential_extractor.rs @@ -131,6 +131,7 @@ gen_seq! { SeqFromRequestFut3; A B C } gen_seq! { SeqFromRequestFut4; A B C D } gen_seq! { SeqFromRequestFut5; A B C D E } gen_seq! { SeqFromRequestFut6; A B C D E F } +gen_seq! { SeqFromRequestFut7; A B C D E F G } pin_project! { #[project = ExtractProj] diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index b91edaf01..a6a0f0d77 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -468,6 +468,7 @@ pub fn configure_data( .app_data(web::Data::from(analytics)) .app_data(web::Data::new(logs_route)) .app_data(web::Data::new(logs_stderr)) + .app_data(web::Data::new(opt.clone())) .app_data( web::JsonConfig::default() .limit(http_payload_size_limit) diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index cd99bf452..58f3791e8 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -52,6 +52,7 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE"; +const MEILI_EXPERIMENTAL_HA_PARAMETERS: &str = "MEILI_EXPERIMENTAL_HA_PARAMETERS"; const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = @@ -358,6 +359,17 @@ pub struct Opt { #[serde(default)] pub experimental_enable_logs_route: bool, + /// Enable multiple features that helps you to run meilisearch in a high availability context. + /// TODO: TAMO: Update the discussion link + /// For more information, see: + /// + /// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now + /// - Lets you specify a custom task ID upon registering a task + /// - Lets you execute dry-register a task (get an answer from the route but nothing is actually registered in meilisearch and it won't be processed) + #[clap(long, env = MEILI_EXPERIMENTAL_HA_PARAMETERS)] + #[serde(default)] + pub experimental_ha_parameters: bool, + /// Experimental RAM reduction during indexing, do not use in production, see: #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[serde(default)] @@ -465,6 +477,7 @@ impl Opt { experimental_enable_metrics, experimental_logs_mode, experimental_enable_logs_route, + experimental_ha_parameters, experimental_reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); @@ -525,6 +538,10 @@ impl Opt { MEILI_EXPERIMENTAL_LOGS_MODE, experimental_logs_mode.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_HA_PARAMETERS, + experimental_ha_parameters.to_string(), + ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE, experimental_enable_logs_route.to_string(), diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 8f44070d8..56231a759 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -12,6 +12,7 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); @@ -21,6 +22,7 @@ pub async fn create_dump( index_scheduler: GuardedData, Data>, auth_controller: GuardedData, Data>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); @@ -29,7 +31,7 @@ pub async fn create_dump( keys: auth_controller.list_keys()?, instance_uid: analytics.instance_uid().cloned(), }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 492f039cf..5bf7eaa8d 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -38,6 +38,7 @@ use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::{get_task_id, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::search::parse_filter; +use crate::Opt; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()] @@ -119,6 +120,7 @@ pub async fn delete_document( index_scheduler: GuardedData, Data>, path: web::Path, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = path.into_inner(); @@ -130,7 +132,7 @@ pub async fn delete_document( index_uid: index_uid.to_string(), documents_ids: vec![document_id], }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!("returns: {:?}", task); @@ -268,6 +270,7 @@ pub async fn replace_documents( params: AwebQueryParameter, body: Payload, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -278,7 +281,7 @@ pub async fn replace_documents( analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -302,6 +305,7 @@ pub async fn update_documents( params: AwebQueryParameter, body: Payload, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -312,7 +316,7 @@ pub async fn update_documents( analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -472,6 +476,7 @@ pub async fn delete_documents_batch( index_uid: web::Path, body: web::Json>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Delete documents by batch"); @@ -486,7 +491,7 @@ pub async fn delete_documents_batch( let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); @@ -506,6 +511,7 @@ pub async fn delete_documents_by_filter( index_uid: web::Path, body: AwebJson, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Delete documents by filter"); @@ -523,7 +529,7 @@ pub async fn delete_documents_by_filter( .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?; let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); @@ -535,13 +541,14 @@ pub async fn clear_all_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; analytics.delete_documents(DocumentDeletionKind::ClearAll, &req); let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 6451d930d..59a1f0e64 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -22,6 +22,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; +use crate::Opt; pub mod documents; pub mod facet_search; @@ -123,6 +124,7 @@ pub async fn create_index( index_scheduler: GuardedData, Data>, body: AwebJson, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Create index"); @@ -137,7 +139,7 @@ pub async fn create_index( ); let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Create index"); @@ -191,6 +193,7 @@ pub async fn update_index( index_uid: web::Path, body: AwebJson, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Update index"); @@ -207,7 +210,7 @@ pub async fn update_index( primary_key: body.primary_key, }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); @@ -219,10 +222,11 @@ pub async fn delete_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, req: HttpRequest, + opt: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete index"); diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 9fbd84161..6e43bce41 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -16,6 +16,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::Opt; #[macro_export] macro_rules! make_setting_route { @@ -34,6 +35,7 @@ macro_rules! make_setting_route { use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::GuardedData; use $crate::extractors::sequential_extractor::SeqHandler; + use $crate::Opt; use $crate::routes::{get_task_id, SummarizedTaskView}; pub async fn delete( @@ -43,6 +45,7 @@ macro_rules! make_setting_route { >, index_uid: web::Path, req: HttpRequest, + opt: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -57,7 +60,7 @@ macro_rules! make_setting_route { is_deletion: true, allow_index_creation, }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) .await?? @@ -75,6 +78,7 @@ macro_rules! make_setting_route { index_uid: actix_web::web::Path, body: deserr::actix_web::AwebJson, $err_ty>, req: HttpRequest, + opt: web::Data, $analytics_var: web::Data, ) -> std::result::Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -107,7 +111,7 @@ macro_rules! make_setting_route { is_deletion: false, allow_index_creation, }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) .await?? @@ -655,6 +659,7 @@ pub async fn update_all( index_uid: web::Path, body: AwebJson, DeserrJsonError>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -770,7 +775,7 @@ pub async fn update_all( is_deletion: false, allow_index_creation, }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); @@ -795,6 +800,7 @@ pub async fn delete_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, req: HttpRequest, + opt: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -808,7 +814,7 @@ pub async fn delete_all( is_deletion: true, allow_index_creation, }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 61a9f3352..2dc89b150 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -15,6 +15,7 @@ use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; +use crate::Opt; const PAGINATION_DEFAULT_LIMIT: usize = 20; @@ -45,7 +46,10 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/experimental-features").configure(features::configure)); } -pub fn get_task_id(req: &HttpRequest) -> Result, ResponseError> { +pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result, ResponseError> { + if !opt.experimental_ha_parameters { + return Ok(None); + } let task_id = req .headers() .get("TaskId") diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index 28dbac85f..6b3178126 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -11,6 +11,7 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot)))); @@ -19,12 +20,13 @@ pub fn configure(cfg: &mut web::ServiceConfig) { pub async fn create_snapshot( index_scheduler: GuardedData, Data>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req)); let task = KindWithContent::SnapshotCreation; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index 64268dbfa..f8adeeb18 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -16,6 +16,7 @@ use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; +use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes)))); @@ -32,6 +33,7 @@ pub async fn swap_indexes( index_scheduler: GuardedData, Data>, params: AwebJson, DeserrJsonError>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -60,7 +62,7 @@ pub async fn swap_indexes( } let task = KindWithContent::IndexSwap { swaps }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 26e1c43f8..279b57e3d 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -23,6 +23,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::Opt; const DEFAULT_LIMIT: u32 = 20; @@ -161,6 +162,7 @@ async fn cancel_tasks( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -197,7 +199,7 @@ async fn cancel_tasks( let task_cancelation = KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid)).await??; let task: SummarizedTaskView = task.into(); @@ -209,6 +211,7 @@ async fn delete_tasks( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -244,7 +247,7 @@ async fn delete_tasks( let task_deletion = KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid)).await??; let task: SummarizedTaskView = task.into(); diff --git a/meilisearch/tests/index/create_index.rs b/meilisearch/tests/index/create_index.rs index b9f755f35..7a678624c 100644 --- a/meilisearch/tests/index/create_index.rs +++ b/meilisearch/tests/index/create_index.rs @@ -2,9 +2,10 @@ use actix_web::http::header::ContentType; use actix_web::test; use http::header::ACCEPT_ENCODING; use meili_snap::{json_string, snapshot}; +use meilisearch::Opt; use crate::common::encoder::Encoder; -use crate::common::{Server, Value}; +use crate::common::{default_settings, Server, Value}; use crate::json; #[actix_rt::test] @@ -202,7 +203,11 @@ async fn error_create_with_invalid_index_uid() { #[actix_rt::test] async fn send_task_id() { - let server = Server::new().await; + let temp = tempfile::tempdir().unwrap(); + + let options = Opt { experimental_ha_parameters: true, ..default_settings(temp.path()) }; + let server = Server::new_with_options(options).await.unwrap(); + let app = server.init_web_app().await; let index = server.index("catto"); let (response, code) = index.create(None).await; From 1eb1c043b59dce623012a79cc2e2c6fcceb0cade Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 20 Feb 2024 12:16:50 +0100 Subject: [PATCH 3/8] disable the auto deletion of tasks when the ha mode is enabled --- index-scheduler/src/insta_snapshot.rs | 1 + index-scheduler/src/lib.rs | 68 +++++++++++++- .../task_deletion_have_not_been_enqueued.snap | 90 +++++++++++++++++++ .../task_queue_is_full.snap | 90 +++++++++++++++++++ meilisearch/src/lib.rs | 1 + 5 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index 42f041578..988e75b81 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -15,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { let IndexScheduler { autobatching_enabled, + cleanup_enabled: _, must_stop_processing: _, processing_tasks, file_store, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index b1edaabe5..9a1799469 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -264,6 +264,9 @@ pub struct IndexSchedulerOptions { /// Set to `true` iff the index scheduler is allowed to automatically /// batch tasks together, to process multiple tasks at once. pub autobatching_enabled: bool, + /// Set to `true` iff the index scheduler is allowed to automatically + /// delete the finished tasks when there are too many tasks. + pub cleanup_enabled: bool, /// The maximum number of tasks stored in the task queue before starting /// to auto schedule task deletions. pub max_number_of_tasks: usize, @@ -324,6 +327,9 @@ pub struct IndexScheduler { /// Whether auto-batching is enabled or not. pub(crate) autobatching_enabled: bool, + /// Whether we should automatically cleanup the task queue or not. + pub(crate) cleanup_enabled: bool, + /// The max number of tasks allowed before the scheduler starts to delete /// the finished tasks automatically. pub(crate) max_number_of_tasks: usize, @@ -390,6 +396,7 @@ impl IndexScheduler { index_mapper: self.index_mapper.clone(), wake_up: self.wake_up.clone(), autobatching_enabled: self.autobatching_enabled, + cleanup_enabled: self.cleanup_enabled, max_number_of_tasks: self.max_number_of_tasks, max_number_of_batched_tasks: self.max_number_of_batched_tasks, puffin_frame: self.puffin_frame.clone(), @@ -491,6 +498,7 @@ impl IndexScheduler { wake_up: Arc::new(SignalEvent::auto(true)), puffin_frame: Arc::new(puffin::GlobalFrameView::default()), autobatching_enabled: options.autobatching_enabled, + cleanup_enabled: options.cleanup_enabled, max_number_of_tasks: options.max_number_of_tasks, max_number_of_batched_tasks: options.max_number_of_batched_tasks, dumps_path: options.dumps_path, @@ -1134,7 +1142,9 @@ impl IndexScheduler { self.breakpoint(Breakpoint::Start); } - self.cleanup_task_queue()?; + if self.cleanup_enabled { + self.cleanup_task_queue()?; + } let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; let batch = @@ -1781,6 +1791,7 @@ mod tests { index_count: 5, indexer_config, autobatching_enabled: true, + cleanup_enabled: true, max_number_of_tasks: 1_000_000, max_number_of_batched_tasks: usize::MAX, instance_features: Default::default(), @@ -4484,6 +4495,61 @@ mod tests { drop(rtxn); } + #[test] + fn test_disable_auto_deletion_of_tasks() { + let (index_scheduler, mut handle) = + IndexScheduler::test_with_custom_config(vec![], |config| { + config.cleanup_enabled = false; + config.max_number_of_tasks = 2; + }); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) + .unwrap(); + handle.advance_one_failed_batch(); + + // at this point the max number of tasks is reached + // we can still enqueue multiple tasks + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) + .unwrap(); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); + let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); + drop(rtxn); + + // now we're above the max number of tasks + // and if we try to advance in the tick function no new task deletion should be enqueued + handle.advance_till([Start, BatchCreated]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); + let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); + drop(rtxn); + } + #[test] fn basic_get_stats() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); diff --git a/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap new file mode 100644 index 000000000..988df76ec --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap @@ -0,0 +1,90 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "uid": 0, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "succeeded", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 1, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": { + "message": "Index `doggo` already exists.", + "code": "index_already_exists", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_already_exists" + }, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "failed", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 2, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 3, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap new file mode 100644 index 000000000..988df76ec --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap @@ -0,0 +1,90 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "uid": 0, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "succeeded", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 1, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": { + "message": "Index `doggo` already exists.", + "code": "index_already_exists", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_already_exists" + }, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "failed", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 2, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 3, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + } +] diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index a6a0f0d77..292a87259 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -300,6 +300,7 @@ fn open_or_create_database_unchecked( enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, + cleanup_enabled: !opt.experimental_ha_parameters, max_number_of_tasks: 1_000_000, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize, From 36c27a18a1c45f2069fc8d0fbec4d48a49dfa447 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 21 Feb 2024 11:21:26 +0100 Subject: [PATCH 4/8] implement the dry run ha parameter --- file-store/src/lib.rs | 22 +- index-scheduler/src/lib.rs | 249 ++++++++++++++++---- meilisearch/src/lib.rs | 4 +- meilisearch/src/routes/dump.rs | 7 +- meilisearch/src/routes/indexes/documents.rs | 35 ++- meilisearch/src/routes/indexes/mod.rs | 16 +- meilisearch/src/routes/indexes/settings.rs | 20 +- meilisearch/src/routes/mod.rs | 19 ++ meilisearch/src/routes/snapshot.rs | 7 +- meilisearch/src/routes/swap_indexes.rs | 7 +- meilisearch/src/routes/tasks.rs | 10 +- 11 files changed, 317 insertions(+), 79 deletions(-) diff --git a/file-store/src/lib.rs b/file-store/src/lib.rs index 75db9bb5f..e3851a2df 100644 --- a/file-store/src/lib.rs +++ b/file-store/src/lib.rs @@ -56,7 +56,7 @@ impl FileStore { let file = NamedTempFile::new_in(&self.path)?; let uuid = Uuid::new_v4(); let path = self.path.join(uuid.to_string()); - let update_file = File { file, path }; + let update_file = File { dry: false, file, path }; Ok((uuid, update_file)) } @@ -67,7 +67,7 @@ impl FileStore { let file = NamedTempFile::new_in(&self.path)?; let uuid = Uuid::from_u128(uuid); let path = self.path.join(uuid.to_string()); - let update_file = File { file, path }; + let update_file = File { dry: false, file, path }; Ok((uuid, update_file)) } @@ -135,13 +135,29 @@ impl FileStore { } pub struct File { + dry: bool, path: PathBuf, file: NamedTempFile, } impl File { + pub fn dry_file() -> Result { + #[cfg(target_family = "unix")] + let path = PathBuf::from_str("/dev/null").unwrap(); + #[cfg(target_family = "windows")] + let path = PathBuf::from_str("\\Device\\Null").unwrap(); + + Ok(Self { + dry: true, + path: path.clone(), + file: tempfile::Builder::new().make(|_| std::fs::File::create(path.clone()))?, + }) + } + pub fn persist(self) -> Result<()> { - self.file.persist(&self.path)?; + if !self.dry { + self.file.persist(&self.path)?; + } Ok(()) } } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 9a1799469..5d0ce9eb9 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1001,7 +1001,12 @@ impl IndexScheduler { /// Register a new task in the scheduler. /// /// If it fails and data was associated with the task, it tries to delete the associated data. - pub fn register(&self, kind: KindWithContent, task_id: Option) -> Result { + pub fn register( + &self, + kind: KindWithContent, + task_id: Option, + dry_run: bool, + ) -> Result { let mut wtxn = self.env.write_txn()?; // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task @@ -1037,6 +1042,11 @@ impl IndexScheduler { // (that it does not contain duplicate indexes). check_index_swap_validity(&task)?; + // At this point the task is going to be registered and no further checks will be done + if dry_run { + return Ok(task); + } + // Get rid of the mutability. let task = task; @@ -1101,8 +1111,12 @@ impl IndexScheduler { /// The returned file and uuid can be used to associate /// some data to a task. The file will be kept until /// the task has been fully processed. - pub fn create_update_file(&self) -> Result<(Uuid, file_store::File)> { - Ok(self.file_store.new_update()?) + pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> { + if dry_run { + Ok((Uuid::nil(), file_store::File::dry_file()?)) + } else { + Ok(self.file_store.new_update()?) + } } #[cfg(test)] @@ -1413,6 +1427,7 @@ impl IndexScheduler { tasks: to_delete, }, None, + false, )?; Ok(()) @@ -1534,7 +1549,7 @@ impl<'a> Dump<'a> { ) -> Result { let content_uuid = match content_file { Some(content_file) if task.status == Status::Enqueued => { - let (uuid, mut file) = self.index_scheduler.create_update_file()?; + let (uuid, mut file) = self.index_scheduler.create_update_file(false)?; let mut builder = DocumentsBatchBuilder::new(file.as_file_mut()); for doc in content_file { builder.append_json_object(&doc?)?; @@ -2038,7 +2053,7 @@ mod tests { for (idx, kind) in kinds.into_iter().enumerate() { let k = kind.as_kind(); - let task = index_scheduler.register(kind, None).unwrap(); + let task = index_scheduler.register(kind, None, false).unwrap(); index_scheduler.assert_internally_consistent(); assert_eq!(task.uid, idx as u32); @@ -2053,18 +2068,18 @@ mod tests { fn insert_task_while_another_task_is_processing() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler.register(index_creation_task("index_a", "id"), None).unwrap(); + index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated]); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation"); // while the task is processing can we register another task? - index_scheduler.register(index_creation_task("index_b", "id"), None).unwrap(); + index_scheduler.register(index_creation_task("index_b", "id"), None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None) + .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); } @@ -2073,7 +2088,7 @@ mod tests { fn test_task_is_processing() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler.register(index_creation_task("index_a", "id"), None).unwrap(); + index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task"); handle.advance_till([Start, BatchCreated]); @@ -2090,6 +2105,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2098,12 +2114,13 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); @@ -2125,22 +2142,23 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task"); @@ -2173,7 +2191,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2188,6 +2206,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0, 1]), }, None, + false, ) .unwrap(); // again, no progress made at all, but one more task is registered @@ -2222,7 +2241,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); @@ -2239,6 +2258,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0]), }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion"); @@ -2262,7 +2282,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); @@ -2280,6 +2300,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0]), }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -2313,6 +2334,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); @@ -2338,6 +2360,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2356,12 +2379,13 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); @@ -2395,6 +2419,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2405,6 +2430,7 @@ mod tests { documents_ids: vec![S("1"), S("2")], }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); @@ -2434,6 +2460,7 @@ mod tests { documents_ids: vec![S("1"), S("2")], }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2458,6 +2485,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); @@ -2495,6 +2523,7 @@ mod tests { primary_key: None, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -2502,7 +2531,11 @@ mod tests { for name in index_names { index_scheduler - .register(KindWithContent::DocumentClear { index_uid: name.to_string() }, None) + .register( + KindWithContent::DocumentClear { index_uid: name.to_string() }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2527,7 +2560,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2549,6 +2582,7 @@ mod tests { ], }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered"); @@ -2558,6 +2592,7 @@ mod tests { swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered"); @@ -2568,7 +2603,9 @@ mod tests { handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); - index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }, None).unwrap(); + index_scheduler + .register(KindWithContent::IndexSwap { swaps: vec![] }, None, false) + .unwrap(); handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); } @@ -2585,7 +2622,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } handle.advance_n_successful_batches(4); @@ -2603,6 +2640,7 @@ mod tests { ], }, None, + false, ) .unwrap_err(); snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times."); @@ -2621,6 +2659,7 @@ mod tests { ], }, None, + false, ) .unwrap(); handle.advance_one_failed_batch(); @@ -2652,10 +2691,11 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler)); @@ -2680,7 +2720,7 @@ mod tests { }, ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2697,7 +2737,7 @@ mod tests { file0.persist().unwrap(); let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0), None) + .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2711,6 +2751,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0]), }, None, + false, ) .unwrap(); @@ -2726,7 +2767,7 @@ mod tests { file0.persist().unwrap(); let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0), None) + .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2740,6 +2781,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0]), }, None, + false, ) .unwrap(); @@ -2770,7 +2812,7 @@ mod tests { replace_document_import_task("wolfo", None, 2, documents_count2), ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } handle.advance_one_successful_batch(); @@ -2784,6 +2826,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0, 1, 2]), }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); @@ -2822,6 +2865,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -2872,6 +2916,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -2924,6 +2969,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -2977,6 +3023,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3031,6 +3078,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3076,13 +3124,13 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let kind = index_creation_task("doggo", "bone"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); let kind = index_creation_task("whalo", "plankton"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); let kind = index_creation_task("catto", "his_own_vomit"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); handle.advance_n_successful_batches(3); @@ -3140,11 +3188,11 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3363,17 +3411,17 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], }; - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], }; - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3449,20 +3497,20 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _ = index_scheduler.register(kind, None).unwrap(); + let _ = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _ = index_scheduler.register(kind, None).unwrap(); + let _ = index_scheduler.register(kind, None, false).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], }; - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); handle.advance_n_successful_batches(1); let kind = KindWithContent::TaskCancelation { query: "test_query".to_string(), tasks: [0, 1, 2, 3].into_iter().collect(), }; - let task_cancelation = index_scheduler.register(kind, None).unwrap(); + let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); handle.advance_n_successful_batches(1); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3497,7 +3545,7 @@ mod tests { let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); handle.advance_one_failed_batch(); @@ -3532,6 +3580,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3573,6 +3622,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3632,6 +3682,7 @@ mod tests { allow_index_creation: false, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3683,6 +3734,7 @@ mod tests { allow_index_creation: false, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3714,6 +3766,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3743,6 +3796,7 @@ mod tests { allow_index_creation: false, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3779,6 +3833,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3808,6 +3863,7 @@ mod tests { allow_index_creation: false, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3848,6 +3904,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3878,6 +3935,7 @@ mod tests { allow_index_creation, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3934,6 +3992,7 @@ mod tests { allow_index_creation, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3989,6 +4048,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -4053,6 +4113,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -4113,6 +4174,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -4197,6 +4259,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -4283,6 +4346,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -4337,7 +4401,7 @@ mod tests { let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]); @@ -4360,6 +4424,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_successful_batch(); @@ -4368,6 +4433,7 @@ mod tests { let result = index_scheduler.register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ); if result.is_err() { break; @@ -4381,6 +4447,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap_err(); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); @@ -4392,6 +4459,7 @@ mod tests { .register( KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, None, + false, ) .unwrap_err(); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); @@ -4403,6 +4471,7 @@ mod tests { .register( KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, None, + false, ) .unwrap(); handle.advance_one_successful_batch(); @@ -4412,6 +4481,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_failed_batch(); @@ -4428,6 +4498,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_successful_batch(); @@ -4436,6 +4507,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_failed_batch(); @@ -4446,12 +4518,14 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); index_scheduler .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); @@ -4507,6 +4581,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_successful_batch(); @@ -4515,6 +4590,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_failed_batch(); @@ -4525,12 +4601,14 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); index_scheduler .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); @@ -4555,11 +4633,11 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" { @@ -4709,11 +4787,11 @@ mod tests { query: "cancel dump".to_owned(), tasks: RoaringBitmap::from_iter([0]), }; - let _ = index_scheduler.register(dump_creation, None).unwrap(); + let _ = index_scheduler.register(dump_creation, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register"); handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - let _ = index_scheduler.register(dump_cancellation, None).unwrap(); + let _ = index_scheduler.register(dump_cancellation, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered"); snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation"); @@ -4727,15 +4805,86 @@ mod tests { let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, None).unwrap(); + let task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(task.uid, @"0"); let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, Some(12)).unwrap(); + let task = index_scheduler.register(kind, Some(12), false).unwrap(); snapshot!(task.uid, @"12"); let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let error = index_scheduler.register(kind, Some(5)).unwrap_err(); + let error = index_scheduler.register(kind, Some(5), false).unwrap_err(); snapshot!(error, @"Received bad task id: 5 should be >= to 13."); } + + #[test] + fn dry_run() { + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, None, true).unwrap(); + snapshot!(task.uid, @"0"); + snapshot!(snapshot_index_scheduler(&index_scheduler), @r###" + ### Autobatching Enabled = true + ### Processing Tasks: + [] + ---------------------------------------------------------------------- + ### All Tasks: + ---------------------------------------------------------------------- + ### Status: + ---------------------------------------------------------------------- + ### Kind: + ---------------------------------------------------------------------- + ### Index Tasks: + ---------------------------------------------------------------------- + ### Index Mapper: + + ---------------------------------------------------------------------- + ### Canceled By: + + ---------------------------------------------------------------------- + ### Enqueued At: + ---------------------------------------------------------------------- + ### Started At: + ---------------------------------------------------------------------- + ### Finished At: + ---------------------------------------------------------------------- + ### File Store: + + ---------------------------------------------------------------------- + "###); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, Some(12), true).unwrap(); + snapshot!(task.uid, @"12"); + snapshot!(snapshot_index_scheduler(&index_scheduler), @r###" + ### Autobatching Enabled = true + ### Processing Tasks: + [] + ---------------------------------------------------------------------- + ### All Tasks: + ---------------------------------------------------------------------- + ### Status: + ---------------------------------------------------------------------- + ### Kind: + ---------------------------------------------------------------------- + ### Index Tasks: + ---------------------------------------------------------------------- + ### Index Mapper: + + ---------------------------------------------------------------------- + ### Canceled By: + + ---------------------------------------------------------------------- + ### Enqueued At: + ---------------------------------------------------------------------- + ### Started At: + ---------------------------------------------------------------------- + ### Finished At: + ---------------------------------------------------------------------- + ### File Store: + + ---------------------------------------------------------------------- + "###); + } } diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 292a87259..7c40059d7 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -265,7 +265,9 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Arc< .name(String::from("register-snapshot-tasks")) .spawn(move || loop { thread::sleep(snapshot_delay); - if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation, None) { + if let Err(e) = + index_scheduler.register(KindWithContent::SnapshotCreation, None, false) + { error!("Error while registering snapshot: {}", e); } }) diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 56231a759..7f3cd06a5 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -11,7 +11,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -32,8 +32,11 @@ pub async fn create_dump( instance_uid: analytics.instance_uid().cloned(), }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Create dump"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 5bf7eaa8d..a74bbff49 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -36,7 +36,9 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{get_task_id, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; +use crate::routes::{ + get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, +}; use crate::search::parse_filter; use crate::Opt; @@ -133,8 +135,11 @@ pub async fn delete_document( documents_ids: vec![document_id], }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } @@ -282,6 +287,7 @@ pub async fn replace_documents( let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -291,6 +297,7 @@ pub async fn replace_documents( body, IndexDocumentsMethod::ReplaceDocuments, uid, + dry_run, allow_index_creation, ) .await?; @@ -317,6 +324,7 @@ pub async fn update_documents( let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -326,6 +334,7 @@ pub async fn update_documents( body, IndexDocumentsMethod::UpdateDocuments, uid, + dry_run, allow_index_creation, ) .await?; @@ -344,6 +353,7 @@ async fn document_addition( mut body: Payload, method: IndexDocumentsMethod, task_id: Option, + dry_run: bool, allow_index_creation: bool, ) -> Result { let format = match ( @@ -376,7 +386,7 @@ async fn document_addition( } }; - let (uuid, mut update_file) = index_scheduler.create_update_file()?; + let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?; let temp_file = match tempfile() { Ok(file) => file, @@ -460,7 +470,9 @@ async fn document_addition( }; let scheduler = index_scheduler.clone(); - let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id)).await? { + let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run)) + .await? + { Ok(task) => task, Err(e) => { index_scheduler.delete_update_file(uuid)?; @@ -492,8 +504,11 @@ pub async fn delete_documents_batch( let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Delete documents by batch"); Ok(HttpResponse::Accepted().json(task)) @@ -530,8 +545,11 @@ pub async fn delete_documents_by_filter( let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Delete documents by filter"); Ok(HttpResponse::Accepted().json(task)) @@ -549,8 +567,11 @@ pub async fn clear_all_documents( let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Delete all documents"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 59a1f0e64..59fa02dff 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -22,6 +22,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; +use crate::routes::is_dry_run; use crate::Opt; pub mod documents; @@ -140,8 +141,11 @@ pub async fn create_index( let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Create index"); Ok(HttpResponse::Accepted().json(task)) @@ -211,8 +215,11 @@ pub async fn update_index( }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Update index"); Ok(HttpResponse::Accepted().json(task)) @@ -227,8 +234,11 @@ pub async fn delete_index( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Delete index"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 6e43bce41..c71d83279 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -15,7 +15,7 @@ use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; -use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::Opt; #[macro_export] @@ -36,7 +36,7 @@ macro_rules! make_setting_route { use $crate::extractors::authentication::GuardedData; use $crate::extractors::sequential_extractor::SeqHandler; use $crate::Opt; - use $crate::routes::{get_task_id, SummarizedTaskView}; + use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView}; pub async fn delete( index_scheduler: GuardedData< @@ -61,8 +61,9 @@ macro_rules! make_setting_route { allow_index_creation, }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) .await?? .into(); @@ -112,8 +113,9 @@ macro_rules! make_setting_route { allow_index_creation, }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) .await?? .into(); @@ -776,8 +778,11 @@ pub async fn update_all( allow_index_creation, }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Update all settings"); Ok(HttpResponse::Accepted().json(task)) @@ -815,8 +820,11 @@ pub async fn delete_all( allow_index_creation, }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Delete all settings"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 2dc89b150..f98d4b4de 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -77,6 +77,25 @@ pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result, Respo Ok(task_id) } +pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result { + if !opt.experimental_ha_parameters { + return Ok(false); + } + Ok(req + .headers() + .get("DryRun") + .map(|header| { + header.to_str().map_err(|e| { + ResponseError::from_msg( + format!("DryRun is not a valid utf-8 string: {e}"), + Code::BadRequest, + ) + }) + }) + .transpose()? + .map_or(false, |s| s.to_lowercase() == "true")) +} + #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct SummarizedTaskView { diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index 6b3178126..84673729f 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -10,7 +10,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -27,8 +27,11 @@ pub async fn create_snapshot( let task = KindWithContent::SnapshotCreation; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Create snapshot"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index f8adeeb18..51a7b0707 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -10,7 +10,7 @@ use meilisearch_types::index_uid::IndexUid; use meilisearch_types::tasks::{IndexSwap, KindWithContent}; use serde_json::json; -use super::{get_task_id, SummarizedTaskView}; +use super::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; @@ -63,7 +63,10 @@ pub async fn swap_indexes( let task = KindWithContent::IndexSwap { swaps }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 279b57e3d..f35d97fe6 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -18,7 +18,7 @@ use time::macros::format_description; use time::{Date, Duration, OffsetDateTime, Time}; use tokio::task; -use super::{get_task_id, SummarizedTaskView}; +use super::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; @@ -200,8 +200,10 @@ async fn cancel_tasks( KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task = - task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid)).await??; + task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid, dry_run)) + .await??; let task: SummarizedTaskView = task.into(); Ok(HttpResponse::Ok().json(task)) @@ -248,7 +250,9 @@ async fn delete_tasks( KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; let uid = get_task_id(&req, &opt)?; - let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid)).await??; + let dry_run = is_dry_run(&req, &opt)?; + let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid, dry_run)) + .await??; let task: SummarizedTaskView = task.into(); Ok(HttpResponse::Ok().json(task)) From 60510e037bab23dde027c694bed10a5380a57f65 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 21 Feb 2024 12:30:28 +0100 Subject: [PATCH 5/8] update the discussion link --- meilisearch/src/option.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 58f3791e8..377507374 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -360,8 +360,7 @@ pub struct Opt { pub experimental_enable_logs_route: bool, /// Enable multiple features that helps you to run meilisearch in a high availability context. - /// TODO: TAMO: Update the discussion link - /// For more information, see: + /// For more information, see: /// /// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now /// - Lets you specify a custom task ID upon registering a task From bbf3fb88ca3a9db178403f758201657b5c1d02cb Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 21 Feb 2024 14:33:40 +0100 Subject: [PATCH 6/8] rename the cli parameter --- meilisearch/src/analytics/segment_analytics.rs | 14 +++----------- meilisearch/src/lib.rs | 2 +- meilisearch/src/option.rs | 14 +++++++------- meilisearch/src/routes/mod.rs | 4 ++-- meilisearch/tests/index/create_index.rs | 3 ++- 5 files changed, 15 insertions(+), 22 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 55dd02460..262a4751a 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -253,7 +253,7 @@ struct Infos { env: String, experimental_enable_metrics: bool, experimental_logs_mode: LogMode, - experimental_ha_parameters: bool, + experimental_replication_parameters: bool, experimental_enable_logs_route: bool, experimental_reduce_indexing_memory_usage: bool, experimental_max_number_of_batched_tasks: usize, @@ -292,12 +292,8 @@ impl From for Infos { let Opt { db_path, experimental_enable_metrics, -<<<<<<< HEAD experimental_logs_mode, -||||||| parent of 01ae46dd8 (add an experimental cli parameter to allow specifying your task id) -======= - experimental_ha_parameters, ->>>>>>> 01ae46dd8 (add an experimental cli parameter to allow specifying your task id) + experimental_replication_parameters, experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, experimental_max_number_of_batched_tasks, @@ -345,12 +341,8 @@ impl From for Infos { Self { env, experimental_enable_metrics, -<<<<<<< HEAD experimental_logs_mode, -||||||| parent of 01ae46dd8 (add an experimental cli parameter to allow specifying your task id) -======= - experimental_ha_parameters, ->>>>>>> 01ae46dd8 (add an experimental cli parameter to allow specifying your task id) + experimental_replication_parameters, experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, db_path: db_path != PathBuf::from("./data.ms"), diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 7c40059d7..0d892e7e8 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -302,7 +302,7 @@ fn open_or_create_database_unchecked( enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, - cleanup_enabled: !opt.experimental_ha_parameters, + cleanup_enabled: !opt.experimental_replication_parameters, max_number_of_tasks: 1_000_000, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize, diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 377507374..27f2d9c41 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -52,7 +52,7 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE"; -const MEILI_EXPERIMENTAL_HA_PARAMETERS: &str = "MEILI_EXPERIMENTAL_HA_PARAMETERS"; +const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS"; const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = @@ -359,15 +359,15 @@ pub struct Opt { #[serde(default)] pub experimental_enable_logs_route: bool, - /// Enable multiple features that helps you to run meilisearch in a high availability context. + /// Enable multiple features that helps you to run meilisearch in a replicated context. /// For more information, see: /// /// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now /// - Lets you specify a custom task ID upon registering a task /// - Lets you execute dry-register a task (get an answer from the route but nothing is actually registered in meilisearch and it won't be processed) - #[clap(long, env = MEILI_EXPERIMENTAL_HA_PARAMETERS)] + #[clap(long, env = MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS)] #[serde(default)] - pub experimental_ha_parameters: bool, + pub experimental_replication_parameters: bool, /// Experimental RAM reduction during indexing, do not use in production, see: #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] @@ -476,7 +476,7 @@ impl Opt { experimental_enable_metrics, experimental_logs_mode, experimental_enable_logs_route, - experimental_ha_parameters, + experimental_replication_parameters, experimental_reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); @@ -538,8 +538,8 @@ impl Opt { experimental_logs_mode.to_string(), ); export_to_env_if_not_present( - MEILI_EXPERIMENTAL_HA_PARAMETERS, - experimental_ha_parameters.to_string(), + MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS, + experimental_replication_parameters.to_string(), ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE, diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index f98d4b4de..249103e12 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -47,7 +47,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { } pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result, ResponseError> { - if !opt.experimental_ha_parameters { + if !opt.experimental_replication_parameters { return Ok(None); } let task_id = req @@ -78,7 +78,7 @@ pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result, Respo } pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result { - if !opt.experimental_ha_parameters { + if !opt.experimental_replication_parameters { return Ok(false); } Ok(req diff --git a/meilisearch/tests/index/create_index.rs b/meilisearch/tests/index/create_index.rs index 7a678624c..b309b83c6 100644 --- a/meilisearch/tests/index/create_index.rs +++ b/meilisearch/tests/index/create_index.rs @@ -205,7 +205,8 @@ async fn error_create_with_invalid_index_uid() { async fn send_task_id() { let temp = tempfile::tempdir().unwrap(); - let options = Opt { experimental_ha_parameters: true, ..default_settings(temp.path()) }; + let options = + Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) }; let server = Server::new_with_options(options).await.unwrap(); let app = server.init_web_app().await; From a478392b7a5b200d2ce6847a8173b1984e6bc955 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 22 Feb 2024 15:51:47 +0100 Subject: [PATCH 7/8] create a test with the dry-run parameter enabled --- meilisearch/tests/common/index.rs | 9 +--- meilisearch/tests/documents/add_documents.rs | 49 +++++++++++++++++++- meilisearch/tests/documents/errors.rs | 41 ++++++++++------ 3 files changed, 76 insertions(+), 23 deletions(-) diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 4992eeb13..16fc10e98 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -100,16 +100,11 @@ impl Index<'_> { pub async fn raw_add_documents( &self, payload: &str, - content_type: Option<&str>, + headers: Vec<(&str, &str)>, query_parameter: &str, ) -> (Value, StatusCode) { let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter); - - if let Some(content_type) = content_type { - self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await - } else { - self.service.post_str(url, payload, Vec::new()).await - } + self.service.post_str(url, payload, headers).await } pub async fn update_documents( diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index 9733f7741..e6af85229 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -1,10 +1,11 @@ use actix_web::test; use meili_snap::{json_string, snapshot}; +use meilisearch::Opt; use time::format_description::well_known::Rfc3339; use time::OffsetDateTime; use crate::common::encoder::Encoder; -use crate::common::{GetAllDocumentsOptions, Server, Value}; +use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value}; use crate::json; /// This is the basic usage of our API and every other tests uses the content-type application/json @@ -2157,3 +2158,49 @@ async fn batch_several_documents_addition() { assert_eq!(code, 200, "failed with `{}`", response); assert_eq!(response["results"].as_array().unwrap().len(), 120); } + +#[actix_rt::test] +async fn dry_register_file() { + let temp = tempfile::tempdir().unwrap(); + + let options = + Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) }; + let server = Server::new_with_options(options).await.unwrap(); + let index = server.index("tamo"); + + let documents = r#" + { + "id": "12", + "doggo": "kefir" + } + "#; + + let (response, code) = index + .raw_add_documents( + documents, + vec![("Content-Type", "application/json"), ("DryRun", "true")], + "", + ) + .await; + snapshot!(response, @r###" + { + "taskUid": 0, + "indexUid": "tamo", + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "[date]" + } + "###); + snapshot!(code, @"202 Accepted"); + + let (response, code) = index.get_task(response.uid()).await; + snapshot!(response, @r###" + { + "message": "Task `0` not found.", + "code": "task_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#task_not_found" + } + "###); + snapshot!(code, @"404 Not Found"); +} diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index bd06aabce..cd2d89813 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -209,7 +209,8 @@ async fn replace_documents_missing_payload() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.raw_add_documents("", Some("application/json"), "").await; + let (response, code) = + index.raw_add_documents("", vec![("Content-Type", "application/json")], "").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -220,7 +221,8 @@ async fn replace_documents_missing_payload() { } "###); - let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await; + let (response, code) = + index.raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -231,7 +233,8 @@ async fn replace_documents_missing_payload() { } "###); - let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await; + let (response, code) = + index.raw_add_documents("", vec![("Content-Type", "text/csv")], "").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -287,7 +290,7 @@ async fn replace_documents_missing_content_type() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.raw_add_documents("", None, "").await; + let (response, code) = index.raw_add_documents("", Vec::new(), "").await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -299,7 +302,7 @@ async fn replace_documents_missing_content_type() { "###); // even with a csv delimiter specified this error is triggered first - let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await; + let (response, code) = index.raw_add_documents("", Vec::new(), "?csvDelimiter=;").await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -345,7 +348,7 @@ async fn replace_documents_bad_content_type() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.raw_add_documents("", Some("doggo"), "").await; + let (response, code) = index.raw_add_documents("", vec![("Content-Type", "doggo")], "").await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -379,8 +382,9 @@ async fn replace_documents_bad_csv_delimiter() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = - index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter") + .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -391,8 +395,9 @@ async fn replace_documents_bad_csv_delimiter() { } "###); - let (response, code) = - index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=doggo") + .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -404,7 +409,11 @@ async fn replace_documents_bad_csv_delimiter() { "###); let (response, code) = index - .raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰"))) + .raw_add_documents( + "", + vec![("Content-Type", "application/json")], + &format!("?csvDelimiter={}", encode("🍰")), + ) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" @@ -469,8 +478,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = - index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=a") + .await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -481,8 +491,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() { } "###); - let (response, code) = - index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "?csvDelimiter=a") + .await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { From 0562818c2a1380d9d87cd5ca1a37bdc7b1bb8748 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 22 Feb 2024 18:42:12 +0100 Subject: [PATCH 8/8] fix and remove the file-store hack of /dev/null --- file-store/src/lib.rs | 56 +++++++++------------ index-scheduler/src/lib.rs | 54 ++++++++++---------- meilisearch-types/src/document_formats.rs | 16 +++--- meilisearch/src/routes/indexes/documents.rs | 8 ++- 4 files changed, 63 insertions(+), 71 deletions(-) diff --git a/file-store/src/lib.rs b/file-store/src/lib.rs index e3851a2df..15c4168bc 100644 --- a/file-store/src/lib.rs +++ b/file-store/src/lib.rs @@ -1,5 +1,5 @@ use std::fs::File as StdFile; -use std::ops::{Deref, DerefMut}; +use std::io::Write; use std::path::{Path, PathBuf}; use std::str::FromStr; @@ -22,20 +22,6 @@ pub enum Error { pub type Result = std::result::Result; -impl Deref for File { - type Target = NamedTempFile; - - fn deref(&self) -> &Self::Target { - &self.file - } -} - -impl DerefMut for File { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.file - } -} - #[derive(Clone, Debug)] pub struct FileStore { path: PathBuf, @@ -56,7 +42,7 @@ impl FileStore { let file = NamedTempFile::new_in(&self.path)?; let uuid = Uuid::new_v4(); let path = self.path.join(uuid.to_string()); - let update_file = File { dry: false, file, path }; + let update_file = File { file: Some(file), path }; Ok((uuid, update_file)) } @@ -67,7 +53,7 @@ impl FileStore { let file = NamedTempFile::new_in(&self.path)?; let uuid = Uuid::from_u128(uuid); let path = self.path.join(uuid.to_string()); - let update_file = File { dry: false, file, path }; + let update_file = File { file: Some(file), path }; Ok((uuid, update_file)) } @@ -135,33 +121,41 @@ impl FileStore { } pub struct File { - dry: bool, path: PathBuf, - file: NamedTempFile, + file: Option, } impl File { pub fn dry_file() -> Result { - #[cfg(target_family = "unix")] - let path = PathBuf::from_str("/dev/null").unwrap(); - #[cfg(target_family = "windows")] - let path = PathBuf::from_str("\\Device\\Null").unwrap(); - - Ok(Self { - dry: true, - path: path.clone(), - file: tempfile::Builder::new().make(|_| std::fs::File::create(path.clone()))?, - }) + Ok(Self { path: PathBuf::new(), file: None }) } pub fn persist(self) -> Result<()> { - if !self.dry { - self.file.persist(&self.path)?; + if let Some(file) = self.file { + file.persist(&self.path)?; } Ok(()) } } +impl Write for File { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + if let Some(file) = self.file.as_mut() { + file.write(buf) + } else { + Ok(buf.len()) + } + } + + fn flush(&mut self) -> std::io::Result<()> { + if let Some(file) = self.file.as_mut() { + file.flush() + } else { + Ok(()) + } + } +} + #[cfg(test)] mod test { use std::io::Write; diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 5d0ce9eb9..1c3b93bce 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1550,7 +1550,7 @@ impl<'a> Dump<'a> { let content_uuid = match content_file { Some(content_file) if task.status == Status::Enqueued => { let (uuid, mut file) = self.index_scheduler.create_update_file(false)?; - let mut builder = DocumentsBatchBuilder::new(file.as_file_mut()); + let mut builder = DocumentsBatchBuilder::new(&mut file); for doc in content_file { builder.append_json_object(&doc?)?; } @@ -1734,7 +1734,7 @@ pub struct IndexStats { #[cfg(test)] mod tests { - use std::io::{BufWriter, Seek, Write}; + use std::io::{BufWriter, Write}; use std::time::Instant; use big_s::S; @@ -1882,7 +1882,7 @@ mod tests { /// Adapting to the new json reading interface pub fn read_json( bytes: &[u8], - write: impl Write + Seek, + write: impl Write, ) -> std::result::Result { let temp_file = NamedTempFile::new().unwrap(); let mut buffer = BufWriter::new(temp_file.reopen().unwrap()); @@ -1909,7 +1909,7 @@ mod tests { ); let (_uuid, mut file) = index_scheduler.create_update_file_with_uuid(file_uuid).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); (file, documents_count) } @@ -2321,7 +2321,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2366,7 +2366,7 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2406,7 +2406,7 @@ mod tests { ]"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2472,7 +2472,7 @@ mod tests { ]"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2678,7 +2678,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2852,7 +2852,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2903,7 +2903,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2956,7 +2956,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3010,7 +3010,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3065,7 +3065,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3567,7 +3567,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3609,7 +3609,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3669,7 +3669,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3721,7 +3721,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3783,7 +3783,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3850,7 +3850,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3922,7 +3922,7 @@ mod tests { let allow_index_creation = i % 2 != 0; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3979,7 +3979,7 @@ mod tests { let allow_index_creation = i % 2 != 0; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -4033,7 +4033,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); @@ -4098,7 +4098,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); @@ -4159,7 +4159,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); @@ -4244,7 +4244,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); @@ -4331,7 +4331,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); diff --git a/meilisearch-types/src/document_formats.rs b/meilisearch-types/src/document_formats.rs index 0f1d995f9..50dc5bad4 100644 --- a/meilisearch-types/src/document_formats.rs +++ b/meilisearch-types/src/document_formats.rs @@ -1,6 +1,6 @@ use std::fmt::{self, Debug, Display}; use std::fs::File; -use std::io::{self, Seek, Write}; +use std::io::{self, BufWriter, Write}; use std::marker::PhantomData; use memmap2::MmapOptions; @@ -104,8 +104,8 @@ impl ErrorCode for DocumentFormatError { } /// Reads CSV from input and write an obkv batch to writer. -pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); +pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result { + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer)); let mmap = unsafe { MmapOptions::new().map(file)? }; let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref()); builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?; @@ -116,9 +116,9 @@ pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result Ok(count as u64) } -/// Reads JSON from temporary file and write an obkv batch to writer. -pub fn read_json(file: &File, writer: impl Write + Seek) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); +/// Reads JSON from temporary file and write an obkv batch to writer. +pub fn read_json(file: &File, writer: impl Write) -> Result { + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer)); let mmap = unsafe { MmapOptions::new().map(file)? }; let mut deserializer = serde_json::Deserializer::from_slice(&mmap); @@ -151,8 +151,8 @@ pub fn read_json(file: &File, writer: impl Write + Seek) -> Result { } /// Reads JSON from temporary file and write an obkv batch to writer. -pub fn read_ndjson(file: &File, writer: impl Write + Seek) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); +pub fn read_ndjson(file: &File, writer: impl Write) -> Result { + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer)); let mmap = unsafe { MmapOptions::new().map(file)? }; for result in serde_json::Deserializer::from_slice(&mmap).into_iter() { diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index a74bbff49..43fab1dae 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -425,11 +425,9 @@ async fn document_addition( let read_file = buffer.into_inner().into_std().await; let documents_count = tokio::task::spawn_blocking(move || { let documents_count = match format { - PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?, - PayloadType::Csv { delimiter } => { - read_csv(&read_file, update_file.as_file_mut(), delimiter)? - } - PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?, + PayloadType::Json => read_json(&read_file, &mut update_file)?, + PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?, + PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?, }; // we NEED to persist the file here because we moved the `udpate_file` in another task. update_file.persist()?;