From ea60d35c713eb591e8be8bab0448e3bfb572a052 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 18 Oct 2022 15:04:14 +0200 Subject: [PATCH] Delete a task's persisted data when appropriate --- file-store/src/lib.rs | 16 +++++++++++++ index-scheduler/src/batch.rs | 19 ++++++++------- index-scheduler/src/lib.rs | 23 +++++++++++++++++-- index-scheduler/src/snapshot.rs | 14 ++++++++++- .../snapshots/lib.rs/document_addition/1.snap | 4 ++++ .../snapshots/lib.rs/document_addition/2.snap | 4 ++++ .../snapshots/lib.rs/document_addition/3.snap | 3 +++ .../1.snap | 3 +++ .../src/snapshots/lib.rs/register/1.snap | 3 +++ .../initial_tasks_enqueued.snap | 5 ++++ .../initial_tasks_processed.snap | 4 ++++ .../task_deletion_processed.snap | 4 ++++ .../initial_tasks_enqueued.snap | 5 ++++ .../initial_tasks_processed.snap | 4 ++++ .../task_deletion_processed.snap | 4 ++++ .../initial_tasks_enqueued.snap | 3 +++ .../task_deletion_done.snap | 3 +++ .../task_deletion_enqueued.snap | 3 +++ .../task_deletion_processing.snap | 3 +++ 19 files changed, 116 insertions(+), 11 deletions(-) diff --git a/file-store/src/lib.rs b/file-store/src/lib.rs index a3446ae92..0e30661ec 100644 --- a/file-store/src/lib.rs +++ b/file-store/src/lib.rs @@ -1,6 +1,8 @@ +use std::collections::BTreeSet; use std::fs::File as StdFile; use std::ops::{Deref, DerefMut}; use std::path::{Path, PathBuf}; +use std::str::FromStr; use tempfile::NamedTempFile; use uuid::Uuid; @@ -96,6 +98,20 @@ impl FileStore { std::fs::remove_file(path)?; Ok(()) } + + /// List the Uuids of the files in the FileStore + /// + /// This function is meant to be used by tests only. + #[doc(hidden)] + pub fn __all_uuids(&self) -> BTreeSet { + let mut uuids = BTreeSet::new(); + for entry in self.path.read_dir().unwrap() { + let entry = entry.unwrap(); + let uuid = Uuid::from_str(entry.file_name().to_str().unwrap()).unwrap(); + uuids.insert(uuid); + } + uuids + } } pub struct File { diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index b855f2d92..7a0458d57 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -953,15 +953,18 @@ impl IndexScheduler { let mut affected_kinds = HashSet::new(); for task_id in to_delete_tasks.iter() { - // This should never fail, but there is no harm done if it does. The function - // will still be 99% correct (the number of deleted tasks will be slightly incorrect). - if let Some(task) = self.get_task(wtxn, task_id)? { - if let Some(task_indexes) = task.indexes() { - affected_indexes.extend(task_indexes.into_iter().map(|x| x.to_owned())); - } - affected_statuses.insert(task.status); - affected_kinds.insert(task.kind.as_kind()); + let task = self + .get_task(wtxn, task_id)? + .ok_or(Error::CorruptedTaskQueue)?; + if let Some(task_indexes) = task.indexes() { + affected_indexes.extend(task_indexes.into_iter().map(|x| x.to_owned())); } + affected_statuses.insert(task.status); + affected_kinds.insert(task.kind.as_kind()); + // Note: don't delete the persisted task data since + // we can only delete succeeded, failed, and canceled tasks. + // In each of those cases, the persisted data is supposed to + // have been deleted already. } for index in affected_indexes { self.update_index(wtxn, &index, |bitmap| { diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 988ecbea7..043e4c0c3 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -381,7 +381,7 @@ impl IndexScheduler { match wtxn.commit() { Ok(()) => (), _e @ Err(_) => { - todo!("remove the data associated with the task"); + self.delete_persisted_task_data(&task)?; // _e?; } } @@ -577,8 +577,8 @@ impl IndexScheduler { for mut task in tasks { task.started_at = Some(started_at); task.finished_at = Some(finished_at); - // TODO the info field should've been set by the process_batch function self.update_task(&mut wtxn, &task)?; + self.delete_persisted_task_data(&task)?; } log::info!("A batch of tasks was successfully completed."); } @@ -606,6 +606,25 @@ impl IndexScheduler { Ok(processed_tasks) } + + pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> { + match &task.kind { + KindWithContent::DocumentImport { content_file, .. } => { + self.delete_update_file(*content_file) + } + KindWithContent::DocumentDeletion { .. } + | KindWithContent::DocumentClear { .. } + | KindWithContent::Settings { .. } + | KindWithContent::IndexDeletion { .. } + | KindWithContent::IndexCreation { .. } + | KindWithContent::IndexUpdate { .. } + | KindWithContent::IndexSwap { .. } + | KindWithContent::CancelTask { .. } + | KindWithContent::TaskDeletion { .. } + | KindWithContent::DumpExport { .. } + | KindWithContent::Snapshot => Ok(()), + } + } } #[cfg(test)] diff --git a/index-scheduler/src/snapshot.rs b/index-scheduler/src/snapshot.rs index 13d91cda3..52c8b30ea 100644 --- a/index-scheduler/src/snapshot.rs +++ b/index-scheduler/src/snapshot.rs @@ -15,7 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { let IndexScheduler { autobatching_enabled, processing_tasks, - file_store: _, + file_store, env, all_tasks, status, @@ -59,11 +59,23 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { snap.push_str(&snapshot_index_mapper(&rtxn, index_mapper)); snap.push_str("\n----------------------------------------------------------------------\n"); + snap.push_str("### File Store:\n"); + snap.push_str(&snapshot_file_store(file_store)); + snap.push_str("\n----------------------------------------------------------------------\n"); + rtxn.commit().unwrap(); snap } +fn snapshot_file_store(file_store: &file_store::FileStore) -> String { + let mut snap = String::new(); + for uuid in file_store.__all_uuids() { + snap.push_str(&format!("{uuid}\n")); + } + snap +} + fn snapshot_bitmap(r: &RoaringBitmap) -> String { let mut snap = String::new(); snap.push('['); diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/1.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/1.snap index 0c8834643..29fda8278 100644 --- a/index-scheduler/src/snapshots/lib.rs/document_addition/1.snap +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/1.snap @@ -20,4 +20,8 @@ doggos [0,] ### Index Mapper: [] ---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/2.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/2.snap index bdf20e9d8..ff9798905 100644 --- a/index-scheduler/src/snapshots/lib.rs/document_addition/2.snap +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/2.snap @@ -20,4 +20,8 @@ doggos [0,] ### Index Mapper: [] ---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/3.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/3.snap index ad94b0962..4c7739942 100644 --- a/index-scheduler/src/snapshots/lib.rs/document_addition/3.snap +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/3.snap @@ -21,4 +21,7 @@ doggos [0,] ### Index Mapper: ["doggos"] ---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/1.snap b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/1.snap index 8504cc177..572cb0596 100644 --- a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/1.snap +++ b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/1.snap @@ -23,4 +23,7 @@ doggos [2,] ### Index Mapper: [] ---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/register/1.snap b/index-scheduler/src/snapshots/lib.rs/register/1.snap index b4740a8d7..84c574103 100644 --- a/index-scheduler/src/snapshots/lib.rs/register/1.snap +++ b/index-scheduler/src/snapshots/lib.rs/register/1.snap @@ -27,4 +27,7 @@ doggo [4,] ### Index Mapper: [] ---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap index cd6149369..fd2bc806d 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap @@ -22,4 +22,9 @@ doggo [1,] ### Index Mapper: [] ---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap index 84f02f4ad..167d387da 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap @@ -23,4 +23,8 @@ doggo [1,] ### Index Mapper: ["catto"] ---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap index 1a103c1f7..b7b4c4b97 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap @@ -25,4 +25,8 @@ doggo [1,] ### Index Mapper: ["catto"] ---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap index cd6149369..fd2bc806d 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap @@ -22,4 +22,9 @@ doggo [1,] ### Index Mapper: [] ---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap index 84f02f4ad..167d387da 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap @@ -23,4 +23,8 @@ doggo [1,] ### Index Mapper: ["catto"] ---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap index 6c16585d1..6d3e2f9ce 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap @@ -24,4 +24,8 @@ doggo [1,] ### Index Mapper: ["catto"] ---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap index 78926d4ae..a6ae53c78 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap @@ -24,4 +24,7 @@ doggo [2,] ### Index Mapper: [] ---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap index f3c79f205..4e833c022 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap @@ -27,4 +27,7 @@ doggo [2,] ### Index Mapper: [] ---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap index 320d5f67a..c0e353710 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap @@ -26,4 +26,7 @@ doggo [2,] ### Index Mapper: [] ---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap index 5d61c13a3..e4f0be8c4 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap @@ -26,4 +26,7 @@ doggo [2,] ### Index Mapper: [] ---------------------------------------------------------------------- +### File Store: + +----------------------------------------------------------------------