From ab3056cc662668ab8639aa3ae560781fdde926d5 Mon Sep 17 00:00:00 2001 From: funilrys Date: Mon, 31 Oct 2022 18:57:40 +0100 Subject: [PATCH 01/10] Extract the dates out of the dumpv4. This patch possibly fixes #2987. This patch introduces a way to fill the IndexMetadata.created_at and IndexMetadata.updated_at keys from the tasks events. This is done by reading the creation date of the first event (created_at) and the creation date of the last event (updated_at). --- dump/src/reader/v4/mod.rs | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs index 3aad71ddb..c335d4289 100644 --- a/dump/src/reader/v4/mod.rs +++ b/dump/src/reader/v4/mod.rs @@ -23,6 +23,7 @@ pub type Checked = settings::Checked; pub type Unchecked = settings::Unchecked; pub type Task = tasks::Task; +pub type TaskEvent = tasks::TaskEvent; pub type Key = keys::Key; // everything related to the settings @@ -100,6 +101,7 @@ impl V4Reader { V4IndexReader::new( index.uid.clone(), &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), + BufReader::new(self.tasks.get_ref().try_clone().unwrap()), ) })) } @@ -147,16 +149,31 @@ pub struct V4IndexReader { } impl V4IndexReader { - pub fn new(name: String, path: &Path) -> Result { + pub fn new(name: String, path: &Path, tasks: BufReader) -> Result { let meta = File::open(path.join("meta.json"))?; let meta: DumpMeta = serde_json::from_reader(meta)?; + let mut index_tasks: Vec = vec![]; + + for line in tasks.lines() { + let task: Task = serde_json::from_str(&line?)?; + + if task.index_uid.to_string() == name { + index_tasks.push(task) + } + } + let metadata = IndexMetadata { uid: name, primary_key: meta.primary_key, - // FIXME: Iterate over the whole task queue to find the creation and last update date. - created_at: OffsetDateTime::now_utc(), - updated_at: OffsetDateTime::now_utc(), + created_at: match index_tasks.first().unwrap().events.first() { + Some(TaskEvent::Created(ts)) => *ts, + _ => OffsetDateTime::now_utc(), + }, + updated_at: match index_tasks.last().unwrap().events.last() { + Some(TaskEvent::Created(ts)) => *ts, + _ => OffsetDateTime::now_utc(), + }, }; let ret = V4IndexReader { From 953b2ec4380736136c015ccbf4d508acdf7fc21a Mon Sep 17 00:00:00 2001 From: funilrys Date: Wed, 2 Nov 2022 17:49:37 +0100 Subject: [PATCH 02/10] fixup! Extract the dates out of the dumpv4. --- dump/src/reader/v4/mod.rs | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs index c335d4289..5cf7b112d 100644 --- a/dump/src/reader/v4/mod.rs +++ b/dump/src/reader/v4/mod.rs @@ -24,6 +24,7 @@ pub type Unchecked = settings::Unchecked; pub type Task = tasks::Task; pub type TaskEvent = tasks::TaskEvent; +pub type TaskContent = tasks::TaskContent; pub type Key = keys::Key; // everything related to the settings @@ -153,27 +154,41 @@ impl V4IndexReader { let meta = File::open(path.join("meta.json"))?; let meta: DumpMeta = serde_json::from_reader(meta)?; - let mut index_tasks: Vec = vec![]; + let mut created_at = None; + let mut updated_at = None; for line in tasks.lines() { let task: Task = serde_json::from_str(&line?)?; if task.index_uid.to_string() == name { - index_tasks.push(task) + if updated_at.is_none() { + updated_at = match task.events.last() { + Some(TaskEvent::Created(ts)) => Some(*ts), + _ => None, + }; + } + + if created_at.is_none() { + created_at = match task.content { + TaskContent::IndexCreation { primary_key } => match task.events.first() { + Some(TaskEvent::Created(ts)) => Some(*ts), + _ => None, + }, + _ => None, + }; + } + + if created_at.is_some() { + break; + } } } let metadata = IndexMetadata { uid: name, primary_key: meta.primary_key, - created_at: match index_tasks.first().unwrap().events.first() { - Some(TaskEvent::Created(ts)) => *ts, - _ => OffsetDateTime::now_utc(), - }, - updated_at: match index_tasks.last().unwrap().events.last() { - Some(TaskEvent::Created(ts)) => *ts, - _ => OffsetDateTime::now_utc(), - }, + created_at: created_at.unwrap_or(OffsetDateTime::now_utc()), + updated_at: updated_at.unwrap_or(OffsetDateTime::now_utc()), }; let ret = V4IndexReader { From 06e7db7a1ff08dc25593d58086993001a4f4e2e3 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 12 Nov 2022 18:28:23 +0100 Subject: [PATCH 03/10] fixup! Extract the dates out of the dumpv4. --- dump/src/reader/v4/mod.rs | 27 +++++++++++---------------- dump/src/reader/v4/tasks.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs index 5cf7b112d..140890d78 100644 --- a/dump/src/reader/v4/mod.rs +++ b/dump/src/reader/v4/mod.rs @@ -13,7 +13,7 @@ pub mod meta; pub mod settings; pub mod tasks; -use self::meta::{DumpMeta, IndexUuid}; +use self::meta::{DumpMeta, IndexMeta, IndexUuid}; use super::compat::v4_to_v5::CompatV4ToV5; use crate::{Error, IndexMetadata, Result, Version}; @@ -23,8 +23,6 @@ pub type Checked = settings::Checked; pub type Unchecked = settings::Unchecked; pub type Task = tasks::Task; -pub type TaskEvent = tasks::TaskEvent; -pub type TaskContent = tasks::TaskContent; pub type Key = keys::Key; // everything related to the settings @@ -102,6 +100,7 @@ impl V4Reader { V4IndexReader::new( index.uid.clone(), &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), + &index.index_meta, BufReader::new(self.tasks.get_ref().try_clone().unwrap()), ) })) @@ -150,7 +149,12 @@ pub struct V4IndexReader { } impl V4IndexReader { - pub fn new(name: String, path: &Path, tasks: BufReader) -> Result { + pub fn new( + name: String, + path: &Path, + index_metadata: &IndexMeta, + tasks: BufReader, + ) -> Result { let meta = File::open(path.join("meta.json"))?; let meta: DumpMeta = serde_json::from_reader(meta)?; @@ -162,23 +166,14 @@ impl V4IndexReader { if task.index_uid.to_string() == name { if updated_at.is_none() { - updated_at = match task.events.last() { - Some(TaskEvent::Created(ts)) => Some(*ts), - _ => None, - }; + updated_at = task.updated_at() } if created_at.is_none() { - created_at = match task.content { - TaskContent::IndexCreation { primary_key } => match task.events.first() { - Some(TaskEvent::Created(ts)) => Some(*ts), - _ => None, - }, - _ => None, - }; + created_at = task.created_at() } - if created_at.is_some() { + if task.id as usize == index_metadata.creation_task_id { break; } } diff --git a/dump/src/reader/v4/tasks.rs b/dump/src/reader/v4/tasks.rs index e1bdde0c7..c075fecc7 100644 --- a/dump/src/reader/v4/tasks.rs +++ b/dump/src/reader/v4/tasks.rs @@ -104,6 +104,33 @@ impl Task { }) } + pub fn updated_at(&self) -> Option { + match self.events.last() { + Some(TaskEvent::Created(ts)) => Some(*ts), + _ => None, + } + } + + pub fn created_at(&self) -> Option { + match &self.content { + TaskContent::IndexCreation { primary_key: _ } => match self.events.first() { + Some(TaskEvent::Created(ts)) => Some(*ts), + _ => None, + }, + TaskContent::DocumentAddition { + content_uuid: _, + merge_strategy: _, + primary_key: _, + documents_count: _, + allow_index_creation: _, + } => match self.events.first() { + Some(TaskEvent::Created(ts)) => Some(*ts), + _ => None, + }, + _ => None, + } + } + /// Return the content_uuid of the `Task` if there is one. pub fn get_content_uuid(&self) -> Option { match self { From 079357ee1f2a90e5b46490bf1db2d960ea3223b5 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 12 Nov 2022 20:57:27 +0100 Subject: [PATCH 04/10] Fix linting issues. --- dump/src/reader/v4/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs index 140890d78..34df609b1 100644 --- a/dump/src/reader/v4/mod.rs +++ b/dump/src/reader/v4/mod.rs @@ -182,8 +182,8 @@ impl V4IndexReader { let metadata = IndexMetadata { uid: name, primary_key: meta.primary_key, - created_at: created_at.unwrap_or(OffsetDateTime::now_utc()), - updated_at: updated_at.unwrap_or(OffsetDateTime::now_utc()), + created_at: created_at.unwrap_or_else(OffsetDateTime::now_utc), + updated_at: updated_at.unwrap_or_else(OffsetDateTime::now_utc), }; let ret = V4IndexReader { From 8a14f6f5455c6c0bfed015a67944b5e26002ffc1 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sun, 13 Nov 2022 10:12:51 +0100 Subject: [PATCH 05/10] Add Task.processed_at. --- dump/src/reader/v4/mod.rs | 2 ++ dump/src/reader/v4/tasks.rs | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs index 34df609b1..885f59d97 100644 --- a/dump/src/reader/v4/mod.rs +++ b/dump/src/reader/v4/mod.rs @@ -174,6 +174,8 @@ impl V4IndexReader { } if task.id as usize == index_metadata.creation_task_id { + created_at = task.processed_at(); + break; } } diff --git a/dump/src/reader/v4/tasks.rs b/dump/src/reader/v4/tasks.rs index c075fecc7..be98f9ee2 100644 --- a/dump/src/reader/v4/tasks.rs +++ b/dump/src/reader/v4/tasks.rs @@ -104,6 +104,13 @@ impl Task { }) } + pub fn processed_at(&self) -> Option { + match self.events.last() { + Some(TaskEvent::Succeded { result: _, timestamp }) => Some(timestamp.clone()), + _ => None, + } + } + pub fn updated_at(&self) -> Option { match self.events.last() { Some(TaskEvent::Created(ts)) => Some(*ts), From 0a102d601c774805f9d68733b233647a09c56fa8 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sun, 13 Nov 2022 10:14:20 +0100 Subject: [PATCH 06/10] Update Task.created_at Indeed, before this patch we weren't considering the TaskContent::SetingsUpdate while trying to find the creation date. --- dump/src/reader/v4/tasks.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dump/src/reader/v4/tasks.rs b/dump/src/reader/v4/tasks.rs index be98f9ee2..e563274d9 100644 --- a/dump/src/reader/v4/tasks.rs +++ b/dump/src/reader/v4/tasks.rs @@ -134,6 +134,14 @@ impl Task { Some(TaskEvent::Created(ts)) => Some(*ts), _ => None, }, + TaskContent::SettingsUpdate { + settings: _, + is_deletion: _, + allow_index_creation: _, + } => match self.events.first() { + Some(TaskEvent::Created(ts)) => Some(*ts), + _ => None, + }, _ => None, } } From e81b349658031439be0a479b70af1e3b5a170d61 Mon Sep 17 00:00:00 2001 From: funilrys Date: Mon, 14 Nov 2022 18:51:34 +0100 Subject: [PATCH 07/10] Fix linting issue. --- dump/src/reader/v4/tasks.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/src/reader/v4/tasks.rs b/dump/src/reader/v4/tasks.rs index e563274d9..c9920ffd6 100644 --- a/dump/src/reader/v4/tasks.rs +++ b/dump/src/reader/v4/tasks.rs @@ -106,7 +106,7 @@ impl Task { pub fn processed_at(&self) -> Option { match self.events.last() { - Some(TaskEvent::Succeded { result: _, timestamp }) => Some(timestamp.clone()), + Some(TaskEvent::Succeded { result: _, timestamp }) => Some(*timestamp), _ => None, } } From f056fc118fcdba2f7270f5b8517f50664bc601ca Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 17:29:41 +0100 Subject: [PATCH 08/10] Re-open tasks queue. Indeed, before this patch, I was (probably) breaking every usage of the tasks BufReader. This patch solves the issue by reopening the the tasks file every time its needed. --- dump/src/reader/v4/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs index 885f59d97..db2cec4e5 100644 --- a/dump/src/reader/v4/mod.rs +++ b/dump/src/reader/v4/mod.rs @@ -101,7 +101,7 @@ impl V4Reader { index.uid.clone(), &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), &index.index_meta, - BufReader::new(self.tasks.get_ref().try_clone().unwrap()), + BufReader::new(File::open(dump.path().join("updates").join("data.jsonl")).unwrap()), ) })) } From e510ace1794495a724626a382832908bd08c0964 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 17:41:33 +0100 Subject: [PATCH 09/10] fixup! Re-open tasks queue. --- dump/src/reader/v4/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs index db2cec4e5..04ad346b7 100644 --- a/dump/src/reader/v4/mod.rs +++ b/dump/src/reader/v4/mod.rs @@ -101,7 +101,7 @@ impl V4Reader { index.uid.clone(), &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), &index.index_meta, - BufReader::new(File::open(dump.path().join("updates").join("data.jsonl")).unwrap()), + BufReader::new(File::open(&self.dump.path().join("updates").join("data.jsonl")).unwrap()), ) })) } From 8b6eba4f0b678679a8ea3901790c7246de9ca844 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 17:47:02 +0100 Subject: [PATCH 10/10] Apply fmt. --- dump/src/reader/v4/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs index 04ad346b7..2cef6c86d 100644 --- a/dump/src/reader/v4/mod.rs +++ b/dump/src/reader/v4/mod.rs @@ -101,7 +101,9 @@ impl V4Reader { index.uid.clone(), &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), &index.index_meta, - BufReader::new(File::open(&self.dump.path().join("updates").join("data.jsonl")).unwrap()), + BufReader::new( + File::open(&self.dump.path().join("updates").join("data.jsonl")).unwrap(), + ), ) })) }