3343: Extract creation and last updated timestamp for v3 dump r=curquiza a=FrancisMurillo

# Pull Request

## Related issue
Fixes #2988

## What does this PR do?

Inspired by the v4 dump implementation, this extracts the first `createdAt` and last `updatedAt` fields by parsing the task queue.

Questions:
- Should the parsing of the tasks be cached instead of being parsed for every index since it might add a performance penalty?
- I am not sure if the `created_at` and `processed_at` fields are correct 
- Should I assume the data is sorted in some order like with `uuid` or `updateId`? I assumed the list is unordered.
- I was planning to populate my dev instance with data and dump my data. Is there a way to dump with previous versions?

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Francis Murillo <evacuee.overlap.vs3op@aleeas.com>
This commit is contained in:
bors[bot] 2023-01-19 16:14:21 +00:00 committed by GitHub
commit 60018d0fe4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 64 additions and 15 deletions

View File

@ -112,8 +112,11 @@ impl V3Reader {
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V3IndexReader>> + '_> { pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V3IndexReader>> + '_> {
Ok(self.index_uuid.iter().map(|index| -> Result<_> { Ok(self.index_uuid.iter().map(|index| -> Result<_> {
V3IndexReader::new( V3IndexReader::new(
index.uid.clone(),
&self.dump.path().join("indexes").join(index.uuid.to_string()), &self.dump.path().join("indexes").join(index.uuid.to_string()),
index,
BufReader::new(
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
),
) )
})) }))
} }
@ -155,16 +158,42 @@ pub struct V3IndexReader {
} }
impl V3IndexReader { impl V3IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> { pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
let meta = File::open(path.join("meta.json"))?; let meta = File::open(path.join("meta.json"))?;
let meta: DumpMeta = serde_json::from_reader(meta)?; let meta: DumpMeta = serde_json::from_reader(meta)?;
let mut created_at = None;
let mut updated_at = None;
for line in tasks.lines() {
let task: Task = serde_json::from_str(&line?)?;
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
continue;
}
let new_created_at = match task.update.meta() {
Kind::DocumentAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
_ => None,
};
let new_updated_at = task.update.finished_at();
if created_at.is_none() || created_at > new_created_at {
created_at = new_created_at;
}
if updated_at.is_none() || updated_at < new_updated_at {
updated_at = new_updated_at;
}
}
let current_time = OffsetDateTime::now_utc();
let metadata = IndexMetadata { let metadata = IndexMetadata {
uid: name, uid: index_uuid.uid.clone(),
primary_key: meta.primary_key, primary_key: meta.primary_key,
// FIXME: Iterate over the whole task queue to find the creation and last update date. created_at: created_at.unwrap_or(current_time),
created_at: OffsetDateTime::now_utc(), updated_at: updated_at.unwrap_or(current_time),
updated_at: OffsetDateTime::now_utc(),
}; };
let ret = V3IndexReader { let ret = V3IndexReader {
@ -263,12 +292,12 @@ pub(crate) mod test {
assert!(indexes.is_empty()); assert!(indexes.is_empty());
// products // products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" insta::assert_json_snapshot!(products.metadata(), @r###"
{ {
"uid": "products", "uid": "products",
"primaryKey": "sku", "primaryKey": "sku",
"createdAt": "[now]", "createdAt": "2022-10-07T11:38:54.74389899Z",
"updatedAt": "[now]" "updatedAt": "2022-10-07T11:38:55.963185778Z"
} }
"###); "###);
@ -278,12 +307,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies // movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" insta::assert_json_snapshot!(movies.metadata(), @r###"
{ {
"uid": "movies", "uid": "movies",
"primaryKey": "id", "primaryKey": "id",
"createdAt": "[now]", "createdAt": "2022-10-07T11:38:54.026649575Z",
"updatedAt": "[now]" "updatedAt": "2022-10-07T11:39:04.188852537Z"
} }
"###); "###);
@ -308,12 +337,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells // spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" insta::assert_json_snapshot!(spells.metadata(), @r###"
{ {
"uid": "dnd_spells", "uid": "dnd_spells",
"primaryKey": "index", "primaryKey": "index",
"createdAt": "[now]", "createdAt": "2022-10-07T11:38:56.265951133Z",
"updatedAt": "[now]" "updatedAt": "2022-10-07T11:38:56.521004328Z"
} }
"###); "###);

View File

@ -74,6 +74,26 @@ impl UpdateStatus {
_ => None, _ => None,
} }
} }
pub fn enqueued_at(&self) -> Option<OffsetDateTime> {
match self {
UpdateStatus::Processing(u) => Some(u.from.enqueued_at),
UpdateStatus::Enqueued(u) => Some(u.enqueued_at),
UpdateStatus::Processed(u) => Some(u.from.from.enqueued_at),
UpdateStatus::Aborted(u) => Some(u.from.enqueued_at),
UpdateStatus::Failed(u) => Some(u.from.from.enqueued_at),
}
}
pub fn finished_at(&self) -> Option<OffsetDateTime> {
match self {
UpdateStatus::Processing(_) => None,
UpdateStatus::Enqueued(_) => None,
UpdateStatus::Processed(u) => Some(u.processed_at),
UpdateStatus::Aborted(_) => None,
UpdateStatus::Failed(u) => Some(u.failed_at),
}
}
} }
#[derive(Debug, Deserialize, Clone)] #[derive(Debug, Deserialize, Clone)]