2022-10-07 16:43:05 +02:00
|
|
|
|
//! ```text
|
|
|
|
|
//! .
|
|
|
|
|
//! ├── indexes
|
|
|
|
|
//! │ ├── 01d7dd17-8241-4f1f-a7d1-2d1cb255f5b0
|
|
|
|
|
//! │ │ ├── documents.jsonl
|
|
|
|
|
//! │ │ └── meta.json
|
|
|
|
|
//! │ ├── 78be64a3-cae1-449e-b7ed-13e77c9a8a0c
|
|
|
|
|
//! │ │ ├── documents.jsonl
|
|
|
|
|
//! │ │ └── meta.json
|
|
|
|
|
//! │ ├── ba553439-18fe-4733-ba53-44eed898280c
|
|
|
|
|
//! │ │ ├── documents.jsonl
|
|
|
|
|
//! │ │ └── meta.json
|
|
|
|
|
//! │ └── c408bc22-5859-49d1-8e9f-c88e2fa95cb0
|
|
|
|
|
//! │ ├── documents.jsonl
|
|
|
|
|
//! │ └── meta.json
|
|
|
|
|
//! ├── index_uuids
|
|
|
|
|
//! │ └── data.jsonl
|
|
|
|
|
//! ├── metadata.json
|
|
|
|
|
//! └── updates
|
|
|
|
|
//! ├── data.jsonl
|
|
|
|
|
//! └── updates_files
|
|
|
|
|
//! └── 66d3f12d-fcf3-4b53-88cb-407017373de7
|
|
|
|
|
//! ```
|
|
|
|
|
|
2022-10-20 18:00:07 +02:00
|
|
|
|
use std::fs::{self, File};
|
|
|
|
|
use std::io::{BufRead, BufReader};
|
|
|
|
|
use std::path::Path;
|
2022-10-07 16:43:05 +02:00
|
|
|
|
|
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
|
use tempfile::TempDir;
|
|
|
|
|
use time::OffsetDateTime;
|
2022-10-09 17:30:34 +02:00
|
|
|
|
|
2022-10-07 16:43:05 +02:00
|
|
|
|
pub mod errors;
|
2022-10-09 02:49:42 +02:00
|
|
|
|
pub mod meta;
|
2022-10-07 16:43:05 +02:00
|
|
|
|
pub mod settings;
|
|
|
|
|
pub mod updates;
|
|
|
|
|
|
|
|
|
|
use self::meta::{DumpMeta, IndexUuid};
|
2022-10-20 18:00:07 +02:00
|
|
|
|
use super::compat::v3_to_v4::CompatV3ToV4;
|
|
|
|
|
use super::Document;
|
|
|
|
|
use crate::{Error, IndexMetadata, Result, Version};
|
2022-10-07 16:43:05 +02:00
|
|
|
|
|
|
|
|
|
pub type Settings<T> = settings::Settings<T>;
|
|
|
|
|
pub type Checked = settings::Checked;
|
|
|
|
|
pub type Unchecked = settings::Unchecked;
|
|
|
|
|
|
|
|
|
|
pub type Task = updates::UpdateEntry;
|
|
|
|
|
|
|
|
|
|
// ===== Other types to clarify the code of the compat module
|
|
|
|
|
// everything related to the tasks
|
|
|
|
|
pub type Status = updates::UpdateStatus;
|
|
|
|
|
pub type Kind = updates::Update;
|
|
|
|
|
|
|
|
|
|
// everything related to the settings
|
|
|
|
|
pub type Setting<T> = settings::Setting<T>;
|
|
|
|
|
|
|
|
|
|
// everything related to the errors
|
2022-10-09 02:49:42 +02:00
|
|
|
|
pub type Code = errors::Code;
|
2022-10-07 16:43:05 +02:00
|
|
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize, Debug)]
|
|
|
|
|
#[serde(rename_all = "camelCase")]
|
|
|
|
|
pub struct Metadata {
|
|
|
|
|
db_version: String,
|
|
|
|
|
index_db_size: usize,
|
|
|
|
|
update_db_size: usize,
|
|
|
|
|
#[serde(with = "time::serde::rfc3339")]
|
|
|
|
|
dump_date: OffsetDateTime,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct V3Reader {
|
|
|
|
|
dump: TempDir,
|
|
|
|
|
metadata: Metadata,
|
|
|
|
|
tasks: BufReader<File>,
|
2022-10-10 14:32:11 +02:00
|
|
|
|
index_uuid: Vec<IndexUuid>,
|
2022-10-07 16:43:05 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl V3Reader {
|
|
|
|
|
pub fn open(dump: TempDir) -> Result<Self> {
|
|
|
|
|
let meta_file = fs::read(dump.path().join("metadata.json"))?;
|
|
|
|
|
let metadata = serde_json::from_reader(&*meta_file)?;
|
|
|
|
|
let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?;
|
|
|
|
|
let index_uuid = BufReader::new(index_uuid);
|
|
|
|
|
let index_uuid = index_uuid
|
|
|
|
|
.lines()
|
|
|
|
|
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
|
|
|
|
|
.collect::<Result<Vec<_>>>()?;
|
|
|
|
|
|
|
|
|
|
Ok(V3Reader {
|
|
|
|
|
metadata,
|
2022-10-09 23:47:56 +02:00
|
|
|
|
tasks: BufReader::new(File::open(dump.path().join("updates").join("data.jsonl"))?),
|
2022-10-07 16:43:05 +02:00
|
|
|
|
index_uuid,
|
|
|
|
|
dump,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-10 14:32:11 +02:00
|
|
|
|
pub fn index_uuid(&self) -> Vec<IndexUuid> {
|
|
|
|
|
self.index_uuid.clone()
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-09 02:49:42 +02:00
|
|
|
|
pub fn to_v4(self) -> CompatV3ToV4 {
|
|
|
|
|
CompatV3ToV4::new(self)
|
|
|
|
|
}
|
2022-10-07 16:43:05 +02:00
|
|
|
|
|
|
|
|
|
pub fn version(&self) -> Version {
|
|
|
|
|
Version::V3
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn date(&self) -> Option<OffsetDateTime> {
|
|
|
|
|
Some(self.metadata.dump_date)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V3IndexReader>> + '_> {
|
|
|
|
|
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
|
2022-10-22 16:35:42 +02:00
|
|
|
|
V3IndexReader::new(
|
2022-10-20 18:00:07 +02:00
|
|
|
|
&self.dump.path().join("indexes").join(index.uuid.to_string()),
|
2023-01-19 19:38:20 +08:00
|
|
|
|
index,
|
2023-01-13 22:45:45 +08:00
|
|
|
|
BufReader::new(
|
|
|
|
|
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
|
|
|
|
|
),
|
2022-10-22 16:35:42 +02:00
|
|
|
|
)
|
2022-10-07 16:43:05 +02:00
|
|
|
|
}))
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-10 17:58:30 +02:00
|
|
|
|
pub fn tasks(
|
|
|
|
|
&mut self,
|
|
|
|
|
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
|
2022-10-07 16:43:05 +02:00
|
|
|
|
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
|
|
|
|
|
let task: Task = serde_json::from_str(&line?)?;
|
|
|
|
|
if !task.is_finished() {
|
|
|
|
|
if let Some(uuid) = task.get_content_uuid() {
|
|
|
|
|
let update_file_path = self
|
|
|
|
|
.dump
|
|
|
|
|
.path()
|
|
|
|
|
.join("updates")
|
|
|
|
|
.join("updates_files")
|
|
|
|
|
.join(uuid.to_string());
|
2022-10-10 17:58:30 +02:00
|
|
|
|
Ok((
|
|
|
|
|
task,
|
|
|
|
|
Some(
|
|
|
|
|
Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>
|
|
|
|
|
),
|
|
|
|
|
))
|
2022-10-07 16:43:05 +02:00
|
|
|
|
} else {
|
|
|
|
|
Ok((task, None))
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
Ok((task, None))
|
|
|
|
|
}
|
|
|
|
|
}))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct V3IndexReader {
|
|
|
|
|
metadata: IndexMetadata,
|
|
|
|
|
settings: Settings<Checked>,
|
|
|
|
|
|
|
|
|
|
documents: BufReader<File>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl V3IndexReader {
|
2023-01-17 23:11:49 +08:00
|
|
|
|
pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
|
2022-10-07 16:43:05 +02:00
|
|
|
|
let meta = File::open(path.join("meta.json"))?;
|
|
|
|
|
let meta: DumpMeta = serde_json::from_reader(meta)?;
|
|
|
|
|
|
2023-01-17 23:11:49 +08:00
|
|
|
|
let mut created_at = None;
|
|
|
|
|
let mut updated_at = None;
|
2023-01-13 22:45:45 +08:00
|
|
|
|
|
|
|
|
|
for line in tasks.lines() {
|
|
|
|
|
let task: Task = serde_json::from_str(&line?)?;
|
|
|
|
|
|
2023-01-19 19:38:20 +08:00
|
|
|
|
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
|
2023-01-17 23:11:49 +08:00
|
|
|
|
continue;
|
|
|
|
|
}
|
2023-01-13 22:45:45 +08:00
|
|
|
|
|
2023-01-17 23:11:49 +08:00
|
|
|
|
let new_created_at = match task.update.meta() {
|
|
|
|
|
Kind::DocumentAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
|
|
|
|
|
_ => None,
|
|
|
|
|
};
|
|
|
|
|
let new_updated_at = task.update.finished_at();
|
2023-01-13 22:45:45 +08:00
|
|
|
|
|
2023-01-17 23:11:49 +08:00
|
|
|
|
if created_at.is_none() || created_at > new_created_at {
|
|
|
|
|
created_at = new_created_at;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if updated_at.is_none() || updated_at < new_updated_at {
|
|
|
|
|
updated_at = new_updated_at;
|
2023-01-13 22:45:45 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let current_time = OffsetDateTime::now_utc();
|
|
|
|
|
|
2022-10-07 16:43:05 +02:00
|
|
|
|
let metadata = IndexMetadata {
|
2023-01-17 23:11:49 +08:00
|
|
|
|
uid: index_uuid.uid.clone(),
|
2022-10-07 16:43:05 +02:00
|
|
|
|
primary_key: meta.primary_key,
|
2023-01-17 23:11:49 +08:00
|
|
|
|
created_at: created_at.unwrap_or(current_time),
|
|
|
|
|
updated_at: updated_at.unwrap_or(current_time),
|
2022-10-07 16:43:05 +02:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let ret = V3IndexReader {
|
|
|
|
|
metadata,
|
|
|
|
|
settings: meta.settings.check(),
|
|
|
|
|
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Ok(ret)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn metadata(&self) -> &IndexMetadata {
|
|
|
|
|
&self.metadata
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
|
|
|
|
|
Ok((&mut self.documents)
|
|
|
|
|
.lines()
|
|
|
|
|
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
|
|
|
|
Ok(self.settings.clone())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-10 17:58:30 +02:00
|
|
|
|
pub struct UpdateFile {
|
|
|
|
|
reader: BufReader<File>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl UpdateFile {
|
|
|
|
|
fn new(path: &Path) -> Result<Self> {
|
2022-10-20 18:00:07 +02:00
|
|
|
|
Ok(UpdateFile { reader: BufReader::new(File::open(path)?) })
|
2022-10-10 17:58:30 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Iterator for UpdateFile {
|
|
|
|
|
type Item = Result<Document>;
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
(&mut self.reader)
|
|
|
|
|
.lines()
|
|
|
|
|
.map(|line| {
|
|
|
|
|
line.map_err(Error::from)
|
|
|
|
|
.and_then(|line| serde_json::from_str(&line).map_err(Error::from))
|
|
|
|
|
})
|
|
|
|
|
.next()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-07 16:43:05 +02:00
|
|
|
|
#[cfg(test)]
|
|
|
|
|
pub(crate) mod test {
|
2022-10-20 18:00:07 +02:00
|
|
|
|
use std::fs::File;
|
|
|
|
|
use std::io::BufReader;
|
2022-10-07 16:43:05 +02:00
|
|
|
|
|
|
|
|
|
use flate2::bufread::GzDecoder;
|
2022-10-26 18:49:47 +02:00
|
|
|
|
use meili_snap::insta;
|
2022-10-07 16:43:05 +02:00
|
|
|
|
use tempfile::TempDir;
|
|
|
|
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn read_dump_v3() {
|
|
|
|
|
let dump = File::open("tests/assets/v3.dump").unwrap();
|
|
|
|
|
let dir = TempDir::new().unwrap();
|
|
|
|
|
let mut dump = BufReader::new(dump);
|
|
|
|
|
let gz = GzDecoder::new(&mut dump);
|
|
|
|
|
let mut archive = tar::Archive::new(gz);
|
|
|
|
|
archive.unpack(dir.path()).unwrap();
|
|
|
|
|
|
|
|
|
|
let mut dump = V3Reader::open(dir).unwrap();
|
|
|
|
|
|
|
|
|
|
// top level infos
|
|
|
|
|
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
|
|
|
|
|
|
|
|
|
// tasks
|
|
|
|
|
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
2022-10-10 17:58:30 +02:00
|
|
|
|
let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
2022-10-11 19:43:50 +02:00
|
|
|
|
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"63086d59c3f2074e4ab3fff7e8cc36c1");
|
2022-10-07 16:43:05 +02:00
|
|
|
|
assert_eq!(update_files.len(), 10);
|
|
|
|
|
assert!(update_files[0].is_some()); // the enqueued document addition
|
|
|
|
|
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
|
|
|
|
|
|
2022-10-20 18:00:07 +02:00
|
|
|
|
let update_file = update_files.remove(0).unwrap().collect::<Result<Vec<_>>>().unwrap();
|
2022-10-11 19:43:50 +02:00
|
|
|
|
meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d");
|
2022-10-10 17:58:30 +02:00
|
|
|
|
|
2022-10-07 16:43:05 +02:00
|
|
|
|
// indexes
|
|
|
|
|
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
|
|
|
|
// the index are not ordered in any way by default
|
|
|
|
|
indexes.sort_by_key(|index| index.metadata().uid.to_string());
|
|
|
|
|
|
|
|
|
|
let mut products = indexes.pop().unwrap();
|
|
|
|
|
let mut movies2 = indexes.pop().unwrap();
|
|
|
|
|
let mut movies = indexes.pop().unwrap();
|
|
|
|
|
let mut spells = indexes.pop().unwrap();
|
|
|
|
|
assert!(indexes.is_empty());
|
|
|
|
|
|
|
|
|
|
// products
|
2023-01-13 22:45:45 +08:00
|
|
|
|
insta::assert_json_snapshot!(products.metadata(), @r###"
|
2022-10-07 16:43:05 +02:00
|
|
|
|
{
|
|
|
|
|
"uid": "products",
|
|
|
|
|
"primaryKey": "sku",
|
2023-01-17 23:11:49 +08:00
|
|
|
|
"createdAt": "2022-10-07T11:38:54.74389899Z",
|
2023-01-13 22:45:45 +08:00
|
|
|
|
"updatedAt": "2022-10-07T11:38:55.963185778Z"
|
2022-10-07 16:43:05 +02:00
|
|
|
|
}
|
|
|
|
|
"###);
|
|
|
|
|
|
2022-11-23 16:52:19 +01:00
|
|
|
|
insta::assert_json_snapshot!(products.settings().unwrap());
|
2022-10-20 18:00:07 +02:00
|
|
|
|
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
2022-10-07 16:43:05 +02:00
|
|
|
|
assert_eq!(documents.len(), 10);
|
2022-10-11 19:43:50 +02:00
|
|
|
|
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
2022-10-07 16:43:05 +02:00
|
|
|
|
|
|
|
|
|
// movies
|
2023-01-13 22:45:45 +08:00
|
|
|
|
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
2022-10-07 16:43:05 +02:00
|
|
|
|
{
|
|
|
|
|
"uid": "movies",
|
|
|
|
|
"primaryKey": "id",
|
2023-01-17 23:11:49 +08:00
|
|
|
|
"createdAt": "2022-10-07T11:38:54.026649575Z",
|
2023-01-13 22:45:45 +08:00
|
|
|
|
"updatedAt": "2022-10-07T11:39:04.188852537Z"
|
2022-10-07 16:43:05 +02:00
|
|
|
|
}
|
|
|
|
|
"###);
|
|
|
|
|
|
2022-11-23 16:52:19 +01:00
|
|
|
|
insta::assert_json_snapshot!(movies.settings().unwrap());
|
2022-10-20 18:00:07 +02:00
|
|
|
|
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
2022-10-07 16:43:05 +02:00
|
|
|
|
assert_eq!(documents.len(), 110);
|
2022-10-11 19:43:50 +02:00
|
|
|
|
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
|
2022-10-07 16:43:05 +02:00
|
|
|
|
|
|
|
|
|
// movies2
|
2023-01-17 23:11:49 +08:00
|
|
|
|
insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
2022-10-07 16:43:05 +02:00
|
|
|
|
{
|
|
|
|
|
"uid": "movies_2",
|
|
|
|
|
"primaryKey": null,
|
2023-01-17 23:11:49 +08:00
|
|
|
|
"createdAt": "[now]",
|
2022-10-07 16:43:05 +02:00
|
|
|
|
"updatedAt": "[now]"
|
|
|
|
|
}
|
|
|
|
|
"###);
|
|
|
|
|
|
2022-11-23 16:52:19 +01:00
|
|
|
|
insta::assert_json_snapshot!(movies2.settings().unwrap());
|
2022-10-20 18:00:07 +02:00
|
|
|
|
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
2022-10-07 16:43:05 +02:00
|
|
|
|
assert_eq!(documents.len(), 0);
|
2022-10-11 19:43:50 +02:00
|
|
|
|
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
2022-10-07 16:43:05 +02:00
|
|
|
|
|
|
|
|
|
// spells
|
2023-01-13 22:45:45 +08:00
|
|
|
|
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
2022-10-07 16:43:05 +02:00
|
|
|
|
{
|
|
|
|
|
"uid": "dnd_spells",
|
|
|
|
|
"primaryKey": "index",
|
2023-01-17 23:11:49 +08:00
|
|
|
|
"createdAt": "2022-10-07T11:38:56.265951133Z",
|
2023-01-13 22:45:45 +08:00
|
|
|
|
"updatedAt": "2022-10-07T11:38:56.521004328Z"
|
2022-10-07 16:43:05 +02:00
|
|
|
|
}
|
|
|
|
|
"###);
|
|
|
|
|
|
2022-11-23 16:52:19 +01:00
|
|
|
|
insta::assert_json_snapshot!(spells.settings().unwrap());
|
2022-10-20 18:00:07 +02:00
|
|
|
|
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
2022-10-07 16:43:05 +02:00
|
|
|
|
assert_eq!(documents.len(), 10);
|
2022-10-11 19:43:50 +02:00
|
|
|
|
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
|
2022-10-07 16:43:05 +02:00
|
|
|
|
}
|
|
|
|
|
}
|