From 101f55ce8b052c4f3b75c5148536fd0709813e29 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 4 Oct 2022 19:13:30 +0200 Subject: [PATCH] introduce the index metadata --- dump/src/lib.rs | 34 ++++- dump/src/reader/mod.rs | 16 ++- dump/src/reader/v5/meta.rs | 14 ++ dump/src/reader/v5/mod.rs | 221 +++++++++++++++++++++++++++++ dump/src/reader/v5/settings.rs | 251 +++++++++++++++++++++++++++++++++ dump/src/reader/v5/tasks.rs | 173 +++++++++++++++++++++++ dump/src/reader/v6.rs | 65 +++++---- dump/src/writer.rs | 32 +++-- 8 files changed, 753 insertions(+), 53 deletions(-) create mode 100644 dump/src/reader/v5/meta.rs create mode 100644 dump/src/reader/v5/mod.rs create mode 100644 dump/src/reader/v5/settings.rs create mode 100644 dump/src/reader/v5/tasks.rs diff --git a/dump/src/lib.rs b/dump/src/lib.rs index b316ec2a1..a0b3e8ea2 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -21,6 +21,17 @@ struct Metadata { pub dump_date: OffsetDateTime, } +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct IndexMetadata { + pub uid: String, + pub primary_key: Option, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, +} + #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] pub enum Version { V1, @@ -49,12 +60,21 @@ pub(crate) mod test { use time::{macros::datetime, Duration}; use uuid::Uuid; - use crate::{reader, DumpWriter, Version}; + use crate::{reader, DumpWriter, IndexMetadata, Version}; pub fn create_test_instance_uid() -> Uuid { Uuid::parse_str("9e15e977-f2ae-4761-943f-1eaf75fd736d").unwrap() } + pub fn create_test_index_metadata() -> IndexMetadata { + IndexMetadata { + uid: S("doggo"), + primary_key: None, + created_at: datetime!(2022-11-20 12:00 UTC), + updated_at: datetime!(2022-11-21 00:00 UTC), + } + } + pub fn create_test_documents() -> Vec> { vec![ json!({ "id": 1, "race": "golden retriever", "name": "paul", "age": 4 }) @@ -186,7 +206,9 @@ pub(crate) mod test { let documents = create_test_documents(); let settings = create_test_settings(); - let mut index = dump.create_index("doggos").unwrap(); + let mut index = dump + .create_index("doggos", &create_test_index_metadata()) + .unwrap(); for document in &documents { index.push_document(document).unwrap(); } @@ -217,7 +239,7 @@ pub(crate) mod test { } #[test] - fn test_creating_dump() { + fn test_creating_and_read_dump() { let mut file = create_test_dump(); let mut dump = reader::open(&mut file).unwrap(); @@ -234,12 +256,14 @@ pub(crate) mod test { let mut index = indexes.next().unwrap().unwrap(); assert!(indexes.next().is_none()); // there was only one index in the dump - assert_eq!(index.name(), "doggos"); - for (document, expected) in index.documents().unwrap().zip(create_test_documents()) { assert_eq!(document.unwrap(), expected); } assert_eq!(index.settings().unwrap(), create_test_settings()); + assert_eq!(index.metadata(), &create_test_index_metadata()); + + drop(index); + drop(indexes); // ==== checking the task queue for (task, expected) in dump.tasks().zip(create_test_tasks()) { diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs index 6eb7337d3..57e2fa12d 100644 --- a/dump/src/reader/mod.rs +++ b/dump/src/reader/mod.rs @@ -12,7 +12,7 @@ use tempfile::TempDir; use time::OffsetDateTime; use uuid::Uuid; -use crate::{Result, Version}; +use crate::{IndexMetadata, Result, Version}; // use self::loaders::{v2, v3, v4, v5}; @@ -20,6 +20,7 @@ use crate::{Result, Version}; // mod compat; // mod loaders; // mod v1; +mod v5; mod v6; pub fn open( @@ -97,10 +98,13 @@ pub trait DumpReader { ) -> Result< Box< dyn Iterator< - Item = Result< - Box>, - >, - >, + Item = Result< + Box< + dyn IndexReader + + '_, + >, + >, + > + '_, >, >; @@ -117,7 +121,7 @@ pub trait IndexReader { type Document; type Settings; - fn name(&self) -> &str; + fn metadata(&self) -> &IndexMetadata; fn documents(&mut self) -> Result> + '_>>; fn settings(&mut self) -> Result; } diff --git a/dump/src/reader/v5/meta.rs b/dump/src/reader/v5/meta.rs new file mode 100644 index 000000000..f13c2bbef --- /dev/null +++ b/dump/src/reader/v5/meta.rs @@ -0,0 +1,14 @@ +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Serialize, Deserialize, Debug)] +pub struct IndexUuid { + pub uid: String, + pub index_meta: IndexMeta, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct IndexMeta { + pub uuid: Uuid, + pub creation_task_id: usize, +} diff --git a/dump/src/reader/v5/mod.rs b/dump/src/reader/v5/mod.rs new file mode 100644 index 000000000..671f0f5d5 --- /dev/null +++ b/dump/src/reader/v5/mod.rs @@ -0,0 +1,221 @@ +//! Here is what a dump v5 look like. +//! +//! ```text +//! . +//! ├── indexes +//! │   ├── 22c269d8-fbbd-4416-bd46-7c7c02849325 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── 6d0471ba-2ed1-41de-8ea6-10db10fa2bb8 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   └── f7d53ec4-0748-48e6-b66f-1fca9944b0fa +//! │   ├── documents.jsonl +//! │   └── meta.json +//! ├── index_uuids +//! │   └── data.jsonl +//! ├── instance-uid +//! ├── keys +//! ├── metadata.json +//! └── updates +//! ├── data.jsonl +//! └── updates_files +//! └── c83a004a-da98-4b94-b245-3256266c7281 +//! ``` +//! +//! Here is what `index_uuids/data.jsonl` looks like; +//! +//! ```json +//! {"uid":"dnd_spells","index_meta":{"uuid":"22c269d8-fbbd-4416-bd46-7c7c02849325","creation_task_id":9}} +//! {"uid":"movies","index_meta":{"uuid":"6d0471ba-2ed1-41de-8ea6-10db10fa2bb8","creation_task_id":1}} +//! {"uid":"products","index_meta":{"uuid":"f7d53ec4-0748-48e6-b66f-1fca9944b0fa","creation_task_id":4}} +//! ``` +//! + +use std::{ + fs::{self, File}, + io::{BufRead, BufReader}, + path::Path, +}; + +use serde::{Deserialize, Serialize}; +use tempfile::TempDir; +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::{IndexMetadata, Result, Version}; + +use self::{ + meta::IndexUuid, + settings::{Checked, Settings, Unchecked}, + tasks::Task, +}; + +use super::{DumpReader, IndexReader}; + +mod meta; +mod settings; +mod tasks; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + db_version: String, + index_db_size: usize, + update_db_size: usize, + #[serde(with = "time::serde::rfc3339")] + dump_date: OffsetDateTime, +} + +pub struct V5Reader { + dump: TempDir, + metadata: Metadata, + tasks: BufReader, + keys: BufReader, + index_uuid: Vec, +} + +struct V5IndexReader { + metadata: IndexMetadata, + + documents: BufReader, + settings: BufReader, +} + +impl V5IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let metadata = File::open(path.join("metadata.json"))?; + + let ret = V5IndexReader { + metadata: serde_json::from_reader(metadata)?, + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + settings: BufReader::new(File::open(path.join("settings.json"))?), + }; + + Ok(ret) + } +} + +impl V5Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?; + let index_uuid = BufReader::new(index_uuid); + let index_uuid = index_uuid + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) + .collect::>>()?; + + Ok(V5Reader { + metadata, + tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), + keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), + index_uuid, + dump, + }) + } +} + +impl DumpReader for V5Reader { + type Document = serde_json::Map; + type Settings = Settings; + + type Task = Task; + type UpdateFile = File; + + // TODO: remove this + type Key = meilisearch_auth::Key; + + fn version(&self) -> Version { + Version::V5 + } + + fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + fn instance_uid(&self) -> Result> { + let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?; + Ok(Some(Uuid::parse_str(&uuid)?)) + } + + fn indexes( + &self, + ) -> Result< + Box< + dyn Iterator< + Item = Result< + Box< + dyn super::IndexReader< + Document = Self::Document, + Settings = Self::Settings, + > + '_, + >, + >, + > + '_, + >, + > { + Ok(Box::new(self.index_uuid.iter().map(|index| -> Result<_> { + Ok(Box::new(V5IndexReader::new( + index.uid.clone(), + &self + .dump + .path() + .join("indexes") + .join(index.index_meta.uuid.to_string()), + )?) + as Box< + dyn IndexReader, + >) + }))) + } + + fn tasks( + &mut self, + ) -> Box)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Self::Task = serde_json::from_str(&line?)?; + if let Some(uuid) = task.get_content_uuid() { + let update_file_path = self + .dump + .path() + .join("updates") + .join("update_files") + .join(uuid.to_string()); + Ok((task, Some(File::open(update_file_path)?))) + } else { + Ok((task, None)) + } + })) + } + + // TODO: do it + fn keys(&mut self) -> Box> + '_> { + Box::new( + (&mut self.keys) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), + ) + } +} + +impl IndexReader for V5IndexReader { + type Document = serde_json::Map; + type Settings = Settings; + + fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + fn documents(&mut self) -> Result> + '_>> { + Ok(Box::new((&mut self.documents).lines().map( + |line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }, + ))) + } + + fn settings(&mut self) -> Result { + let settings: Settings = serde_json::from_reader(&mut self.settings)?; + Ok(settings.check()) + } +} diff --git a/dump/src/reader/v5/settings.rs b/dump/src/reader/v5/settings.rs new file mode 100644 index 000000000..4499ba1ff --- /dev/null +++ b/dump/src/reader/v5/settings.rs @@ -0,0 +1,251 @@ +use std::{ + collections::{BTreeMap, BTreeSet}, + marker::PhantomData, +}; + +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)] +pub struct Checked; + +#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Unchecked; + +/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings +/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a +/// call to `check` will return a `Settings` from a `Settings`. +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))] +pub struct Settings { + #[serde(default)] + pub displayed_attributes: Setting>, + + #[serde(default)] + pub searchable_attributes: Setting>, + + #[serde(default)] + pub filterable_attributes: Setting>, + #[serde(default)] + pub sortable_attributes: Setting>, + #[serde(default)] + pub ranking_rules: Setting>, + #[serde(default)] + pub stop_words: Setting>, + #[serde(default)] + pub synonyms: Setting>>, + #[serde(default)] + pub distinct_attribute: Setting, + #[serde(default)] + pub typo_tolerance: Setting, + #[serde(default)] + pub faceting: Setting, + #[serde(default)] + pub pagination: Setting, + + #[serde(skip)] + pub _kind: PhantomData, +} + +fn serialize_with_wildcard( + field: &Setting>, + s: S, +) -> std::result::Result +where + S: Serializer, +{ + let wildcard = vec!["*".to_string()]; + match field { + Setting::Set(value) => Some(value), + Setting::Reset => Some(&wildcard), + Setting::NotSet => None, + } + .serialize(s) +} + +#[derive(Debug, Clone, PartialEq, Copy)] +pub enum Setting { + Set(T), + Reset, + NotSet, +} + +impl Default for Setting { + fn default() -> Self { + Self::NotSet + } +} + +impl Setting { + pub fn set(self) -> Option { + match self { + Self::Set(value) => Some(value), + _ => None, + } + } + + pub const fn as_ref(&self) -> Setting<&T> { + match *self { + Self::Set(ref value) => Setting::Set(value), + Self::Reset => Setting::Reset, + Self::NotSet => Setting::NotSet, + } + } + + pub const fn is_not_set(&self) -> bool { + matches!(self, Self::NotSet) + } + + /// If `Self` is `Reset`, then map self to `Set` with the provided `val`. + pub fn or_reset(self, val: T) -> Self { + match self { + Self::Reset => Self::Set(val), + otherwise => otherwise, + } + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + Deserialize::deserialize(deserializer).map(|x| match x { + Some(x) => Self::Set(x), + None => Self::Reset, // Reset is forced by sending null value + }) + } +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct MinWordSizeTyposSetting { + #[serde(default)] + pub one_typo: Setting, + #[serde(default)] + pub two_typos: Setting, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct TypoSettings { + #[serde(default)] + pub enabled: Setting, + #[serde(default)] + pub min_word_size_for_typos: Setting, + #[serde(default)] + pub disable_on_words: Setting>, + #[serde(default)] + pub disable_on_attributes: Setting>, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct FacetingSettings { + #[serde(default)] + pub max_values_per_facet: Setting, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct PaginationSettings { + #[serde(default)] + pub max_total_hits: Setting, +} + +impl Settings { + pub fn cleared() -> Settings { + Settings { + displayed_attributes: Setting::Reset, + searchable_attributes: Setting::Reset, + filterable_attributes: Setting::Reset, + sortable_attributes: Setting::Reset, + ranking_rules: Setting::Reset, + stop_words: Setting::Reset, + synonyms: Setting::Reset, + distinct_attribute: Setting::Reset, + typo_tolerance: Setting::Reset, + faceting: Setting::Reset, + pagination: Setting::Reset, + _kind: PhantomData, + } + } + + pub fn into_unchecked(self) -> Settings { + let Self { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + typo_tolerance, + faceting, + pagination, + .. + } = self; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + typo_tolerance, + faceting, + pagination, + _kind: PhantomData, + } + } +} + +impl Settings { + pub fn check(self) -> Settings { + let displayed_attributes = match self.displayed_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + let searchable_attributes = match self.searchable_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes: self.filterable_attributes, + sortable_attributes: self.sortable_attributes, + ranking_rules: self.ranking_rules, + stop_words: self.stop_words, + synonyms: self.synonyms, + distinct_attribute: self.distinct_attribute, + typo_tolerance: self.typo_tolerance, + faceting: self.faceting, + pagination: self.pagination, + _kind: PhantomData, + } + } +} diff --git a/dump/src/reader/v5/tasks.rs b/dump/src/reader/v5/tasks.rs new file mode 100644 index 000000000..6683354d1 --- /dev/null +++ b/dump/src/reader/v5/tasks.rs @@ -0,0 +1,173 @@ +use serde::{Deserialize, Serialize}; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::settings::{Settings, Unchecked}; + +pub type TaskId = u32; +pub type BatchId = u32; + +#[derive(Clone, Debug, Deserialize, PartialEq)] +pub struct Task { + pub id: TaskId, + /// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task) + /// then this is None + // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of + // the TaskContent. + pub content: TaskContent, + pub events: Vec, +} + +#[derive(Clone, Debug, Deserialize, PartialEq)] +#[allow(clippy::large_enum_variant)] +pub enum TaskContent { + DocumentAddition { + index_uid: IndexUid, + content_uuid: Uuid, + merge_strategy: IndexDocumentsMethod, + primary_key: Option, + documents_count: usize, + allow_index_creation: bool, + }, + DocumentDeletion { + index_uid: IndexUid, + deletion: DocumentDeletion, + }, + SettingsUpdate { + index_uid: IndexUid, + settings: Settings, + /// Indicates whether the task was a deletion + is_deletion: bool, + allow_index_creation: bool, + }, + IndexDeletion { + index_uid: IndexUid, + }, + IndexCreation { + index_uid: IndexUid, + primary_key: Option, + }, + IndexUpdate { + index_uid: IndexUid, + primary_key: Option, + }, + Dump { + uid: String, + }, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +pub struct IndexUid(String); + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum IndexDocumentsMethod { + /// Replace the previous document with the new one, + /// removing all the already known attributes. + ReplaceDocuments, + + /// Merge the previous version of the document with the new version, + /// replacing old attributes values with the new ones and add the new attributes. + UpdateDocuments, +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub enum DocumentDeletion { + Clear, + Ids(Vec), +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub enum TaskEvent { + Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), + Batched { + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + batch_id: BatchId, + }, + Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), + Succeeded { + result: TaskResult, + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + }, + Failed { + error: ResponseError, + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + }, +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub enum TaskResult { + DocumentAddition { indexed_documents: u64 }, + DocumentDeletion { deleted_documents: u64 }, + ClearAll { deleted_documents: u64 }, + Other, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct ResponseError { + message: String, + #[serde(rename = "code")] + error_code: String, + #[serde(rename = "type")] + error_type: String, + #[serde(rename = "link")] + error_link: String, +} + +impl Task { + /// Return true when a task is finished. + /// A task is finished when its last state is either `Succeeded` or `Failed`. + pub fn is_finished(&self) -> bool { + self.events.last().map_or(false, |event| { + matches!( + event, + TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. } + ) + }) + } + + /// Return the content_uuid of the `Task` if there is one. + pub fn get_content_uuid(&self) -> Option { + match self { + Task { + content: TaskContent::DocumentAddition { content_uuid, .. }, + .. + } => Some(*content_uuid), + _ => None, + } + } + + pub fn index_uid(&self) -> Option<&str> { + match &self.content { + TaskContent::DocumentAddition { index_uid, .. } + | TaskContent::DocumentDeletion { index_uid, .. } + | TaskContent::SettingsUpdate { index_uid, .. } + | TaskContent::IndexDeletion { index_uid } + | TaskContent::IndexCreation { index_uid, .. } + | TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()), + TaskContent::Dump { .. } => None, + } + } +} + +impl IndexUid { + pub fn into_inner(self) -> String { + self.0 + } + + /// Return a reference over the inner str. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::ops::Deref for IndexUid { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/dump/src/reader/v6.rs b/dump/src/reader/v6.rs index d125a19a6..57cd8d523 100644 --- a/dump/src/reader/v6.rs +++ b/dump/src/reader/v6.rs @@ -2,6 +2,7 @@ use std::{ fs::{self, File}, io::{BufRead, BufReader}, path::Path, + str::FromStr, }; use index::{Checked, Unchecked}; @@ -9,50 +10,35 @@ use tempfile::TempDir; use time::OffsetDateTime; use uuid::Uuid; -use crate::{Error, Result, Version}; +use crate::{Error, IndexMetadata, Result, Version}; use super::{DumpReader, IndexReader}; type Metadata = crate::Metadata; -pub fn date(dump: &Path) -> Result { - let metadata = fs::read(dump.join("metadata.json"))?; - let metadata: Metadata = serde_json::from_reader(&*metadata)?; - Ok(metadata.dump_date) -} - pub struct V6Reader { dump: TempDir, + instance_uid: Uuid, metadata: Metadata, tasks: BufReader, keys: BufReader, } struct V6IndexReader { - name: String, + metadata: IndexMetadata, documents: BufReader, settings: BufReader, } -impl V6IndexReader { - pub fn new(name: String, path: &Path) -> Result { - let ret = V6IndexReader { - name, - documents: BufReader::new(File::open(path.join("documents.jsonl"))?), - settings: BufReader::new(File::open(path.join("settings.json"))?), - }; - - Ok(ret) - } -} - impl V6Reader { pub fn open(dump: TempDir) -> Result { let meta_file = fs::read(dump.path().join("metadata.json"))?; - let metadata = serde_json::from_reader(&*meta_file)?; + let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?; + let instance_uid = Uuid::from_str(&instance_uid)?; Ok(V6Reader { - metadata, + metadata: serde_json::from_reader(&*meta_file)?, + instance_uid, tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), dump, @@ -60,6 +46,20 @@ impl V6Reader { } } +impl V6IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let metadata = File::open(path.join("metadata.json"))?; + + let ret = V6IndexReader { + metadata: serde_json::from_reader(metadata)?, + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + settings: BufReader::new(File::open(path.join("settings.json"))?), + }; + + Ok(ret) + } +} + impl DumpReader for V6Reader { type Document = serde_json::Map; type Settings = index::Settings; @@ -78,8 +78,7 @@ impl DumpReader for V6Reader { } fn instance_uid(&self) -> Result> { - let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?; - Ok(Some(Uuid::parse_str(&uuid)?)) + Ok(Some(self.instance_uid)) } fn indexes( @@ -87,15 +86,15 @@ impl DumpReader for V6Reader { ) -> Result< Box< dyn Iterator< - Item = Result< - Box< - dyn super::IndexReader< - Document = Self::Document, - Settings = Self::Settings, + Item = Result< + Box< + dyn super::IndexReader< + Document = Self::Document, + Settings = Self::Settings, + > + '_, >, >, - >, - >, + > + '_, >, > { let entries = fs::read_dir(self.dump.path().join("indexes"))?; @@ -164,8 +163,8 @@ impl IndexReader for V6IndexReader { type Document = serde_json::Map; type Settings = index::Settings; - fn name(&self) -> &str { - &self.name + fn metadata(&self) -> &IndexMetadata { + &self.metadata } fn documents(&mut self) -> Result> + '_>> { diff --git a/dump/src/writer.rs b/dump/src/writer.rs index 4b31b78ba..fc0e44ba0 100644 --- a/dump/src/writer.rs +++ b/dump/src/writer.rs @@ -8,13 +8,12 @@ use flate2::{write::GzEncoder, Compression}; use index::{Checked, Settings}; use index_scheduler::TaskView; use meilisearch_auth::Key; -use serde::Serialize; use serde_json::{Map, Value}; use tempfile::TempDir; use time::OffsetDateTime; use uuid::Uuid; -use crate::{Metadata, Result, CURRENT_DUMP_VERSION}; +use crate::{IndexMetadata, Metadata, Result, CURRENT_DUMP_VERSION}; pub struct DumpWriter { dir: TempDir, @@ -23,8 +22,9 @@ pub struct DumpWriter { impl DumpWriter { pub fn new(instance_uuid: Uuid) -> Result { let dir = TempDir::new()?; + fs::write( - dir.path().join("instance-uid"), + dir.path().join("instance_uid.uuid"), &instance_uuid.as_hyphenated().to_string(), )?; @@ -43,8 +43,8 @@ impl DumpWriter { Ok(DumpWriter { dir }) } - pub fn create_index(&self, index_name: &str) -> Result { - IndexWriter::new(self.dir.path().join("indexes").join(index_name)) + pub fn create_index(&self, index_name: &str, metadata: &IndexMetadata) -> Result { + IndexWriter::new(self.dir.path().join("indexes").join(index_name), metadata) } pub fn create_keys(&self) -> Result { @@ -126,9 +126,12 @@ pub struct IndexWriter { } impl IndexWriter { - pub(crate) fn new(path: PathBuf) -> Result { + pub(self) fn new(path: PathBuf, metadata: &IndexMetadata) -> Result { std::fs::create_dir(&path)?; + let metadata_file = File::create(path.join("metadata.json"))?; + serde_json::to_writer(metadata_file, metadata)?; + let documents = File::create(path.join("documents.jsonl"))?; let settings = File::create(path.join("settings.json"))?; @@ -243,14 +246,15 @@ pub(crate) mod test { ├---- indexes/ │ └---- doggos/ │ │ ├---- settings.json - │ │ └---- documents.jsonl + │ │ ├---- documents.jsonl + │ │ └---- metadata.json ├---- tasks/ │ ├---- update_files/ │ │ └---- 1 │ └---- queue.jsonl ├---- keys.jsonl ├---- metadata.json - └---- instance-uid + └---- instance_uid.uuid "###); // ==== checking the top level infos @@ -264,7 +268,7 @@ pub(crate) mod test { } "###); - let instance_uid = fs::read_to_string(dump_path.join("instance-uid")).unwrap(); + let instance_uid = fs::read_to_string(dump_path.join("instance_uid.uuid")).unwrap(); assert_eq!( Uuid::from_str(&instance_uid).unwrap(), create_test_instance_uid() @@ -284,6 +288,16 @@ pub(crate) mod test { serde_json::from_str::>(&test_settings).unwrap(), create_test_settings().into_unchecked() ); + let metadata = fs::read_to_string(dump_path.join("indexes/doggos/metadata.json")).unwrap(); + let metadata: IndexMetadata = serde_json::from_str(&metadata).unwrap(); + insta::assert_json_snapshot!(metadata, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }, @r###" + { + "uid": "doggo", + "primaryKey": null, + "createdAt": "[date]", + "updatedAt": "[date]" + } + "###); // ==== checking the task queue let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap();