From e845cc2b6fc73aab56c33246e4c6eababe9f5f09 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 3 Oct 2022 18:50:06 +0200 Subject: [PATCH] fix the tests --- Cargo.lock | 3 + dump/Cargo.toml | 3 + dump/src/error.rs | 2 + dump/src/lib.rs | 234 ++++++++++++++++++++++++++++++++++++ dump/src/reader/mod.rs | 19 +-- dump/src/reader/v6.rs | 21 +++- dump/src/writer.rs | 136 ++++++++------------- index-scheduler/src/task.rs | 14 ++- meilisearch-auth/src/key.rs | 2 +- 9 files changed, 327 insertions(+), 107 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f2e7bb60d..4dec9a239 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1145,12 +1145,15 @@ name = "dump" version = "0.29.0" dependencies = [ "anyhow", + "big_s", "flate2", "index", "index-scheduler", "insta", "log", + "maplit", "meilisearch-auth", + "meilisearch-types", "serde", "serde_json", "tar", diff --git a/dump/Cargo.toml b/dump/Cargo.toml index 0f418c55d..5350ecd8f 100644 --- a/dump/Cargo.toml +++ b/dump/Cargo.toml @@ -21,4 +21,7 @@ index-scheduler = { path = "../index-scheduler" } meilisearch-auth = { path = "../meilisearch-auth" } [dev-dependencies] +big_s = "1.0.2" insta = { version = "1.19.1", features = ["json", "redactions"] } +maplit = "1.0.2" +meilisearch-types = { path = "../meilisearch-types" } diff --git a/dump/src/error.rs b/dump/src/error.rs index 78912e1a7..be26e7406 100644 --- a/dump/src/error.rs +++ b/dump/src/error.rs @@ -11,4 +11,6 @@ pub enum Error { Io(#[from] std::io::Error), #[error(transparent)] Serde(#[from] serde_json::Error), + #[error(transparent)] + Uuid(#[from] uuid::Error), } diff --git a/dump/src/lib.rs b/dump/src/lib.rs index 8b25b6443..b316ec2a1 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -30,3 +30,237 @@ pub enum Version { V5, V6, } + +#[cfg(test)] +pub(crate) mod test { + use std::{ + fs::File, + io::{Read, Seek, SeekFrom}, + str::FromStr, + }; + + use big_s::S; + use index::{Checked, Settings}; + use index_scheduler::{milli::update::Setting, task::Details, Kind, Status, TaskView}; + use maplit::btreeset; + use meilisearch_auth::{Action, Key}; + use meilisearch_types::{index_uid::IndexUid, star_or::StarOr}; + use serde_json::{json, Map, Value}; + use time::{macros::datetime, Duration}; + use uuid::Uuid; + + use crate::{reader, DumpWriter, Version}; + + pub fn create_test_instance_uid() -> Uuid { + Uuid::parse_str("9e15e977-f2ae-4761-943f-1eaf75fd736d").unwrap() + } + + pub fn create_test_documents() -> Vec> { + vec![ + json!({ "id": 1, "race": "golden retriever", "name": "paul", "age": 4 }) + .as_object() + .unwrap() + .clone(), + json!({ "id": 2, "race": "bernese mountain", "name": "tamo", "age": 6 }) + .as_object() + .unwrap() + .clone(), + json!({ "id": 3, "race": "great pyrenees", "name": "patou", "age": 5 }) + .as_object() + .unwrap() + .clone(), + ] + } + + pub fn create_test_settings() -> Settings { + let settings = Settings { + displayed_attributes: Setting::Set(vec![S("race"), S("name")]), + searchable_attributes: Setting::Set(vec![S("name"), S("race")]), + filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }), + sortable_attributes: Setting::Set(btreeset! { S("age") }), + ranking_rules: Setting::NotSet, + stop_words: Setting::NotSet, + synonyms: Setting::NotSet, + distinct_attribute: Setting::NotSet, + typo_tolerance: Setting::NotSet, + faceting: Setting::NotSet, + pagination: Setting::NotSet, + _kind: std::marker::PhantomData, + }; + settings.check() + } + + pub fn create_test_tasks() -> Vec<(TaskView, Option<&'static [u8]>)> { + vec![ + ( + TaskView { + uid: 0, + index_uid: Some(S("doggos")), + status: Status::Succeeded, + kind: Kind::DocumentAddition, + details: Some(Details::DocumentAddition { + received_documents: 10_000, + indexed_documents: 3, + }), + error: None, + duration: Some(Duration::DAY), + enqueued_at: datetime!(2022-11-11 0:00 UTC), + started_at: Some(datetime!(2022-11-20 0:00 UTC)), + finished_at: Some(datetime!(2022-11-21 0:00 UTC)), + }, + None, + ), + ( + TaskView { + uid: 1, + index_uid: Some(S("doggos")), + status: Status::Enqueued, + kind: Kind::DocumentAddition, + details: None, + error: None, + duration: Some(Duration::DAY), + enqueued_at: datetime!(2022-11-11 0:00 UTC), + started_at: Some(datetime!(2022-11-20 0:00 UTC)), + finished_at: Some(datetime!(2022-11-21 0:00 UTC)), + }, + Some(br#"{ "id": 4, "race": "leonberg" }"#), + ), + ( + TaskView { + uid: 5, + index_uid: Some(S("doggos")), + status: Status::Enqueued, + kind: Kind::IndexDeletion, + details: None, + error: None, + duration: None, + enqueued_at: datetime!(2022-11-15 0:00 UTC), + started_at: None, + finished_at: None, + }, + None, + ), + ] + } + + pub fn create_test_api_keys() -> Vec { + vec![ + Key { + description: Some(S("The main key to manage all the doggos")), + name: Some(S("doggos_key")), + uid: Uuid::from_str("9f8a34da-b6b2-42f0-939b-dbd4c3448655").unwrap(), + actions: vec![Action::DocumentsAll], + indexes: vec![StarOr::Other(IndexUid::from_str("doggos").unwrap())], + expires_at: Some(datetime!(4130-03-14 12:21 UTC)), + created_at: datetime!(1960-11-15 0:00 UTC), + updated_at: datetime!(2022-11-10 0:00 UTC), + }, + Key { + description: Some(S("The master key for everything and even the doggos")), + name: Some(S("master_key")), + uid: Uuid::from_str("4622f717-1c00-47bb-a494-39d76a49b591").unwrap(), + actions: vec![Action::All], + indexes: vec![StarOr::Star], + expires_at: None, + created_at: datetime!(0000-01-01 00:01 UTC), + updated_at: datetime!(1964-05-04 17:25 UTC), + }, + Key { + description: Some(S("The useless key to for nothing nor the doggos")), + name: Some(S("useless_key")), + uid: Uuid::from_str("fb80b58b-0a34-412f-8ba7-1ce868f8ac5c").unwrap(), + actions: vec![], + indexes: vec![], + expires_at: None, + created_at: datetime!(400-02-29 0:00 UTC), + updated_at: datetime!(1024-02-29 0:00 UTC), + }, + ] + } + + pub fn create_test_dump() -> File { + let instance_uid = create_test_instance_uid(); + let dump = DumpWriter::new(instance_uid.clone()).unwrap(); + + // ========== Adding an index + let documents = create_test_documents(); + let settings = create_test_settings(); + + let mut index = dump.create_index("doggos").unwrap(); + for document in &documents { + index.push_document(document).unwrap(); + } + index.settings(&settings).unwrap(); + + // ========== pushing the task queue + let tasks = create_test_tasks(); + + let mut task_queue = dump.create_tasks_queue().unwrap(); + for (task, update_file) in &tasks { + task_queue.push_task(task, update_file.map(|c| c)).unwrap(); + } + + // ========== pushing the api keys + let api_keys = create_test_api_keys(); + + let mut keys = dump.create_keys().unwrap(); + for key in &api_keys { + keys.push_key(key).unwrap(); + } + + // create the dump + let mut file = tempfile::tempfile().unwrap(); + dump.persist_to(&mut file).unwrap(); + file.seek(SeekFrom::Start(0)).unwrap(); + + file + } + + #[test] + fn test_creating_dump() { + let mut file = create_test_dump(); + let mut dump = reader::open(&mut file).unwrap(); + + // ==== checking the top level infos + assert_eq!(dump.version(), Version::V6); + assert!(dump.date().is_some()); + assert_eq!( + dump.instance_uid().unwrap().unwrap(), + create_test_instance_uid() + ); + + // ==== checking the index + let mut indexes = dump.indexes().unwrap(); + let mut index = indexes.next().unwrap().unwrap(); + assert!(indexes.next().is_none()); // there was only one index in the dump + + assert_eq!(index.name(), "doggos"); + + for (document, expected) in index.documents().unwrap().zip(create_test_documents()) { + assert_eq!(document.unwrap(), expected); + } + assert_eq!(index.settings().unwrap(), create_test_settings()); + + // ==== checking the task queue + for (task, expected) in dump.tasks().zip(create_test_tasks()) { + let (task, content_file) = task.unwrap(); + assert_eq!(task, expected.0); + + if let Some(expected_update) = expected.1 { + assert!( + content_file.is_some(), + "A content file was expected for the task {}.", + expected.0.uid + ); + let mut update = Vec::new(); + content_file.unwrap().read_to_end(&mut update).unwrap(); + assert_eq!(update, expected_update); + } + } + + // ==== checking the keys + for (key, expected) in dump.keys().zip(create_test_api_keys()) { + assert_eq!(key.unwrap(), expected); + } + } +} diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs index fe4096a8b..6eb7337d3 100644 --- a/dump/src/reader/mod.rs +++ b/dump/src/reader/mod.rs @@ -1,14 +1,16 @@ +use std::io::Read; use std::path::Path; use std::{fs::File, io::BufReader}; use flate2::{bufread::GzDecoder, Compression}; -use index::{Settings, Unchecked}; +use index::{Checked, Settings, Unchecked}; use index_scheduler::TaskView; use meilisearch_auth::Key; use serde::{Deserialize, Serialize}; use tempfile::TempDir; use time::OffsetDateTime; +use uuid::Uuid; use crate::{Result, Version}; @@ -21,12 +23,12 @@ use crate::{Result, Version}; mod v6; pub fn open( - dump_path: &Path, + dump: impl Read, ) -> Result< Box< dyn DumpReader< Document = serde_json::Map, - Settings = Settings, + Settings = Settings, Task = TaskView, UpdateFile = File, Key = Key, @@ -34,15 +36,13 @@ pub fn open( >, > { let path = TempDir::new()?; - - let dump = File::open(dump_path)?; let mut dump = BufReader::new(dump); - let gz = GzDecoder::new(&mut dump); let mut archive = tar::Archive::new(gz); archive.unpack(path.path())?; #[derive(Deserialize)] + #[serde(rename_all = "camelCase")] struct MetadataVersion { pub dump_version: Version, } @@ -61,7 +61,7 @@ pub fn open( as Box< dyn DumpReader< Document = serde_json::Map, - Settings = Settings, + Settings = Settings, Task = TaskView, UpdateFile = File, Key = Key, @@ -85,9 +85,12 @@ pub trait DumpReader { /// Return the version of the dump. fn version(&self) -> Version; - /// Return at which date the index was created. + /// Return at which date the dump was created if there was one. fn date(&self) -> Option; + /// Return the instance-uid if there was one. + fn instance_uid(&self) -> Result>; + /// Return an iterator over each indexes. fn indexes( &self, diff --git a/dump/src/reader/v6.rs b/dump/src/reader/v6.rs index 339f88b55..d125a19a6 100644 --- a/dump/src/reader/v6.rs +++ b/dump/src/reader/v6.rs @@ -4,9 +4,10 @@ use std::{ path::Path, }; -use index::Unchecked; +use index::{Checked, Unchecked}; use tempfile::TempDir; use time::OffsetDateTime; +use uuid::Uuid; use crate::{Error, Result, Version}; @@ -61,7 +62,7 @@ impl V6Reader { impl DumpReader for V6Reader { type Document = serde_json::Map; - type Settings = index::Settings; + type Settings = index::Settings; type Task = index_scheduler::TaskView; type UpdateFile = File; @@ -76,6 +77,11 @@ impl DumpReader for V6Reader { Some(self.metadata.dump_date) } + fn instance_uid(&self) -> Result> { + let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?; + Ok(Some(Uuid::parse_str(&uuid)?)) + } + fn indexes( &self, ) -> Result< @@ -125,7 +131,11 @@ impl DumpReader for V6Reader { &mut self, ) -> Box)>> + '_> { Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { - let task: index_scheduler::TaskView = serde_json::from_str(&line?)?; + let mut task: index_scheduler::TaskView = serde_json::from_str(&line?)?; + // TODO: this can be removed once we can `Deserialize` the duration from the `TaskView`. + if let Some((started_at, finished_at)) = task.started_at.zip(task.finished_at) { + task.duration = Some(finished_at - started_at); + } let update_file_path = self .dump .path() @@ -152,7 +162,7 @@ impl DumpReader for V6Reader { impl IndexReader for V6IndexReader { type Document = serde_json::Map; - type Settings = index::Settings; + type Settings = index::Settings; fn name(&self) -> &str { &self.name @@ -165,6 +175,7 @@ impl IndexReader for V6IndexReader { } fn settings(&mut self) -> Result { - Ok(serde_json::from_reader(&mut self.settings)?) + let settings: index::Settings = serde_json::from_reader(&mut self.settings)?; + Ok(settings.check()) } } diff --git a/dump/src/writer.rs b/dump/src/writer.rs index 0273de210..4b31b78ba 100644 --- a/dump/src/writer.rs +++ b/dump/src/writer.rs @@ -5,7 +5,11 @@ use std::{ }; use flate2::{write::GzEncoder, Compression}; +use index::{Checked, Settings}; +use index_scheduler::TaskView; +use meilisearch_auth::Key; use serde::Serialize; +use serde_json::{Map, Value}; use tempfile::TempDir; use time::OffsetDateTime; use uuid::Uuid; @@ -73,8 +77,8 @@ impl KeyWriter { Ok(KeyWriter { file }) } - pub fn push_key(&mut self, key: impl Serialize) -> Result<()> { - self.file.write_all(&serde_json::to_vec(&key)?)?; + pub fn push_key(&mut self, key: &Key) -> Result<()> { + self.file.write_all(&serde_json::to_vec(key)?)?; self.file.write_all(b"\n")?; Ok(()) } @@ -101,16 +105,15 @@ impl TaskWriter { /// Pushes tasks in the dump. /// If the tasks has an associated `update_file` it'll use the `task_id` as its name. - pub fn push_task( - &mut self, - task_id: u32, - task: impl Serialize, - update_file: Option, - ) -> Result<()> { + pub fn push_task(&mut self, task: &TaskView, update_file: Option) -> Result<()> { + // TODO: this could be removed the day we implements `Deserialize` on the Duration. + let mut task = task.clone(); + task.duration = None; + self.queue.write_all(&serde_json::to_vec(&task)?)?; self.queue.write_all(b"\n")?; if let Some(mut update_file) = update_file { - let mut file = File::create(&self.update_files.join(task_id.to_string()))?; + let mut file = File::create(&self.update_files.join(task.uid.to_string()))?; std::io::copy(&mut update_file, &mut file)?; } Ok(()) @@ -135,13 +138,13 @@ impl IndexWriter { }) } - pub fn push_document(&mut self, document: impl Serialize) -> Result<()> { - self.documents.write_all(&serde_json::to_vec(&document)?)?; + pub fn push_document(&mut self, document: &Map) -> Result<()> { + self.documents.write_all(&serde_json::to_vec(document)?)?; self.documents.write_all(b"\n")?; Ok(()) } - pub fn settings(mut self, settings: impl Serialize) -> Result<()> { + pub fn settings(mut self, settings: &Settings) -> Result<()> { self.settings.write_all(&serde_json::to_vec(&settings)?)?; Ok(()) } @@ -149,14 +152,15 @@ impl IndexWriter { #[cfg(test)] pub(crate) mod test { - use std::{ - fmt::Write, - io::{Seek, SeekFrom}, - path::Path, - }; + use std::{fmt::Write, io::BufReader, path::Path, str::FromStr}; - use flate2::read::GzDecoder; - use serde_json::json; + use flate2::bufread::GzDecoder; + use index::Unchecked; + + use crate::test::{ + create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid, + create_test_settings, create_test_tasks, + }; use super::*; @@ -221,62 +225,10 @@ pub(crate) mod test { #[test] fn test_creating_dump() { - let instance_uid = Uuid::parse_str("9e15e977-f2ae-4761-943f-1eaf75fd736d").unwrap(); - let dump = DumpWriter::new(instance_uid.clone()).unwrap(); + let file = create_test_dump(); + let mut file = BufReader::new(file); - // ========== Adding an index - let documents = [ - json!({ "id": 1, "race": "golden retriever" }), - json!({ "id": 2, "race": "bernese mountain" }), - json!({ "id": 3, "race": "great pyrenees" }), - ]; - let settings = json!({ "the empty setting": [], "the null setting": null, "the string setting": "hello" }); - let mut index = dump.create_index("doggos").unwrap(); - for document in &documents { - index.push_document(document).unwrap(); - } - index.settings(&settings).unwrap(); - - // ========== pushing the task queue - let tasks = [ - (0, json!({ "is this a good task": "yes" }), None), - ( - 1, - json!({ "is this a good boi": "absolutely" }), - Some(br#"{ "id": 4, "race": "leonberg" }"#), - ), - ( - 3, - json!({ "and finally": "one last task with a missing id in the middle" }), - None, - ), - ]; - - // ========== pushing the task queue - let mut task_queue = dump.create_tasks_queue().unwrap(); - for (task_id, task, update_file) in &tasks { - task_queue - .push_task(*task_id, task, update_file.map(|c| c.as_slice())) - .unwrap(); - } - - // ========== pushing the api keys - let api_keys = [ - json!({ "one api key": 1, "for": "golden retriever" }), - json!({ "id": 2, "race": "bernese mountain" }), - json!({ "id": 3, "race": "great pyrenees" }), - ]; - let mut keys = dump.create_keys().unwrap(); - for key in &api_keys { - keys.push_key(key).unwrap(); - } - - // create the dump - let mut file = tempfile::tempfile().unwrap(); - dump.persist_to(&mut file).unwrap(); - - // ============ testing we write everything in the correct place. - file.seek(SeekFrom::Start(0)).unwrap(); + // ============ ensuring we wrote everything in the correct place. let dump = tempfile::tempdir().unwrap(); let gz = GzDecoder::new(&mut file); @@ -302,7 +254,6 @@ pub(crate) mod test { "###); // ==== checking the top level infos - let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap(); let metadata: Metadata = serde_json::from_str(&metadata).unwrap(); insta::assert_json_snapshot!(metadata, { ".dumpDate" => "[date]" }, @r###" @@ -313,27 +264,37 @@ pub(crate) mod test { } "###); + let instance_uid = fs::read_to_string(dump_path.join("instance-uid")).unwrap(); assert_eq!( - instance_uid.to_string(), - fs::read_to_string(dump_path.join("instance-uid")).unwrap() + Uuid::from_str(&instance_uid).unwrap(), + create_test_instance_uid() ); // ==== checking the index - let docs = fs::read_to_string(dump_path.join("indexes/doggos/documents.jsonl")).unwrap(); - for (document, expected) in docs.lines().zip(documents) { - assert_eq!(document, serde_json::to_string(&expected).unwrap()); + for (document, expected) in docs.lines().zip(create_test_documents()) { + assert_eq!( + serde_json::from_str::>(document).unwrap(), + expected + ); } let test_settings = fs::read_to_string(dump_path.join("indexes/doggos/settings.json")).unwrap(); - assert_eq!(test_settings, serde_json::to_string(&settings).unwrap()); + assert_eq!( + serde_json::from_str::>(&test_settings).unwrap(), + create_test_settings().into_unchecked() + ); // ==== checking the task queue let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap(); - for (task, expected) in tasks_queue.lines().zip(tasks) { - assert_eq!(task, serde_json::to_string(&expected.1).unwrap()); - if let Some(expected_update) = expected.2 { - let path = dump_path.join(format!("tasks/update_files/{}", expected.0)); + for (task, mut expected) in tasks_queue.lines().zip(create_test_tasks()) { + // TODO: This can be removed once `Duration` from the `TaskView` is implemented. + expected.0.duration = None; + dbg!(&task); + assert_eq!(serde_json::from_str::(task).unwrap(), expected.0); + + if let Some(expected_update) = expected.1 { + let path = dump_path.join(format!("tasks/update_files/{}", expected.0.uid)); println!("trying to open {}", path.display()); let update = fs::read(path).unwrap(); assert_eq!(update, expected_update); @@ -341,10 +302,9 @@ pub(crate) mod test { } // ==== checking the keys - let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap(); - for (key, expected) in keys.lines().zip(api_keys) { - assert_eq!(key, serde_json::to_string(&expected).unwrap()); + for (key, expected) in keys.lines().zip(create_test_api_keys()) { + assert_eq!(serde_json::from_str::(key).unwrap(), expected); } } } diff --git a/index-scheduler/src/task.rs b/index-scheduler/src/task.rs index 4e08b70bc..4564ad3c4 100644 --- a/index-scheduler/src/task.rs +++ b/index-scheduler/src/task.rs @@ -14,32 +14,36 @@ use crate::{Error, TaskId}; #[serde(rename_all = "camelCase")] pub struct TaskView { pub uid: TaskId, + #[serde(default)] pub index_uid: Option, pub status: Status, // TODO use our own Kind for the user #[serde(rename = "type")] pub kind: Kind, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(skip_serializing_if = "Option::is_none", default)] pub details: Option
, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(skip_serializing_if = "Option::is_none", default)] pub error: Option, #[serde( serialize_with = "serialize_duration", - skip_serializing_if = "Option::is_none" + skip_serializing_if = "Option::is_none", + default )] pub duration: Option, #[serde(with = "time::serde::rfc3339")] pub enqueued_at: OffsetDateTime, #[serde( with = "time::serde::rfc3339::option", - skip_serializing_if = "Option::is_none" + skip_serializing_if = "Option::is_none", + default )] pub started_at: Option, #[serde( with = "time::serde::rfc3339::option", - skip_serializing_if = "Option::is_none" + skip_serializing_if = "Option::is_none", + default )] pub finished_at: Option, } diff --git a/meilisearch-auth/src/key.rs b/meilisearch-auth/src/key.rs index eb72aaa72..5ff8f8ac5 100644 --- a/meilisearch-auth/src/key.rs +++ b/meilisearch-auth/src/key.rs @@ -11,7 +11,7 @@ use time::macros::{format_description, time}; use time::{Date, OffsetDateTime, PrimitiveDateTime}; use uuid::Uuid; -#[derive(Debug, Deserialize, Serialize)] +#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct Key { #[serde(skip_serializing_if = "Option::is_none")] pub description: Option,