diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 3a10a611f..54d3b38b4 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -6,7 +6,7 @@ name: Execute code coverage jobs: nightly-coverage: - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 diff --git a/.github/workflows/publish-deb-brew-pkg.yml b/.github/workflows/publish-deb-brew-pkg.yml index f4bf2d465..79da1e4e4 100644 --- a/.github/workflows/publish-deb-brew-pkg.yml +++ b/.github/workflows/publish-deb-brew-pkg.yml @@ -1,4 +1,4 @@ -name: Publish deb pkg to GitHub release & APT repository & Homebrew +name: Publish to APT repository & Homebrew on: release: @@ -38,11 +38,11 @@ jobs: homebrew: name: Bump Homebrew formula - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest needs: check-version steps: - name: Create PR to Homebrew - uses: mislav/bump-homebrew-formula-action@v1 + uses: mislav/bump-homebrew-formula-action@v2 with: formula-name: meilisearch env: diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 266e306d6..8cde7d527 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -25,7 +25,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Cache dependencies - uses: Swatinem/rust-cache@v2.0.0 + uses: Swatinem/rust-cache@v2.2.0 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -49,7 +49,7 @@ jobs: toolchain: stable override: true - name: Cache dependencies - uses: Swatinem/rust-cache@v2.0.0 + uses: Swatinem/rust-cache@v2.2.0 - name: Run tests in debug uses: actions-rs/cargo@v1 with: @@ -68,7 +68,7 @@ jobs: override: true components: clippy - name: Cache dependencies - uses: Swatinem/rust-cache@v2.0.0 + uses: Swatinem/rust-cache@v2.2.0 - name: Run cargo clippy uses: actions-rs/cargo@v1 with: @@ -87,6 +87,6 @@ jobs: override: true components: rustfmt - name: Cache dependencies - uses: Swatinem/rust-cache@v2.0.0 + uses: Swatinem/rust-cache@v2.2.0 - name: Run cargo fmt run: cargo fmt --all -- --check diff --git a/dump/src/error.rs b/dump/src/error.rs index a11aae9cf..0d57729ae 100644 --- a/dump/src/error.rs +++ b/dump/src/error.rs @@ -3,8 +3,6 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum Error { - #[error("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")] - DumpV1Unsupported, #[error("Bad index name.")] BadIndexName, #[error("Malformed task.")] @@ -28,7 +26,6 @@ impl ErrorCode for Error { Error::Uuid(_) => Code::Internal, // all these errors should never be raised when creating a dump, thus no error code should be associated. - Error::DumpV1Unsupported => Code::Internal, Error::BadIndexName => Code::Internal, Error::MalformedTask => Code::Internal, } diff --git a/dump/src/lib.rs b/dump/src/lib.rs index 5be680c12..7a7b9a5b7 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -23,7 +23,7 @@ const CURRENT_DUMP_VERSION: Version = Version::V6; type Result = std::result::Result; -#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Metadata { pub dump_version: Version, @@ -32,7 +32,7 @@ pub struct Metadata { pub dump_date: OffsetDateTime, } -#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct IndexMetadata { pub uid: String, @@ -43,7 +43,7 @@ pub struct IndexMetadata { pub updated_at: OffsetDateTime, } -#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] pub enum Version { V1, V2, diff --git a/dump/src/reader/compat/mod.rs b/dump/src/reader/compat/mod.rs index 29836aa61..aabf400d9 100644 --- a/dump/src/reader/compat/mod.rs +++ b/dump/src/reader/compat/mod.rs @@ -1,3 +1,4 @@ +pub mod v1_to_v2; pub mod v2_to_v3; pub mod v3_to_v4; pub mod v4_to_v5; diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-11.snap b/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-11.snap new file mode 100644 index 000000000..1adf85e6a --- /dev/null +++ b/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-11.snap @@ -0,0 +1,23 @@ +--- +source: dump/src/reader/compat/v1_to_v2.rs +expression: spells.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-3.snap b/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-3.snap new file mode 100644 index 000000000..f7e1736b1 --- /dev/null +++ b/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-3.snap @@ -0,0 +1,37 @@ +--- +source: dump/src/reader/compat/v1_to_v2.rs +expression: products.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": { + "android": [ + "phone", + "smartphone" + ], + "iphone": [ + "phone", + "smartphone" + ], + "phone": [ + "android", + "iphone", + "smartphone" + ] + }, + "distinctAttribute": null +} diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-6.snap b/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-6.snap new file mode 100644 index 000000000..8c36fe96c --- /dev/null +++ b/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-6.snap @@ -0,0 +1,27 @@ +--- +source: dump/src/reader/compat/v1_to_v2.rs +expression: movies.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [ + "genres", + "id" + ], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness", + "asc(release_date)" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-7.snap b/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-7.snap new file mode 100644 index 000000000..8c36fe96c --- /dev/null +++ b/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-7.snap @@ -0,0 +1,27 @@ +--- +source: dump/src/reader/compat/v1_to_v2.rs +expression: movies.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [ + "genres", + "id" + ], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness", + "asc(release_date)" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-9.snap b/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-9.snap new file mode 100644 index 000000000..1adf85e6a --- /dev/null +++ b/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-9.snap @@ -0,0 +1,23 @@ +--- +source: dump/src/reader/compat/v1_to_v2.rs +expression: spells.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/dump/src/reader/compat/v1_to_v2.rs b/dump/src/reader/compat/v1_to_v2.rs new file mode 100644 index 000000000..23e4529dc --- /dev/null +++ b/dump/src/reader/compat/v1_to_v2.rs @@ -0,0 +1,414 @@ +use std::{collections::BTreeSet, str::FromStr}; + +use crate::reader::{v1, v2, Document}; + +use super::v2_to_v3::CompatV2ToV3; +use crate::Result; + +pub struct CompatV1ToV2 { + pub from: v1::V1Reader, +} + +impl CompatV1ToV2 { + pub fn new(v1: v1::V1Reader) -> Self { + Self { from: v1 } + } + + pub fn to_v3(self) -> CompatV2ToV3 { + CompatV2ToV3::Compat(self) + } + + pub fn version(&self) -> crate::Version { + self.from.version() + } + + pub fn date(&self) -> Option { + self.from.date() + } + + pub fn index_uuid(&self) -> Vec { + self.from + .index_uuid() + .into_iter() + .enumerate() + // we use the index of the index 😬 as UUID for the index, so that we can link the v2::Task to their index + .map(|(index, index_uuid)| v2::meta::IndexUuid { + uid: index_uuid.uid, + uuid: uuid::Uuid::from_u128(index as u128), + }) + .collect() + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.from.indexes()?.map(|index_reader| Ok(CompatIndexV1ToV2 { from: index_reader? }))) + } + + pub fn tasks( + &mut self, + ) -> Box)>> + '_> { + // Convert an error here to an iterator yielding the error + let indexes = match self.from.indexes() { + Ok(indexes) => indexes, + Err(err) => return Box::new(std::iter::once(Err(err))), + }; + let it = indexes.enumerate().flat_map( + move |(index, index_reader)| -> Box> { + let index_reader = match index_reader { + Ok(index_reader) => index_reader, + Err(err) => return Box::new(std::iter::once(Err(err))), + }; + Box::new( + index_reader + .tasks() + // Filter out the UpdateStatus::Customs variant that is not supported in v2 + // and enqueued tasks, that don't contain the necessary update file in v1 + .filter_map(move |task| -> Option<_> { + let task = match task { + Ok(task) => task, + Err(err) => return Some(Err(err)), + }; + Some(Ok(( + v2::Task { + uuid: uuid::Uuid::from_u128(index as u128), + update: Option::from(task)?, + }, + None, + ))) + }), + ) + }, + ); + Box::new(it) + } +} + +pub struct CompatIndexV1ToV2 { + pub from: v1::V1IndexReader, +} + +impl CompatIndexV1ToV2 { + pub fn metadata(&self) -> &crate::IndexMetadata { + self.from.metadata() + } + + pub fn documents(&mut self) -> Result> + '_>> { + self.from.documents().map(|it| Box::new(it) as Box>) + } + + pub fn settings(&mut self) -> Result> { + Ok(v2::settings::Settings::::from(self.from.settings()?).check()) + } +} + +impl From for v2::Settings { + fn from(source: v1::settings::Settings) -> Self { + let displayed_attributes = source + .displayed_attributes + .map(|opt| opt.map(|displayed_attributes| displayed_attributes.into_iter().collect())); + let attributes_for_faceting = source.attributes_for_faceting.map(|opt| { + opt.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect()) + }); + let ranking_rules = source.ranking_rules.map(|opt| { + opt.map(|ranking_rules| { + ranking_rules + .into_iter() + .filter_map(|ranking_rule| { + match v1::settings::RankingRule::from_str(&ranking_rule) { + Ok(ranking_rule) => { + let criterion: Option = + ranking_rule.into(); + criterion.as_ref().map(ToString::to_string) + } + Err(()) => Some(ranking_rule), + } + }) + .collect() + }) + }); + + Self { + displayed_attributes, + searchable_attributes: source.searchable_attributes, + filterable_attributes: attributes_for_faceting, + ranking_rules, + stop_words: source.stop_words, + synonyms: source.synonyms, + distinct_attribute: source.distinct_attribute, + _kind: std::marker::PhantomData, + } + } +} + +impl From for Option { + fn from(source: v1::update::UpdateStatus) -> Self { + use v1::update::UpdateStatus as UpdateStatusV1; + use v2::updates::UpdateStatus as UpdateStatusV2; + Some(match source { + UpdateStatusV1::Enqueued { content } => { + log::warn!( + "Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)", + content.update_id + ); + log::warn!("Task will be skipped in the queue of imported tasks."); + + return None; + } + UpdateStatusV1::Failed { content } => UpdateStatusV2::Failed(v2::updates::Failed { + from: v2::updates::Processing { + from: v2::updates::Enqueued { + update_id: content.update_id, + meta: Option::from(content.update_type)?, + enqueued_at: content.enqueued_at, + content: None, + }, + started_processing_at: content.processed_at + - std::time::Duration::from_secs_f64(content.duration), + }, + error: v2::ResponseError { + // error code is ignored by serialization, and so always default in deserialized v2 dumps + // that's a good thing, because we don't have them in v1 dump 😅 + code: http::StatusCode::default(), + message: content.error.unwrap_or_default(), + // error codes are unchanged between v1 and v2 + error_code: content.error_code.unwrap_or_default(), + // error types are unchanged between v1 and v2 + error_type: content.error_type.unwrap_or_default(), + // error links are unchanged between v1 and v2 + error_link: content.error_link.unwrap_or_default(), + }, + failed_at: content.processed_at, + }), + UpdateStatusV1::Processed { content } => { + UpdateStatusV2::Processed(v2::updates::Processed { + success: match &content.update_type { + v1::update::UpdateType::ClearAll => { + v2::updates::UpdateResult::DocumentDeletion { deleted: u64::MAX } + } + v1::update::UpdateType::Customs => v2::updates::UpdateResult::Other, + v1::update::UpdateType::DocumentsAddition { number } => { + v2::updates::UpdateResult::DocumentsAddition( + v2::updates::DocumentAdditionResult { nb_documents: *number }, + ) + } + v1::update::UpdateType::DocumentsPartial { number } => { + v2::updates::UpdateResult::DocumentsAddition( + v2::updates::DocumentAdditionResult { nb_documents: *number }, + ) + } + v1::update::UpdateType::DocumentsDeletion { number } => { + v2::updates::UpdateResult::DocumentDeletion { deleted: *number as u64 } + } + v1::update::UpdateType::Settings { .. } => v2::updates::UpdateResult::Other, + }, + processed_at: content.processed_at, + from: v2::updates::Processing { + from: v2::updates::Enqueued { + update_id: content.update_id, + meta: Option::from(content.update_type)?, + enqueued_at: content.enqueued_at, + content: None, + }, + started_processing_at: content.processed_at + - std::time::Duration::from_secs_f64(content.duration), + }, + }) + } + }) + } +} + +impl From for Option { + fn from(source: v1::update::UpdateType) -> Self { + Some(match source { + v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments, + v1::update::UpdateType::Customs => { + log::warn!("Ignoring task with type 'Customs' that is no longer supported"); + return None; + } + v1::update::UpdateType::DocumentsAddition { .. } => { + v2::updates::UpdateMeta::DocumentsAddition { + method: v2::updates::IndexDocumentsMethod::ReplaceDocuments, + format: v2::updates::UpdateFormat::Json, + primary_key: None, + } + } + v1::update::UpdateType::DocumentsPartial { .. } => { + v2::updates::UpdateMeta::DocumentsAddition { + method: v2::updates::IndexDocumentsMethod::UpdateDocuments, + format: v2::updates::UpdateFormat::Json, + primary_key: None, + } + } + v1::update::UpdateType::DocumentsDeletion { .. } => { + v2::updates::UpdateMeta::DeleteDocuments { ids: vec![] } + } + v1::update::UpdateType::Settings { settings } => { + v2::updates::UpdateMeta::Settings((*settings).into()) + } + }) + } +} + +impl From for v2::Settings { + fn from(source: v1::settings::SettingsUpdate) -> Self { + let displayed_attributes: Option>> = + source.displayed_attributes.into(); + + let attributes_for_faceting: Option>> = + source.attributes_for_faceting.into(); + + let ranking_rules: Option>> = + source.ranking_rules.into(); + + // go from the concrete types of v1 (RankingRule) to the concrete type of v2 (Criterion), + // and then back to string as this is what the settings manipulate + let ranking_rules = ranking_rules.map(|opt| { + opt.map(|ranking_rules| { + ranking_rules + .into_iter() + // filter out the WordsPosition ranking rule that exists in v1 but not v2 + .filter_map(|ranking_rule| { + Option::::from(ranking_rule) + }) + .map(|criterion| criterion.to_string()) + .collect() + }) + }); + + Self { + displayed_attributes: displayed_attributes.map(|opt| { + opt.map(|displayed_attributes| displayed_attributes.into_iter().collect()) + }), + searchable_attributes: source.searchable_attributes.into(), + filterable_attributes: attributes_for_faceting.map(|opt| { + opt.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect()) + }), + ranking_rules, + stop_words: source.stop_words.into(), + synonyms: source.synonyms.into(), + distinct_attribute: source.distinct_attribute.into(), + _kind: std::marker::PhantomData, + } + } +} + +impl From for Option { + fn from(source: v1::settings::RankingRule) -> Self { + match source { + v1::settings::RankingRule::Typo => Some(v2::settings::Criterion::Typo), + v1::settings::RankingRule::Words => Some(v2::settings::Criterion::Words), + v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity), + v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute), + v1::settings::RankingRule::WordsPosition => { + log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); + None + } + v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness), + v1::settings::RankingRule::Asc(field_name) => { + Some(v2::settings::Criterion::Asc(field_name)) + } + v1::settings::RankingRule::Desc(field_name) => { + Some(v2::settings::Criterion::Desc(field_name)) + } + } + } +} + +impl From> for Option> { + fn from(source: v1::settings::UpdateState) -> Self { + match source { + v1::settings::UpdateState::Update(new_value) => Some(Some(new_value)), + v1::settings::UpdateState::Clear => Some(None), + v1::settings::UpdateState::Nothing => None, + } + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + fn compat_v1_v2() { + let dump = File::open("tests/assets/v1.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = v1::V1Reader::open(dir).unwrap().to_v2(); + + // top level infos + assert_eq!(dump.date(), None); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ad6245d98d1a8e30535f3339a9a8d223"); + assert_eq!(update_files.len(), 9); + assert!(update_files[..].iter().all(|u| u.is_none())); // no update file in dumps v1 + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(products.settings().unwrap()); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(movies.settings().unwrap()); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b63dbed5bbc059f3e32bc471ae699bf5"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(spells.settings().unwrap()); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"aa24c0cfc733d66c396237ad44263bed"); + } +} diff --git a/dump/src/reader/compat/v2_to_v3.rs b/dump/src/reader/compat/v2_to_v3.rs index 70bc5b867..8574e04b4 100644 --- a/dump/src/reader/compat/v2_to_v3.rs +++ b/dump/src/reader/compat/v2_to_v3.rs @@ -4,22 +4,28 @@ use std::str::FromStr; use time::OffsetDateTime; use uuid::Uuid; +use super::v1_to_v2::{CompatIndexV1ToV2, CompatV1ToV2}; use super::v3_to_v4::CompatV3ToV4; use crate::reader::{v2, v3, Document}; use crate::Result; -pub struct CompatV2ToV3 { - pub from: v2::V2Reader, +pub enum CompatV2ToV3 { + V2(v2::V2Reader), + Compat(CompatV1ToV2), } impl CompatV2ToV3 { pub fn new(v2: v2::V2Reader) -> CompatV2ToV3 { - CompatV2ToV3 { from: v2 } + CompatV2ToV3::V2(v2) } pub fn index_uuid(&self) -> Vec { - self.from - .index_uuid() + let v2_uuids = match self { + CompatV2ToV3::V2(from) => from.index_uuid(), + CompatV2ToV3::Compat(compat) => compat.index_uuid(), + }; + v2_uuids + .into_iter() .into_iter() .map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid }) .collect() @@ -30,11 +36,17 @@ impl CompatV2ToV3 { } pub fn version(&self) -> crate::Version { - self.from.version() + match self { + CompatV2ToV3::V2(from) => from.version(), + CompatV2ToV3::Compat(compat) => compat.version(), + } } pub fn date(&self) -> Option { - self.from.date() + match self { + CompatV2ToV3::V2(from) => from.date(), + CompatV2ToV3::Compat(compat) => compat.date(), + } } pub fn instance_uid(&self) -> Result> { @@ -42,10 +54,18 @@ impl CompatV2ToV3 { } pub fn indexes(&self) -> Result> + '_> { - Ok(self.from.indexes()?.map(|index_reader| -> Result<_> { - let compat = CompatIndexV2ToV3::new(index_reader?); - Ok(compat) - })) + Ok(match self { + CompatV2ToV3::V2(from) => Box::new(from.indexes()?.map(|index_reader| -> Result<_> { + let compat = CompatIndexV2ToV3::new(index_reader?); + Ok(compat) + })) + as Box> + '_>, + CompatV2ToV3::Compat(compat) => Box::new(compat.indexes()?.map(|index_reader| { + let compat = CompatIndexV2ToV3::Compat(Box::new(index_reader?)); + Ok(compat) + })) + as Box> + '_>, + }) } pub fn tasks( @@ -54,11 +74,13 @@ impl CompatV2ToV3 { dyn Iterator>>>)>> + '_, > { - let _indexes = self.from.index_uuid.clone(); + let tasks = match self { + CompatV2ToV3::V2(from) => from.tasks(), + CompatV2ToV3::Compat(compat) => compat.tasks(), + }; Box::new( - self.from - .tasks() + tasks .map(move |task| { task.map(|(task, content_file)| { let task = v3::Task { uuid: task.uuid, update: task.update.into() }; @@ -76,27 +98,38 @@ impl CompatV2ToV3 { } } -pub struct CompatIndexV2ToV3 { - from: v2::V2IndexReader, +pub enum CompatIndexV2ToV3 { + V2(v2::V2IndexReader), + Compat(Box), } impl CompatIndexV2ToV3 { pub fn new(v2: v2::V2IndexReader) -> CompatIndexV2ToV3 { - CompatIndexV2ToV3 { from: v2 } + CompatIndexV2ToV3::V2(v2) } pub fn metadata(&self) -> &crate::IndexMetadata { - self.from.metadata() + match self { + CompatIndexV2ToV3::V2(from) => from.metadata(), + CompatIndexV2ToV3::Compat(compat) => compat.metadata(), + } } pub fn documents(&mut self) -> Result> + '_>> { - self.from - .documents() - .map(|iter| Box::new(iter) as Box> + '_>) + match self { + CompatIndexV2ToV3::V2(from) => from + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + CompatIndexV2ToV3::Compat(compat) => compat.documents(), + } } pub fn settings(&mut self) -> Result> { - Ok(v3::Settings::::from(self.from.settings()?).check()) + let settings = match self { + CompatIndexV2ToV3::V2(from) => from.settings()?, + CompatIndexV2ToV3::Compat(compat) => compat.settings()?, + }; + Ok(v3::Settings::::from(settings).check()) } } diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs index d1ca9ec42..efbca06d0 100644 --- a/dump/src/reader/mod.rs +++ b/dump/src/reader/mod.rs @@ -9,11 +9,11 @@ use self::compat::v4_to_v5::CompatV4ToV5; use self::compat::v5_to_v6::{CompatIndexV5ToV6, CompatV5ToV6}; use self::v5::V5Reader; use self::v6::{V6IndexReader, V6Reader}; -use crate::{Error, Result, Version}; +use crate::{Result, Version}; mod compat; -// pub(self) mod v1; +pub(self) mod v1; pub(self) mod v2; pub(self) mod v3; pub(self) mod v4; @@ -45,8 +45,9 @@ impl DumpReader { let MetadataVersion { dump_version } = serde_json::from_reader(&mut meta_file)?; match dump_version { - // Version::V1 => Ok(Box::new(v1::Reader::open(path)?)), - Version::V1 => Err(Error::DumpV1Unsupported), + Version::V1 => { + Ok(v1::V1Reader::open(path)?.to_v2().to_v3().to_v4().to_v5().to_v6().into()) + } Version::V2 => Ok(v2::V2Reader::open(path)?.to_v3().to_v4().to_v5().to_v6().into()), Version::V3 => Ok(v3::V3Reader::open(path)?.to_v4().to_v5().to_v6().into()), Version::V4 => Ok(v4::V4Reader::open(path)?.to_v5().to_v6().into()), @@ -528,4 +529,81 @@ pub(crate) mod test { assert_eq!(documents.len(), 10); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); } + + #[test] + fn import_dump_v1() { + let dump = File::open("tests/assets/v1.dump").unwrap(); + let mut dump = DumpReader::open(dump).unwrap(); + + // top level infos + assert_eq!(dump.date(), None); + assert_eq!(dump.instance_uid().unwrap(), None); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"b3e3652bfc10a76670be157d2507d761"); + assert_eq!(update_files.len(), 9); + assert!(update_files[..].iter().all(|u| u.is_none())); // no update file in dump v1 + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot!(meili_snap::json_string!(keys), @"[]"); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(products.settings().unwrap()); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(movies.settings().unwrap()); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b63dbed5bbc059f3e32bc471ae699bf5"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(spells.settings().unwrap()); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"aa24c0cfc733d66c396237ad44263bed"); + } } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap new file mode 100644 index 000000000..d20fdc77e --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap @@ -0,0 +1,27 @@ +--- +source: dump/src/reader/mod.rs +expression: movies.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [ + "genres", + "id" + ], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness", + "release_date:asc" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-11.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-11.snap new file mode 100644 index 000000000..997d303e7 --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-11.snap @@ -0,0 +1,23 @@ +--- +source: dump/src/reader/mod.rs +expression: spells.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-14.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-14.snap new file mode 100644 index 000000000..997d303e7 --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-14.snap @@ -0,0 +1,23 @@ +--- +source: dump/src/reader/mod.rs +expression: spells.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-5.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-5.snap new file mode 100644 index 000000000..282cd6ba7 --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-5.snap @@ -0,0 +1,37 @@ +--- +source: dump/src/reader/mod.rs +expression: products.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": { + "android": [ + "phone", + "smartphone" + ], + "iphone": [ + "phone", + "smartphone" + ], + "phone": [ + "android", + "iphone", + "smartphone" + ] + }, + "distinctAttribute": null +} diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-6.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-6.snap new file mode 100644 index 000000000..282cd6ba7 --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-6.snap @@ -0,0 +1,37 @@ +--- +source: dump/src/reader/mod.rs +expression: products.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": { + "android": [ + "phone", + "smartphone" + ], + "iphone": [ + "phone", + "smartphone" + ], + "phone": [ + "android", + "iphone", + "smartphone" + ] + }, + "distinctAttribute": null +} diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-8.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-8.snap new file mode 100644 index 000000000..d20fdc77e --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-8.snap @@ -0,0 +1,27 @@ +--- +source: dump/src/reader/mod.rs +expression: movies.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [ + "genres", + "id" + ], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness", + "release_date:asc" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/dump/src/reader/v1/mod.rs b/dump/src/reader/v1/mod.rs index f638262cc..1932b602a 100644 --- a/dump/src/reader/v1/mod.rs +++ b/dump/src/reader/v1/mod.rs @@ -1,173 +1,263 @@ use std::{ - convert::Infallible, fs::{self, File}, io::{BufRead, BufReader}, - path::Path, + path::{Path, PathBuf}, }; use tempfile::TempDir; use time::OffsetDateTime; -use self::update::UpdateStatus; - -use super::{DumpReader, IndexReader}; -use crate::{Error, Result, Version}; +use super::{compat::v1_to_v2::CompatV1ToV2, Document}; +use crate::{IndexMetadata, Result, Version}; +use serde::Deserialize; pub mod settings; pub mod update; -pub mod v1; pub struct V1Reader { - dump: TempDir, - metadata: v1::Metadata, - indexes: Vec, + pub dump: TempDir, + pub db_version: String, + pub dump_version: crate::Version, + indexes: Vec, } -struct V1IndexReader { - name: String, +pub struct IndexUuid { + pub name: String, + pub uid: String, +} +pub type Task = self::update::UpdateStatus; + +struct V1Index { + metadata: IndexMetadataV1, + path: PathBuf, +} + +impl V1Index { + pub fn new(path: PathBuf, metadata: Index) -> Self { + Self { metadata: metadata.into(), path } + } + + pub fn open(&self) -> Result { + V1IndexReader::new(&self.path, self.metadata.clone()) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata.metadata + } +} + +pub struct V1IndexReader { + metadata: IndexMetadataV1, documents: BufReader, settings: BufReader, updates: BufReader, - - current_update: Option, } impl V1IndexReader { - pub fn new(name: String, path: &Path) -> Result { - let mut ret = V1IndexReader { - name, + pub fn new(path: &Path, metadata: IndexMetadataV1) -> Result { + Ok(V1IndexReader { + metadata, documents: BufReader::new(File::open(path.join("documents.jsonl"))?), settings: BufReader::new(File::open(path.join("settings.json"))?), updates: BufReader::new(File::open(path.join("updates.jsonl"))?), - current_update: None, - }; - ret.next_update(); - - Ok(ret) + }) } - pub fn next_update(&mut self) -> Result> { - let current_update = if let Some(line) = self.updates.lines().next() { - Some(serde_json::from_str(&line?)?) - } else { - None - }; + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata.metadata + } - Ok(std::mem::replace(&mut self.current_update, current_update)) + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result { + Ok(serde_json::from_reader(&mut self.settings)?) + } + + pub fn tasks(self) -> impl Iterator> { + self.updates.lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) } } impl V1Reader { pub fn open(dump: TempDir) -> Result { - let mut meta_file = fs::read(dump.path().join("metadata.json"))?; - let metadata = serde_json::from_reader(&*meta_file)?; + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata: Metadata = serde_json::from_reader(&*meta_file)?; let mut indexes = Vec::new(); - let entries = fs::read_dir(dump.path())?; - for entry in entries { - let entry = entry?; - if entry.file_type()?.is_dir() { - indexes.push(V1IndexReader::new( - entry - .file_name() - .to_str() - .ok_or(Error::BadIndexName)? - .to_string(), - &entry.path(), - )?); - } + for index in metadata.indexes.into_iter() { + let index_path = dump.path().join(&index.uid); + indexes.push(V1Index::new(index_path, index)); } Ok(V1Reader { dump, - metadata, indexes, + db_version: metadata.db_version, + dump_version: metadata.dump_version, }) } - fn next_update(&mut self) -> Result> { - if let Some((idx, _)) = self - .indexes + pub fn to_v2(self) -> CompatV1ToV2 { + CompatV1ToV2 { from: self } + } + + pub fn index_uuid(&self) -> Vec { + self.indexes .iter() - .map(|index| index.current_update) - .enumerate() - .filter_map(|(idx, update)| update.map(|u| (idx, u))) - .min_by_key(|(_, update)| update.enqueued_at()) - { - self.indexes[idx].next_update() - } else { - Ok(None) + .map(|index| IndexUuid { + name: index.metadata.name.to_owned(), + uid: index.metadata().uid.to_owned(), + }) + .collect() + } + + pub fn version(&self) -> Version { + Version::V1 + } + + pub fn date(&self) -> Option { + None + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.indexes.iter().map(|index| index.open())) + } +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Index { + pub name: String, + pub uid: String, + #[serde(with = "time::serde::rfc3339")] + created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + updated_at: OffsetDateTime, + pub primary_key: Option, +} + +#[derive(Clone)] +pub struct IndexMetadataV1 { + pub name: String, + pub metadata: crate::IndexMetadata, +} + +impl From for IndexMetadataV1 { + fn from(index: Index) -> Self { + IndexMetadataV1 { + name: index.name, + metadata: crate::IndexMetadata { + uid: index.uid, + primary_key: index.primary_key, + created_at: index.created_at, + updated_at: index.updated_at, + }, } } } -impl IndexReader for &V1IndexReader { - type Document = serde_json::Map; - type Settings = settings::Settings; - - fn name(&self) -> &str { - todo!() - } - - fn documents(&self) -> Result>>> { - todo!() - } - - fn settings(&self) -> Result { - todo!() - } +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + pub indexes: Vec, + pub db_version: String, + pub dump_version: crate::Version, } -impl DumpReader for V1Reader { - type Document = serde_json::Map; - type Settings = settings::Settings; +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; - type Task = update::UpdateStatus; - type UpdateFile = Infallible; + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; - type Key = Infallible; + use super::*; - fn date(&self) -> Option { - None - } + #[test] + fn read_dump_v1() { + let dump = File::open("tests/assets/v1.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); - fn version(&self) -> Version { - Version::V1 - } + let dump = V1Reader::open(dir).unwrap(); - fn indexes( - &self, - ) -> Result< - Box< - dyn Iterator< - Item = Result< - Box< - dyn super::IndexReader< - Document = Self::Document, - Settings = Self::Settings, - >, - >, - >, - >, - >, - > { - Ok(Box::new(self.indexes.iter().map(|index| { - let index = Box::new(index) - as Box>; - Ok(index) - }))) - } + // top level infos + assert_eq!(dump.date(), None); - fn tasks(&self) -> Box)>>> { - Box::new(std::iter::from_fn(|| { - self.next_update() - .transpose() - .map(|result| result.map(|task| (task, None))) - })) - } + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); - fn keys(&self) -> Box>> { - Box::new(std::iter::empty()) + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut dnd_spells = indexes.pop().unwrap(); + + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(products.settings().unwrap()); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // products tasks + let tasks = products.tasks().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"91de507f206ad21964584021932ba7a7"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(movies.settings().unwrap()); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b63dbed5bbc059f3e32bc471ae699bf5"); + + // movies tasks + let tasks = movies.tasks().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"55eef4de2bef7e84c5ce0bee47488f56"); + + // spells + insta::assert_json_snapshot!(dnd_spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(dnd_spells.settings().unwrap()); + let documents = dnd_spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"aa24c0cfc733d66c396237ad44263bed"); + + // spells tasks + let tasks = dnd_spells.tasks().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"836dd7d64d5ad20ad901c44b1b161a4c"); } } diff --git a/dump/src/reader/v1/settings.rs b/dump/src/reader/v1/settings.rs index 0065d3f97..2f7976534 100644 --- a/dump/src/reader/v1/settings.rs +++ b/dump/src/reader/v1/settings.rs @@ -1,6 +1,9 @@ use std::collections::{BTreeMap, BTreeSet}; use std::result::Result as StdResult; +use std::str::FromStr; +use once_cell::sync::Lazy; +use regex::Regex; use serde::{Deserialize, Deserializer, Serialize}; #[derive(Default, Clone, Serialize, Deserialize, Debug)] @@ -53,6 +56,34 @@ pub enum RankingRule { Desc(String), } +static ASC_DESC_REGEX: Lazy = + Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap()); + +impl FromStr for RankingRule { + type Err = (); + + fn from_str(s: &str) -> Result { + Ok(match s { + "typo" => Self::Typo, + "words" => Self::Words, + "proximity" => Self::Proximity, + "attribute" => Self::Attribute, + "wordsPosition" => Self::WordsPosition, + "exactness" => Self::Exactness, + text => { + let caps = ASC_DESC_REGEX.captures(text).ok_or(())?; + let order = caps.get(1).unwrap().as_str(); + let field_name = caps.get(2).unwrap().as_str(); + match order { + "asc" => Self::Asc(field_name.to_string()), + "desc" => Self::Desc(field_name.to_string()), + _ => return Err(()), + } + } + }) + } +} + // Any value that is present is considered Some value, including null. fn deserialize_some<'de, T, D>(deserializer: D) -> StdResult, D::Error> where diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-10.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-10.snap new file mode 100644 index 000000000..f71df0ae6 --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-10.snap @@ -0,0 +1,24 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: dnd_spells.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-12.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-12.snap new file mode 100644 index 000000000..f71df0ae6 --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-12.snap @@ -0,0 +1,24 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: dnd_spells.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-2.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-2.snap new file mode 100644 index 000000000..b117c5f3d --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-2.snap @@ -0,0 +1,38 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: products.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": { + "android": [ + "phone", + "smartphone" + ], + "iphone": [ + "phone", + "smartphone" + ], + "phone": [ + "android", + "iphone", + "smartphone" + ] + }, + "attributesForFaceting": [] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-5.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-5.snap new file mode 100644 index 000000000..aa9ed082a --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-5.snap @@ -0,0 +1,28 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: movies.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness", + "asc(release_date)" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [ + "id", + "genres" + ] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-6.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-6.snap new file mode 100644 index 000000000..aa9ed082a --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-6.snap @@ -0,0 +1,28 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: movies.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness", + "asc(release_date)" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [ + "id", + "genres" + ] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-7.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-7.snap new file mode 100644 index 000000000..aa9ed082a --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-7.snap @@ -0,0 +1,28 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: movies.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness", + "asc(release_date)" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [ + "id", + "genres" + ] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-8.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-8.snap new file mode 100644 index 000000000..f71df0ae6 --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-8.snap @@ -0,0 +1,24 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: dnd_spells.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [] +} diff --git a/dump/src/reader/v1/update.rs b/dump/src/reader/v1/update.rs index c9ccaf309..b6408f42a 100644 --- a/dump/src/reader/v1/update.rs +++ b/dump/src/reader/v1/update.rs @@ -1,54 +1,8 @@ use serde::{Deserialize, Serialize}; -use serde_json::Value; use time::OffsetDateTime; use super::settings::SettingsUpdate; -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Update { - data: UpdateData, - #[serde(with = "time::serde::rfc3339")] - enqueued_at: OffsetDateTime, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum UpdateData { - ClearAll, - Customs(Vec), - // (primary key, documents) - DocumentsAddition { - primary_key: Option, - documents: Vec>, - }, - DocumentsPartial { - primary_key: Option, - documents: Vec>, - }, - DocumentsDeletion(Vec), - Settings(Box), -} - -impl UpdateData { - pub fn update_type(&self) -> UpdateType { - match self { - UpdateData::ClearAll => UpdateType::ClearAll, - UpdateData::Customs(_) => UpdateType::Customs, - UpdateData::DocumentsAddition { documents, .. } => UpdateType::DocumentsAddition { - number: documents.len(), - }, - UpdateData::DocumentsPartial { documents, .. } => UpdateType::DocumentsPartial { - number: documents.len(), - }, - UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion { - number: deletion.len(), - }, - UpdateData::Settings(update) => UpdateType::Settings { - settings: update.clone(), - }, - } - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "name")] pub enum UpdateType { diff --git a/dump/src/reader/v1/v1.rs b/dump/src/reader/v1/v1.rs deleted file mode 100644 index 0f4312508..000000000 --- a/dump/src/reader/v1/v1.rs +++ /dev/null @@ -1,22 +0,0 @@ -use serde::Deserialize; -use time::OffsetDateTime; - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Index { - pub name: String, - pub uid: String, - #[serde(with = "time::serde::rfc3339")] - created_at: OffsetDateTime, - #[serde(with = "time::serde::rfc3339")] - updated_at: OffsetDateTime, - pub primary_key: Option, -} - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Metadata { - indexes: Vec, - db_version: String, - dump_version: crate::Version, -} diff --git a/dump/src/reader/v2/settings.rs b/dump/src/reader/v2/settings.rs index 62e5c05f9..1a7935b56 100644 --- a/dump/src/reader/v2/settings.rs +++ b/dump/src/reader/v2/settings.rs @@ -1,4 +1,5 @@ use std::collections::{BTreeMap, BTreeSet}; +use std::fmt::Display; use std::marker::PhantomData; use std::str::FromStr; @@ -174,3 +175,17 @@ impl FromStr for Criterion { } } } + +impl Display for Criterion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Criterion::Words => write!(f, "words"), + Criterion::Typo => write!(f, "typo"), + Criterion::Proximity => write!(f, "proximity"), + Criterion::Attribute => write!(f, "attribute"), + Criterion::Exactness => write!(f, "exactness"), + Criterion::Asc(field_name) => write!(f, "asc({})", field_name), + Criterion::Desc(field_name) => write!(f, "desc({})", field_name), + } + } +} diff --git a/dump/tests/assets/v1.dump b/dump/tests/assets/v1.dump new file mode 100644 index 000000000..f1e295936 Binary files /dev/null and b/dump/tests/assets/v1.dump differ diff --git a/meilisearch-http/tests/dumps/mod.rs b/meilisearch-http/tests/dumps/mod.rs index cd9ba3828..10098c60f 100644 --- a/meilisearch-http/tests/dumps/mod.rs +++ b/meilisearch-http/tests/dumps/mod.rs @@ -9,19 +9,189 @@ use crate::common::{default_settings, GetAllDocumentsOptions, Server}; // all the following test are ignored on windows. See #2364 #[actix_rt::test] #[cfg_attr(target_os = "windows", ignore)] -async fn import_dump_v1() { +async fn import_dump_v1_movie_raw() { + let temp = tempfile::tempdir().unwrap(); + let path = GetDump::MoviesRawV1.path(); + let options = Opt { import_dump: Some(path), ..default_settings(temp.path()) }; + let server = Server::new_with_options(options).await.unwrap(); + + let (indexes, code) = server.list_indexes(None, None).await; + assert_eq!(code, 200); + + assert_eq!(indexes["results"].as_array().unwrap().len(), 1); + assert_eq!(indexes["results"][0]["uid"], json!("indexUID")); + assert_eq!(indexes["results"][0]["primaryKey"], json!("id")); + + let index = server.index("indexUID"); + + let (stats, code) = index.stats().await; + assert_eq!(code, 200); + assert_eq!( + stats, + json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) + ); + + let (settings, code) = index.settings().await; + assert_eq!(code, 200); + assert_eq!( + settings, + json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["typo", "words", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + ); + + let (tasks, code) = index.list_tasks().await; + assert_eq!(code, 200); + assert_eq!( + tasks, + json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31968 }, "error": null, "duration": "PT9.317060500S", "enqueuedAt": "2021-09-08T09:08:45.153219Z", "startedAt": "2021-09-08T09:08:45.3961665Z", "finishedAt": "2021-09-08T09:08:54.713227Z" }], "limit": 20, "from": 0, "next": null }) + ); + + // finally we're just going to check that we can still get a few documents by id + let (document, code) = index.get_document(100, None).await; + assert_eq!(code, 200); + assert_eq!( + document, + json!({"id": 100, "title": "Lock, Stock and Two Smoking Barrels", "overview": "A card shark and his unwillingly-enlisted friends need to make a lot of cash quick after losing a sketchy poker match. To do this they decide to pull a heist on a small-time gang who happen to be operating out of the flat next door.", "genres": ["Comedy", "Crime"], "poster": "https://image.tmdb.org/t/p/w500/8kSerJrhrJWKLk1LViesGcnrUPE.jpg", "release_date": 889056000}) + ); + + let (document, code) = index.get_document(500, None).await; + assert_eq!(code, 200); + assert_eq!( + document, + json!({"id": 500, "title": "Reservoir Dogs", "overview": "A botched robbery indicates a police informant, and the pressure mounts in the aftermath at a warehouse. Crime begets violence as the survivors -- veteran Mr. White, newcomer Mr. Orange, psychopathic parolee Mr. Blonde, bickering weasel Mr. Pink and Nice Guy Eddie -- unravel.", "genres": ["Crime", "Thriller"], "poster": "https://image.tmdb.org/t/p/w500/AjTtJNumZyUDz33VtMlF1K8JPsE.jpg", "release_date": 715392000}) + ); + + let (document, code) = index.get_document(10006, None).await; + assert_eq!(code, 200); + assert_eq!( + document, + json!({"id": 10006, "title": "Wild Seven", "overview": "In this darkly karmic vision of Arizona, a man who breathes nothing but ill will begins a noxious domino effect as quickly as an uncontrollable virus kills. As he exits Arizona State Penn after twenty-one long years, Wilson has only one thing on the brain, leveling the score with career criminal, Mackey Willis.", "genres": ["Action", "Crime", "Drama"], "poster": "https://image.tmdb.org/t/p/w500/y114dTPoqn8k2Txps4P2tI95YCS.jpg", "release_date": 1136073600}) + ); +} + +#[actix_rt::test] +#[cfg_attr(target_os = "windows", ignore)] +async fn import_dump_v1_movie_with_settings() { let temp = tempfile::tempdir().unwrap(); - for path in [ - GetDump::MoviesRawV1.path(), - GetDump::MoviesWithSettingsV1.path(), - GetDump::RubyGemsWithSettingsV1.path(), - ] { - let options = Opt { import_dump: Some(path), ..default_settings(temp.path()) }; - let error = Server::new_with_options(options).await.map(drop).unwrap_err(); + let path = GetDump::MoviesWithSettingsV1.path(); - assert_eq!(error.to_string(), "The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards."); - } + let options = Opt { import_dump: Some(path), ..default_settings(temp.path()) }; + let server = Server::new_with_options(options).await.unwrap(); + + let (indexes, code) = server.list_indexes(None, None).await; + assert_eq!(code, 200); + + assert_eq!(indexes["results"].as_array().unwrap().len(), 1); + assert_eq!(indexes["results"][0]["uid"], json!("indexUID")); + assert_eq!(indexes["results"][0]["primaryKey"], json!("id")); + + let index = server.index("indexUID"); + + let (stats, code) = index.stats().await; + assert_eq!(code, 200); + assert_eq!( + stats, + json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) + ); + + let (settings, code) = index.settings().await; + assert_eq!(code, 200); + assert_eq!( + settings, + json!({ "displayedAttributes": ["genres", "id", "overview", "poster", "release_date", "title"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["typo", "words", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } }) + ); + + let (tasks, code) = index.list_tasks().await; + assert_eq!(code, 200); + assert_eq!( + tasks, + json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "displayedAttributes": ["genres", "id", "overview", "poster", "release_date", "title"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "error": null, "duration": "PT7.288826907S", "enqueuedAt": "2021-09-08T09:34:40.882977Z", "startedAt": "2021-09-08T09:34:40.883073093Z", "finishedAt": "2021-09-08T09:34:48.1719Z"}, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31968 }, "error": null, "duration": "PT9.090735774S", "enqueuedAt": "2021-09-08T09:34:16.036101Z", "startedAt": "2021-09-08T09:34:16.261191226Z", "finishedAt": "2021-09-08T09:34:25.351927Z" }], "limit": 20, "from": 1, "next": null }) + ); + + // finally we're just going to check that we can still get a few documents by id + let (document, code) = index.get_document(100, None).await; + assert_eq!(code, 200); + assert_eq!( + document, + json!({ "id": 100, "title": "Lock, Stock and Two Smoking Barrels", "genres": ["Comedy", "Crime"], "overview": "A card shark and his unwillingly-enlisted friends need to make a lot of cash quick after losing a sketchy poker match. To do this they decide to pull a heist on a small-time gang who happen to be operating out of the flat next door.", "poster": "https://image.tmdb.org/t/p/w500/8kSerJrhrJWKLk1LViesGcnrUPE.jpg", "release_date": 889056000 }) + ); + + let (document, code) = index.get_document(500, None).await; + assert_eq!(code, 200); + assert_eq!( + document, + json!({ "id": 500, "title": "Reservoir Dogs", "genres": ["Crime", "Thriller"], "overview": "A botched robbery indicates a police informant, and the pressure mounts in the aftermath at a warehouse. Crime begets violence as the survivors -- veteran Mr. White, newcomer Mr. Orange, psychopathic parolee Mr. Blonde, bickering weasel Mr. Pink and Nice Guy Eddie -- unravel.", "poster": "https://image.tmdb.org/t/p/w500/AjTtJNumZyUDz33VtMlF1K8JPsE.jpg", "release_date": 715392000}) + ); + + let (document, code) = index.get_document(10006, None).await; + assert_eq!(code, 200); + assert_eq!( + document, + json!({ "id": 10006, "title": "Wild Seven", "genres": ["Action", "Crime", "Drama"], "overview": "In this darkly karmic vision of Arizona, a man who breathes nothing but ill will begins a noxious domino effect as quickly as an uncontrollable virus kills. As he exits Arizona State Penn after twenty-one long years, Wilson has only one thing on the brain, leveling the score with career criminal, Mackey Willis.", "poster": "https://image.tmdb.org/t/p/w500/y114dTPoqn8k2Txps4P2tI95YCS.jpg", "release_date": 1136073600}) + ); +} + +#[actix_rt::test] +#[cfg_attr(target_os = "windows", ignore)] +async fn import_dump_v1_rubygems_with_settings() { + let temp = tempfile::tempdir().unwrap(); + + let path = GetDump::RubyGemsWithSettingsV1.path(); + + let options = Opt { import_dump: Some(path), ..default_settings(temp.path()) }; + let server = Server::new_with_options(options).await.unwrap(); + + let (indexes, code) = server.list_indexes(None, None).await; + assert_eq!(code, 200); + + assert_eq!(indexes["results"].as_array().unwrap().len(), 1); + assert_eq!(indexes["results"][0]["uid"], json!("rubygems")); + assert_eq!(indexes["results"][0]["primaryKey"], json!("id")); + + let index = server.index("rubygems"); + + let (stats, code) = index.stats().await; + assert_eq!(code, 200); + assert_eq!( + stats, + json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) + ); + + let (settings, code) = index.settings().await; + assert_eq!(code, 200); + assert_eq!( + settings, + json!({"displayedAttributes": ["description", "id", "name", "summary", "total_downloads", "version"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 }}) + ); + + let (tasks, code) = index.list_tasks().await; + assert_eq!(code, 200); + assert_eq!( + tasks["results"][0], + json!({"uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "error": null, "duration": "PT1.487793839S", "enqueuedAt": "2021-09-08T09:27:01.465296Z", "startedAt": "2021-09-08T09:28:44.882177161Z", "finishedAt": "2021-09-08T09:28:46.369971Z"}) + ); + + // finally we're just going to check that we can still get a few documents by id + let (document, code) = index.get_document(188040, None).await; + assert_eq!(code, 200); + assert_eq!( + document, + json!({ "name": "meilisearch", "summary": "An easy-to-use ruby client for Meilisearch API", "description": "An easy-to-use ruby client for Meilisearch API. See https://github.com/meilisearch/MeiliSearch", "id": "188040", "version": "0.15.2", "total_downloads": "7465"}) + ); + + let (document, code) = index.get_document(191940, None).await; + assert_eq!(code, 200); + assert_eq!( + document, + json!({ "name": "doggo", "summary": "RSpec 3 formatter - documentation, with progress indication", "description": "Similar to \"rspec -f d\", but also indicates progress by showing the current test number and total test count on each line.", "id": "191940", "version": "1.1.0", "total_downloads": "9394"}) + ); + + let (document, code) = index.get_document(159227, None).await; + assert_eq!(code, 200); + assert_eq!( + document, + json!({ "name": "vortex-of-agony", "summary": "You dont need to use nodejs or go, just install this plugin. It will crash your application at random", "description": "You dont need to use nodejs or go, just install this plugin. It will crash your application at random", "id": "159227", "version": "0.1.0", "total_downloads": "1007"}) + ); } #[actix_rt::test]