From c7749127fa332b949a1b0df16a4ff3f8a3ed4627 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 30 Nov 2022 14:55:45 +0100 Subject: [PATCH] Use reader v1 and compat to v2 --- dump/src/error.rs | 3 - dump/src/reader/compat/mod.rs | 1 + dump/src/reader/compat/v1_to_v2.rs | 325 +++++++++++++++++++++++++++++ dump/src/reader/compat/v2_to_v3.rs | 77 +++++-- dump/src/reader/mod.rs | 9 +- 5 files changed, 386 insertions(+), 29 deletions(-) create mode 100644 dump/src/reader/compat/v1_to_v2.rs diff --git a/dump/src/error.rs b/dump/src/error.rs index a11aae9cf..0d57729ae 100644 --- a/dump/src/error.rs +++ b/dump/src/error.rs @@ -3,8 +3,6 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum Error { - #[error("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")] - DumpV1Unsupported, #[error("Bad index name.")] BadIndexName, #[error("Malformed task.")] @@ -28,7 +26,6 @@ impl ErrorCode for Error { Error::Uuid(_) => Code::Internal, // all these errors should never be raised when creating a dump, thus no error code should be associated. - Error::DumpV1Unsupported => Code::Internal, Error::BadIndexName => Code::Internal, Error::MalformedTask => Code::Internal, } diff --git a/dump/src/reader/compat/mod.rs b/dump/src/reader/compat/mod.rs index 29836aa61..aabf400d9 100644 --- a/dump/src/reader/compat/mod.rs +++ b/dump/src/reader/compat/mod.rs @@ -1,3 +1,4 @@ +pub mod v1_to_v2; pub mod v2_to_v3; pub mod v3_to_v4; pub mod v4_to_v5; diff --git a/dump/src/reader/compat/v1_to_v2.rs b/dump/src/reader/compat/v1_to_v2.rs new file mode 100644 index 000000000..d8713a344 --- /dev/null +++ b/dump/src/reader/compat/v1_to_v2.rs @@ -0,0 +1,325 @@ +use std::{collections::BTreeSet, str::FromStr}; + +use crate::reader::{v1, v2, Document}; + +use super::v2_to_v3::CompatV2ToV3; +use crate::Result; + +pub struct CompatV1ToV2 { + pub from: v1::V1Reader, +} + +impl CompatV1ToV2 { + pub fn new(v1: v1::V1Reader) -> Self { + Self { from: v1 } + } + + pub fn to_v3(self) -> CompatV2ToV3 { + CompatV2ToV3::Compat(self) + } + + pub fn version(&self) -> crate::Version { + self.from.version() + } + + pub fn date(&self) -> Option { + self.from.date() + } + + pub fn index_uuid(&self) -> Vec { + self.from + .index_uuid() + .into_iter() + .enumerate() + // we use the index of the index 😬 as UUID for the index, so that we can link the v2::Task to their index + .map(|(index, index_uuid)| v2::meta::IndexUuid { + uid: index_uuid.uid, + uuid: uuid::Uuid::from_u128(index as u128), + }) + .collect() + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.from.indexes()?.map(|index_reader| Ok(CompatIndexV1ToV2 { from: index_reader? }))) + } + + pub fn tasks( + &mut self, + ) -> Box)>> + '_> { + // Convert an error here to an iterator yielding the error + let indexes = match self.from.indexes() { + Ok(indexes) => indexes, + Err(err) => return Box::new(std::iter::once(Err(err))), + }; + let it = indexes.enumerate().flat_map( + move |(index, index_reader)| -> Box> { + let index_reader = match index_reader { + Ok(index_reader) => index_reader, + Err(err) => return Box::new(std::iter::once(Err(err))), + }; + Box::new( + index_reader + .tasks() + // Filter out the UpdateStatus::Customs variant that is not supported in v2 + // and enqueued tasks, that don't contain the necessary update file in v1 + .filter_map(move |task| -> Option<_> { + let task = match task { + Ok(task) => task, + Err(err) => return Some(Err(err)), + }; + Some(Ok(( + v2::Task { + uuid: uuid::Uuid::from_u128(index as u128), + update: Option::from(task)?, + }, + None, + ))) + }), + ) + }, + ); + Box::new(it) + } +} + +pub struct CompatIndexV1ToV2 { + pub from: v1::V1IndexReader, +} + +impl CompatIndexV1ToV2 { + pub fn metadata(&self) -> &crate::IndexMetadata { + self.from.metadata() + } + + pub fn documents(&mut self) -> Result> + '_>> { + self.from.documents().map(|it| Box::new(it) as Box>) + } + + pub fn settings(&mut self) -> Result> { + Ok(v2::settings::Settings::::from(self.from.settings()?).check()) + } +} + +impl From for v2::Settings { + fn from(source: v1::settings::Settings) -> Self { + let displayed_attributes = source + .displayed_attributes + .map(|opt| opt.map(|displayed_attributes| displayed_attributes.into_iter().collect())); + let attributes_for_faceting = source.attributes_for_faceting.map(|opt| { + opt.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect()) + }); + let ranking_rules = source.ranking_rules.map(|opt| { + opt.map(|ranking_rules| { + ranking_rules + .into_iter() + .filter_map(|ranking_rule| { + match v1::settings::RankingRule::from_str(&ranking_rule) { + Ok(ranking_rule) => { + let criterion: Option = + ranking_rule.into(); + criterion.as_ref().map(ToString::to_string) + } + Err(()) => Some(ranking_rule), + } + }) + .collect() + }) + }); + + Self { + displayed_attributes, + searchable_attributes: source.searchable_attributes, + filterable_attributes: attributes_for_faceting, + ranking_rules, + stop_words: source.stop_words, + synonyms: source.synonyms, + distinct_attribute: source.distinct_attribute, + _kind: std::marker::PhantomData, + } + } +} + +impl From for Option { + fn from(source: v1::update::UpdateStatus) -> Self { + use v1::update::UpdateStatus as UpdateStatusV1; + use v2::updates::UpdateStatus as UpdateStatusV2; + Some(match source { + UpdateStatusV1::Enqueued { content } => { + log::warn!( + "Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)", + content.update_id + ); + log::warn!("Task will be skipped in the queue of imported tasks."); + + return None; + } + UpdateStatusV1::Failed { content } => UpdateStatusV2::Failed(v2::updates::Failed { + from: v2::updates::Processing { + from: v2::updates::Enqueued { + update_id: content.update_id, + meta: Option::from(content.update_type)?, + enqueued_at: content.enqueued_at, + content: None, + }, + started_processing_at: content.processed_at + - std::time::Duration::from_secs_f64(content.duration), + }, + error: v2::ResponseError { + // error code is ignored by serialization, and so always default in deserialized v2 dumps + // that's a good thing, because we don't have them in v1 dump 😅 + code: http::StatusCode::default(), + message: content.error.unwrap_or_default(), + // error codes are unchanged between v1 and v2 + error_code: content.error_code.unwrap_or_default(), + // error types are unchanged between v1 and v2 + error_type: content.error_type.unwrap_or_default(), + // error links are unchanged between v1 and v2 + error_link: content.error_link.unwrap_or_default(), + }, + failed_at: content.processed_at, + }), + UpdateStatusV1::Processed { content } => { + UpdateStatusV2::Processed(v2::updates::Processed { + success: match &content.update_type { + v1::update::UpdateType::ClearAll => { + v2::updates::UpdateResult::DocumentDeletion { deleted: u64::MAX } + } + v1::update::UpdateType::Customs => v2::updates::UpdateResult::Other, + v1::update::UpdateType::DocumentsAddition { number } => { + v2::updates::UpdateResult::DocumentsAddition( + v2::updates::DocumentAdditionResult { nb_documents: *number }, + ) + } + v1::update::UpdateType::DocumentsPartial { number } => { + v2::updates::UpdateResult::DocumentsAddition( + v2::updates::DocumentAdditionResult { nb_documents: *number }, + ) + } + v1::update::UpdateType::DocumentsDeletion { number } => { + v2::updates::UpdateResult::DocumentDeletion { deleted: *number as u64 } + } + v1::update::UpdateType::Settings { .. } => v2::updates::UpdateResult::Other, + }, + processed_at: content.processed_at, + from: v2::updates::Processing { + from: v2::updates::Enqueued { + update_id: content.update_id, + meta: Option::from(content.update_type)?, + enqueued_at: content.enqueued_at, + content: None, + }, + started_processing_at: content.processed_at + - std::time::Duration::from_secs_f64(content.duration), + }, + }) + } + }) + } +} + +impl From for Option { + fn from(source: v1::update::UpdateType) -> Self { + Some(match source { + v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments, + v1::update::UpdateType::Customs => { + log::warn!("Ignoring task with type 'Customs' that is no longer supported"); + return None; + } + v1::update::UpdateType::DocumentsAddition { .. } => { + v2::updates::UpdateMeta::DocumentsAddition { + method: v2::updates::IndexDocumentsMethod::ReplaceDocuments, + format: v2::updates::UpdateFormat::Json, + primary_key: None, + } + } + v1::update::UpdateType::DocumentsPartial { .. } => { + v2::updates::UpdateMeta::DocumentsAddition { + method: v2::updates::IndexDocumentsMethod::UpdateDocuments, + format: v2::updates::UpdateFormat::Json, + primary_key: None, + } + } + v1::update::UpdateType::DocumentsDeletion { .. } => { + v2::updates::UpdateMeta::DeleteDocuments { ids: vec![] } + } + v1::update::UpdateType::Settings { settings } => { + v2::updates::UpdateMeta::Settings((*settings).into()) + } + }) + } +} + +impl From for v2::Settings { + fn from(source: v1::settings::SettingsUpdate) -> Self { + let displayed_attributes: Option>> = + source.displayed_attributes.into(); + + let attributes_for_faceting: Option>> = + source.attributes_for_faceting.into(); + + let ranking_rules: Option>> = + source.ranking_rules.into(); + + // go from the concrete types of v1 (RankingRule) to the concrete type of v2 (Criterion), + // and then back to string as this is what the settings manipulate + let ranking_rules = ranking_rules.map(|opt| { + opt.map(|ranking_rules| { + ranking_rules + .into_iter() + // filter out the WordsPosition ranking rule that exists in v1 but not v2 + .filter_map(|ranking_rule| { + Option::::from(ranking_rule) + }) + .map(|criterion| criterion.to_string()) + .collect() + }) + }); + + Self { + displayed_attributes: displayed_attributes.map(|opt| { + opt.map(|displayed_attributes| displayed_attributes.into_iter().collect()) + }), + searchable_attributes: source.searchable_attributes.into(), + filterable_attributes: attributes_for_faceting.map(|opt| { + opt.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect()) + }), + ranking_rules, + stop_words: source.stop_words.into(), + synonyms: source.synonyms.into(), + distinct_attribute: source.distinct_attribute.into(), + _kind: std::marker::PhantomData, + } + } +} + +impl From for Option { + fn from(source: v1::settings::RankingRule) -> Self { + match source { + v1::settings::RankingRule::Typo => Some(v2::settings::Criterion::Typo), + v1::settings::RankingRule::Words => Some(v2::settings::Criterion::Words), + v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity), + v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute), + v1::settings::RankingRule::WordsPosition => { + log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); + None + } + v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness), + v1::settings::RankingRule::Asc(field_name) => { + Some(v2::settings::Criterion::Asc(field_name)) + } + v1::settings::RankingRule::Desc(field_name) => { + Some(v2::settings::Criterion::Desc(field_name)) + } + } + } +} + +impl From> for Option> { + fn from(source: v1::settings::UpdateState) -> Self { + match source { + v1::settings::UpdateState::Update(new_value) => Some(Some(new_value)), + v1::settings::UpdateState::Clear => Some(None), + v1::settings::UpdateState::Nothing => None, + } + } +} diff --git a/dump/src/reader/compat/v2_to_v3.rs b/dump/src/reader/compat/v2_to_v3.rs index 70bc5b867..8574e04b4 100644 --- a/dump/src/reader/compat/v2_to_v3.rs +++ b/dump/src/reader/compat/v2_to_v3.rs @@ -4,22 +4,28 @@ use std::str::FromStr; use time::OffsetDateTime; use uuid::Uuid; +use super::v1_to_v2::{CompatIndexV1ToV2, CompatV1ToV2}; use super::v3_to_v4::CompatV3ToV4; use crate::reader::{v2, v3, Document}; use crate::Result; -pub struct CompatV2ToV3 { - pub from: v2::V2Reader, +pub enum CompatV2ToV3 { + V2(v2::V2Reader), + Compat(CompatV1ToV2), } impl CompatV2ToV3 { pub fn new(v2: v2::V2Reader) -> CompatV2ToV3 { - CompatV2ToV3 { from: v2 } + CompatV2ToV3::V2(v2) } pub fn index_uuid(&self) -> Vec { - self.from - .index_uuid() + let v2_uuids = match self { + CompatV2ToV3::V2(from) => from.index_uuid(), + CompatV2ToV3::Compat(compat) => compat.index_uuid(), + }; + v2_uuids + .into_iter() .into_iter() .map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid }) .collect() @@ -30,11 +36,17 @@ impl CompatV2ToV3 { } pub fn version(&self) -> crate::Version { - self.from.version() + match self { + CompatV2ToV3::V2(from) => from.version(), + CompatV2ToV3::Compat(compat) => compat.version(), + } } pub fn date(&self) -> Option { - self.from.date() + match self { + CompatV2ToV3::V2(from) => from.date(), + CompatV2ToV3::Compat(compat) => compat.date(), + } } pub fn instance_uid(&self) -> Result> { @@ -42,10 +54,18 @@ impl CompatV2ToV3 { } pub fn indexes(&self) -> Result> + '_> { - Ok(self.from.indexes()?.map(|index_reader| -> Result<_> { - let compat = CompatIndexV2ToV3::new(index_reader?); - Ok(compat) - })) + Ok(match self { + CompatV2ToV3::V2(from) => Box::new(from.indexes()?.map(|index_reader| -> Result<_> { + let compat = CompatIndexV2ToV3::new(index_reader?); + Ok(compat) + })) + as Box> + '_>, + CompatV2ToV3::Compat(compat) => Box::new(compat.indexes()?.map(|index_reader| { + let compat = CompatIndexV2ToV3::Compat(Box::new(index_reader?)); + Ok(compat) + })) + as Box> + '_>, + }) } pub fn tasks( @@ -54,11 +74,13 @@ impl CompatV2ToV3 { dyn Iterator>>>)>> + '_, > { - let _indexes = self.from.index_uuid.clone(); + let tasks = match self { + CompatV2ToV3::V2(from) => from.tasks(), + CompatV2ToV3::Compat(compat) => compat.tasks(), + }; Box::new( - self.from - .tasks() + tasks .map(move |task| { task.map(|(task, content_file)| { let task = v3::Task { uuid: task.uuid, update: task.update.into() }; @@ -76,27 +98,38 @@ impl CompatV2ToV3 { } } -pub struct CompatIndexV2ToV3 { - from: v2::V2IndexReader, +pub enum CompatIndexV2ToV3 { + V2(v2::V2IndexReader), + Compat(Box), } impl CompatIndexV2ToV3 { pub fn new(v2: v2::V2IndexReader) -> CompatIndexV2ToV3 { - CompatIndexV2ToV3 { from: v2 } + CompatIndexV2ToV3::V2(v2) } pub fn metadata(&self) -> &crate::IndexMetadata { - self.from.metadata() + match self { + CompatIndexV2ToV3::V2(from) => from.metadata(), + CompatIndexV2ToV3::Compat(compat) => compat.metadata(), + } } pub fn documents(&mut self) -> Result> + '_>> { - self.from - .documents() - .map(|iter| Box::new(iter) as Box> + '_>) + match self { + CompatIndexV2ToV3::V2(from) => from + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + CompatIndexV2ToV3::Compat(compat) => compat.documents(), + } } pub fn settings(&mut self) -> Result> { - Ok(v3::Settings::::from(self.from.settings()?).check()) + let settings = match self { + CompatIndexV2ToV3::V2(from) => from.settings()?, + CompatIndexV2ToV3::Compat(compat) => compat.settings()?, + }; + Ok(v3::Settings::::from(settings).check()) } } diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs index d1ca9ec42..1368596d0 100644 --- a/dump/src/reader/mod.rs +++ b/dump/src/reader/mod.rs @@ -9,11 +9,11 @@ use self::compat::v4_to_v5::CompatV4ToV5; use self::compat::v5_to_v6::{CompatIndexV5ToV6, CompatV5ToV6}; use self::v5::V5Reader; use self::v6::{V6IndexReader, V6Reader}; -use crate::{Error, Result, Version}; +use crate::{Result, Version}; mod compat; -// pub(self) mod v1; +pub(self) mod v1; pub(self) mod v2; pub(self) mod v3; pub(self) mod v4; @@ -45,8 +45,9 @@ impl DumpReader { let MetadataVersion { dump_version } = serde_json::from_reader(&mut meta_file)?; match dump_version { - // Version::V1 => Ok(Box::new(v1::Reader::open(path)?)), - Version::V1 => Err(Error::DumpV1Unsupported), + Version::V1 => { + Ok(v1::V1Reader::open(path)?.to_v2().to_v3().to_v4().to_v5().to_v6().into()) + } Version::V2 => Ok(v2::V2Reader::open(path)?.to_v3().to_v4().to_v5().to_v6().into()), Version::V3 => Ok(v3::V3Reader::open(path)?.to_v4().to_v5().to_v6().into()), Version::V4 => Ok(v4::V4Reader::open(path)?.to_v5().to_v6().into()),