diff --git a/Cargo.lock b/Cargo.lock index 4db6ac2f0..6a8e8b7da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1109,11 +1109,13 @@ dependencies = [ "http", "index-scheduler", "insta", + "lazy_static", "log", "maplit", "meili-snap", "meilisearch-auth", "meilisearch-types", + "regex", "serde", "serde_json", "tar", diff --git a/dump/Cargo.toml b/dump/Cargo.toml index b2ab5ceaa..09c2a8a10 100644 --- a/dump/Cargo.toml +++ b/dump/Cargo.toml @@ -19,6 +19,8 @@ log = "0.4.17" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } http = "0.2.8" +regex = "1.6.0" +lazy_static = "1.4.0" [dev-dependencies] big_s = "1.0.2" diff --git a/dump/src/lib.rs b/dump/src/lib.rs index da777b7d6..c13e5e8ed 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -1,10 +1,8 @@ use meilisearch_types::{ error::ResponseError, - keys::Key, milli::update::IndexDocumentsMethod, settings::Unchecked, tasks::{Details, KindWithContent, Status, Task, TaskId}, - InstanceUid, }; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; diff --git a/dump/src/reader/compat/v2_to_v3.rs b/dump/src/reader/compat/v2_to_v3.rs index 51329b0d5..69c935c0c 100644 --- a/dump/src/reader/compat/v2_to_v3.rs +++ b/dump/src/reader/compat/v2_to_v3.rs @@ -353,7 +353,12 @@ impl From> for v3::Settings { filterable_attributes: option_to_setting(settings.filterable_attributes) .map(|f| f.into_iter().collect()), sortable_attributes: v3::Setting::NotSet, - ranking_rules: option_to_setting(settings.ranking_rules), + ranking_rules: option_to_setting(settings.ranking_rules).map(|criteria| { + criteria + .into_iter() + .map(|criterion| patch_ranking_rules(&criterion)) + .collect() + }), stop_words: option_to_setting(settings.stop_words), synonyms: option_to_setting(settings.synonyms), distinct_attribute: option_to_setting(settings.distinct_attribute), @@ -362,6 +367,20 @@ impl From> for v3::Settings { } } +fn patch_ranking_rules(ranking_rule: &str) -> String { + match v2::settings::Criterion::from_str(ranking_rule) { + Ok(v2::settings::Criterion::Words) => String::from("words"), + Ok(v2::settings::Criterion::Typo) => String::from("typo"), + Ok(v2::settings::Criterion::Proximity) => String::from("proximity"), + Ok(v2::settings::Criterion::Attribute) => String::from("attribute"), + Ok(v2::settings::Criterion::Exactness) => String::from("exactness"), + Ok(v2::settings::Criterion::Asc(name)) => format!("{name}:asc"), + Ok(v2::settings::Criterion::Desc(name)) => format!("{name}:desc"), + // we want to forward the error to the current version of meilisearch + Err(_) => ranking_rule.to_string(), + } +} + #[cfg(test)] pub(crate) mod test { use std::{fs::File, io::BufReader}; @@ -388,7 +407,7 @@ pub(crate) mod test { // tasks let tasks = dump.tasks().collect::>>().unwrap(); let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); - meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"6adb1469ab4cc7625fd8ad32d07e51cd"); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"9507711db47c7171c79bc6d57d0bed79"); assert_eq!(update_files.len(), 9); assert!(update_files[0].is_some()); // the enqueued document addition assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed @@ -440,7 +459,7 @@ pub(crate) mod test { } "###); - meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"8ee40d46442eb1a7cdc463d8a787515e"); + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"ae7c5ade2243a553152dab2f354e9095"); let documents = movies .documents() .unwrap() diff --git a/dump/src/reader/compat/v4_to_v5.rs b/dump/src/reader/compat/v4_to_v5.rs index 9e81b9d32..67337a3fe 100644 --- a/dump/src/reader/compat/v4_to_v5.rs +++ b/dump/src/reader/compat/v4_to_v5.rs @@ -410,7 +410,7 @@ pub(crate) mod test { // tasks let tasks = dump.tasks().collect::>>().unwrap(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); - meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"e0b53f2cbd76c66dc55b12263a60d2c5"); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ed9a30cded4c046ef46f7cff7450347e"); assert_eq!(update_files.len(), 10); assert!(update_files[0].is_some()); // the enqueued document addition assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index d1061d17a..6b99be81c 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -414,7 +414,7 @@ pub(crate) mod test { // tasks let tasks = dump.tasks().collect::>>().unwrap(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); - meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"84d5b8eb31735d643483fcee28080edf"); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"8c6cd41457c0b7e4c6727c9c85b7abac"); assert_eq!(update_files.len(), 22); assert!(update_files[0].is_none()); // the dump creation assert!(update_files[1].is_some()); // the enqueued document addition diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs index e549010a6..e74f92036 100644 --- a/dump/src/reader/mod.rs +++ b/dump/src/reader/mod.rs @@ -203,7 +203,7 @@ pub(crate) mod test { // tasks let tasks = dump.tasks().collect::>>().unwrap(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); - meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"84d5b8eb31735d643483fcee28080edf"); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"8c6cd41457c0b7e4c6727c9c85b7abac"); assert_eq!(update_files.len(), 22); assert!(update_files[0].is_none()); // the dump creation assert!(update_files[1].is_some()); // the enqueued document addition @@ -293,7 +293,7 @@ pub(crate) mod test { // tasks let tasks = dump.tasks().collect::>>().unwrap(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); - meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"0903b293c6ff8dc0819cbd3406848ef2"); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"acd74244b4e6578c353899e6db30b0b5"); assert_eq!(update_files.len(), 10); assert!(update_files[0].is_some()); // the enqueued document addition assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed @@ -382,7 +382,7 @@ pub(crate) mod test { // tasks let tasks = dump.tasks().collect::>>().unwrap(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); - meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"891538c6fe0ba5187853a4f04890f9b5"); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"fa74f7c6ab3014e09bb813fdc551db8f"); assert_eq!(update_files.len(), 10); assert!(update_files[0].is_some()); // the enqueued document addition assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed @@ -491,7 +491,7 @@ pub(crate) mod test { // tasks let tasks = dump.tasks().collect::>>().unwrap(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); - meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"c52c07e1b356cce6982e2aeea7d0bf5e"); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"19882e94dc085f1d60eb7df5005a3224"); assert_eq!(update_files.len(), 9); assert!(update_files[0].is_some()); // the enqueued document addition assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed @@ -540,7 +540,7 @@ pub(crate) mod test { } "###); - meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"1e51f7fdc322176408f471a6d90d7698"); + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"5389153ddf5527fa79c54b6a6e9c21f6"); let documents = movies .documents() .unwrap() diff --git a/dump/src/reader/v2/settings.rs b/dump/src/reader/v2/settings.rs index f91d14bd1..3c4d46c26 100644 --- a/dump/src/reader/v2/settings.rs +++ b/dump/src/reader/v2/settings.rs @@ -1,8 +1,10 @@ use std::{ collections::{BTreeMap, BTreeSet, HashSet}, marker::PhantomData, + str::FromStr, }; +use regex::Regex; use serde::{Deserialize, Deserializer}; #[cfg(test)] @@ -129,3 +131,51 @@ impl Settings { } } } + +lazy_static::lazy_static! { + static ref ASC_DESC_REGEX: Regex = Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap(); +} + +#[derive(Debug, Deserialize, Clone, PartialEq, Eq)] +pub enum Criterion { + /// Sorted by decreasing number of matched query terms. + /// Query words at the front of an attribute is considered better than if it was at the back. + Words, + /// Sorted by increasing number of typos. + Typo, + /// Sorted by increasing distance between matched query terms. + Proximity, + /// Documents with quey words contained in more important + /// attributes are considred better. + Attribute, + /// Sorted by the similarity of the matched words with the query words. + Exactness, + /// Sorted by the increasing value of the field specified. + Asc(String), + /// Sorted by the decreasing value of the field specified. + Desc(String), +} + +impl FromStr for Criterion { + type Err = (); + + fn from_str(txt: &str) -> Result { + match txt { + "words" => Ok(Criterion::Words), + "typo" => Ok(Criterion::Typo), + "proximity" => Ok(Criterion::Proximity), + "attribute" => Ok(Criterion::Attribute), + "exactness" => Ok(Criterion::Exactness), + text => { + let caps = ASC_DESC_REGEX.captures(text).ok_or(())?; + let order = caps.get(1).unwrap().as_str(); + let field_name = caps.get(2).unwrap().as_str(); + match order { + "asc" => Ok(Criterion::Asc(field_name.to_string())), + "desc" => Ok(Criterion::Desc(field_name.to_string())), + _text => Err(()), + } + } + } + } +} diff --git a/dump/src/writer.rs b/dump/src/writer.rs index b5c65e664..abb270cb8 100644 --- a/dump/src/writer.rs +++ b/dump/src/writer.rs @@ -330,9 +330,8 @@ pub(crate) mod test { // ==== checking the task queue let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap(); - for (task, mut expected) in tasks_queue.lines().zip(create_test_tasks()) { - // TODO: uncomment this one once the we write the dump integration in the index-scheduler - // assert_eq!(serde_json::from_str::(task).unwrap(), expected.0); + for (task, expected) in tasks_queue.lines().zip(create_test_tasks()) { + assert_eq!(serde_json::from_str::(task).unwrap(), expected.0); if let Some(expected_update) = expected.1 { let path = dump_path.join(format!("tasks/update_files/{}.jsonl", expected.0.uid));