fix all the errors code and settings issues when importing a dump v2

This commit is contained in:
Tamo 2022-10-17 12:47:48 +02:00 committed by Clément Renault
parent 3872a1b8d1
commit a9eeb070b8
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
9 changed files with 85 additions and 15 deletions

2
Cargo.lock generated
View File

@ -1109,11 +1109,13 @@ dependencies = [
"http",
"index-scheduler",
"insta",
"lazy_static",
"log",
"maplit",
"meili-snap",
"meilisearch-auth",
"meilisearch-types",
"regex",
"serde",
"serde_json",
"tar",

View File

@ -19,6 +19,8 @@ log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
http = "0.2.8"
regex = "1.6.0"
lazy_static = "1.4.0"
[dev-dependencies]
big_s = "1.0.2"

View File

@ -1,10 +1,8 @@
use meilisearch_types::{
error::ResponseError,
keys::Key,
milli::update::IndexDocumentsMethod,
settings::Unchecked,
tasks::{Details, KindWithContent, Status, Task, TaskId},
InstanceUid,
};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;

View File

@ -353,7 +353,12 @@ impl<T> From<v2::Settings<T>> for v3::Settings<v3::Unchecked> {
filterable_attributes: option_to_setting(settings.filterable_attributes)
.map(|f| f.into_iter().collect()),
sortable_attributes: v3::Setting::NotSet,
ranking_rules: option_to_setting(settings.ranking_rules),
ranking_rules: option_to_setting(settings.ranking_rules).map(|criteria| {
criteria
.into_iter()
.map(|criterion| patch_ranking_rules(&criterion))
.collect()
}),
stop_words: option_to_setting(settings.stop_words),
synonyms: option_to_setting(settings.synonyms),
distinct_attribute: option_to_setting(settings.distinct_attribute),
@ -362,6 +367,20 @@ impl<T> From<v2::Settings<T>> for v3::Settings<v3::Unchecked> {
}
}
fn patch_ranking_rules(ranking_rule: &str) -> String {
match v2::settings::Criterion::from_str(ranking_rule) {
Ok(v2::settings::Criterion::Words) => String::from("words"),
Ok(v2::settings::Criterion::Typo) => String::from("typo"),
Ok(v2::settings::Criterion::Proximity) => String::from("proximity"),
Ok(v2::settings::Criterion::Attribute) => String::from("attribute"),
Ok(v2::settings::Criterion::Exactness) => String::from("exactness"),
Ok(v2::settings::Criterion::Asc(name)) => format!("{name}:asc"),
Ok(v2::settings::Criterion::Desc(name)) => format!("{name}:desc"),
// we want to forward the error to the current version of meilisearch
Err(_) => ranking_rule.to_string(),
}
}
#[cfg(test)]
pub(crate) mod test {
use std::{fs::File, io::BufReader};
@ -388,7 +407,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"6adb1469ab4cc7625fd8ad32d07e51cd");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"9507711db47c7171c79bc6d57d0bed79");
assert_eq!(update_files.len(), 9);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -440,7 +459,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"8ee40d46442eb1a7cdc463d8a787515e");
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"ae7c5ade2243a553152dab2f354e9095");
let documents = movies
.documents()
.unwrap()

View File

@ -410,7 +410,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"e0b53f2cbd76c66dc55b12263a60d2c5");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ed9a30cded4c046ef46f7cff7450347e");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed

View File

@ -414,7 +414,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"84d5b8eb31735d643483fcee28080edf");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"8c6cd41457c0b7e4c6727c9c85b7abac");
assert_eq!(update_files.len(), 22);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_some()); // the enqueued document addition

View File

@ -203,7 +203,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"84d5b8eb31735d643483fcee28080edf");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"8c6cd41457c0b7e4c6727c9c85b7abac");
assert_eq!(update_files.len(), 22);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_some()); // the enqueued document addition
@ -293,7 +293,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"0903b293c6ff8dc0819cbd3406848ef2");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"acd74244b4e6578c353899e6db30b0b5");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -382,7 +382,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"891538c6fe0ba5187853a4f04890f9b5");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"fa74f7c6ab3014e09bb813fdc551db8f");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -491,7 +491,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"c52c07e1b356cce6982e2aeea7d0bf5e");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"19882e94dc085f1d60eb7df5005a3224");
assert_eq!(update_files.len(), 9);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -540,7 +540,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"1e51f7fdc322176408f471a6d90d7698");
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"5389153ddf5527fa79c54b6a6e9c21f6");
let documents = movies
.documents()
.unwrap()

View File

@ -1,8 +1,10 @@
use std::{
collections::{BTreeMap, BTreeSet, HashSet},
marker::PhantomData,
str::FromStr,
};
use regex::Regex;
use serde::{Deserialize, Deserializer};
#[cfg(test)]
@ -129,3 +131,51 @@ impl Settings<Unchecked> {
}
}
}
lazy_static::lazy_static! {
static ref ASC_DESC_REGEX: Regex = Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap();
}
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
pub enum Criterion {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.
Words,
/// Sorted by increasing number of typos.
Typo,
/// Sorted by increasing distance between matched query terms.
Proximity,
/// Documents with quey words contained in more important
/// attributes are considred better.
Attribute,
/// Sorted by the similarity of the matched words with the query words.
Exactness,
/// Sorted by the increasing value of the field specified.
Asc(String),
/// Sorted by the decreasing value of the field specified.
Desc(String),
}
impl FromStr for Criterion {
type Err = ();
fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
match txt {
"words" => Ok(Criterion::Words),
"typo" => Ok(Criterion::Typo),
"proximity" => Ok(Criterion::Proximity),
"attribute" => Ok(Criterion::Attribute),
"exactness" => Ok(Criterion::Exactness),
text => {
let caps = ASC_DESC_REGEX.captures(text).ok_or(())?;
let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str();
match order {
"asc" => Ok(Criterion::Asc(field_name.to_string())),
"desc" => Ok(Criterion::Desc(field_name.to_string())),
_text => Err(()),
}
}
}
}
}

View File

@ -330,9 +330,8 @@ pub(crate) mod test {
// ==== checking the task queue
let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap();
for (task, mut expected) in tasks_queue.lines().zip(create_test_tasks()) {
// TODO: uncomment this one once the we write the dump integration in the index-scheduler
// assert_eq!(serde_json::from_str::<TaskView>(task).unwrap(), expected.0);
for (task, expected) in tasks_queue.lines().zip(create_test_tasks()) {
assert_eq!(serde_json::from_str::<TaskDump>(task).unwrap(), expected.0);
if let Some(expected_update) = expected.1 {
let path = dump_path.join(format!("tasks/update_files/{}.jsonl", expected.0.uid));