3249: Bring back changes from release-v0.30.3 to main r=curquiza a=curquiza

⚠️ ⚠️ I had to fix git conflicts, ensure I did not lose anything ⚠️ ⚠️ 

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
bors[bot] 2022-12-15 14:13:30 +00:00 committed by GitHub
commit 867279f2a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 79 additions and 67 deletions

16
Cargo.lock generated
View File

@ -1332,8 +1332,8 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "0.37.2" version = "0.37.3"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457"
dependencies = [ dependencies = [
"nom", "nom",
"nom_locate", "nom_locate",
@ -1351,8 +1351,8 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "0.37.2" version = "0.37.3"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457"
dependencies = [ dependencies = [
"serde_json", "serde_json",
] ]
@ -1898,8 +1898,8 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "0.37.2" version = "0.37.3"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457"
dependencies = [ dependencies = [
"serde_json", "serde_json",
] ]
@ -2418,8 +2418,8 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "0.37.2" version = "0.37.3"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457"
dependencies = [ dependencies = [
"bimap", "bimap",
"bincode", "bincode",

View File

@ -13,7 +13,7 @@ enum-iterator = "1.1.3"
flate2 = "1.0.24" flate2 = "1.0.24"
fst = "0.4.7" fst = "0.4.7"
memmap2 = "0.5.7" memmap2 = "0.5.7"
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.37.2", default-features = false } milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.37.3", default-features = false }
proptest = { version = "1.0.0", optional = true } proptest = { version = "1.0.0", optional = true }
proptest-derive = { version = "0.3.0", optional = true } proptest-derive = { version = "0.3.0", optional = true }
roaring = { version = "0.10.0", features = ["serde"] } roaring = { version = "0.10.0", features = ["serde"] }

View File

@ -108,75 +108,43 @@ pub fn create_app(
.wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim)) .wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim))
} }
// TODO: TAMO: Finish setting up things enum OnFailure {
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key);
let index_scheduler_builder = || {
IndexScheduler::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_size: opt.max_index_size.get_bytes() as usize,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: !opt.scheduler_options.disable_auto_batching,
})
};
enum OnFailure {
RemoveDb, RemoveDb,
KeepDb, KeepDb,
} }
let meilisearch_builder = |on_failure: OnFailure| -> anyhow::Result<_> {
// if anything wrong happens we delete the `data.ms` entirely.
match (
index_scheduler_builder().map_err(anyhow::Error::from),
auth_controller_builder().map_err(anyhow::Error::from),
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
) {
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
if matches!(on_failure, OnFailure::RemoveDb) {
std::fs::remove_dir_all(&opt.db_path)?;
}
Err(e)
}
}
};
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, AuthController)> {
let empty_db = is_empty_db(&opt.db_path); let empty_db = is_empty_db(&opt.db_path);
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot { let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
let snapshot_path_exists = snapshot_path.exists(); let snapshot_path_exists = snapshot_path.exists();
// the db is empty and the snapshot exists, import it
if empty_db && snapshot_path_exists { if empty_db && snapshot_path_exists {
match compression::from_tar_gz(snapshot_path, &opt.db_path) { match compression::from_tar_gz(snapshot_path, &opt.db_path) {
Ok(()) => meilisearch_builder(OnFailure::RemoveDb)?, Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
Err(e) => { Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?; std::fs::remove_dir_all(&opt.db_path)?;
return Err(e); return Err(e);
} }
} }
// the db already exists and we should not ignore the snapshot => throw an error
} else if !empty_db && !opt.ignore_snapshot_if_db_exists { } else if !empty_db && !opt.ignore_snapshot_if_db_exists {
bail!( bail!(
"database already exists at {:?}, try to delete it or rename it", "database already exists at {:?}, try to delete it or rename it",
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
) )
// the snapshot doesn't exist and we can't ignore it => throw an error
} else if !snapshot_path_exists && !opt.ignore_missing_snapshot { } else if !snapshot_path_exists && !opt.ignore_missing_snapshot {
bail!("snapshot doesn't exist at {}", snapshot_path.display()) bail!("snapshot doesn't exist at {}", snapshot_path.display())
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
} else { } else {
meilisearch_builder(OnFailure::RemoveDb)? open_or_create_database(opt, empty_db)?
} }
} else if let Some(ref path) = opt.import_dump { } else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists(); let src_path_exists = path.exists();
// the db is empty and the dump exists, import it
if empty_db && src_path_exists { if empty_db && src_path_exists {
let (mut index_scheduler, mut auth_controller) = let (mut index_scheduler, mut auth_controller) =
meilisearch_builder(OnFailure::RemoveDb)?; open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) { match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller), Ok(()) => (index_scheduler, auth_controller),
Err(e) => { Err(e) => {
@ -184,29 +152,22 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
return Err(e); return Err(e);
} }
} }
// the db already exists and we should not ignore the dump option => throw an error
} else if !empty_db && !opt.ignore_dump_if_db_exists { } else if !empty_db && !opt.ignore_dump_if_db_exists {
bail!( bail!(
"database already exists at {:?}, try to delete it or rename it", "database already exists at {:?}, try to delete it or rename it",
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
) )
// the dump doesn't exist and we can't ignore it => throw an error
} else if !src_path_exists && !opt.ignore_missing_dump { } else if !src_path_exists && !opt.ignore_missing_dump {
bail!("dump doesn't exist at {:?}", path) bail!("dump doesn't exist at {:?}", path)
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
} else { } else {
let (mut index_scheduler, mut auth_controller) = open_or_create_database(opt, empty_db)?
meilisearch_builder(OnFailure::RemoveDb)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
}
}
} }
} else { } else {
if !empty_db { open_or_create_database(opt, empty_db)?
check_version_file(&opt.db_path)?;
}
meilisearch_builder(OnFailure::KeepDb)?
}; };
// We create a loop in a thread that registers snapshotCreation tasks // We create a loop in a thread that registers snapshotCreation tasks
@ -228,6 +189,57 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
Ok((index_scheduler, auth_controller)) Ok((index_scheduler, auth_controller))
} }
/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
fn open_or_create_database_unchecked(
opt: &Opt,
on_failure: OnFailure,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_size: opt.max_index_size.get_bytes() as usize,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: !opt.scheduler_options.disable_auto_batching,
})?)
};
match (
index_scheduler_builder(),
auth_controller.map_err(anyhow::Error::from),
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
) {
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
if matches!(on_failure, OnFailure::RemoveDb) {
std::fs::remove_dir_all(&opt.db_path)?;
}
Err(e)
}
}
}
/// Ensure you're in a valid state and open the IndexScheduler + AuthController for you.
fn open_or_create_database(
opt: &Opt,
empty_db: bool,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
if !empty_db {
check_version_file(&opt.db_path)?;
}
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
}
fn import_dump( fn import_dump(
db_path: &Path, db_path: &Path,
dump_path: &Path, dump_path: &Path,