mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Merge #2098
2098: feat(dump): Provide the same cli options as the snapshots r=MarinPostma a=irevoire Add two cli options for the dump: - `--ignore-missing-dump` - `--ignore-dump-if-db-exists` Fix #2087 Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
c8bb54cd94
@ -30,11 +30,15 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
|
|||||||
meilisearch
|
meilisearch
|
||||||
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
|
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
|
||||||
.set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize)
|
.set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize)
|
||||||
|
// snapshot
|
||||||
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
|
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
|
||||||
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
|
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
|
||||||
.set_dump_dst(opt.dumps_dir.clone())
|
|
||||||
.set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec))
|
.set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec))
|
||||||
.set_snapshot_dir(opt.snapshot_dir.clone());
|
.set_snapshot_dir(opt.snapshot_dir.clone())
|
||||||
|
// dump
|
||||||
|
.set_ignore_missing_dump(opt.ignore_missing_dump)
|
||||||
|
.set_ignore_dump_if_db_exists(opt.ignore_dump_if_db_exists)
|
||||||
|
.set_dump_dst(opt.dumps_dir.clone());
|
||||||
|
|
||||||
if let Some(ref path) = opt.import_snapshot {
|
if let Some(ref path) = opt.import_snapshot {
|
||||||
meilisearch.set_import_snapshot(path.clone());
|
meilisearch.set_import_snapshot(path.clone());
|
||||||
|
@ -124,14 +124,22 @@ pub struct Opt {
|
|||||||
#[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h
|
#[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h
|
||||||
pub snapshot_interval_sec: u64,
|
pub snapshot_interval_sec: u64,
|
||||||
|
|
||||||
/// Folder where dumps are created when the dump route is called.
|
|
||||||
#[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
|
|
||||||
pub dumps_dir: PathBuf,
|
|
||||||
|
|
||||||
/// Import a dump from the specified path, must be a `.dump` file.
|
/// Import a dump from the specified path, must be a `.dump` file.
|
||||||
#[clap(long, conflicts_with = "import-snapshot")]
|
#[clap(long, conflicts_with = "import-snapshot")]
|
||||||
pub import_dump: Option<PathBuf>,
|
pub import_dump: Option<PathBuf>,
|
||||||
|
|
||||||
|
/// If the dump doesn't exists, load or create the database specified by `db-path` instead.
|
||||||
|
#[clap(long, requires = "import-dump")]
|
||||||
|
pub ignore_missing_dump: bool,
|
||||||
|
|
||||||
|
/// Ignore the dump if a database already exists, and load that database instead.
|
||||||
|
#[clap(long, requires = "import-dump")]
|
||||||
|
pub ignore_dump_if_db_exists: bool,
|
||||||
|
|
||||||
|
/// Folder where dumps are created when the dump route is called.
|
||||||
|
#[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
|
||||||
|
pub dumps_dir: PathBuf,
|
||||||
|
|
||||||
/// Set the log level
|
/// Set the log level
|
||||||
#[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")]
|
#[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")]
|
||||||
pub log_level: String,
|
pub log_level: String,
|
||||||
|
@ -148,6 +148,8 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
|||||||
schedule_snapshot: false,
|
schedule_snapshot: false,
|
||||||
snapshot_interval_sec: 0,
|
snapshot_interval_sec: 0,
|
||||||
import_dump: None,
|
import_dump: None,
|
||||||
|
ignore_missing_dump: false,
|
||||||
|
ignore_dump_if_db_exists: false,
|
||||||
indexer_options: IndexerOpts {
|
indexer_options: IndexerOpts {
|
||||||
// memory has to be unlimited because several meilisearch are running in test context.
|
// memory has to be unlimited because several meilisearch are running in test context.
|
||||||
max_memory: MaxMemory::unlimited(),
|
max_memory: MaxMemory::unlimited(),
|
||||||
|
@ -1,14 +1,16 @@
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use anyhow::bail;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use log::{info, trace, warn};
|
use log::{info, trace};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
pub use actor::DumpActor;
|
pub use actor::DumpActor;
|
||||||
pub use handle_impl::*;
|
pub use handle_impl::*;
|
||||||
use meilisearch_auth::AuthController;
|
use meilisearch_auth::AuthController;
|
||||||
pub use message::DumpMsg;
|
pub use message::DumpMsg;
|
||||||
|
use tempfile::TempDir;
|
||||||
use tokio::fs::create_dir_all;
|
use tokio::fs::create_dir_all;
|
||||||
use tokio::sync::oneshot;
|
use tokio::sync::oneshot;
|
||||||
|
|
||||||
@ -79,6 +81,47 @@ pub enum MetadataVersion {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MetadataVersion {
|
impl MetadataVersion {
|
||||||
|
pub fn load_dump(
|
||||||
|
self,
|
||||||
|
src: impl AsRef<Path>,
|
||||||
|
dst: impl AsRef<Path>,
|
||||||
|
index_db_size: usize,
|
||||||
|
meta_env_size: usize,
|
||||||
|
indexing_options: &IndexerOpts,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
match self {
|
||||||
|
MetadataVersion::V1(_meta) => {
|
||||||
|
anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
|
||||||
|
}
|
||||||
|
MetadataVersion::V2(meta) => v2::load_dump(
|
||||||
|
meta,
|
||||||
|
src,
|
||||||
|
dst,
|
||||||
|
index_db_size,
|
||||||
|
meta_env_size,
|
||||||
|
indexing_options,
|
||||||
|
)?,
|
||||||
|
MetadataVersion::V3(meta) => v3::load_dump(
|
||||||
|
meta,
|
||||||
|
src,
|
||||||
|
dst,
|
||||||
|
index_db_size,
|
||||||
|
meta_env_size,
|
||||||
|
indexing_options,
|
||||||
|
)?,
|
||||||
|
MetadataVersion::V4(meta) => v4::load_dump(
|
||||||
|
meta,
|
||||||
|
src,
|
||||||
|
dst,
|
||||||
|
index_db_size,
|
||||||
|
meta_env_size,
|
||||||
|
indexing_options,
|
||||||
|
)?,
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self {
|
pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self {
|
||||||
let meta = Metadata::new(index_db_size, update_db_size);
|
let meta = Metadata::new(index_db_size, update_db_size);
|
||||||
Self::V4(meta)
|
Self::V4(meta)
|
||||||
@ -160,10 +203,46 @@ impl DumpInfo {
|
|||||||
pub fn load_dump(
|
pub fn load_dump(
|
||||||
dst_path: impl AsRef<Path>,
|
dst_path: impl AsRef<Path>,
|
||||||
src_path: impl AsRef<Path>,
|
src_path: impl AsRef<Path>,
|
||||||
|
ignore_dump_if_db_exists: bool,
|
||||||
|
ignore_missing_dump: bool,
|
||||||
index_db_size: usize,
|
index_db_size: usize,
|
||||||
update_db_size: usize,
|
update_db_size: usize,
|
||||||
indexer_opts: &IndexerOpts,
|
indexer_opts: &IndexerOpts,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
|
let empty_db = crate::is_empty_db(&dst_path);
|
||||||
|
let src_path_exists = src_path.as_ref().exists();
|
||||||
|
|
||||||
|
if empty_db && src_path_exists {
|
||||||
|
let (tmp_src, tmp_dst, meta) = extract_dump(&dst_path, &src_path)?;
|
||||||
|
meta.load_dump(
|
||||||
|
tmp_src.path(),
|
||||||
|
tmp_dst.path(),
|
||||||
|
index_db_size,
|
||||||
|
update_db_size,
|
||||||
|
indexer_opts,
|
||||||
|
)?;
|
||||||
|
persist_dump(&dst_path, tmp_dst)?;
|
||||||
|
Ok(())
|
||||||
|
} else if !empty_db && !ignore_dump_if_db_exists {
|
||||||
|
bail!(
|
||||||
|
"database already exists at {:?}, try to delete it or rename it",
|
||||||
|
dst_path
|
||||||
|
.as_ref()
|
||||||
|
.canonicalize()
|
||||||
|
.unwrap_or_else(|_| dst_path.as_ref().to_owned())
|
||||||
|
)
|
||||||
|
} else if !src_path_exists && !ignore_missing_dump {
|
||||||
|
bail!("dump doesn't exist at {:?}", src_path.as_ref())
|
||||||
|
} else {
|
||||||
|
// there is nothing to do
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_dump(
|
||||||
|
dst_path: impl AsRef<Path>,
|
||||||
|
src_path: impl AsRef<Path>,
|
||||||
|
) -> anyhow::Result<(TempDir, TempDir, MetadataVersion)> {
|
||||||
// Setup a temp directory path in the same path as the database, to prevent cross devices
|
// Setup a temp directory path in the same path as the database, to prevent cross devices
|
||||||
// references.
|
// references.
|
||||||
let temp_path = dst_path
|
let temp_path = dst_path
|
||||||
@ -201,40 +280,14 @@ pub fn load_dump(
|
|||||||
meta.version()
|
meta.version()
|
||||||
);
|
);
|
||||||
|
|
||||||
match meta {
|
Ok((tmp_src, tmp_dst, meta))
|
||||||
MetadataVersion::V1(_meta) => {
|
|
||||||
anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
|
|
||||||
}
|
|
||||||
MetadataVersion::V2(meta) => v2::load_dump(
|
|
||||||
meta,
|
|
||||||
&tmp_src_path,
|
|
||||||
tmp_dst.path(),
|
|
||||||
index_db_size,
|
|
||||||
update_db_size,
|
|
||||||
indexer_opts,
|
|
||||||
)?,
|
|
||||||
MetadataVersion::V3(meta) => v3::load_dump(
|
|
||||||
meta,
|
|
||||||
&tmp_src_path,
|
|
||||||
tmp_dst.path(),
|
|
||||||
index_db_size,
|
|
||||||
update_db_size,
|
|
||||||
indexer_opts,
|
|
||||||
)?,
|
|
||||||
MetadataVersion::V4(meta) => v4::load_dump(
|
|
||||||
meta,
|
|
||||||
&tmp_src_path,
|
|
||||||
tmp_dst.path(),
|
|
||||||
index_db_size,
|
|
||||||
update_db_size,
|
|
||||||
indexer_opts,
|
|
||||||
)?,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn persist_dump(dst_path: impl AsRef<Path>, tmp_dst: TempDir) -> anyhow::Result<()> {
|
||||||
let persisted_dump = tmp_dst.into_path();
|
let persisted_dump = tmp_dst.into_path();
|
||||||
|
|
||||||
// Delete everything in the `data.ms` except the tempdir.
|
// Delete everything in the `data.ms` except the tempdir.
|
||||||
if dst_path.as_ref().exists() {
|
if dst_path.as_ref().exists() {
|
||||||
warn!("Overwriting database at {}", dst_path.as_ref().display());
|
|
||||||
for file in dst_path.as_ref().read_dir().unwrap() {
|
for file in dst_path.as_ref().read_dir().unwrap() {
|
||||||
let file = file.unwrap().path();
|
let file = file.unwrap().path();
|
||||||
if file.file_name() == persisted_dump.file_name() {
|
if file.file_name() == persisted_dump.file_name() {
|
||||||
|
@ -150,6 +150,8 @@ pub struct IndexControllerBuilder {
|
|||||||
schedule_snapshot: bool,
|
schedule_snapshot: bool,
|
||||||
dump_src: Option<PathBuf>,
|
dump_src: Option<PathBuf>,
|
||||||
dump_dst: Option<PathBuf>,
|
dump_dst: Option<PathBuf>,
|
||||||
|
ignore_dump_if_db_exists: bool,
|
||||||
|
ignore_missing_dump: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexControllerBuilder {
|
impl IndexControllerBuilder {
|
||||||
@ -186,6 +188,8 @@ impl IndexControllerBuilder {
|
|||||||
load_dump(
|
load_dump(
|
||||||
db_path.as_ref(),
|
db_path.as_ref(),
|
||||||
src_path,
|
src_path,
|
||||||
|
self.ignore_dump_if_db_exists,
|
||||||
|
self.ignore_missing_dump,
|
||||||
index_size,
|
index_size,
|
||||||
task_store_size,
|
task_store_size,
|
||||||
&indexer_options,
|
&indexer_options,
|
||||||
@ -296,18 +300,6 @@ impl IndexControllerBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set the index controller builder's dump src.
|
|
||||||
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
|
|
||||||
self.dump_src.replace(dump_src);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's dump dst.
|
|
||||||
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
|
|
||||||
self.dump_dst.replace(dump_dst);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's import snapshot.
|
/// Set the index controller builder's import snapshot.
|
||||||
pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
|
pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
|
||||||
self.import_snapshot.replace(import_snapshot);
|
self.import_snapshot.replace(import_snapshot);
|
||||||
@ -325,6 +317,30 @@ impl IndexControllerBuilder {
|
|||||||
self.schedule_snapshot = true;
|
self.schedule_snapshot = true;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the index controller builder's dump src.
|
||||||
|
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
|
||||||
|
self.dump_src.replace(dump_src);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the index controller builder's dump dst.
|
||||||
|
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
|
||||||
|
self.dump_dst.replace(dump_dst);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the index controller builder's ignore dump if db exists.
|
||||||
|
pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self {
|
||||||
|
self.ignore_dump_if_db_exists = ignore_dump_if_db_exists;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the index controller builder's ignore missing dump.
|
||||||
|
pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self {
|
||||||
|
self.ignore_missing_dump = ignore_missing_dump;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<U, I> IndexController<U, I>
|
impl<U, I> IndexController<U, I>
|
||||||
|
@ -10,6 +10,8 @@ mod snapshot;
|
|||||||
pub mod tasks;
|
pub mod tasks;
|
||||||
mod update_file_store;
|
mod update_file_store;
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
pub use index_controller::MeiliSearch;
|
pub use index_controller::MeiliSearch;
|
||||||
|
|
||||||
pub use milli;
|
pub use milli;
|
||||||
@ -33,3 +35,19 @@ impl EnvSizer for heed::Env {
|
|||||||
.fold(0, |acc, m| acc + m.len())
|
.fold(0, |acc, m| acc + m.len())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if a db is empty. It does not provide any information on the
|
||||||
|
/// validity of the data in it.
|
||||||
|
/// We consider a database as non empty when it's a non empty directory.
|
||||||
|
pub fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||||
|
let db_path = db_path.as_ref();
|
||||||
|
|
||||||
|
if !db_path.exists() {
|
||||||
|
true
|
||||||
|
// if we encounter an error or if the db is a file we consider the db non empty
|
||||||
|
} else if let Ok(dir) = db_path.read_dir() {
|
||||||
|
dir.count() == 0
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -49,7 +49,10 @@ pub fn load_snapshot(
|
|||||||
ignore_snapshot_if_db_exists: bool,
|
ignore_snapshot_if_db_exists: bool,
|
||||||
ignore_missing_snapshot: bool,
|
ignore_missing_snapshot: bool,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
|
let empty_db = crate::is_empty_db(&db_path);
|
||||||
|
let snapshot_path_exists = snapshot_path.as_ref().exists();
|
||||||
|
|
||||||
|
if empty_db && snapshot_path_exists {
|
||||||
match from_tar_gz(snapshot_path, &db_path) {
|
match from_tar_gz(snapshot_path, &db_path) {
|
||||||
Ok(()) => Ok(()),
|
Ok(()) => Ok(()),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
@ -58,7 +61,7 @@ pub fn load_snapshot(
|
|||||||
Err(e)
|
Err(e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
|
} else if !empty_db && !ignore_snapshot_if_db_exists {
|
||||||
bail!(
|
bail!(
|
||||||
"database already exists at {:?}, try to delete it or rename it",
|
"database already exists at {:?}, try to delete it or rename it",
|
||||||
db_path
|
db_path
|
||||||
@ -66,14 +69,8 @@ pub fn load_snapshot(
|
|||||||
.canonicalize()
|
.canonicalize()
|
||||||
.unwrap_or_else(|_| db_path.as_ref().to_owned())
|
.unwrap_or_else(|_| db_path.as_ref().to_owned())
|
||||||
)
|
)
|
||||||
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
|
} else if !snapshot_path_exists && !ignore_missing_snapshot {
|
||||||
bail!(
|
bail!("snapshot doesn't exist at {:?}", snapshot_path.as_ref())
|
||||||
"snapshot doesn't exist at {:?}",
|
|
||||||
snapshot_path
|
|
||||||
.as_ref()
|
|
||||||
.canonicalize()
|
|
||||||
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
|
|
||||||
)
|
|
||||||
} else {
|
} else {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user