From 4dfae444780981550231404a19cd81eb117a0196 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Mon, 19 Sep 2022 18:16:28 +0200 Subject: [PATCH] Apply PR review comments --- config.toml | 156 ++++++++++++++++++--------------- meilisearch-http/src/main.rs | 13 ++- meilisearch-http/src/option.rs | 145 +++++++++++++++++++----------- meilisearch-lib/src/options.rs | 21 +++-- 4 files changed, 203 insertions(+), 132 deletions(-) diff --git a/config.toml b/config.toml index dcfaf835d..5d5ae4507 100644 --- a/config.toml +++ b/config.toml @@ -1,50 +1,55 @@ # This file shows the default configuration of Meilisearch. # All variables are defined here https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables +# db_path = "./data.ms" +# The destination where the database must be created. + +# env = "development" # Possible values: [development, production] +# This environment variable must be set to `production` if you are running in production. +# More logs wiil be displayed if the server is running in development mode. Setting the master +# key is optional; hence no security on the updates routes. This +# is useful to debug when integrating the engine with another service. + +# http_addr = "127.0.0.1:7700" +# The address on which the HTTP server will listen. + +# master-key = "MASTER_KEY" +# Sets the instance's master key, automatically protecting all routes except GET /health. + +# no_analytics = false +# Do not send analytics to Meilisearch. + +# disable-auto-batching = false +# The engine will disable task auto-batching, and will sequencialy compute each task one by one. + ### DUMP -# Folder where dumps are created when the dump route is called # dumps-dir = "dumps/" +# Folder where dumps are created when the dump route is called. -# Ignore the dump if a database already exists, and load that database instead -# ignore-dump-if-db-exists = false - -# If the dump doesn't exists, load or create the database specified by `db-path` instead -# ignore-missing-dump = false - -# Import a dump from the specified path, must be a `.dump` file # import-dump = "./path/to/my/file.dump" +# Import a dump from the specified path, must be a `.dump` file. + +# ignore-missing-dump = false +# If the dump doesn't exist, load or create the database specified by `db-path` instead. + +# ignore-dump-if-db-exists = false +# Ignore the dump if a database already exists, and load that database instead. + +### -### SNAPSHOT - -# The engine will ignore a missing snapshot and not return an error in such case -# ignore-missing-snapshot = false - -# The engine will skip snapshot importation and not return an error in such case -# ignore-snapshot-if-db-exists = false - -# Defines the path of the snapshot file to import. This option will, by default, stop the -# process if a database already exist or if no snapshot exists at the given path. If this -# option is not specified no snapshot is imported -# import-snapshot = false - -# Activate snapshot scheduling -# schedule-snapshot = false - -# Defines the directory path where meilisearch will create snapshot each snapshot_time_gap -# snapshot-dir = "snapshots/" - -# Defines time interval, in seconds, between each snapshot creation -# snapshot-interval-sec = 86400 +# log-level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] +# Set the log level. ### INDEX -# The maximum size, in bytes, of the main lmdb database directory # max-index-size = "100 GiB" +# The maximum size, in bytes, of the main LMDB database directory. +# max-indexing-memory = "2 GiB" # The maximum amount of memory the indexer will use. It defaults to 2/3 of the available # memory. It is recommended to use something like 80%-90% of the available memory, no # more. @@ -54,68 +59,73 @@ # is recommended to specify the amount of memory to use. # # /!\ The default value is system dependant /!\ -# max-indexing-memory = "2 GiB" +# max-indexing-threads = 4 # The maximum number of threads the indexer will use. If the number set is higher than the # real number of cores available in the machine, it will use the maximum number of # available cores. # # It defaults to half of the available threads. -# max-indexing-threads = 4 + +### + + +# max-task-db-size = "100 GiB" +# The maximum size, in bytes, of the update LMDB database directory. + +# http-payload-size-limit = 100000000 +# The maximum size, in bytes, of accepted JSON payloads. + + +### SNAPSHOT + +# schedule-snapshot = false +# Activate snapshot scheduling. + +# snapshot-dir = "snapshots/" +# Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec. + +# snapshot-interval-sec = 86400 +# Defines time interval, in seconds, between each snapshot creation. + +# import-snapshot = false +# Defines the path of the snapshot file to import. This option will, by default, stop the +# process if a database already exist, or if no snapshot exists at the given path. If this +# option is not specified, no snapshot is imported. + +# ignore-missing-snapshot = false +# The engine will ignore a missing snapshot and not return an error in such a case. + +# ignore-snapshot-if-db-exists = false +# The engine will skip snapshot importation and not return an error in such a case. + +### ### SSL -# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE # ssl-auth-path = "./path/to/root" +# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE. +# ssl-cert-path = "./path/to/CERTFILE" # Read server certificates from CERTFILE. This should contain PEM-format certificates in # the right order (the first certificate should certify KEYFILE, the last should be a root -# CA) -# ssl-cert-path = "./path/to/CERTFILE" +# CA). -# Read private key from KEYFILE. This should be a RSA private key or PKCS8-encoded -# private key, in PEM format # ssl-key-path = "./path/to/private-key" - -# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional +# Read the private key from KEYFILE. This should be an RSA private key or PKCS8-encoded +# private key, in PEM format. + # ssl-ocsp-path = "./path/to/OCSPFILE" +# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional. -# Send a fatal alert if the client does not complete client authentication # ssl-require-auth = false - -# SSL support session resumption +# Send a fatal alert if the client does not complete client authentication. + # ssl-resumption = false - -# SSL support tickets +# SSL support session resumption. + # ssl-tickets = false +# SSL support tickets. - -### MISC - -# This environment variable must be set to `production` if you are running in production. -# If the server is running in development mode more logs will be displayed, and the master -# key can be avoided which implies that there is no security on the updates routes. This -# is useful to debug when integrating the engine with another service -# env = "development" # possible values: [development, production] - -# The address on which the http server will listen -# http-addr = "127.0.0.1:7700" - -# The maximum size, in bytes, of accepted JSON payloads -# http-payload-size-limit = 100000000 - -# The destination where the database must be created -# db-path = "./data.ms" - -# The engine will disable task auto-batching, and will sequencialy compute each task one by one -# disable-auto-batching = false - -# Set the log level -# log-level = "info" - -# The master key allowing you to do everything on the server -# master-key = "YOUR MASTER KEY" - -# The maximum size, in bytes, of the update lmdb database directory -# max-task-db-size = "100 GiB" +### diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 147f526a2..01cf39a2f 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,4 +1,5 @@ use std::env; +use std::path::PathBuf; use std::sync::Arc; use actix_web::http::KeepAlive; @@ -28,7 +29,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { #[actix_web::main] async fn main() -> anyhow::Result<()> { - let opt = Opt::build(); + let (opt, config_read_from) = Opt::try_build()?; setup(&opt)?; @@ -57,7 +58,7 @@ async fn main() -> anyhow::Result<()> { #[cfg(any(debug_assertions, not(feature = "analytics")))] let (analytics, user) = analytics::MockAnalytics::new(&opt); - print_launch_resume(&opt, &user); + print_launch_resume(&opt, &user, config_read_from); run_http(meilisearch, auth_controller, opt, analytics).await?; @@ -96,7 +97,7 @@ async fn run_http( Ok(()) } -pub fn print_launch_resume(opt: &Opt, user: &str) { +pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option) { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { @@ -117,6 +118,12 @@ pub fn print_launch_resume(opt: &Opt, user: &str) { eprintln!("{}", ascii_name); + eprintln!( + "Config file path:\t{}", + config_read_from + .map(|config_file_path| config_file_path.display().to_string()) + .unwrap_or_else(|| "none".to_string()) + ); eprintln!("Database path:\t\t{:?}", opt.db_path); eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr); eprintln!("Environment:\t\t{:?}", opt.env); diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 11a396904..1f676813a 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -64,6 +64,7 @@ const DEFAULT_LOG_LEVEL: &str = "info"; #[derive(Debug, Clone, Parser, Serialize, Deserialize)] #[clap(version)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct Opt { /// The destination where the database must be created. #[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())] @@ -75,15 +76,15 @@ pub struct Opt { #[serde(default = "default_http_addr")] pub http_addr: String, - /// The master key allowing you to do everything on the server. + /// Sets the instance's master key, automatically protecting all routes except GET /health #[serde(skip_serializing)] #[clap(long, env = MEILI_MASTER_KEY)] pub master_key: Option, /// This environment variable must be set to `production` if you are running in production. - /// If the server is running in development mode more logs will be displayed, - /// and the master key can be avoided which implies that there is no security on the updates routes. - /// This is useful to debug when integrating the engine with another service. + /// More logs wiil be displayed if the server is running in development mode. Setting the master + /// key is optional; hence no security on the updates routes. This + /// is useful to debug when integrating the engine with another service #[clap(long, env = MEILI_ENV, default_value_t = default_env(), possible_values = &POSSIBLE_ENV)] #[serde(default = "default_env")] pub env: String, @@ -94,12 +95,12 @@ pub struct Opt { #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, - /// The maximum size, in bytes, of the main lmdb database directory + /// The maximum size, in bytes, of the main LMDB database directory #[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())] #[serde(default = "default_max_index_size")] pub max_index_size: Byte, - /// The maximum size, in bytes, of the update lmdb database directory + /// The maximum size, in bytes, of the update LMDB database directory #[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())] #[serde(default = "default_max_task_db_size")] pub max_task_db_size: Byte, @@ -117,7 +118,7 @@ pub struct Opt { #[clap(long, env = MEILI_SSL_CERT_PATH, parse(from_os_str))] pub ssl_cert_path: Option, - /// Read private key from KEYFILE. This should be a RSA + /// Read the private key from KEYFILE. This should be an RSA /// private key or PKCS8-encoded private key, in PEM format. #[serde(skip_serializing)] #[clap(long, env = MEILI_SSL_KEY_PATH, parse(from_os_str))] @@ -151,12 +152,12 @@ pub struct Opt { pub ssl_tickets: bool, /// Defines the path of the snapshot file to import. - /// This option will, by default, stop the process if a database already exist or if no snapshot exists at - /// the given path. If this option is not specified no snapshot is imported. + /// This option will, by default, stop the process if a database already exists or if no snapshot exists at + /// the given path. If this option is not specified, no snapshot is imported. #[clap(long, env = MEILI_IMPORT_SNAPSHOT)] pub import_snapshot: Option, - /// The engine will ignore a missing snapshot and not return an error in such case. + /// The engine will ignore a missing snapshot and not return an error in such a case. #[clap( long, env = MEILI_IGNORE_MISSING_SNAPSHOT, @@ -174,7 +175,7 @@ pub struct Opt { #[serde(default)] pub ignore_snapshot_if_db_exists: bool, - /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap. + /// Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec. #[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())] #[serde(default = "default_snapshot_dir")] pub snapshot_dir: PathBuf, @@ -194,7 +195,7 @@ pub struct Opt { #[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import-snapshot")] pub import_dump: Option, - /// If the dump doesn't exists, load or create the database specified by `db-path` instead. + /// If the dump doesn't exist, load or create the database specified by `db-path` instead. #[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import-dump")] #[serde(default)] pub ignore_missing_dump: bool, @@ -209,7 +210,7 @@ pub struct Opt { #[serde(default = "default_dumps_dir")] pub dumps_dir: PathBuf, - /// Set the log level + /// Set the log level. # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] #[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())] #[serde(default = "default_log_level")] pub log_level: String, @@ -243,78 +244,124 @@ impl Opt { } /// Build a new Opt from config file, env vars and cli args. - pub fn build() -> Self { + pub fn try_build() -> anyhow::Result<(Self, Option)> { // Parse the args to get the config_file_path. let mut opts = Opt::parse(); - if let Some(config_file_path) = opts.config_file_path.as_ref() { - eprintln!("loading config file : {:?}", config_file_path); - match std::fs::read(config_file_path) { + let mut config_read_from = None; + if let Some(config_file_path) = opts + .config_file_path + .clone() + .or_else(|| Some(PathBuf::from("./config.toml"))) + { + match std::fs::read(&config_file_path) { Ok(config) => { - // If the arg is present, and the file successfully read, we deserialize it with `toml`. - let opt_from_config = - toml::from_slice::(&config).expect("can't read file"); - // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. - opt_from_config.export_to_env(); - // Once injected we parse the cli args once again to take the new env vars into scope. - opts = Opt::parse(); + // If the file is successfully read, we deserialize it with `toml`. + match toml::from_slice::(&config) { + Ok(opt_from_config) => { + // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. + opt_from_config.export_to_env(); + // Once injected we parse the cli args once again to take the new env vars into scope. + opts = Opt::parse(); + config_read_from = Some(config_file_path); + } + // If we have an error deserializing the file defined by the user. + Err(err) if opts.config_file_path.is_some() => anyhow::bail!(err), + _ => (), + } } - Err(err) => eprintln!("can't read {:?} : {}", config_file_path, err), + // If we have an error while reading the file defined by the user. + Err(err) if opts.config_file_path.is_some() => anyhow::bail!(err), + _ => (), } } - opts + Ok((opts, config_read_from)) } /// Exports the opts values to their corresponding env vars if they are not set. fn export_to_env(self) { - export_to_env_if_not_present(MEILI_DB_PATH, self.db_path); - export_to_env_if_not_present(MEILI_HTTP_ADDR, self.http_addr); - if let Some(master_key) = self.master_key { + let Opt { + db_path, + http_addr, + master_key, + env, + max_index_size, + max_task_db_size, + http_payload_size_limit, + ssl_cert_path, + ssl_key_path, + ssl_auth_path, + ssl_ocsp_path, + ssl_require_auth, + ssl_resumption, + ssl_tickets, + snapshot_dir, + schedule_snapshot, + snapshot_interval_sec, + dumps_dir, + log_level, + indexer_options, + scheduler_options, + import_snapshot: _, + ignore_missing_snapshot: _, + ignore_snapshot_if_db_exists: _, + import_dump: _, + ignore_missing_dump: _, + ignore_dump_if_db_exists: _, + config_file_path: _, + #[cfg(all(not(debug_assertions), feature = "analytics"))] + no_analytics, + #[cfg(feature = "metrics")] + enable_metrics_route, + } = self; + export_to_env_if_not_present(MEILI_DB_PATH, db_path); + export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); + if let Some(master_key) = master_key { export_to_env_if_not_present(MEILI_MASTER_KEY, master_key); } - export_to_env_if_not_present(MEILI_ENV, self.env); + export_to_env_if_not_present(MEILI_ENV, env); #[cfg(all(not(debug_assertions), feature = "analytics"))] { - export_to_env_if_not_present(MEILI_NO_ANALYTICS, self.no_analytics.to_string()); + export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); } - export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, self.max_index_size.to_string()); - export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, self.max_task_db_size.to_string()); + export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, max_index_size.to_string()); + export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, max_task_db_size.to_string()); export_to_env_if_not_present( MEILI_HTTP_PAYLOAD_SIZE_LIMIT, - self.http_payload_size_limit.to_string(), + http_payload_size_limit.to_string(), ); - if let Some(ssl_cert_path) = self.ssl_cert_path { + if let Some(ssl_cert_path) = ssl_cert_path { export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path); } - if let Some(ssl_key_path) = self.ssl_key_path { + if let Some(ssl_key_path) = ssl_key_path { export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path); } - if let Some(ssl_auth_path) = self.ssl_auth_path { + if let Some(ssl_auth_path) = ssl_auth_path { export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path); } - if let Some(ssl_ocsp_path) = self.ssl_ocsp_path { + if let Some(ssl_ocsp_path) = ssl_ocsp_path { export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path); } - export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, self.ssl_require_auth.to_string()); - export_to_env_if_not_present(MEILI_SSL_RESUMPTION, self.ssl_resumption.to_string()); - export_to_env_if_not_present(MEILI_SSL_TICKETS, self.ssl_tickets.to_string()); - export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, self.snapshot_dir); - export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, self.schedule_snapshot.to_string()); + export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string()); + export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string()); + export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string()); + export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir); + export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, schedule_snapshot.to_string()); export_to_env_if_not_present( MEILI_SNAPSHOT_INTERVAL_SEC, - self.snapshot_interval_sec.to_string(), + snapshot_interval_sec.to_string(), ); - export_to_env_if_not_present(MEILI_DUMPS_DIR, self.dumps_dir); - export_to_env_if_not_present(MEILI_LOG_LEVEL, self.log_level); + export_to_env_if_not_present(MEILI_DUMPS_DIR, dumps_dir); + export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level); #[cfg(feature = "metrics")] { export_to_env_if_not_present( MEILI_ENABLE_METRICS_ROUTE, - self.enable_metrics_route.to_string(), + enable_metrics_route.to_string(), ); } - self.indexer_options.export_to_env(); - self.scheduler_options.export_to_env(); + indexer_options.export_to_env(); + scheduler_options.export_to_env(); } pub fn get_ssl_config(&self) -> anyhow::Result> { diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index 5aa7edf37..d75e02b39 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -12,10 +12,10 @@ use sysinfo::{RefreshKind, System, SystemExt}; const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY"; const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; - const DEFAULT_LOG_EVERY_N: usize = 100000; #[derive(Debug, Clone, Parser, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct IndexerOpts { /// The amount of documents to skip before printing /// a log regarding the indexing advancement. @@ -50,6 +50,7 @@ pub struct IndexerOpts { } #[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct SchedulerConfig { /// The engine will disable task auto-batching, /// and will sequencialy compute each task one by one. @@ -61,7 +62,13 @@ pub struct SchedulerConfig { impl IndexerOpts { /// Exports the values to their corresponding env vars if they are not set. pub fn export_to_env(self) { - if let Some(max_indexing_memory) = self.max_indexing_memory.0 { + let IndexerOpts { + max_indexing_memory, + max_indexing_threads, + log_every_n: _, + max_nb_chunks: _, + } = self; + if let Some(max_indexing_memory) = max_indexing_memory.0 { export_to_env_if_not_present( MEILI_MAX_INDEXING_MEMORY, max_indexing_memory.to_string(), @@ -69,7 +76,7 @@ impl IndexerOpts { } export_to_env_if_not_present( MEILI_MAX_INDEXING_THREADS, - self.max_indexing_threads.0.to_string(), + max_indexing_threads.0.to_string(), ); } } @@ -106,10 +113,10 @@ impl Default for IndexerOpts { impl SchedulerConfig { pub fn export_to_env(self) { - export_to_env_if_not_present( - DISABLE_AUTO_BATCHING, - self.disable_auto_batching.to_string(), - ); + let SchedulerConfig { + disable_auto_batching, + } = self; + export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string()); } }