Apply PR review comments

This commit is contained in:
mlemesle 2022-09-19 18:16:28 +02:00
parent a690ace36e
commit 4dfae44478
4 changed files with 203 additions and 132 deletions

View File

@ -1,50 +1,55 @@
# This file shows the default configuration of Meilisearch.
# All variables are defined here https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables
# db_path = "./data.ms"
# The destination where the database must be created.
# env = "development" # Possible values: [development, production]
# This environment variable must be set to `production` if you are running in production.
# More logs wiil be displayed if the server is running in development mode. Setting the master
# key is optional; hence no security on the updates routes. This
# is useful to debug when integrating the engine with another service.
# http_addr = "127.0.0.1:7700"
# The address on which the HTTP server will listen.
# master-key = "MASTER_KEY"
# Sets the instance's master key, automatically protecting all routes except GET /health.
# no_analytics = false
# Do not send analytics to Meilisearch.
# disable-auto-batching = false
# The engine will disable task auto-batching, and will sequencialy compute each task one by one.
### DUMP
# Folder where dumps are created when the dump route is called
# dumps-dir = "dumps/"
# Folder where dumps are created when the dump route is called.
# Ignore the dump if a database already exists, and load that database instead
# ignore-dump-if-db-exists = false
# If the dump doesn't exists, load or create the database specified by `db-path` instead
# ignore-missing-dump = false
# Import a dump from the specified path, must be a `.dump` file
# import-dump = "./path/to/my/file.dump"
# Import a dump from the specified path, must be a `.dump` file.
# ignore-missing-dump = false
# If the dump doesn't exist, load or create the database specified by `db-path` instead.
# ignore-dump-if-db-exists = false
# Ignore the dump if a database already exists, and load that database instead.
###
### SNAPSHOT
# The engine will ignore a missing snapshot and not return an error in such case
# ignore-missing-snapshot = false
# The engine will skip snapshot importation and not return an error in such case
# ignore-snapshot-if-db-exists = false
# Defines the path of the snapshot file to import. This option will, by default, stop the
# process if a database already exist or if no snapshot exists at the given path. If this
# option is not specified no snapshot is imported
# import-snapshot = false
# Activate snapshot scheduling
# schedule-snapshot = false
# Defines the directory path where meilisearch will create snapshot each snapshot_time_gap
# snapshot-dir = "snapshots/"
# Defines time interval, in seconds, between each snapshot creation
# snapshot-interval-sec = 86400
# log-level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE]
# Set the log level.
### INDEX
# The maximum size, in bytes, of the main lmdb database directory
# max-index-size = "100 GiB"
# The maximum size, in bytes, of the main LMDB database directory.
# max-indexing-memory = "2 GiB"
# The maximum amount of memory the indexer will use. It defaults to 2/3 of the available
# memory. It is recommended to use something like 80%-90% of the available memory, no
# more.
@ -54,68 +59,73 @@
# is recommended to specify the amount of memory to use.
#
# /!\ The default value is system dependant /!\
# max-indexing-memory = "2 GiB"
# max-indexing-threads = 4
# The maximum number of threads the indexer will use. If the number set is higher than the
# real number of cores available in the machine, it will use the maximum number of
# available cores.
#
# It defaults to half of the available threads.
# max-indexing-threads = 4
###
# max-task-db-size = "100 GiB"
# The maximum size, in bytes, of the update LMDB database directory.
# http-payload-size-limit = 100000000
# The maximum size, in bytes, of accepted JSON payloads.
### SNAPSHOT
# schedule-snapshot = false
# Activate snapshot scheduling.
# snapshot-dir = "snapshots/"
# Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec.
# snapshot-interval-sec = 86400
# Defines time interval, in seconds, between each snapshot creation.
# import-snapshot = false
# Defines the path of the snapshot file to import. This option will, by default, stop the
# process if a database already exist, or if no snapshot exists at the given path. If this
# option is not specified, no snapshot is imported.
# ignore-missing-snapshot = false
# The engine will ignore a missing snapshot and not return an error in such a case.
# ignore-snapshot-if-db-exists = false
# The engine will skip snapshot importation and not return an error in such a case.
###
### SSL
# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE
# ssl-auth-path = "./path/to/root"
# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE.
# ssl-cert-path = "./path/to/CERTFILE"
# Read server certificates from CERTFILE. This should contain PEM-format certificates in
# the right order (the first certificate should certify KEYFILE, the last should be a root
# CA)
# ssl-cert-path = "./path/to/CERTFILE"
# CA).
# Read private key from KEYFILE. This should be a RSA private key or PKCS8-encoded
# private key, in PEM format
# ssl-key-path = "./path/to/private-key"
# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional
# Read the private key from KEYFILE. This should be an RSA private key or PKCS8-encoded
# private key, in PEM format.
# ssl-ocsp-path = "./path/to/OCSPFILE"
# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional.
# Send a fatal alert if the client does not complete client authentication
# ssl-require-auth = false
# SSL support session resumption
# Send a fatal alert if the client does not complete client authentication.
# ssl-resumption = false
# SSL support tickets
# SSL support session resumption.
# ssl-tickets = false
# SSL support tickets.
### MISC
# This environment variable must be set to `production` if you are running in production.
# If the server is running in development mode more logs will be displayed, and the master
# key can be avoided which implies that there is no security on the updates routes. This
# is useful to debug when integrating the engine with another service
# env = "development" # possible values: [development, production]
# The address on which the http server will listen
# http-addr = "127.0.0.1:7700"
# The maximum size, in bytes, of accepted JSON payloads
# http-payload-size-limit = 100000000
# The destination where the database must be created
# db-path = "./data.ms"
# The engine will disable task auto-batching, and will sequencialy compute each task one by one
# disable-auto-batching = false
# Set the log level
# log-level = "info"
# The master key allowing you to do everything on the server
# master-key = "YOUR MASTER KEY"
# The maximum size, in bytes, of the update lmdb database directory
# max-task-db-size = "100 GiB"
###

View File

@ -1,4 +1,5 @@
use std::env;
use std::path::PathBuf;
use std::sync::Arc;
use actix_web::http::KeepAlive;
@ -28,7 +29,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let opt = Opt::build();
let (opt, config_read_from) = Opt::try_build()?;
setup(&opt)?;
@ -57,7 +58,7 @@ async fn main() -> anyhow::Result<()> {
#[cfg(any(debug_assertions, not(feature = "analytics")))]
let (analytics, user) = analytics::MockAnalytics::new(&opt);
print_launch_resume(&opt, &user);
print_launch_resume(&opt, &user, config_read_from);
run_http(meilisearch, auth_controller, opt, analytics).await?;
@ -96,7 +97,7 @@ async fn run_http(
Ok(())
}
pub fn print_launch_resume(opt: &Opt, user: &str) {
pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option<PathBuf>) {
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() {
@ -117,6 +118,12 @@ pub fn print_launch_resume(opt: &Opt, user: &str) {
eprintln!("{}", ascii_name);
eprintln!(
"Config file path:\t{}",
config_read_from
.map(|config_file_path| config_file_path.display().to_string())
.unwrap_or_else(|| "none".to_string())
);
eprintln!("Database path:\t\t{:?}", opt.db_path);
eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr);
eprintln!("Environment:\t\t{:?}", opt.env);

View File

@ -64,6 +64,7 @@ const DEFAULT_LOG_LEVEL: &str = "info";
#[derive(Debug, Clone, Parser, Serialize, Deserialize)]
#[clap(version)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct Opt {
/// The destination where the database must be created.
#[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())]
@ -75,15 +76,15 @@ pub struct Opt {
#[serde(default = "default_http_addr")]
pub http_addr: String,
/// The master key allowing you to do everything on the server.
/// Sets the instance's master key, automatically protecting all routes except GET /health
#[serde(skip_serializing)]
#[clap(long, env = MEILI_MASTER_KEY)]
pub master_key: Option<String>,
/// This environment variable must be set to `production` if you are running in production.
/// If the server is running in development mode more logs will be displayed,
/// and the master key can be avoided which implies that there is no security on the updates routes.
/// This is useful to debug when integrating the engine with another service.
/// More logs wiil be displayed if the server is running in development mode. Setting the master
/// key is optional; hence no security on the updates routes. This
/// is useful to debug when integrating the engine with another service
#[clap(long, env = MEILI_ENV, default_value_t = default_env(), possible_values = &POSSIBLE_ENV)]
#[serde(default = "default_env")]
pub env: String,
@ -94,12 +95,12 @@ pub struct Opt {
#[clap(long, env = MEILI_NO_ANALYTICS)]
pub no_analytics: bool,
/// The maximum size, in bytes, of the main lmdb database directory
/// The maximum size, in bytes, of the main LMDB database directory
#[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())]
#[serde(default = "default_max_index_size")]
pub max_index_size: Byte,
/// The maximum size, in bytes, of the update lmdb database directory
/// The maximum size, in bytes, of the update LMDB database directory
#[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())]
#[serde(default = "default_max_task_db_size")]
pub max_task_db_size: Byte,
@ -117,7 +118,7 @@ pub struct Opt {
#[clap(long, env = MEILI_SSL_CERT_PATH, parse(from_os_str))]
pub ssl_cert_path: Option<PathBuf>,
/// Read private key from KEYFILE. This should be a RSA
/// Read the private key from KEYFILE. This should be an RSA
/// private key or PKCS8-encoded private key, in PEM format.
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_KEY_PATH, parse(from_os_str))]
@ -151,12 +152,12 @@ pub struct Opt {
pub ssl_tickets: bool,
/// Defines the path of the snapshot file to import.
/// This option will, by default, stop the process if a database already exist or if no snapshot exists at
/// the given path. If this option is not specified no snapshot is imported.
/// This option will, by default, stop the process if a database already exists or if no snapshot exists at
/// the given path. If this option is not specified, no snapshot is imported.
#[clap(long, env = MEILI_IMPORT_SNAPSHOT)]
pub import_snapshot: Option<PathBuf>,
/// The engine will ignore a missing snapshot and not return an error in such case.
/// The engine will ignore a missing snapshot and not return an error in such a case.
#[clap(
long,
env = MEILI_IGNORE_MISSING_SNAPSHOT,
@ -174,7 +175,7 @@ pub struct Opt {
#[serde(default)]
pub ignore_snapshot_if_db_exists: bool,
/// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap.
/// Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec.
#[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())]
#[serde(default = "default_snapshot_dir")]
pub snapshot_dir: PathBuf,
@ -194,7 +195,7 @@ pub struct Opt {
#[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import-snapshot")]
pub import_dump: Option<PathBuf>,
/// If the dump doesn't exists, load or create the database specified by `db-path` instead.
/// If the dump doesn't exist, load or create the database specified by `db-path` instead.
#[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import-dump")]
#[serde(default)]
pub ignore_missing_dump: bool,
@ -209,7 +210,7 @@ pub struct Opt {
#[serde(default = "default_dumps_dir")]
pub dumps_dir: PathBuf,
/// Set the log level
/// Set the log level. # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE]
#[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())]
#[serde(default = "default_log_level")]
pub log_level: String,
@ -243,78 +244,124 @@ impl Opt {
}
/// Build a new Opt from config file, env vars and cli args.
pub fn build() -> Self {
pub fn try_build() -> anyhow::Result<(Self, Option<PathBuf>)> {
// Parse the args to get the config_file_path.
let mut opts = Opt::parse();
if let Some(config_file_path) = opts.config_file_path.as_ref() {
eprintln!("loading config file : {:?}", config_file_path);
match std::fs::read(config_file_path) {
let mut config_read_from = None;
if let Some(config_file_path) = opts
.config_file_path
.clone()
.or_else(|| Some(PathBuf::from("./config.toml")))
{
match std::fs::read(&config_file_path) {
Ok(config) => {
// If the arg is present, and the file successfully read, we deserialize it with `toml`.
let opt_from_config =
toml::from_slice::<Opt>(&config).expect("can't read file");
// We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args.
opt_from_config.export_to_env();
// Once injected we parse the cli args once again to take the new env vars into scope.
opts = Opt::parse();
// If the file is successfully read, we deserialize it with `toml`.
match toml::from_slice::<Opt>(&config) {
Ok(opt_from_config) => {
// We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args.
opt_from_config.export_to_env();
// Once injected we parse the cli args once again to take the new env vars into scope.
opts = Opt::parse();
config_read_from = Some(config_file_path);
}
// If we have an error deserializing the file defined by the user.
Err(err) if opts.config_file_path.is_some() => anyhow::bail!(err),
_ => (),
}
}
Err(err) => eprintln!("can't read {:?} : {}", config_file_path, err),
// If we have an error while reading the file defined by the user.
Err(err) if opts.config_file_path.is_some() => anyhow::bail!(err),
_ => (),
}
}
opts
Ok((opts, config_read_from))
}
/// Exports the opts values to their corresponding env vars if they are not set.
fn export_to_env(self) {
export_to_env_if_not_present(MEILI_DB_PATH, self.db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, self.http_addr);
if let Some(master_key) = self.master_key {
let Opt {
db_path,
http_addr,
master_key,
env,
max_index_size,
max_task_db_size,
http_payload_size_limit,
ssl_cert_path,
ssl_key_path,
ssl_auth_path,
ssl_ocsp_path,
ssl_require_auth,
ssl_resumption,
ssl_tickets,
snapshot_dir,
schedule_snapshot,
snapshot_interval_sec,
dumps_dir,
log_level,
indexer_options,
scheduler_options,
import_snapshot: _,
ignore_missing_snapshot: _,
ignore_snapshot_if_db_exists: _,
import_dump: _,
ignore_missing_dump: _,
ignore_dump_if_db_exists: _,
config_file_path: _,
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics,
#[cfg(feature = "metrics")]
enable_metrics_route,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
if let Some(master_key) = master_key {
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
}
export_to_env_if_not_present(MEILI_ENV, self.env);
export_to_env_if_not_present(MEILI_ENV, env);
#[cfg(all(not(debug_assertions), feature = "analytics"))]
{
export_to_env_if_not_present(MEILI_NO_ANALYTICS, self.no_analytics.to_string());
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
}
export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, self.max_index_size.to_string());
export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, self.max_task_db_size.to_string());
export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, max_index_size.to_string());
export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, max_task_db_size.to_string());
export_to_env_if_not_present(
MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
self.http_payload_size_limit.to_string(),
http_payload_size_limit.to_string(),
);
if let Some(ssl_cert_path) = self.ssl_cert_path {
if let Some(ssl_cert_path) = ssl_cert_path {
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
}
if let Some(ssl_key_path) = self.ssl_key_path {
if let Some(ssl_key_path) = ssl_key_path {
export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path);
}
if let Some(ssl_auth_path) = self.ssl_auth_path {
if let Some(ssl_auth_path) = ssl_auth_path {
export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path);
}
if let Some(ssl_ocsp_path) = self.ssl_ocsp_path {
if let Some(ssl_ocsp_path) = ssl_ocsp_path {
export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path);
}
export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, self.ssl_require_auth.to_string());
export_to_env_if_not_present(MEILI_SSL_RESUMPTION, self.ssl_resumption.to_string());
export_to_env_if_not_present(MEILI_SSL_TICKETS, self.ssl_tickets.to_string());
export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, self.snapshot_dir);
export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, self.schedule_snapshot.to_string());
export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string());
export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string());
export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string());
export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir);
export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, schedule_snapshot.to_string());
export_to_env_if_not_present(
MEILI_SNAPSHOT_INTERVAL_SEC,
self.snapshot_interval_sec.to_string(),
snapshot_interval_sec.to_string(),
);
export_to_env_if_not_present(MEILI_DUMPS_DIR, self.dumps_dir);
export_to_env_if_not_present(MEILI_LOG_LEVEL, self.log_level);
export_to_env_if_not_present(MEILI_DUMPS_DIR, dumps_dir);
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level);
#[cfg(feature = "metrics")]
{
export_to_env_if_not_present(
MEILI_ENABLE_METRICS_ROUTE,
self.enable_metrics_route.to_string(),
enable_metrics_route.to_string(),
);
}
self.indexer_options.export_to_env();
self.scheduler_options.export_to_env();
indexer_options.export_to_env();
scheduler_options.export_to_env();
}
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {

View File

@ -12,10 +12,10 @@ use sysinfo::{RefreshKind, System, SystemExt};
const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING";
const DEFAULT_LOG_EVERY_N: usize = 100000;
#[derive(Debug, Clone, Parser, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
@ -50,6 +50,7 @@ pub struct IndexerOpts {
}
#[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct SchedulerConfig {
/// The engine will disable task auto-batching,
/// and will sequencialy compute each task one by one.
@ -61,7 +62,13 @@ pub struct SchedulerConfig {
impl IndexerOpts {
/// Exports the values to their corresponding env vars if they are not set.
pub fn export_to_env(self) {
if let Some(max_indexing_memory) = self.max_indexing_memory.0 {
let IndexerOpts {
max_indexing_memory,
max_indexing_threads,
log_every_n: _,
max_nb_chunks: _,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
MEILI_MAX_INDEXING_MEMORY,
max_indexing_memory.to_string(),
@ -69,7 +76,7 @@ impl IndexerOpts {
}
export_to_env_if_not_present(
MEILI_MAX_INDEXING_THREADS,
self.max_indexing_threads.0.to_string(),
max_indexing_threads.0.to_string(),
);
}
}
@ -106,10 +113,10 @@ impl Default for IndexerOpts {
impl SchedulerConfig {
pub fn export_to_env(self) {
export_to_env_if_not_present(
DISABLE_AUTO_BATCHING,
self.disable_auto_batching.to_string(),
);
let SchedulerConfig {
disable_auto_batching,
} = self;
export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string());
}
}