2021-09-03 00:18:59 +08:00
|
|
|
use byte_unit::ByteError;
|
|
|
|
use std::fmt;
|
2020-12-12 20:32:06 +08:00
|
|
|
use std::io::{BufReader, Read};
|
2021-09-03 00:18:59 +08:00
|
|
|
use std::ops::Deref;
|
2020-12-12 20:32:06 +08:00
|
|
|
use std::path::PathBuf;
|
2021-09-03 00:18:59 +08:00
|
|
|
use std::str::FromStr;
|
2020-12-12 20:32:06 +08:00
|
|
|
use std::sync::Arc;
|
2021-03-16 01:11:10 +08:00
|
|
|
use std::{error, fs};
|
2020-12-12 20:32:06 +08:00
|
|
|
|
2020-12-22 21:02:41 +08:00
|
|
|
use byte_unit::Byte;
|
2021-08-25 00:52:28 +08:00
|
|
|
use milli::CompressionType;
|
2020-12-12 20:32:06 +08:00
|
|
|
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
|
|
|
use rustls::{
|
|
|
|
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
|
|
|
|
RootCertStore,
|
|
|
|
};
|
|
|
|
use structopt::StructOpt;
|
2021-09-03 00:18:59 +08:00
|
|
|
use sysinfo::{RefreshKind, System, SystemExt};
|
2020-12-12 20:32:06 +08:00
|
|
|
|
2021-01-16 22:09:48 +08:00
|
|
|
#[derive(Debug, Clone, StructOpt)]
|
|
|
|
pub struct IndexerOpts {
|
|
|
|
/// The amount of documents to skip before printing
|
|
|
|
/// a log regarding the indexing advancement.
|
|
|
|
#[structopt(long, default_value = "100000")] // 100k
|
|
|
|
pub log_every_n: usize,
|
|
|
|
|
2021-02-04 20:21:15 +08:00
|
|
|
/// Grenad max number of chunks in bytes.
|
2021-01-16 22:09:48 +08:00
|
|
|
#[structopt(long)]
|
|
|
|
pub max_nb_chunks: Option<usize>,
|
|
|
|
|
2021-09-03 00:18:59 +08:00
|
|
|
/// The maximum amount of memory the indexer will use. It defaults to 2/3
|
|
|
|
/// of the available memory. It is recommended to use something like 80%-90%
|
|
|
|
/// of the available memory, no more.
|
2021-01-16 22:09:48 +08:00
|
|
|
///
|
2021-09-03 00:18:59 +08:00
|
|
|
/// In case the engine is unable to retrieve the available memory the engine will
|
|
|
|
/// try to use the memory it needs but without real limit, this can lead to
|
|
|
|
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
|
2021-08-24 17:46:37 +08:00
|
|
|
#[structopt(long, default_value)]
|
2021-09-03 00:18:59 +08:00
|
|
|
pub max_memory: MaxMemory,
|
2021-01-16 22:09:48 +08:00
|
|
|
|
|
|
|
/// Size of the linked hash map cache when indexing.
|
|
|
|
/// The bigger it is, the faster the indexing is but the more memory it takes.
|
|
|
|
#[structopt(long, default_value = "500")]
|
|
|
|
pub linked_hash_map_size: usize,
|
|
|
|
|
|
|
|
/// The name of the compression algorithm to use when compressing intermediate
|
2021-02-04 20:21:15 +08:00
|
|
|
/// Grenad chunks while indexing documents.
|
2021-01-16 22:09:48 +08:00
|
|
|
///
|
|
|
|
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
|
|
|
|
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
|
|
|
|
pub chunk_compression_type: CompressionType,
|
|
|
|
|
|
|
|
/// The level of compression of the chosen algorithm.
|
|
|
|
#[structopt(long, requires = "chunk-compression-type")]
|
|
|
|
pub chunk_compression_level: Option<u32>,
|
|
|
|
|
|
|
|
/// The number of bytes to remove from the begining of the chunks while reading/sorting
|
|
|
|
/// or merging them.
|
|
|
|
///
|
|
|
|
/// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`,
|
|
|
|
/// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set.
|
|
|
|
#[structopt(long, default_value = "4 GiB")]
|
|
|
|
pub chunk_fusing_shrink_size: Byte,
|
|
|
|
|
2021-02-04 20:21:15 +08:00
|
|
|
/// Enable the chunk fusing or not, this reduces the amount of disk space used.
|
2021-01-16 22:09:48 +08:00
|
|
|
#[structopt(long)]
|
|
|
|
pub enable_chunk_fusing: bool,
|
2020-12-23 00:13:50 +08:00
|
|
|
|
2021-01-16 22:09:48 +08:00
|
|
|
/// Number of parallel jobs for indexing, defaults to # of CPUs.
|
|
|
|
#[structopt(long)]
|
|
|
|
pub indexing_jobs: Option<usize>,
|
|
|
|
}
|
2021-01-30 02:14:23 +08:00
|
|
|
|
|
|
|
impl Default for IndexerOpts {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self {
|
2021-02-04 20:21:15 +08:00
|
|
|
log_every_n: 100_000,
|
2021-01-30 02:14:23 +08:00
|
|
|
max_nb_chunks: None,
|
2021-09-03 00:18:59 +08:00
|
|
|
max_memory: MaxMemory::default(),
|
2021-02-04 20:21:15 +08:00
|
|
|
linked_hash_map_size: 500,
|
2021-01-30 02:14:23 +08:00
|
|
|
chunk_compression_type: CompressionType::None,
|
|
|
|
chunk_compression_level: None,
|
2021-02-04 20:21:15 +08:00
|
|
|
chunk_fusing_shrink_size: Byte::from_str("4GiB").unwrap(),
|
2021-01-30 02:14:23 +08:00
|
|
|
enable_chunk_fusing: false,
|
|
|
|
indexing_jobs: None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-12 20:32:06 +08:00
|
|
|
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
|
|
|
|
|
2020-12-22 21:02:41 +08:00
|
|
|
#[derive(Debug, Clone, StructOpt)]
|
2020-12-12 20:32:06 +08:00
|
|
|
pub struct Opt {
|
|
|
|
/// The destination where the database must be created.
|
|
|
|
#[structopt(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
|
2020-12-23 00:13:50 +08:00
|
|
|
pub db_path: PathBuf,
|
2020-12-12 20:32:06 +08:00
|
|
|
|
|
|
|
/// The address on which the http server will listen.
|
|
|
|
#[structopt(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
|
|
|
|
pub http_addr: String,
|
|
|
|
|
|
|
|
/// The master key allowing you to do everything on the server.
|
|
|
|
#[structopt(long, env = "MEILI_MASTER_KEY")]
|
|
|
|
pub master_key: Option<String>,
|
|
|
|
|
|
|
|
/// This environment variable must be set to `production` if you are running in production.
|
|
|
|
/// If the server is running in development mode more logs will be displayed,
|
|
|
|
/// and the master key can be avoided which implies that there is no security on the updates routes.
|
|
|
|
/// This is useful to debug when integrating the engine with another service.
|
|
|
|
#[structopt(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)]
|
|
|
|
pub env: String,
|
|
|
|
|
|
|
|
/// Do not send analytics to Meili.
|
2021-06-16 23:12:49 +08:00
|
|
|
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
2020-12-12 20:32:06 +08:00
|
|
|
#[structopt(long, env = "MEILI_NO_ANALYTICS")]
|
|
|
|
pub no_analytics: bool,
|
|
|
|
|
|
|
|
/// The maximum size, in bytes, of the main lmdb database directory
|
2021-06-17 01:50:15 +08:00
|
|
|
#[structopt(long, env = "MEILI_MAX_INDEX_SIZE", default_value = "100 GiB")]
|
|
|
|
pub max_index_size: Byte,
|
2020-12-12 20:32:06 +08:00
|
|
|
|
|
|
|
/// The maximum size, in bytes, of the update lmdb database directory
|
2021-06-17 01:50:15 +08:00
|
|
|
#[structopt(long, env = "MEILI_MAX_UDB_SIZE", default_value = "100 GiB")]
|
2020-12-22 21:02:41 +08:00
|
|
|
pub max_udb_size: Byte,
|
2020-12-12 20:32:06 +08:00
|
|
|
|
|
|
|
/// The maximum size, in bytes, of accepted JSON payloads
|
2021-06-17 01:50:15 +08:00
|
|
|
#[structopt(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")]
|
2020-12-22 21:02:41 +08:00
|
|
|
pub http_payload_size_limit: Byte,
|
2020-12-12 20:32:06 +08:00
|
|
|
|
|
|
|
/// Read server certificates from CERTFILE.
|
|
|
|
/// This should contain PEM-format certificates
|
|
|
|
/// in the right order (the first certificate should
|
|
|
|
/// certify KEYFILE, the last should be a root CA).
|
|
|
|
#[structopt(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))]
|
|
|
|
pub ssl_cert_path: Option<PathBuf>,
|
|
|
|
|
|
|
|
/// Read private key from KEYFILE. This should be a RSA
|
|
|
|
/// private key or PKCS8-encoded private key, in PEM format.
|
|
|
|
#[structopt(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))]
|
|
|
|
pub ssl_key_path: Option<PathBuf>,
|
|
|
|
|
|
|
|
/// Enable client authentication, and accept certificates
|
|
|
|
/// signed by those roots provided in CERTFILE.
|
|
|
|
#[structopt(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))]
|
|
|
|
pub ssl_auth_path: Option<PathBuf>,
|
|
|
|
|
|
|
|
/// Read DER-encoded OCSP response from OCSPFILE and staple to certificate.
|
|
|
|
/// Optional
|
|
|
|
#[structopt(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))]
|
|
|
|
pub ssl_ocsp_path: Option<PathBuf>,
|
|
|
|
|
|
|
|
/// Send a fatal alert if the client does not complete client authentication.
|
|
|
|
#[structopt(long, env = "MEILI_SSL_REQUIRE_AUTH")]
|
|
|
|
pub ssl_require_auth: bool,
|
|
|
|
|
|
|
|
/// SSL support session resumption
|
|
|
|
#[structopt(long, env = "MEILI_SSL_RESUMPTION")]
|
|
|
|
pub ssl_resumption: bool,
|
|
|
|
|
|
|
|
/// SSL support tickets.
|
|
|
|
#[structopt(long, env = "MEILI_SSL_TICKETS")]
|
|
|
|
pub ssl_tickets: bool,
|
|
|
|
|
|
|
|
/// Defines the path of the snapshot file to import.
|
|
|
|
/// This option will, by default, stop the process if a database already exist or if no snapshot exists at
|
|
|
|
/// the given path. If this option is not specified no snapshot is imported.
|
|
|
|
#[structopt(long)]
|
|
|
|
pub import_snapshot: Option<PathBuf>,
|
|
|
|
|
|
|
|
/// The engine will ignore a missing snapshot and not return an error in such case.
|
|
|
|
#[structopt(long, requires = "import-snapshot")]
|
|
|
|
pub ignore_missing_snapshot: bool,
|
|
|
|
|
|
|
|
/// The engine will skip snapshot importation and not return an error in such case.
|
|
|
|
#[structopt(long, requires = "import-snapshot")]
|
|
|
|
pub ignore_snapshot_if_db_exists: bool,
|
|
|
|
|
|
|
|
/// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap.
|
|
|
|
#[structopt(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")]
|
|
|
|
pub snapshot_dir: PathBuf,
|
|
|
|
|
|
|
|
/// Activate snapshot scheduling.
|
|
|
|
#[structopt(long, env = "MEILI_SCHEDULE_SNAPSHOT")]
|
|
|
|
pub schedule_snapshot: bool,
|
|
|
|
|
|
|
|
/// Defines time interval, in seconds, between each snapshot creation.
|
2021-03-17 19:01:56 +08:00
|
|
|
#[structopt(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h
|
|
|
|
pub snapshot_interval_sec: u64,
|
2020-12-12 20:32:06 +08:00
|
|
|
|
|
|
|
/// Folder where dumps are created when the dump route is called.
|
|
|
|
#[structopt(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
|
|
|
|
pub dumps_dir: PathBuf,
|
|
|
|
|
2021-06-17 01:50:15 +08:00
|
|
|
/// Import a dump from the specified path, must be a `.dump` file.
|
2020-12-12 20:32:06 +08:00
|
|
|
#[structopt(long, conflicts_with = "import-snapshot")]
|
|
|
|
pub import_dump: Option<PathBuf>,
|
|
|
|
|
2021-06-23 17:07:19 +08:00
|
|
|
/// Set the log level
|
|
|
|
#[structopt(long, env = "MEILI_LOG_LEVEL", default_value = "info")]
|
|
|
|
pub log_level: String,
|
|
|
|
|
2021-06-22 00:13:00 +08:00
|
|
|
#[structopt(skip)]
|
2020-12-23 00:13:50 +08:00
|
|
|
pub indexer_options: IndexerOpts,
|
2020-12-12 20:32:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Opt {
|
|
|
|
pub fn get_ssl_config(&self) -> Result<Option<rustls::ServerConfig>, Box<dyn error::Error>> {
|
|
|
|
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
|
|
|
|
let client_auth = match &self.ssl_auth_path {
|
|
|
|
Some(auth_path) => {
|
|
|
|
let roots = load_certs(auth_path.to_path_buf())?;
|
|
|
|
let mut client_auth_roots = RootCertStore::empty();
|
|
|
|
for root in roots {
|
|
|
|
client_auth_roots.add(&root).unwrap();
|
|
|
|
}
|
|
|
|
if self.ssl_require_auth {
|
|
|
|
AllowAnyAuthenticatedClient::new(client_auth_roots)
|
|
|
|
} else {
|
|
|
|
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None => NoClientAuth::new(),
|
|
|
|
};
|
|
|
|
|
|
|
|
let mut config = rustls::ServerConfig::new(client_auth);
|
|
|
|
config.key_log = Arc::new(rustls::KeyLogFile::new());
|
|
|
|
|
|
|
|
let certs = load_certs(cert_path.to_path_buf())?;
|
|
|
|
let privkey = load_private_key(key_path.to_path_buf())?;
|
|
|
|
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
|
|
|
|
config
|
|
|
|
.set_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
|
|
|
|
.map_err(|_| "bad certificates/private key")?;
|
|
|
|
|
|
|
|
if self.ssl_resumption {
|
|
|
|
config.set_persistence(rustls::ServerSessionMemoryCache::new(256));
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.ssl_tickets {
|
|
|
|
config.ticketer = rustls::Ticketer::new();
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(Some(config))
|
|
|
|
} else {
|
|
|
|
Ok(None)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-03 00:18:59 +08:00
|
|
|
/// A type used to detect the max memory available and use 2/3 of it.
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
|
|
pub struct MaxMemory(Option<Byte>);
|
|
|
|
|
|
|
|
impl FromStr for MaxMemory {
|
|
|
|
type Err = ByteError;
|
|
|
|
|
|
|
|
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
|
|
|
|
Byte::from_str(s).map(Some).map(MaxMemory)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for MaxMemory {
|
|
|
|
fn default() -> MaxMemory {
|
|
|
|
MaxMemory(
|
|
|
|
total_memory_bytes()
|
|
|
|
.map(|bytes| bytes * 2 / 3)
|
|
|
|
.map(Byte::from_bytes),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl fmt::Display for MaxMemory {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
match self.0 {
|
|
|
|
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
|
|
|
|
None => f.write_str("unknown"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Deref for MaxMemory {
|
|
|
|
type Target = Option<Byte>;
|
|
|
|
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
|
|
&self.0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the total amount of bytes available or `None` if this system isn't supported.
|
|
|
|
fn total_memory_bytes() -> Option<u64> {
|
|
|
|
if System::IS_SUPPORTED {
|
|
|
|
let memory_kind = RefreshKind::new().with_memory();
|
|
|
|
let mut system = System::new_with_specifics(memory_kind);
|
|
|
|
system.refresh_memory();
|
|
|
|
Some(system.total_memory() * 1024) // KiB into bytes
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-12 20:32:06 +08:00
|
|
|
fn load_certs(filename: PathBuf) -> Result<Vec<rustls::Certificate>, Box<dyn error::Error>> {
|
|
|
|
let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?;
|
|
|
|
let mut reader = BufReader::new(certfile);
|
|
|
|
Ok(certs(&mut reader).map_err(|_| "cannot read certificate file")?)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn load_private_key(filename: PathBuf) -> Result<rustls::PrivateKey, Box<dyn error::Error>> {
|
|
|
|
let rsa_keys = {
|
|
|
|
let keyfile =
|
|
|
|
fs::File::open(filename.clone()).map_err(|_| "cannot open private key file")?;
|
|
|
|
let mut reader = BufReader::new(keyfile);
|
|
|
|
rsa_private_keys(&mut reader).map_err(|_| "file contains invalid rsa private key")?
|
|
|
|
};
|
|
|
|
|
|
|
|
let pkcs8_keys = {
|
|
|
|
let keyfile = fs::File::open(filename).map_err(|_| "cannot open private key file")?;
|
|
|
|
let mut reader = BufReader::new(keyfile);
|
|
|
|
pkcs8_private_keys(&mut reader)
|
|
|
|
.map_err(|_| "file contains invalid pkcs8 private key (encrypted keys not supported)")?
|
|
|
|
};
|
|
|
|
|
|
|
|
// prefer to load pkcs8 keys
|
|
|
|
if !pkcs8_keys.is_empty() {
|
|
|
|
Ok(pkcs8_keys[0].clone())
|
|
|
|
} else {
|
|
|
|
assert!(!rsa_keys.is_empty());
|
|
|
|
Ok(rsa_keys[0].clone())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn load_ocsp(filename: &Option<PathBuf>) -> Result<Vec<u8>, Box<dyn error::Error>> {
|
|
|
|
let mut ret = Vec::new();
|
|
|
|
|
|
|
|
if let Some(ref name) = filename {
|
|
|
|
fs::File::open(name)
|
|
|
|
.map_err(|_| "cannot open ocsp file")?
|
|
|
|
.read_to_end(&mut ret)
|
|
|
|
.map_err(|_| "cannot read oscp file")?;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(ret)
|
|
|
|
}
|