refactor the way we send the cli informations + add the analytics for the config file and ssl usage

This commit is contained in:
Tamo 2022-11-15 16:20:47 +01:00
parent a8991ccb64
commit 0fced6f270
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
2 changed files with 125 additions and 42 deletions

View File

@ -6,6 +6,7 @@ use std::time::{Duration, Instant};
use actix_web::http::header::USER_AGENT; use actix_web::http::header::USER_AGENT;
use actix_web::HttpRequest; use actix_web::HttpRequest;
use byte_unit::Byte;
use http::header::CONTENT_TYPE; use http::header::CONTENT_TYPE;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use meilisearch_auth::SearchRules; use meilisearch_auth::SearchRules;
@ -14,6 +15,7 @@ use once_cell::sync::Lazy;
use regex::Regex; use regex::Regex;
use segment::message::{Identify, Track, User}; use segment::message::{Identify, Track, User};
use segment::{AutoBatcher, Batcher, HttpClient}; use segment::{AutoBatcher, Batcher, HttpClient};
use serde::Serialize;
use serde_json::{json, Value}; use serde_json::{json, Value};
use sysinfo::{DiskExt, System, SystemExt}; use sysinfo::{DiskExt, System, SystemExt};
use time::OffsetDateTime; use time::OffsetDateTime;
@ -23,7 +25,7 @@ use uuid::Uuid;
use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH}; use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH};
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::option::default_http_addr; use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, SchedulerConfig};
use crate::routes::indexes::documents::UpdateDocumentsQuery; use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::{create_all_stats, Stats}; use crate::routes::{create_all_stats, Stats};
use crate::search::{ use crate::search::{
@ -182,6 +184,119 @@ impl super::Analytics for SegmentAnalytics {
} }
} }
#[derive(Debug, Clone, Serialize)]
struct Infos {
env: String,
db_path: bool,
import_dump: bool,
dumps_dir: bool,
ignore_missing_dump: bool,
ignore_dump_if_db_exists: bool,
import_snapshot: bool,
schedule_snapshot: bool,
snapshot_dir: bool,
snapshot_interval_sec: u64,
ignore_missing_snapshot: bool,
ignore_snapshot_if_db_exists: bool,
http_addr: bool,
max_index_size: Byte,
max_task_db_size: Byte,
http_payload_size_limit: Byte,
disable_auto_batching: bool,
log_level: String,
max_indexing_memory: MaxMemory,
max_indexing_threads: MaxThreads,
with_configuration_file: bool,
ssl_auth_path: bool,
ssl_cert_path: bool,
ssl_key_path: bool,
ssl_ocsp_path: bool,
ssl_require_auth: bool,
ssl_resumption: bool,
ssl_tickets: bool,
}
impl From<Opt> for Infos {
fn from(options: Opt) -> Self {
// We wants to decompose this whole struct by hand to be sure we don't forget
// to add analytics when we add a field in the Opt.
// Thus we must not insert `..` at the end.
let Opt {
db_path,
http_addr,
master_key: _,
env,
max_index_size,
max_task_db_size,
http_payload_size_limit,
ssl_cert_path,
ssl_key_path,
ssl_auth_path,
ssl_ocsp_path,
ssl_require_auth,
ssl_resumption,
ssl_tickets,
import_snapshot,
ignore_missing_snapshot,
ignore_snapshot_if_db_exists,
snapshot_dir,
schedule_snapshot,
snapshot_interval_sec,
import_dump,
ignore_missing_dump,
ignore_dump_if_db_exists,
dumps_dir,
log_level,
indexer_options,
scheduler_options,
config_file_path,
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics: _,
} = options;
let SchedulerConfig { disable_auto_batching } = scheduler_options;
let IndexerOpts {
log_every_n: _,
max_nb_chunks: _,
max_indexing_memory,
max_indexing_threads,
} = indexer_options;
// We're going to override every sensible information.
// We consider an information as sensible if it contains a path, an address or a key.
Self {
env,
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),
dumps_dir: dumps_dir != PathBuf::from("dumps/"),
ignore_missing_dump,
ignore_dump_if_db_exists,
import_snapshot: import_snapshot.is_some(),
schedule_snapshot,
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
snapshot_interval_sec,
ignore_missing_snapshot,
ignore_snapshot_if_db_exists,
http_addr: http_addr != default_http_addr(),
max_index_size,
max_task_db_size,
http_payload_size_limit,
disable_auto_batching,
log_level,
max_indexing_memory,
max_indexing_threads,
with_configuration_file: config_file_path.is_some(),
ssl_auth_path: ssl_auth_path.is_some(),
ssl_cert_path: ssl_cert_path.is_some(),
ssl_key_path: ssl_key_path.is_some(),
ssl_ocsp_path: ssl_ocsp_path.is_some(),
ssl_require_auth,
ssl_resumption,
ssl_tickets,
}
}
}
pub struct Segment { pub struct Segment {
inbox: Receiver<AnalyticsMsg>, inbox: Receiver<AnalyticsMsg>,
user: User, user: User,
@ -212,31 +327,6 @@ impl Segment {
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
}) })
}); });
// The infos are all cli option except every option containing sensitive information.
// We consider an information as sensible if it contains a path, an address or a key.
let infos = {
// First we see if any sensitive fields were used.
let db_path = opt.db_path != PathBuf::from("./data.ms");
let import_dump = opt.import_dump.is_some();
let dumps_dir = opt.dumps_dir != PathBuf::from("dumps/");
let import_snapshot = opt.import_snapshot.is_some();
let snapshots_dir = opt.snapshot_dir != PathBuf::from("snapshots/");
let http_addr = opt.http_addr != default_http_addr();
let mut infos = serde_json::to_value(opt).unwrap();
// Then we overwrite all sensitive field with a boolean representing if
// the feature was used or not.
infos["db_path"] = json!(db_path);
infos["import_dump"] = json!(import_dump);
infos["dumps_dir"] = json!(dumps_dir);
infos["import_snapshot"] = json!(import_snapshot);
infos["snapshot_dir"] = json!(snapshots_dir);
infos["http_addr"] = json!(http_addr);
infos
};
let number_of_documents = let number_of_documents =
stats.indexes.values().map(|index| index.number_of_documents).collect::<Vec<u64>>(); stats.indexes.values().map(|index| index.number_of_documents).collect::<Vec<u64>>();
@ -248,7 +338,7 @@ impl Segment {
"indexes_number": stats.indexes.len(), "indexes_number": stats.indexes.len(),
"documents_number": number_of_documents, "documents_number": number_of_documents,
}, },
"infos": infos, "infos": Infos::from(opt.clone()),
}) })
} }

View File

@ -69,7 +69,7 @@ const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING";
const DEFAULT_LOG_EVERY_N: usize = 100000; const DEFAULT_LOG_EVERY_N: usize = 100000;
#[derive(Debug, Clone, Parser, Serialize, Deserialize)] #[derive(Debug, Clone, Parser, Deserialize)]
#[clap(version, next_display_order = None)] #[clap(version, next_display_order = None)]
#[serde(rename_all = "snake_case", deny_unknown_fields)] #[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct Opt { pub struct Opt {
@ -84,7 +84,6 @@ pub struct Opt {
pub http_addr: String, pub http_addr: String,
/// Sets the instance's master key, automatically protecting all routes except `GET /health`. /// Sets the instance's master key, automatically protecting all routes except `GET /health`.
#[serde(skip_serializing)]
#[clap(long, env = MEILI_MASTER_KEY)] #[clap(long, env = MEILI_MASTER_KEY)]
pub master_key: Option<String>, pub master_key: Option<String>,
@ -99,7 +98,7 @@ pub struct Opt {
/// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted /// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted
/// at any time. /// at any time.
#[cfg(all(not(debug_assertions), feature = "analytics"))] #[cfg(all(not(debug_assertions), feature = "analytics"))]
#[serde(skip_serializing, default)] // we can't send true #[serde(default)] // we can't send true
#[clap(long, env = MEILI_NO_ANALYTICS)] #[clap(long, env = MEILI_NO_ANALYTICS)]
pub no_analytics: bool, pub no_analytics: bool,
@ -121,39 +120,35 @@ pub struct Opt {
pub http_payload_size_limit: Byte, pub http_payload_size_limit: Byte,
/// Sets the server's SSL certificates. /// Sets the server's SSL certificates.
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_CERT_PATH, value_parser)] #[clap(long, env = MEILI_SSL_CERT_PATH, value_parser)]
pub ssl_cert_path: Option<PathBuf>, pub ssl_cert_path: Option<PathBuf>,
/// Sets the server's SSL key files. /// Sets the server's SSL key files.
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_KEY_PATH, value_parser)] #[clap(long, env = MEILI_SSL_KEY_PATH, value_parser)]
pub ssl_key_path: Option<PathBuf>, pub ssl_key_path: Option<PathBuf>,
/// Enables client authentication in the specified path. /// Enables client authentication in the specified path.
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_AUTH_PATH, value_parser)] #[clap(long, env = MEILI_SSL_AUTH_PATH, value_parser)]
pub ssl_auth_path: Option<PathBuf>, pub ssl_auth_path: Option<PathBuf>,
/// Sets the server's OCSP file. *Optional* /// Sets the server's OCSP file. *Optional*
/// ///
/// Reads DER-encoded OCSP response from OCSPFILE and staple to certificate. /// Reads DER-encoded OCSP response from OCSPFILE and staple to certificate.
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_OCSP_PATH, value_parser)] #[clap(long, env = MEILI_SSL_OCSP_PATH, value_parser)]
pub ssl_ocsp_path: Option<PathBuf>, pub ssl_ocsp_path: Option<PathBuf>,
/// Makes SSL authentication mandatory. /// Makes SSL authentication mandatory.
#[serde(skip_serializing, default)] #[serde(default)]
#[clap(long, env = MEILI_SSL_REQUIRE_AUTH)] #[clap(long, env = MEILI_SSL_REQUIRE_AUTH)]
pub ssl_require_auth: bool, pub ssl_require_auth: bool,
/// Activates SSL session resumption. /// Activates SSL session resumption.
#[serde(skip_serializing, default)] #[serde(default)]
#[clap(long, env = MEILI_SSL_RESUMPTION)] #[clap(long, env = MEILI_SSL_RESUMPTION)]
pub ssl_resumption: bool, pub ssl_resumption: bool,
/// Activates SSL tickets. /// Activates SSL tickets.
#[serde(skip_serializing, default)] #[serde(default)]
#[clap(long, env = MEILI_SSL_TICKETS)] #[clap(long, env = MEILI_SSL_TICKETS)]
pub ssl_tickets: bool, pub ssl_tickets: bool,
@ -251,7 +246,6 @@ pub struct Opt {
/// Set the path to a configuration file that should be used to setup the engine. /// Set the path to a configuration file that should be used to setup the engine.
/// Format must be TOML. /// Format must be TOML.
#[serde(skip_serializing)]
#[clap(long)] #[clap(long)]
pub config_file_path: Option<PathBuf>, pub config_file_path: Option<PathBuf>,
} }
@ -439,16 +433,15 @@ impl Opt {
} }
} }
#[derive(Debug, Clone, Parser, Deserialize, Serialize)] #[derive(Debug, Clone, Parser, Deserialize)]
pub struct IndexerOpts { pub struct IndexerOpts {
/// Sets the amount of documents to skip before printing /// Sets the amount of documents to skip before printing
/// a log regarding the indexing advancement. /// a log regarding the indexing advancement.
#[serde(skip_serializing, default = "default_log_every_n")] #[serde(default = "default_log_every_n")]
#[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k #[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k
pub log_every_n: usize, pub log_every_n: usize,
/// Grenad max number of chunks in bytes. /// Grenad max number of chunks in bytes.
#[serde(skip_serializing)]
#[clap(long, hide = true)] #[clap(long, hide = true)]
pub max_nb_chunks: Option<usize>, pub max_nb_chunks: Option<usize>,
@ -488,7 +481,7 @@ impl IndexerOpts {
} }
} }
#[derive(Debug, Clone, Parser, Default, Deserialize, Serialize)] #[derive(Debug, Clone, Parser, Default, Deserialize)]
#[serde(rename_all = "snake_case", deny_unknown_fields)] #[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct SchedulerConfig { pub struct SchedulerConfig {
/// Deactivates auto-batching when provided. /// Deactivates auto-batching when provided.