diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index 13dba7896..d5bc4cf0d 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -6,6 +6,7 @@ use std::time::{Duration, Instant}; use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; +use byte_unit::Byte; use http::header::CONTENT_TYPE; use index_scheduler::IndexScheduler; use meilisearch_auth::SearchRules; @@ -14,6 +15,7 @@ use once_cell::sync::Lazy; use regex::Regex; use segment::message::{Identify, Track, User}; use segment::{AutoBatcher, Batcher, HttpClient}; +use serde::Serialize; use serde_json::{json, Value}; use sysinfo::{DiskExt, System, SystemExt}; use time::OffsetDateTime; @@ -23,7 +25,7 @@ use uuid::Uuid; use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH}; use crate::analytics::Analytics; -use crate::option::default_http_addr; +use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, SchedulerConfig}; use crate::routes::indexes::documents::UpdateDocumentsQuery; use crate::routes::{create_all_stats, Stats}; use crate::search::{ @@ -182,6 +184,124 @@ impl super::Analytics for SegmentAnalytics { } } +/// This structure represent the `infos` field we send in the analytics. +/// It's quite close to the `Opt` structure except all sensitive informations +/// have been simplified to a boolean. +/// It's send as-is in amplitude thus you should never update a name of the +/// struct without the approval of the PM. +#[derive(Debug, Clone, Serialize)] +struct Infos { + env: String, + db_path: bool, + import_dump: bool, + dumps_dir: bool, + ignore_missing_dump: bool, + ignore_dump_if_db_exists: bool, + import_snapshot: bool, + schedule_snapshot: bool, + snapshot_dir: bool, + snapshot_interval_sec: u64, + ignore_missing_snapshot: bool, + ignore_snapshot_if_db_exists: bool, + http_addr: bool, + max_index_size: Byte, + max_task_db_size: Byte, + http_payload_size_limit: Byte, + disable_auto_batching: bool, + log_level: String, + max_indexing_memory: MaxMemory, + max_indexing_threads: MaxThreads, + with_configuration_file: bool, + ssl_auth_path: bool, + ssl_cert_path: bool, + ssl_key_path: bool, + ssl_ocsp_path: bool, + ssl_require_auth: bool, + ssl_resumption: bool, + ssl_tickets: bool, +} + +impl From for Infos { + fn from(options: Opt) -> Self { + // We wants to decompose this whole struct by hand to be sure we don't forget + // to add analytics when we add a field in the Opt. + // Thus we must not insert `..` at the end. + let Opt { + db_path, + http_addr, + master_key: _, + env, + max_index_size, + max_task_db_size, + http_payload_size_limit, + ssl_cert_path, + ssl_key_path, + ssl_auth_path, + ssl_ocsp_path, + ssl_require_auth, + ssl_resumption, + ssl_tickets, + import_snapshot, + ignore_missing_snapshot, + ignore_snapshot_if_db_exists, + snapshot_dir, + schedule_snapshot, + snapshot_interval_sec, + import_dump, + ignore_missing_dump, + ignore_dump_if_db_exists, + dumps_dir, + log_level, + indexer_options, + scheduler_options, + config_file_path, + #[cfg(all(not(debug_assertions), feature = "analytics"))] + no_analytics: _, + } = options; + + let SchedulerConfig { disable_auto_batching } = scheduler_options; + let IndexerOpts { + log_every_n: _, + max_nb_chunks: _, + max_indexing_memory, + max_indexing_threads, + } = indexer_options; + + // We're going to override every sensible information. + // We consider information sensible if it contains a path, an address, or a key. + Self { + env, + db_path: db_path != PathBuf::from("./data.ms"), + import_dump: import_dump.is_some(), + dumps_dir: dumps_dir != PathBuf::from("dumps/"), + ignore_missing_dump, + ignore_dump_if_db_exists, + import_snapshot: import_snapshot.is_some(), + schedule_snapshot, + snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"), + snapshot_interval_sec, + ignore_missing_snapshot, + ignore_snapshot_if_db_exists, + http_addr: http_addr != default_http_addr(), + max_index_size, + max_task_db_size, + http_payload_size_limit, + disable_auto_batching, + log_level, + max_indexing_memory, + max_indexing_threads, + with_configuration_file: config_file_path.is_some(), + ssl_auth_path: ssl_auth_path.is_some(), + ssl_cert_path: ssl_cert_path.is_some(), + ssl_key_path: ssl_key_path.is_some(), + ssl_ocsp_path: ssl_ocsp_path.is_some(), + ssl_require_auth, + ssl_resumption, + ssl_tickets, + } + } +} + pub struct Segment { inbox: Receiver, user: User, @@ -212,31 +332,6 @@ impl Segment { "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), }) }); - // The infos are all cli option except every option containing sensitive information. - // We consider an information as sensible if it contains a path, an address or a key. - let infos = { - // First we see if any sensitive fields were used. - let db_path = opt.db_path != PathBuf::from("./data.ms"); - let import_dump = opt.import_dump.is_some(); - let dumps_dir = opt.dumps_dir != PathBuf::from("dumps/"); - let import_snapshot = opt.import_snapshot.is_some(); - let snapshots_dir = opt.snapshot_dir != PathBuf::from("snapshots/"); - let http_addr = opt.http_addr != default_http_addr(); - - let mut infos = serde_json::to_value(opt).unwrap(); - - // Then we overwrite all sensitive field with a boolean representing if - // the feature was used or not. - infos["db_path"] = json!(db_path); - infos["import_dump"] = json!(import_dump); - infos["dumps_dir"] = json!(dumps_dir); - infos["import_snapshot"] = json!(import_snapshot); - infos["snapshot_dir"] = json!(snapshots_dir); - infos["http_addr"] = json!(http_addr); - - infos - }; - let number_of_documents = stats.indexes.values().map(|index| index.number_of_documents).collect::>(); @@ -248,7 +343,7 @@ impl Segment { "indexes_number": stats.indexes.len(), "documents_number": number_of_documents, }, - "infos": infos, + "infos": Infos::from(opt.clone()), }) } diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 02ce67bbc..91608d0d2 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -69,7 +69,7 @@ const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; const DEFAULT_LOG_EVERY_N: usize = 100000; -#[derive(Debug, Clone, Parser, Serialize, Deserialize)] +#[derive(Debug, Clone, Parser, Deserialize)] #[clap(version, next_display_order = None)] #[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct Opt { @@ -84,7 +84,6 @@ pub struct Opt { pub http_addr: String, /// Sets the instance's master key, automatically protecting all routes except `GET /health`. - #[serde(skip_serializing)] #[clap(long, env = MEILI_MASTER_KEY)] pub master_key: Option, @@ -99,7 +98,7 @@ pub struct Opt { /// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted /// at any time. #[cfg(all(not(debug_assertions), feature = "analytics"))] - #[serde(skip_serializing, default)] // we can't send true + #[serde(default)] // we can't send true #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, @@ -121,39 +120,35 @@ pub struct Opt { pub http_payload_size_limit: Byte, /// Sets the server's SSL certificates. - #[serde(skip_serializing)] #[clap(long, env = MEILI_SSL_CERT_PATH, value_parser)] pub ssl_cert_path: Option, /// Sets the server's SSL key files. - #[serde(skip_serializing)] #[clap(long, env = MEILI_SSL_KEY_PATH, value_parser)] pub ssl_key_path: Option, /// Enables client authentication in the specified path. - #[serde(skip_serializing)] #[clap(long, env = MEILI_SSL_AUTH_PATH, value_parser)] pub ssl_auth_path: Option, /// Sets the server's OCSP file. *Optional* /// /// Reads DER-encoded OCSP response from OCSPFILE and staple to certificate. - #[serde(skip_serializing)] #[clap(long, env = MEILI_SSL_OCSP_PATH, value_parser)] pub ssl_ocsp_path: Option, /// Makes SSL authentication mandatory. - #[serde(skip_serializing, default)] + #[serde(default)] #[clap(long, env = MEILI_SSL_REQUIRE_AUTH)] pub ssl_require_auth: bool, /// Activates SSL session resumption. - #[serde(skip_serializing, default)] + #[serde(default)] #[clap(long, env = MEILI_SSL_RESUMPTION)] pub ssl_resumption: bool, /// Activates SSL tickets. - #[serde(skip_serializing, default)] + #[serde(default)] #[clap(long, env = MEILI_SSL_TICKETS)] pub ssl_tickets: bool, @@ -251,7 +246,6 @@ pub struct Opt { /// Set the path to a configuration file that should be used to setup the engine. /// Format must be TOML. - #[serde(skip_serializing)] #[clap(long)] pub config_file_path: Option, } @@ -439,16 +433,15 @@ impl Opt { } } -#[derive(Debug, Clone, Parser, Deserialize, Serialize)] +#[derive(Debug, Clone, Parser, Deserialize)] pub struct IndexerOpts { /// Sets the amount of documents to skip before printing /// a log regarding the indexing advancement. - #[serde(skip_serializing, default = "default_log_every_n")] + #[serde(default = "default_log_every_n")] #[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k pub log_every_n: usize, /// Grenad max number of chunks in bytes. - #[serde(skip_serializing)] #[clap(long, hide = true)] pub max_nb_chunks: Option, @@ -488,7 +481,7 @@ impl IndexerOpts { } } -#[derive(Debug, Clone, Parser, Default, Deserialize, Serialize)] +#[derive(Debug, Clone, Parser, Default, Deserialize)] #[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct SchedulerConfig { /// Deactivates auto-batching when provided.