Merge #3056

3056: refactor the way we send the cli informations + add the analytics for the config file and ssl usage r=Kerollmops a=irevoire Partially fix #2955 Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-17 08:10:14 +08:00 · 2022-11-15 17:03:30 +00:00 · 2022-11-15 17:03:30 +00:00 · 51be75a264
commit 51be75a264
parent a8991ccb64 4953b62712
2 changed files with 130 additions and 42 deletions
--- a/meilisearch-http/src/analytics/segment_analytics.rs
+++ b/meilisearch-http/src/analytics/segment_analytics.rs
@ -6,6 +6,7 @@ use std::time::{Duration, Instant};

 use actix_web::http::header::USER_AGENT;
 use actix_web::HttpRequest;
+use byte_unit::Byte;
 use http::header::CONTENT_TYPE;
 use index_scheduler::IndexScheduler;
 use meilisearch_auth::SearchRules;
@ -14,6 +15,7 @@ use once_cell::sync::Lazy;
 use regex::Regex;
 use segment::message::{Identify, Track, User};
 use segment::{AutoBatcher, Batcher, HttpClient};
+use serde::Serialize;
 use serde_json::{json, Value};
 use sysinfo::{DiskExt, System, SystemExt};
 use time::OffsetDateTime;
@ -23,7 +25,7 @@ use uuid::Uuid;

 use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH};
 use crate::analytics::Analytics;
-use crate::option::default_http_addr;
+use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, SchedulerConfig};
 use crate::routes::indexes::documents::UpdateDocumentsQuery;
 use crate::routes::{create_all_stats, Stats};
 use crate::search::{
@ -182,6 +184,124 @@ impl super::Analytics for SegmentAnalytics {
    }
 }

+/// This structure represent the `infos` field we send in the analytics.
+/// It's quite close to the `Opt` structure except all sensitive informations
+/// have been simplified to a boolean.
+/// It's send as-is in amplitude thus you should never update a name of the
+/// struct without the approval of the PM.
+#[derive(Debug, Clone, Serialize)]
+struct Infos {
+    env: String,
+    db_path: bool,
+    import_dump: bool,
+    dumps_dir: bool,
+    ignore_missing_dump: bool,
+    ignore_dump_if_db_exists: bool,
+    import_snapshot: bool,
+    schedule_snapshot: bool,
+    snapshot_dir: bool,
+    snapshot_interval_sec: u64,
+    ignore_missing_snapshot: bool,
+    ignore_snapshot_if_db_exists: bool,
+    http_addr: bool,
+    max_index_size: Byte,
+    max_task_db_size: Byte,
+    http_payload_size_limit: Byte,
+    disable_auto_batching: bool,
+    log_level: String,
+    max_indexing_memory: MaxMemory,
+    max_indexing_threads: MaxThreads,
+    with_configuration_file: bool,
+    ssl_auth_path: bool,
+    ssl_cert_path: bool,
+    ssl_key_path: bool,
+    ssl_ocsp_path: bool,
+    ssl_require_auth: bool,
+    ssl_resumption: bool,
+    ssl_tickets: bool,
+}
+
+impl From<Opt> for Infos {
+    fn from(options: Opt) -> Self {
+        // We wants to decompose this whole struct by hand to be sure we don't forget
+        // to add analytics when we add a field in the Opt.
+        // Thus we must not insert `..` at the end.
+        let Opt {
+            db_path,
+            http_addr,
+            master_key: _,
+            env,
+            max_index_size,
+            max_task_db_size,
+            http_payload_size_limit,
+            ssl_cert_path,
+            ssl_key_path,
+            ssl_auth_path,
+            ssl_ocsp_path,
+            ssl_require_auth,
+            ssl_resumption,
+            ssl_tickets,
+            import_snapshot,
+            ignore_missing_snapshot,
+            ignore_snapshot_if_db_exists,
+            snapshot_dir,
+            schedule_snapshot,
+            snapshot_interval_sec,
+            import_dump,
+            ignore_missing_dump,
+            ignore_dump_if_db_exists,
+            dumps_dir,
+            log_level,
+            indexer_options,
+            scheduler_options,
+            config_file_path,
+            #[cfg(all(not(debug_assertions), feature = "analytics"))]
+                no_analytics: _,
+        } = options;
+
+        let SchedulerConfig { disable_auto_batching } = scheduler_options;
+        let IndexerOpts {
+            log_every_n: _,
+            max_nb_chunks: _,
+            max_indexing_memory,
+            max_indexing_threads,
+        } = indexer_options;
+
+        // We're going to override every sensible information.
+        // We consider information sensible if it contains a path, an address, or a key.
+        Self {
+            env,
+            db_path: db_path != PathBuf::from("./data.ms"),
+            import_dump: import_dump.is_some(),
+            dumps_dir: dumps_dir != PathBuf::from("dumps/"),
+            ignore_missing_dump,
+            ignore_dump_if_db_exists,
+            import_snapshot: import_snapshot.is_some(),
+            schedule_snapshot,
+            snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
+            snapshot_interval_sec,
+            ignore_missing_snapshot,
+            ignore_snapshot_if_db_exists,
+            http_addr: http_addr != default_http_addr(),
+            max_index_size,
+            max_task_db_size,
+            http_payload_size_limit,
+            disable_auto_batching,
+            log_level,
+            max_indexing_memory,
+            max_indexing_threads,
+            with_configuration_file: config_file_path.is_some(),
+            ssl_auth_path: ssl_auth_path.is_some(),
+            ssl_cert_path: ssl_cert_path.is_some(),
+            ssl_key_path: ssl_key_path.is_some(),
+            ssl_ocsp_path: ssl_ocsp_path.is_some(),
+            ssl_require_auth,
+            ssl_resumption,
+            ssl_tickets,
+        }
+    }
+}
+
 pub struct Segment {
    inbox: Receiver<AnalyticsMsg>,
    user: User,
@ -212,31 +332,6 @@ impl Segment {
                    "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
            })
        });
-        // The infos are all cli option except every option containing sensitive information.
-        // We consider an information as sensible if it contains a path, an address or a key.
-        let infos = {
-            // First we see if any sensitive fields were used.
-            let db_path = opt.db_path != PathBuf::from("./data.ms");
-            let import_dump = opt.import_dump.is_some();
-            let dumps_dir = opt.dumps_dir != PathBuf::from("dumps/");
-            let import_snapshot = opt.import_snapshot.is_some();
-            let snapshots_dir = opt.snapshot_dir != PathBuf::from("snapshots/");
-            let http_addr = opt.http_addr != default_http_addr();
-
-            let mut infos = serde_json::to_value(opt).unwrap();
-
-            // Then we overwrite all sensitive field with a boolean representing if
-            // the feature was used or not.
-            infos["db_path"] = json!(db_path);
-            infos["import_dump"] = json!(import_dump);
-            infos["dumps_dir"] = json!(dumps_dir);
-            infos["import_snapshot"] = json!(import_snapshot);
-            infos["snapshot_dir"] = json!(snapshots_dir);
-            infos["http_addr"] = json!(http_addr);
-
-            infos
-        };
-
        let number_of_documents =
            stats.indexes.values().map(|index| index.number_of_documents).collect::<Vec<u64>>();

@ -248,7 +343,7 @@ impl Segment {
                "indexes_number": stats.indexes.len(),
                "documents_number": number_of_documents,
            },
-            "infos": infos,
+            "infos": Infos::from(opt.clone()),
        })
    }

--- a/meilisearch-http/src/option.rs
+++ b/meilisearch-http/src/option.rs
@ -69,7 +69,7 @@ const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
 const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING";
 const DEFAULT_LOG_EVERY_N: usize = 100000;

-#[derive(Debug, Clone, Parser, Serialize, Deserialize)]
+#[derive(Debug, Clone, Parser, Deserialize)]
 #[clap(version, next_display_order = None)]
 #[serde(rename_all = "snake_case", deny_unknown_fields)]
 pub struct Opt {
@ -84,7 +84,6 @@ pub struct Opt {
    pub http_addr: String,

    /// Sets the instance's master key, automatically protecting all routes except `GET /health`.
-    #[serde(skip_serializing)]
    #[clap(long, env = MEILI_MASTER_KEY)]
    pub master_key: Option<String>,

@ -99,7 +98,7 @@ pub struct Opt {
    /// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted
    /// at any time.
    #[cfg(all(not(debug_assertions), feature = "analytics"))]
-    #[serde(skip_serializing, default)] // we can't send true
+    #[serde(default)] // we can't send true
    #[clap(long, env = MEILI_NO_ANALYTICS)]
    pub no_analytics: bool,

@ -121,39 +120,35 @@ pub struct Opt {
    pub http_payload_size_limit: Byte,

    /// Sets the server's SSL certificates.
-    #[serde(skip_serializing)]
    #[clap(long, env = MEILI_SSL_CERT_PATH, value_parser)]
    pub ssl_cert_path: Option<PathBuf>,

    /// Sets the server's SSL key files.
-    #[serde(skip_serializing)]
    #[clap(long, env = MEILI_SSL_KEY_PATH, value_parser)]
    pub ssl_key_path: Option<PathBuf>,

    /// Enables client authentication in the specified path.
-    #[serde(skip_serializing)]
    #[clap(long, env = MEILI_SSL_AUTH_PATH, value_parser)]
    pub ssl_auth_path: Option<PathBuf>,

    /// Sets the server's OCSP file. *Optional*
    ///
    /// Reads DER-encoded OCSP response from OCSPFILE and staple to certificate.
-    #[serde(skip_serializing)]
    #[clap(long, env = MEILI_SSL_OCSP_PATH, value_parser)]
    pub ssl_ocsp_path: Option<PathBuf>,

    /// Makes SSL authentication mandatory.
-    #[serde(skip_serializing, default)]
+    #[serde(default)]
    #[clap(long, env = MEILI_SSL_REQUIRE_AUTH)]
    pub ssl_require_auth: bool,

    /// Activates SSL session resumption.
-    #[serde(skip_serializing, default)]
+    #[serde(default)]
    #[clap(long, env = MEILI_SSL_RESUMPTION)]
    pub ssl_resumption: bool,

    /// Activates SSL tickets.
-    #[serde(skip_serializing, default)]
+    #[serde(default)]
    #[clap(long, env = MEILI_SSL_TICKETS)]
    pub ssl_tickets: bool,

@ -251,7 +246,6 @@ pub struct Opt {

    /// Set the path to a configuration file that should be used to setup the engine.
    /// Format must be TOML.
-    #[serde(skip_serializing)]
    #[clap(long)]
    pub config_file_path: Option<PathBuf>,
 }
@ -439,16 +433,15 @@ impl Opt {
    }
 }

-#[derive(Debug, Clone, Parser, Deserialize, Serialize)]
+#[derive(Debug, Clone, Parser, Deserialize)]
 pub struct IndexerOpts {
    /// Sets the amount of documents to skip before printing
    /// a log regarding the indexing advancement.
-    #[serde(skip_serializing, default = "default_log_every_n")]
+    #[serde(default = "default_log_every_n")]
    #[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k
    pub log_every_n: usize,

    /// Grenad max number of chunks in bytes.
-    #[serde(skip_serializing)]
    #[clap(long, hide = true)]
    pub max_nb_chunks: Option<usize>,

@ -488,7 +481,7 @@ impl IndexerOpts {
    }
 }

-#[derive(Debug, Clone, Parser, Default, Deserialize, Serialize)]
+#[derive(Debug, Clone, Parser, Default, Deserialize)]
 #[serde(rename_all = "snake_case", deny_unknown_fields)]
 pub struct SchedulerConfig {
    /// Deactivates auto-batching when provided.