From f759ec7fad18ff0e583ba36b2568f3254fb70f46 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 15 May 2023 11:23:58 +0200 Subject: [PATCH] Expose a flag to enable the MDB_WRITEMAP flag --- index-scheduler/src/index_mapper/index_map.rs | 38 +++++++++++++++---- index-scheduler/src/index_mapper/mod.rs | 12 +++++- index-scheduler/src/lib.rs | 8 ++++ .../src/analytics/segment_analytics.rs | 3 ++ meilisearch/src/lib.rs | 1 + meilisearch/src/main.rs | 5 +++ meilisearch/src/option.rs | 21 ++++++++++ milli/src/index.rs | 2 +- 8 files changed, 80 insertions(+), 10 deletions(-) diff --git a/index-scheduler/src/index_mapper/index_map.rs b/index-scheduler/src/index_mapper/index_map.rs index d140d4944..9bed4fe5d 100644 --- a/index-scheduler/src/index_mapper/index_map.rs +++ b/index-scheduler/src/index_mapper/index_map.rs @@ -5,6 +5,7 @@ use std::collections::BTreeMap; use std::path::Path; use std::time::Duration; +use meilisearch_types::heed::flags::Flags; use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions}; use meilisearch_types::milli::Index; use time::OffsetDateTime; @@ -53,6 +54,7 @@ pub struct IndexMap { pub struct ClosingIndex { uuid: Uuid, closing_event: EnvClosingEvent, + enable_mdb_writemap: bool, map_size: usize, generation: usize, } @@ -68,6 +70,7 @@ impl ClosingIndex { pub fn wait_timeout(self, timeout: Duration) -> Option { self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex { uuid: self.uuid, + enable_mdb_writemap: self.enable_mdb_writemap, map_size: self.map_size, generation: self.generation, }) @@ -76,6 +79,7 @@ impl ClosingIndex { pub struct ReopenableIndex { uuid: Uuid, + enable_mdb_writemap: bool, map_size: usize, generation: usize, } @@ -103,7 +107,7 @@ impl ReopenableIndex { return Ok(()); } map.unavailable.remove(&self.uuid); - map.create(&self.uuid, path, None, self.map_size)?; + map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?; } Ok(()) } @@ -170,16 +174,17 @@ impl IndexMap { uuid: &Uuid, path: &Path, date: Option<(OffsetDateTime, OffsetDateTime)>, + enable_mdb_writemap: bool, map_size: usize, ) -> Result { if !matches!(self.get_unavailable(uuid), Missing) { panic!("Attempt to open an index that was unavailable"); } - let index = create_or_open_index(path, date, map_size)?; + let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?; match self.available.insert(*uuid, index.clone()) { InsertionOutcome::InsertedNew => (), InsertionOutcome::Evicted(evicted_uuid, evicted_index) => { - self.close(evicted_uuid, evicted_index, 0); + self.close(evicted_uuid, evicted_index, enable_mdb_writemap, 0); } InsertionOutcome::Replaced(_) => { panic!("Attempt to open an index that was already opened") @@ -212,17 +217,30 @@ impl IndexMap { /// | Closing | Closing | /// | Available | Closing | /// - pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) { + pub fn close_for_resize( + &mut self, + uuid: &Uuid, + enable_mdb_writemap: bool, + map_size_growth: usize, + ) { let Some(index) = self.available.remove(uuid) else { return; }; - self.close(*uuid, index, map_size_growth); + self.close(*uuid, index, enable_mdb_writemap, map_size_growth); } - fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) { + fn close( + &mut self, + uuid: Uuid, + index: Index, + enable_mdb_writemap: bool, + map_size_growth: usize, + ) { let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth; let closing_event = index.prepare_for_closing(); let generation = self.next_generation(); - self.unavailable - .insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation })); + self.unavailable.insert( + uuid, + Some(ClosingIndex { uuid, closing_event, enable_mdb_writemap, map_size, generation }), + ); } /// Attempts to delete and index. @@ -282,11 +300,15 @@ impl IndexMap { fn create_or_open_index( path: &Path, date: Option<(OffsetDateTime, OffsetDateTime)>, + enable_mdb_writemap: bool, map_size: usize, ) -> Result { let mut options = EnvOpenOptions::new(); options.map_size(clamp_to_page_size(map_size)); options.max_readers(1024); + if enable_mdb_writemap { + unsafe { options.flag(Flags::MdbWriteMap) }; + } if let Some((created, updated)) = date { Ok(Index::new_with_creation_dates(options, path, created, updated)?) diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 7d373c528..5160ebd63 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -66,6 +66,8 @@ pub struct IndexMapper { index_base_map_size: usize, /// The quantity by which the map size of an index is incremented upon reopening, in bytes. index_growth_amount: usize, + /// Weither we open a meilisearch index with the MDB_WRITEMAP option or not. + enable_mdb_writemap: bool, pub indexer_config: Arc, } @@ -123,6 +125,7 @@ impl IndexMapper { index_base_map_size: usize, index_growth_amount: usize, index_count: usize, + enable_mdb_writemap: bool, indexer_config: IndexerConfig, ) -> Result { let mut wtxn = env.write_txn()?; @@ -137,6 +140,7 @@ impl IndexMapper { base_path, index_base_map_size, index_growth_amount, + enable_mdb_writemap, indexer_config: Arc::new(indexer_config), }) } @@ -167,6 +171,7 @@ impl IndexMapper { &uuid, &index_path, date, + self.enable_mdb_writemap, self.index_base_map_size, )?; @@ -278,7 +283,11 @@ impl IndexMapper { .ok_or_else(|| Error::IndexNotFound(name.to_string()))?; // We remove the index from the in-memory index map. - self.index_map.write().unwrap().close_for_resize(&uuid, self.index_growth_amount); + self.index_map.write().unwrap().close_for_resize( + &uuid, + self.enable_mdb_writemap, + self.index_growth_amount, + ); Ok(()) } @@ -343,6 +352,7 @@ impl IndexMapper { &uuid, &index_path, None, + self.enable_mdb_writemap, self.index_base_map_size, )?; } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index b3607e85e..607a4a407 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -233,6 +233,8 @@ pub struct IndexSchedulerOptions { pub task_db_size: usize, /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index. pub index_base_map_size: usize, + /// Weither we open a meilisearch index with the MDB_WRITEMAP option or not. + pub enable_mdb_writemap: bool, /// The size, in bytes, by which the map size of an index is increased when it resized due to being full. pub index_growth_amount: usize, /// The number of indexes that can be concurrently opened in memory. @@ -374,6 +376,10 @@ impl IndexScheduler { std::fs::create_dir_all(&options.indexes_path)?; std::fs::create_dir_all(&options.dumps_path)?; + if cfg!(windows) && options.enable_mdb_writemap { + panic!("Windows doesn't support the MDB_WRITEMAP LMDB option"); + } + let task_db_size = clamp_to_page_size(options.task_db_size); let budget = if options.indexer_config.skip_index_budget { IndexBudget { @@ -426,6 +432,7 @@ impl IndexScheduler { budget.map_size, options.index_growth_amount, budget.index_count, + options.enable_mdb_writemap, options.indexer_config, )?, env, @@ -1482,6 +1489,7 @@ mod tests { dumps_path: tempdir.path().join("dumps"), task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. + enable_mdb_writemap: false, index_growth_amount: 1000 * 1000, // 1 MB index_count: 5, indexer_config, diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 3e40c09e8..9b465b8d8 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -225,6 +225,7 @@ impl super::Analytics for SegmentAnalytics { struct Infos { env: String, experimental_enable_metrics: bool, + experimental_reduce_indexing_memory_usage: bool, db_path: bool, import_dump: bool, dump_dir: bool, @@ -258,6 +259,7 @@ impl From for Infos { let Opt { db_path, experimental_enable_metrics, + experimental_reduce_indexing_memory_usage, http_addr, master_key: _, env, @@ -300,6 +302,7 @@ impl From for Infos { Self { env, experimental_enable_metrics, + experimental_reduce_indexing_memory_usage, db_path: db_path != PathBuf::from("./data.ms"), import_dump: import_dump.is_some(), dump_dir: dump_dir != PathBuf::from("dumps/"), diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 67d8bbd5c..bee53f6f8 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -232,6 +232,7 @@ fn open_or_create_database_unchecked( dumps_path: opt.dump_dir.clone(), task_db_size: opt.max_task_db_size.get_bytes() as usize, index_base_map_size: opt.max_index_size.get_bytes() as usize, + enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, max_number_of_tasks: 1_000_000, diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 2ab37488c..1b5e918dc 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -29,6 +29,11 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> { let (opt, config_read_from) = Opt::try_build()?; + anyhow::ensure!( + !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), + "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" + ); + setup(&opt)?; match (opt.env.as_ref(), &opt.master_key) { diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 8e6ca9006..83fbeb333 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -48,6 +48,8 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; +const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = + "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_DB_PATH: &str = "./data.ms"; @@ -293,6 +295,20 @@ pub struct Opt { #[serde(default)] pub experimental_enable_metrics: bool, + /// Experimentally reduces the amount of RAM used by the engine when indexing documents. + /// + /// You must not use this flag in production. It is experimental and can corrupt the database + /// or be removed in future versions. It can also be stabilized or directly integrated + /// into the engine later. + /// + /// This flag enables the MDB_WRITEMAP option of LMDB, making the internal key-value store + /// use much less RAM than usual. Unfortunately, it can reduce the write speed of it and therefore + /// slow down the engine. You can read more and tell us about your experience on the dedicated + /// discussion: . + #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] + #[serde(default)] + pub experimental_reduce_indexing_memory_usage: bool, + #[serde(flatten)] #[clap(flatten)] pub indexer_options: IndexerOpts, @@ -385,6 +401,7 @@ impl Opt { #[cfg(all(not(debug_assertions), feature = "analytics"))] no_analytics, experimental_enable_metrics: enable_metrics_route, + experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); @@ -426,6 +443,10 @@ impl Opt { MEILI_EXPERIMENTAL_ENABLE_METRICS, enable_metrics_route.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, + reduce_indexing_memory_usage.to_string(), + ); indexer_options.export_to_env(); } diff --git a/milli/src/index.rs b/milli/src/index.rs index bc14b7195..9ea7b628c 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -167,7 +167,7 @@ impl Index { use db_name::*; options.max_dbs(23); - unsafe { options.flag(Flags::MdbAlwaysFreePages).flag(Flags::MdbWriteMap) }; + unsafe { options.flag(Flags::MdbAlwaysFreePages) }; let env = options.open(path)?; let mut wtxn = env.write_txn()?;