Expose a flag to enable the MDB_WRITEMAP flag

This commit is contained in:
Kerollmops 2023-05-15 11:23:58 +02:00
parent c4a40e7110
commit f759ec7fad
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
8 changed files with 80 additions and 10 deletions

View File

@ -5,6 +5,7 @@ use std::collections::BTreeMap;
use std::path::Path; use std::path::Path;
use std::time::Duration; use std::time::Duration;
use meilisearch_types::heed::flags::Flags;
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions}; use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
use meilisearch_types::milli::Index; use meilisearch_types::milli::Index;
use time::OffsetDateTime; use time::OffsetDateTime;
@ -53,6 +54,7 @@ pub struct IndexMap {
pub struct ClosingIndex { pub struct ClosingIndex {
uuid: Uuid, uuid: Uuid,
closing_event: EnvClosingEvent, closing_event: EnvClosingEvent,
enable_mdb_writemap: bool,
map_size: usize, map_size: usize,
generation: usize, generation: usize,
} }
@ -68,6 +70,7 @@ impl ClosingIndex {
pub fn wait_timeout(self, timeout: Duration) -> Option<ReopenableIndex> { pub fn wait_timeout(self, timeout: Duration) -> Option<ReopenableIndex> {
self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex { self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex {
uuid: self.uuid, uuid: self.uuid,
enable_mdb_writemap: self.enable_mdb_writemap,
map_size: self.map_size, map_size: self.map_size,
generation: self.generation, generation: self.generation,
}) })
@ -76,6 +79,7 @@ impl ClosingIndex {
pub struct ReopenableIndex { pub struct ReopenableIndex {
uuid: Uuid, uuid: Uuid,
enable_mdb_writemap: bool,
map_size: usize, map_size: usize,
generation: usize, generation: usize,
} }
@ -103,7 +107,7 @@ impl ReopenableIndex {
return Ok(()); return Ok(());
} }
map.unavailable.remove(&self.uuid); map.unavailable.remove(&self.uuid);
map.create(&self.uuid, path, None, self.map_size)?; map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
} }
Ok(()) Ok(())
} }
@ -170,16 +174,17 @@ impl IndexMap {
uuid: &Uuid, uuid: &Uuid,
path: &Path, path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>, date: Option<(OffsetDateTime, OffsetDateTime)>,
enable_mdb_writemap: bool,
map_size: usize, map_size: usize,
) -> Result<Index> { ) -> Result<Index> {
if !matches!(self.get_unavailable(uuid), Missing) { if !matches!(self.get_unavailable(uuid), Missing) {
panic!("Attempt to open an index that was unavailable"); panic!("Attempt to open an index that was unavailable");
} }
let index = create_or_open_index(path, date, map_size)?; let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
match self.available.insert(*uuid, index.clone()) { match self.available.insert(*uuid, index.clone()) {
InsertionOutcome::InsertedNew => (), InsertionOutcome::InsertedNew => (),
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => { InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
self.close(evicted_uuid, evicted_index, 0); self.close(evicted_uuid, evicted_index, enable_mdb_writemap, 0);
} }
InsertionOutcome::Replaced(_) => { InsertionOutcome::Replaced(_) => {
panic!("Attempt to open an index that was already opened") panic!("Attempt to open an index that was already opened")
@ -212,17 +217,30 @@ impl IndexMap {
/// | Closing | Closing | /// | Closing | Closing |
/// | Available | Closing | /// | Available | Closing |
/// ///
pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) { pub fn close_for_resize(
&mut self,
uuid: &Uuid,
enable_mdb_writemap: bool,
map_size_growth: usize,
) {
let Some(index) = self.available.remove(uuid) else { return; }; let Some(index) = self.available.remove(uuid) else { return; };
self.close(*uuid, index, map_size_growth); self.close(*uuid, index, enable_mdb_writemap, map_size_growth);
} }
fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) { fn close(
&mut self,
uuid: Uuid,
index: Index,
enable_mdb_writemap: bool,
map_size_growth: usize,
) {
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth; let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
let closing_event = index.prepare_for_closing(); let closing_event = index.prepare_for_closing();
let generation = self.next_generation(); let generation = self.next_generation();
self.unavailable self.unavailable.insert(
.insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation })); uuid,
Some(ClosingIndex { uuid, closing_event, enable_mdb_writemap, map_size, generation }),
);
} }
/// Attempts to delete and index. /// Attempts to delete and index.
@ -282,11 +300,15 @@ impl IndexMap {
fn create_or_open_index( fn create_or_open_index(
path: &Path, path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>, date: Option<(OffsetDateTime, OffsetDateTime)>,
enable_mdb_writemap: bool,
map_size: usize, map_size: usize,
) -> Result<Index> { ) -> Result<Index> {
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(map_size)); options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024); options.max_readers(1024);
if enable_mdb_writemap {
unsafe { options.flag(Flags::MdbWriteMap) };
}
if let Some((created, updated)) = date { if let Some((created, updated)) = date {
Ok(Index::new_with_creation_dates(options, path, created, updated)?) Ok(Index::new_with_creation_dates(options, path, created, updated)?)

View File

@ -66,6 +66,8 @@ pub struct IndexMapper {
index_base_map_size: usize, index_base_map_size: usize,
/// The quantity by which the map size of an index is incremented upon reopening, in bytes. /// The quantity by which the map size of an index is incremented upon reopening, in bytes.
index_growth_amount: usize, index_growth_amount: usize,
/// Weither we open a meilisearch index with the MDB_WRITEMAP option or not.
enable_mdb_writemap: bool,
pub indexer_config: Arc<IndexerConfig>, pub indexer_config: Arc<IndexerConfig>,
} }
@ -123,6 +125,7 @@ impl IndexMapper {
index_base_map_size: usize, index_base_map_size: usize,
index_growth_amount: usize, index_growth_amount: usize,
index_count: usize, index_count: usize,
enable_mdb_writemap: bool,
indexer_config: IndexerConfig, indexer_config: IndexerConfig,
) -> Result<Self> { ) -> Result<Self> {
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
@ -137,6 +140,7 @@ impl IndexMapper {
base_path, base_path,
index_base_map_size, index_base_map_size,
index_growth_amount, index_growth_amount,
enable_mdb_writemap,
indexer_config: Arc::new(indexer_config), indexer_config: Arc::new(indexer_config),
}) })
} }
@ -167,6 +171,7 @@ impl IndexMapper {
&uuid, &uuid,
&index_path, &index_path,
date, date,
self.enable_mdb_writemap,
self.index_base_map_size, self.index_base_map_size,
)?; )?;
@ -278,7 +283,11 @@ impl IndexMapper {
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?; .ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// We remove the index from the in-memory index map. // We remove the index from the in-memory index map.
self.index_map.write().unwrap().close_for_resize(&uuid, self.index_growth_amount); self.index_map.write().unwrap().close_for_resize(
&uuid,
self.enable_mdb_writemap,
self.index_growth_amount,
);
Ok(()) Ok(())
} }
@ -343,6 +352,7 @@ impl IndexMapper {
&uuid, &uuid,
&index_path, &index_path,
None, None,
self.enable_mdb_writemap,
self.index_base_map_size, self.index_base_map_size,
)?; )?;
} }

View File

@ -233,6 +233,8 @@ pub struct IndexSchedulerOptions {
pub task_db_size: usize, pub task_db_size: usize,
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index. /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
pub index_base_map_size: usize, pub index_base_map_size: usize,
/// Weither we open a meilisearch index with the MDB_WRITEMAP option or not.
pub enable_mdb_writemap: bool,
/// The size, in bytes, by which the map size of an index is increased when it resized due to being full. /// The size, in bytes, by which the map size of an index is increased when it resized due to being full.
pub index_growth_amount: usize, pub index_growth_amount: usize,
/// The number of indexes that can be concurrently opened in memory. /// The number of indexes that can be concurrently opened in memory.
@ -374,6 +376,10 @@ impl IndexScheduler {
std::fs::create_dir_all(&options.indexes_path)?; std::fs::create_dir_all(&options.indexes_path)?;
std::fs::create_dir_all(&options.dumps_path)?; std::fs::create_dir_all(&options.dumps_path)?;
if cfg!(windows) && options.enable_mdb_writemap {
panic!("Windows doesn't support the MDB_WRITEMAP LMDB option");
}
let task_db_size = clamp_to_page_size(options.task_db_size); let task_db_size = clamp_to_page_size(options.task_db_size);
let budget = if options.indexer_config.skip_index_budget { let budget = if options.indexer_config.skip_index_budget {
IndexBudget { IndexBudget {
@ -426,6 +432,7 @@ impl IndexScheduler {
budget.map_size, budget.map_size,
options.index_growth_amount, options.index_growth_amount,
budget.index_count, budget.index_count,
options.enable_mdb_writemap,
options.indexer_config, options.indexer_config,
)?, )?,
env, env,
@ -1482,6 +1489,7 @@ mod tests {
dumps_path: tempdir.path().join("dumps"), dumps_path: tempdir.path().join("dumps"),
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false,
index_growth_amount: 1000 * 1000, // 1 MB index_growth_amount: 1000 * 1000, // 1 MB
index_count: 5, index_count: 5,
indexer_config, indexer_config,

View File

@ -225,6 +225,7 @@ impl super::Analytics for SegmentAnalytics {
struct Infos { struct Infos {
env: String, env: String,
experimental_enable_metrics: bool, experimental_enable_metrics: bool,
experimental_reduce_indexing_memory_usage: bool,
db_path: bool, db_path: bool,
import_dump: bool, import_dump: bool,
dump_dir: bool, dump_dir: bool,
@ -258,6 +259,7 @@ impl From<Opt> for Infos {
let Opt { let Opt {
db_path, db_path,
experimental_enable_metrics, experimental_enable_metrics,
experimental_reduce_indexing_memory_usage,
http_addr, http_addr,
master_key: _, master_key: _,
env, env,
@ -300,6 +302,7 @@ impl From<Opt> for Infos {
Self { Self {
env, env,
experimental_enable_metrics, experimental_enable_metrics,
experimental_reduce_indexing_memory_usage,
db_path: db_path != PathBuf::from("./data.ms"), db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(), import_dump: import_dump.is_some(),
dump_dir: dump_dir != PathBuf::from("dumps/"), dump_dir: dump_dir != PathBuf::from("dumps/"),

View File

@ -232,6 +232,7 @@ fn open_or_create_database_unchecked(
dumps_path: opt.dump_dir.clone(), dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize, task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize, index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?, indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true, autobatching_enabled: true,
max_number_of_tasks: 1_000_000, max_number_of_tasks: 1_000_000,

View File

@ -29,6 +29,11 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?; let (opt, config_read_from) = Opt::try_build()?;
anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
);
setup(&opt)?; setup(&opt)?;
match (opt.env.as_ref(), &opt.master_key) { match (opt.env.as_ref(), &opt.master_key) {

View File

@ -48,6 +48,8 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms"; const DEFAULT_DB_PATH: &str = "./data.ms";
@ -293,6 +295,20 @@ pub struct Opt {
#[serde(default)] #[serde(default)]
pub experimental_enable_metrics: bool, pub experimental_enable_metrics: bool,
/// Experimentally reduces the amount of RAM used by the engine when indexing documents.
///
/// You must not use this flag in production. It is experimental and can corrupt the database
/// or be removed in future versions. It can also be stabilized or directly integrated
/// into the engine later.
///
/// This flag enables the MDB_WRITEMAP option of LMDB, making the internal key-value store
/// use much less RAM than usual. Unfortunately, it can reduce the write speed of it and therefore
/// slow down the engine. You can read more and tell us about your experience on the dedicated
/// discussion: <https://github.com/meilisearch/product/discussions/652>.
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
#[serde(default)]
pub experimental_reduce_indexing_memory_usage: bool,
#[serde(flatten)] #[serde(flatten)]
#[clap(flatten)] #[clap(flatten)]
pub indexer_options: IndexerOpts, pub indexer_options: IndexerOpts,
@ -385,6 +401,7 @@ impl Opt {
#[cfg(all(not(debug_assertions), feature = "analytics"))] #[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics, no_analytics,
experimental_enable_metrics: enable_metrics_route, experimental_enable_metrics: enable_metrics_route,
experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
} = self; } = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@ -426,6 +443,10 @@ impl Opt {
MEILI_EXPERIMENTAL_ENABLE_METRICS, MEILI_EXPERIMENTAL_ENABLE_METRICS,
enable_metrics_route.to_string(), enable_metrics_route.to_string(),
); );
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
reduce_indexing_memory_usage.to_string(),
);
indexer_options.export_to_env(); indexer_options.export_to_env();
} }

View File

@ -167,7 +167,7 @@ impl Index {
use db_name::*; use db_name::*;
options.max_dbs(23); options.max_dbs(23);
unsafe { options.flag(Flags::MdbAlwaysFreePages).flag(Flags::MdbWriteMap) }; unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?; let env = options.open(path)?;
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;