From c4a40e711066c88ca0c61579284d9ff0c463e8af Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 15 May 2023 10:15:33 +0200 Subject: [PATCH] Use the writemap flag to reduce the memory usage --- Cargo.lock | 6 +-- index-scheduler/src/index_mapper/mod.rs | 9 +++- index-scheduler/src/lib.rs | 27 +++++++---- meilisearch-auth/src/store.rs | 6 ++- milli/Cargo.toml | 2 +- milli/src/index.rs | 61 +++++++++++++++---------- milli/src/update/facet/mod.rs | 4 +- 7 files changed, 74 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5f192b6d1..87298f665 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1794,7 +1794,7 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heed" version = "0.12.5" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8" +source = "git+https://github.com/meilisearch/heed?branch=create-db-no-sub-txn#ba64ce016e939ff1a35cfaa1989dba7057cb2812" dependencies = [ "byteorder", "heed-traits", @@ -1811,12 +1811,12 @@ dependencies = [ [[package]] name = "heed-traits" version = "0.7.0" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8" +source = "git+https://github.com/meilisearch/heed?branch=create-db-no-sub-txn#ba64ce016e939ff1a35cfaa1989dba7057cb2812" [[package]] name = "heed-types" version = "0.7.2" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8" +source = "git+https://github.com/meilisearch/heed?branch=create-db-no-sub-txn#ba64ce016e939ff1a35cfaa1989dba7057cb2812" dependencies = [ "bincode", "heed-traits", diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 2bf6f46ad..7d373c528 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -125,10 +125,15 @@ impl IndexMapper { index_count: usize, indexer_config: IndexerConfig, ) -> Result { + let mut wtxn = env.write_txn()?; + let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?; + let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?; + wtxn.commit()?; + Ok(Self { index_map: Arc::new(RwLock::new(IndexMap::new(index_count))), - index_mapping: env.create_database(Some(INDEX_MAPPING))?, - index_stats: env.create_database(Some(INDEX_STATS))?, + index_mapping, + index_stats, base_path, index_base_map_size, index_growth_amount, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index af20ba1ae..b3607e85e 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -396,19 +396,30 @@ impl IndexScheduler { .open(options.tasks_path)?; let file_store = FileStore::new(&options.update_file_path)?; + let mut wtxn = env.write_txn()?; + let all_tasks = env.create_database(&mut wtxn, Some(db_name::ALL_TASKS))?; + let status = env.create_database(&mut wtxn, Some(db_name::STATUS))?; + let kind = env.create_database(&mut wtxn, Some(db_name::KIND))?; + let index_tasks = env.create_database(&mut wtxn, Some(db_name::INDEX_TASKS))?; + let canceled_by = env.create_database(&mut wtxn, Some(db_name::CANCELED_BY))?; + let enqueued_at = env.create_database(&mut wtxn, Some(db_name::ENQUEUED_AT))?; + let started_at = env.create_database(&mut wtxn, Some(db_name::STARTED_AT))?; + let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?; + wtxn.commit()?; + // allow unreachable_code to get rids of the warning in the case of a test build. let this = Self { must_stop_processing: MustStopProcessing::default(), processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), file_store, - all_tasks: env.create_database(Some(db_name::ALL_TASKS))?, - status: env.create_database(Some(db_name::STATUS))?, - kind: env.create_database(Some(db_name::KIND))?, - index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?, - canceled_by: env.create_database(Some(db_name::CANCELED_BY))?, - enqueued_at: env.create_database(Some(db_name::ENQUEUED_AT))?, - started_at: env.create_database(Some(db_name::STARTED_AT))?, - finished_at: env.create_database(Some(db_name::FINISHED_AT))?, + all_tasks, + status, + kind, + index_tasks, + canceled_by, + enqueued_at, + started_at, + finished_at, index_mapper: IndexMapper::new( &env, options.indexes_path, diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index 5c2776154..3e4acc88e 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -55,9 +55,11 @@ impl HeedAuthStore { let path = path.as_ref().join(AUTH_DB_PATH); create_dir_all(&path)?; let env = Arc::new(open_auth_store_env(path.as_ref())?); - let keys = env.create_database(Some(KEY_DB_NAME))?; + let mut wtxn = env.write_txn()?; + let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?; let action_keyid_index_expiration = - env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; + env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; + wtxn.commit()?; Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true }) } diff --git a/milli/Cargo.toml b/milli/Cargo.toml index de0f4e31d..bb5b505d1 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -26,7 +26,7 @@ fst = "0.4.7" fxhash = "0.2.1" geoutils = "0.5.1" grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] } -heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] } +heed = { git = "https://github.com/meilisearch/heed", branch = "create-db-no-sub-txn", default-features = false, features = ["lmdb", "sync-read-txn"] } json-depth-checker = { path = "../json-depth-checker" } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } memmap2 = "0.5.10" diff --git a/milli/src/index.rs b/milli/src/index.rs index ad53e79ea..bc14b7195 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -167,36 +167,49 @@ impl Index { use db_name::*; options.max_dbs(23); - unsafe { options.flag(Flags::MdbAlwaysFreePages) }; + unsafe { options.flag(Flags::MdbAlwaysFreePages).flag(Flags::MdbWriteMap) }; let env = options.open(path)?; - let main = env.create_poly_database(Some(MAIN))?; - let word_docids = env.create_database(Some(WORD_DOCIDS))?; - let exact_word_docids = env.create_database(Some(EXACT_WORD_DOCIDS))?; - let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?; - let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?; - let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?; - let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?; - let script_language_docids = env.create_database(Some(SCRIPT_LANGUAGE_DOCIDS))?; + let mut wtxn = env.write_txn()?; + let main = env.create_poly_database(&mut wtxn, Some(MAIN))?; + let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?; + let exact_word_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?; + let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?; + let exact_word_prefix_docids = + env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?; + let docid_word_positions = env.create_database(&mut wtxn, Some(DOCID_WORD_POSITIONS))?; + let word_pair_proximity_docids = + env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?; + let script_language_docids = + env.create_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?; let word_prefix_pair_proximity_docids = - env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?; + env.create_database(&mut wtxn, Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?; let prefix_word_pair_proximity_docids = - env.create_database(Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?; - let word_position_docids = env.create_database(Some(WORD_POSITION_DOCIDS))?; - let word_fid_docids = env.create_database(Some(WORD_FIELD_ID_DOCIDS))?; - let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?; - let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?; - let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?; - let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?; - let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; - let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?; - let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?; - let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?; + env.create_database(&mut wtxn, Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?; + let word_position_docids = env.create_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?; + let word_fid_docids = env.create_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?; + let field_id_word_count_docids = + env.create_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?; + let word_prefix_position_docids = + env.create_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?; + let word_prefix_fid_docids = + env.create_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?; + let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?; + let facet_id_string_docids = + env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?; + let facet_id_exists_docids = + env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?; + let facet_id_is_null_docids = + env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?; + let facet_id_is_empty_docids = + env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?; - let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; + let field_id_docid_facet_f64s = + env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?; let field_id_docid_facet_strings = - env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?; - let documents = env.create_database(Some(DOCUMENTS))?; + env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?; + let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?; + wtxn.commit()?; Index::set_creation_dates(&env, main, created_at, updated_at)?; diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 39a3ef437..2fd748d4d 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -261,7 +261,9 @@ pub(crate) mod test_helpers { let options = options.map_size(4096 * 4 * 1000 * 100); let tempdir = tempfile::TempDir::new().unwrap(); let env = options.open(tempdir.path()).unwrap(); - let content = env.create_database(None).unwrap(); + let mut wtxn = env.write_txn().unwrap(); + let content = env.create_database(&mut wtxn, None).unwrap(); + wtxn.commit().unwrap(); FacetIndex { content,