From e6295c9c5f02df14134322b831c7cd329578a4f1 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 13 Jan 2025 16:36:37 +0100 Subject: [PATCH 1/4] Introduce a meilitool subcommand to compact an index --- crates/meilitool/src/main.rs | 97 +++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 599bc3274..29c111013 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -1,6 +1,7 @@ use std::fs::{read_dir, read_to_string, remove_file, File}; use std::io::BufWriter; use std::path::PathBuf; +use std::time::Instant; use anyhow::Context; use clap::{Parser, Subcommand}; @@ -8,7 +9,9 @@ use dump::{DumpWriter, IndexMetadata}; use file_store::FileStore; use meilisearch_auth::AuthController; use meilisearch_types::heed::types::{SerdeJson, Str}; -use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; +use meilisearch_types::heed::{ + CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, +}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::{obkv_to_json, BEU32}; use meilisearch_types::tasks::{Status, Task}; @@ -78,6 +81,27 @@ enum Command { #[arg(long)] target_version: String, }, + + /// Compact the index by using LMDB. + /// + /// You must run this command while Meilisearch is off. The reason is that Meilisearch keep the + /// indexes opened and this compaction operation writes into another file. Meilisearch will not + /// switch to the new file. + /// + /// **Another possibility** is to keep Meilisearch running to serve search requests, run the + /// compaction and once done, close and immediately reopen Meilisearch. This way Meilisearch + /// will reopened the data.mdb file when rebooting and see the newly compacted file, ignoring + /// the previous non-compacted data. + /// + /// Note that the compaction will open the index, copy and compact the index into another file + /// **on the same disk as the index** and replace the previous index with the newly compacted + /// one. Which means that the disk must have enough room for at most two time the index size. + /// + /// To make sure not to loose any data, this tool takes a mutable transaction on the index + /// before running the copy and compaction. This way the current indexation must finish before + /// the compaction operation can start. Once the compaction is done, the big index is replaced + /// by the compacted one and the mutable transaction is released. + CompactIndex { index_name: String }, } fn main() -> anyhow::Result<()> { @@ -94,6 +118,7 @@ fn main() -> anyhow::Result<()> { let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() } + Command::CompactIndex { index_name } => compact_index(db_path, &index_name), } } @@ -352,3 +377,73 @@ fn export_a_dump( Ok(()) } + +fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> { + let index_scheduler_path = db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } + .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; + + let rtxn = env.read_txn()?; + let index_mapping: Database = + try_opening_database(&env, &rtxn, "index-mapping")?; + + for result in index_mapping.iter(&rtxn)? { + let (uid, uuid) = result?; + + if uid != index_name { + eprintln!("Found index {uid} and skipping it"); + continue; + } else { + eprintln!("Found index {uid} 🎉"); + } + + let index_path = db_path.join("indexes").join(uuid.to_string()); + let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| { + format!("While trying to open the index at path {:?}", index_path.display()) + })?; + + eprintln!("Awaiting for a mutable transaction..."); + let _wtxn = index.write_txn().context("While awaiting for a write transaction")?; + + // We create and immediately drop the file because the + let non_compacted_index_file_path = index_path.join("data.mdb"); + let compacted_index_file_path = index_path.join("data.mdb.cpy"); + + eprintln!("Compacting the index..."); + let before_compaction = Instant::now(); + let new_file = index + .copy_to_file(&compacted_index_file_path, CompactionOption::Enabled) + .with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?; + + let after_size = new_file.metadata()?.len(); + let before_size = std::fs::metadata(&non_compacted_index_file_path) + .with_context(|| { + format!( + "While retrieving the metadata of {}", + non_compacted_index_file_path.display(), + ) + })? + .len(); + + let reduction = before_size as f64 / after_size as f64; + println!("Compaction successful. Took around {:.2?}", before_compaction.elapsed()); + eprintln!("The index went from {before_size} bytes to {after_size} bytes ({reduction:.2}x reduction)"); + + eprintln!("Replacing the non-compacted index by the compacted one..."); + std::fs::rename(&compacted_index_file_path, &non_compacted_index_file_path).with_context( + || { + format!( + "While renaming {} into {}", + compacted_index_file_path.display(), + non_compacted_index_file_path.display(), + ) + }, + )?; + + drop(new_file); + + println!("Everything's done 🎉"); + } + + Ok(()) +} From c72f114b335c8056f286245b024b129fbdb2dd50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 30 Jan 2025 11:07:09 +0100 Subject: [PATCH 2/4] Fix english in the comments Co-authored-by: Louis Dureuil --- crates/meilitool/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 29c111013..4a630a86d 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -95,9 +95,9 @@ enum Command { /// /// Note that the compaction will open the index, copy and compact the index into another file /// **on the same disk as the index** and replace the previous index with the newly compacted - /// one. Which means that the disk must have enough room for at most two time the index size. + /// one. This means that the disk must have enough room for at most two times the index size. /// - /// To make sure not to loose any data, this tool takes a mutable transaction on the index + /// To make sure not to lose any data, this tool takes a mutable transaction on the index /// before running the copy and compaction. This way the current indexation must finish before /// the compaction operation can start. Once the compaction is done, the big index is replaced /// by the compacted one and the mutable transaction is released. From 71bb24f17e3571916cf6e2558c03987e2bb875bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 30 Jan 2025 11:07:43 +0100 Subject: [PATCH 3/4] Throw and error when the index is not found Co-authored-by: Louis Dureuil --- crates/meilitool/src/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 4a630a86d..6e023ea52 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -443,7 +443,8 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> { drop(new_file); println!("Everything's done 🎉"); + return Ok(()) } - Ok(()) + bail!("Target index {index_name} not found!") } From 62ced0e3f18164f5efa6aa4f0f984fd9059cc010 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 30 Jan 2025 11:09:54 +0100 Subject: [PATCH 4/4] Make cargo fmt happy --- crates/meilitool/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 6e023ea52..4bd2c6c96 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -443,7 +443,7 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> { drop(new_file); println!("Everything's done 🎉"); - return Ok(()) + return Ok(()); } bail!("Target index {index_name} not found!")