From c83c1a3c51092ef4cc318852748ee0804ca04c79 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 11 Feb 2025 18:01:53 +0100 Subject: [PATCH 1/4] Introduce the Hair Dryer meilitool sucommand --- crates/meilitool/src/main.rs | 91 +++++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 9b3e11ff0..14f3af30d 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -1,15 +1,16 @@ use std::fs::{read_dir, read_to_string, remove_file, File}; use std::io::{BufWriter, Write as _}; use std::path::PathBuf; +use std::ptr; use std::time::Instant; use anyhow::{bail, Context}; -use clap::{Parser, Subcommand}; +use clap::{Parser, Subcommand, ValueEnum}; use dump::{DumpWriter, IndexMetadata}; use file_store::FileStore; use meilisearch_auth::AuthController; use meilisearch_types::batches::Batch; -use meilisearch_types::heed::types::{SerdeJson, Str}; +use meilisearch_types::heed::types::{Bytes, SerdeJson, Str}; use meilisearch_types::heed::{ CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, }; @@ -124,6 +125,25 @@ enum Command { /// the compaction operation can start. Once the compaction is done, the big index is replaced /// by the compacted one and the mutable transaction is released. CompactIndex { index_name: String }, + + /// Uses the hair dryer the dedicate pages hot in cache + /// + /// To make the index faster we must make sure it is hot in the DB cache that's the cure of + /// memory-mapping but also it's strengh. This command is designed to make a spcific part of + /// the index hot in cache. + HairDryer { + #[arg(long, value_delimiter = ',')] + index_name: Vec, + + #[arg(long, value_delimiter = ',')] + index_part: Vec, + }, +} + +#[derive(Clone, ValueEnum)] +enum IndexPart { + /// Will make the arroy index hot. + Arroy, } fn main() -> anyhow::Result<()> { @@ -144,6 +164,9 @@ fn main() -> anyhow::Result<()> { OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() } Command::CompactIndex { index_name } => compact_index(db_path, &index_name), + Command::HairDryer { index_name, index_part } => { + hair_dryer(db_path, &index_name, &index_part) + } } } @@ -587,3 +610,67 @@ fn export_documents( Ok(()) } + +fn hair_dryer( + db_path: PathBuf, + index_names: &[String], + index_parts: &[IndexPart], +) -> anyhow::Result<()> { + let index_scheduler_path = db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } + .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; + + let rtxn = env.read_txn()?; + let index_mapping: Database = + try_opening_database(&env, &rtxn, "index-mapping")?; + + for result in index_mapping.iter(&rtxn)? { + let (uid, uuid) = result?; + if index_names.iter().any(|i| i == uid) { + let index_path = db_path.join("indexes").join(uuid.to_string()); + let index = + Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { + format!("While trying to open the index at path {:?}", index_path.display()) + })?; + + let rtxn = index.read_txn()?; + for part in index_parts { + match part { + IndexPart::Arroy => { + let mut count = 0; + let total = index.vector_arroy.len(&rtxn)?; + eprintln!("Hair drying arroy for {uid}..."); + for (i, result) in index + .vector_arroy + .remap_types::() + .iter(&rtxn)? + .enumerate() + { + let (key, value) = result?; + count += key.len() + value.len(); + + unsafe { + // All of this just to avoid compiler optimizations 🤞 + // We must read all the bytes to make the pages hot in cache. + // + ptr::read_volatile(&key[0]); + ptr::read_volatile(&key[key.len() - 1]); + ptr::read_volatile(&value[0]); + ptr::read_volatile(&value[value.len() - 1]); + } + + if i % 10_000 == 0 { + eprintln!("Visited {i}/{total} keys") + } + } + eprintln!("Done hair drying a total of at least {count} bytes."); + } + } + } + } else { + eprintln!("Found index {uid} but it's not the right index..."); + } + } + + Ok(()) +} From 5dab435d13ee717f7e79e2f7ed584f24240c45da Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 11 Feb 2025 18:14:48 +0100 Subject: [PATCH 2/4] Add more logs about read txns --- crates/meilitool/src/main.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 14f3af30d..6881a15ec 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -620,6 +620,8 @@ fn hair_dryer( let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; + eprintln!("Trying to get a read transaction on the index scheduler..."); + let rtxn = env.read_txn()?; let index_mapping: Database = try_opening_database(&env, &rtxn, "index-mapping")?; @@ -633,6 +635,8 @@ fn hair_dryer( format!("While trying to open the index at path {:?}", index_path.display()) })?; + eprintln!("Trying to get a read transaction on the {uid} index..."); + let rtxn = index.read_txn()?; for part in index_parts { match part { From 246ad3b06eca400c1b4554dbe7c998bd91d304a2 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 12 Feb 2025 09:55:03 +0100 Subject: [PATCH 3/4] Display a progress percentage --- crates/meilitool/src/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 6881a15ec..8749d006a 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -664,7 +664,8 @@ fn hair_dryer( } if i % 10_000 == 0 { - eprintln!("Visited {i}/{total} keys") + let perc = (i as f64) / (total as f64) * 100.0; + eprintln!("Visited {i}/{total} ({perc:.2}%) keys") } } eprintln!("Done hair drying a total of at least {count} bytes."); From 803a699b157d4bd87b29c5276dfbd79fbae13b68 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 12 Feb 2025 10:46:36 +0100 Subject: [PATCH 4/4] Remove unsafes --- crates/meilitool/src/main.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 8749d006a..8a8b774b8 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -1,7 +1,6 @@ use std::fs::{read_dir, read_to_string, remove_file, File}; use std::io::{BufWriter, Write as _}; use std::path::PathBuf; -use std::ptr; use std::time::Instant; use anyhow::{bail, Context}; @@ -651,17 +650,12 @@ fn hair_dryer( .enumerate() { let (key, value) = result?; - count += key.len() + value.len(); - unsafe { - // All of this just to avoid compiler optimizations 🤞 - // We must read all the bytes to make the pages hot in cache. - // - ptr::read_volatile(&key[0]); - ptr::read_volatile(&key[key.len() - 1]); - ptr::read_volatile(&value[0]); - ptr::read_volatile(&value[value.len() - 1]); - } + // All of this just to avoid compiler optimizations 🤞 + // We must read all the bytes to make the pages hot in cache. + // + count += std::hint::black_box(key.iter().fold(0, |acc, _| acc + 1)); + count += std::hint::black_box(value.iter().fold(0, |acc, _| acc + 1)); if i % 10_000 == 0 { let perc = (i as f64) / (total as f64) * 100.0;