diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 35b747ccf..1a7de7691 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -46,6 +46,10 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .route(web::delete().to(SeqHandler(delete_index))), ) .service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats)))) + .service( + web::resource("/advanced-stats") + .route(web::get().to(SeqHandler(get_advanced_index_stats))), + ) .service(web::scope("/documents").configure(documents::configure)) .service(web::scope("/search").configure(search::configure)) .service(web::scope("/facet-search").configure(facet_search::configure)) @@ -278,3 +282,16 @@ pub async fn get_index_stats( debug!(returns = ?stats, "Get index stats"); Ok(HttpResponse::Ok().json(stats)) } + +pub async fn get_advanced_index_stats( + index_scheduler: GuardedData, Data>, + index_uid: web::Path, +) -> Result { + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.read_txn()?; + let advanced_stats = index.advanced_stats(&rtxn)?; + + debug!(returns = ?advanced_stats, "Get advanced index stats"); + Ok(HttpResponse::Ok().json(advanced_stats)) +} diff --git a/milli/src/index.rs b/milli/src/index.rs index 3c502d541..ec6f489ed 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -9,6 +9,7 @@ use heed::types::*; use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified}; use roaring::RoaringBitmap; use rstar::RTree; +use serde::Serialize; use time::OffsetDateTime; use crate::documents::PrimaryKey; @@ -324,6 +325,87 @@ impl Index { self.env.info().map_size } + pub fn advanced_stats(&self, rtxn: &heed::RoTxn) -> Result { + use db_name::*; + + let mut dbs = BTreeMap::new(); + dbs.insert(WORD_DOCIDS, advanced_database_stats(rtxn, self.word_docids)?); + dbs.insert( + WORD_PAIR_PROXIMITY_DOCIDS, + advanced_database_stats(rtxn, self.word_pair_proximity_docids)?, + ); + dbs.insert(WORD_PREFIX_DOCIDS, advanced_database_stats(rtxn, self.word_prefix_docids)?); + dbs.insert(WORD_FIELD_ID_DOCIDS, advanced_database_stats(rtxn, self.word_fid_docids)?); + dbs.insert(WORD_POSITION_DOCIDS, advanced_database_stats(rtxn, self.word_position_docids)?); + dbs.insert(DOCUMENTS, advanced_database_stats_no_bitmap(rtxn, self.documents)?); + + fn advanced_database_stats( + rtxn: &heed::RoTxn, + db: Database, + ) -> Result { + let db = db.remap_key_type::().lazily_decode_data(); + + let mut entries_count = 0; + let mut total_bitmap_size = 0; + let mut total_bitmap_len = 0; + let mut total_key_size = 0; + + for result in db.iter(rtxn)? { + let (bytes_key, lazy_value) = result?; + entries_count += 1; + total_bitmap_size += lazy_value.remap::().decode().unwrap().len(); + let bitmap = lazy_value.decode().map_err(heed::Error::Decoding)?; + total_bitmap_len += bitmap.len(); + total_key_size += bytes_key.len(); + } + + Ok(AdvancedDatabaseStats { + entries_count, + average_bitmap_len: Some(total_bitmap_len as f64 / entries_count as f64), + median_bitmap_len: None, + average_value_size: Some(total_bitmap_size as f64 / entries_count as f64), + median_value_size: None, + average_key_size: Some(total_key_size as f64 / entries_count as f64), + median_key_size: None, + }) + } + + fn advanced_database_stats_no_bitmap( + rtxn: &heed::RoTxn, + db: Database, + ) -> Result { + let db = db.remap_types::(); + + let mut entries_count = 0; + let mut total_value_size = 0; + let mut total_key_size = 0; + + for result in db.iter(rtxn)? { + let (bytes_key, bytes_value) = result?; + entries_count += 1; + total_value_size += bytes_value.len(); + total_key_size += bytes_key.len(); + } + + Ok(AdvancedDatabaseStats { + entries_count, + average_bitmap_len: None, + median_bitmap_len: None, + average_value_size: Some(total_value_size as f64 / entries_count as f64), + median_value_size: None, + average_key_size: Some(total_key_size as f64 / entries_count as f64), + median_key_size: None, + }) + } + + Ok(AdvancedStats { + map_size: self.map_size(), + non_free_pages_size: self.on_disk_size()?, + on_disk_size: self.on_disk_size()?, + databases: dbs, + }) + } + pub fn copy_to_file>(&self, path: P, option: CompactionOption) -> Result { self.env.copy_to_file(path, option).map_err(Into::into) } @@ -1662,6 +1744,36 @@ impl Index { } } +#[derive(Clone, Debug, Serialize)] +pub struct AdvancedStats { + /// Size of the data memory map. + map_size: usize, + /// Returns the size used by all the databases in the environment without the free pages. + non_free_pages_size: u64, + /// The size of the data file on disk. + on_disk_size: u64, + /// Databases advanced stats. + databases: BTreeMap<&'static str, AdvancedDatabaseStats>, +} + +#[derive(Clone, Debug, Serialize)] +pub struct AdvancedDatabaseStats { + /// The number of entries in this database. + entries_count: usize, + /// The average number of entries in the bitmaps of this database. + average_bitmap_len: Option, + /// The median number of entries in the bitmaps of this database. + median_bitmap_len: Option, + /// The average size of values of this database. + average_value_size: Option, + /// The median size of values of this database. + median_value_size: Option, + /// The average size of keys of this database. + average_key_size: Option, + /// The mediane size of keys of this database. + median_key_size: Option, +} + #[cfg(test)] pub(crate) mod tests { use std::collections::HashSet;