Expose a route to show advanced index stats

This commit is contained in:
Clément Renault 2024-06-03 18:39:38 +02:00
parent d6bd88ce4f
commit af5b1a88a4
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
2 changed files with 129 additions and 0 deletions

View File

@ -46,6 +46,10 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.route(web::delete().to(SeqHandler(delete_index))), .route(web::delete().to(SeqHandler(delete_index))),
) )
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats)))) .service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
.service(
web::resource("/advanced-stats")
.route(web::get().to(SeqHandler(get_advanced_index_stats))),
)
.service(web::scope("/documents").configure(documents::configure)) .service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure)) .service(web::scope("/search").configure(search::configure))
.service(web::scope("/facet-search").configure(facet_search::configure)) .service(web::scope("/facet-search").configure(facet_search::configure))
@ -278,3 +282,16 @@ pub async fn get_index_stats(
debug!(returns = ?stats, "Get index stats"); debug!(returns = ?stats, "Get index stats");
Ok(HttpResponse::Ok().json(stats)) Ok(HttpResponse::Ok().json(stats))
} }
pub async fn get_advanced_index_stats(
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let advanced_stats = index.advanced_stats(&rtxn)?;
debug!(returns = ?advanced_stats, "Get advanced index stats");
Ok(HttpResponse::Ok().json(advanced_stats))
}

View File

@ -9,6 +9,7 @@ use heed::types::*;
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified}; use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use rstar::RTree; use rstar::RTree;
use serde::Serialize;
use time::OffsetDateTime; use time::OffsetDateTime;
use crate::documents::PrimaryKey; use crate::documents::PrimaryKey;
@ -324,6 +325,87 @@ impl Index {
self.env.info().map_size self.env.info().map_size
} }
pub fn advanced_stats(&self, rtxn: &heed::RoTxn) -> Result<AdvancedStats> {
use db_name::*;
let mut dbs = BTreeMap::new();
dbs.insert(WORD_DOCIDS, advanced_database_stats(rtxn, self.word_docids)?);
dbs.insert(
WORD_PAIR_PROXIMITY_DOCIDS,
advanced_database_stats(rtxn, self.word_pair_proximity_docids)?,
);
dbs.insert(WORD_PREFIX_DOCIDS, advanced_database_stats(rtxn, self.word_prefix_docids)?);
dbs.insert(WORD_FIELD_ID_DOCIDS, advanced_database_stats(rtxn, self.word_fid_docids)?);
dbs.insert(WORD_POSITION_DOCIDS, advanced_database_stats(rtxn, self.word_position_docids)?);
dbs.insert(DOCUMENTS, advanced_database_stats_no_bitmap(rtxn, self.documents)?);
fn advanced_database_stats<KC>(
rtxn: &heed::RoTxn,
db: Database<KC, CboRoaringBitmapCodec>,
) -> Result<AdvancedDatabaseStats> {
let db = db.remap_key_type::<Bytes>().lazily_decode_data();
let mut entries_count = 0;
let mut total_bitmap_size = 0;
let mut total_bitmap_len = 0;
let mut total_key_size = 0;
for result in db.iter(rtxn)? {
let (bytes_key, lazy_value) = result?;
entries_count += 1;
total_bitmap_size += lazy_value.remap::<Bytes>().decode().unwrap().len();
let bitmap = lazy_value.decode().map_err(heed::Error::Decoding)?;
total_bitmap_len += bitmap.len();
total_key_size += bytes_key.len();
}
Ok(AdvancedDatabaseStats {
entries_count,
average_bitmap_len: Some(total_bitmap_len as f64 / entries_count as f64),
median_bitmap_len: None,
average_value_size: Some(total_bitmap_size as f64 / entries_count as f64),
median_value_size: None,
average_key_size: Some(total_key_size as f64 / entries_count as f64),
median_key_size: None,
})
}
fn advanced_database_stats_no_bitmap<KC, DC>(
rtxn: &heed::RoTxn,
db: Database<KC, DC>,
) -> Result<AdvancedDatabaseStats> {
let db = db.remap_types::<Bytes, Bytes>();
let mut entries_count = 0;
let mut total_value_size = 0;
let mut total_key_size = 0;
for result in db.iter(rtxn)? {
let (bytes_key, bytes_value) = result?;
entries_count += 1;
total_value_size += bytes_value.len();
total_key_size += bytes_key.len();
}
Ok(AdvancedDatabaseStats {
entries_count,
average_bitmap_len: None,
median_bitmap_len: None,
average_value_size: Some(total_value_size as f64 / entries_count as f64),
median_value_size: None,
average_key_size: Some(total_key_size as f64 / entries_count as f64),
median_key_size: None,
})
}
Ok(AdvancedStats {
map_size: self.map_size(),
non_free_pages_size: self.on_disk_size()?,
on_disk_size: self.on_disk_size()?,
databases: dbs,
})
}
pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> { pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
self.env.copy_to_file(path, option).map_err(Into::into) self.env.copy_to_file(path, option).map_err(Into::into)
} }
@ -1662,6 +1744,36 @@ impl Index {
} }
} }
#[derive(Clone, Debug, Serialize)]
pub struct AdvancedStats {
/// Size of the data memory map.
map_size: usize,
/// Returns the size used by all the databases in the environment without the free pages.
non_free_pages_size: u64,
/// The size of the data file on disk.
on_disk_size: u64,
/// Databases advanced stats.
databases: BTreeMap<&'static str, AdvancedDatabaseStats>,
}
#[derive(Clone, Debug, Serialize)]
pub struct AdvancedDatabaseStats {
/// The number of entries in this database.
entries_count: usize,
/// The average number of entries in the bitmaps of this database.
average_bitmap_len: Option<f64>,
/// The median number of entries in the bitmaps of this database.
median_bitmap_len: Option<f64>,
/// The average size of values of this database.
average_value_size: Option<f64>,
/// The median size of values of this database.
median_value_size: Option<f64>,
/// The average size of keys of this database.
average_key_size: Option<f64>,
/// The mediane size of keys of this database.
median_key_size: Option<f64>,
}
#[cfg(test)] #[cfg(test)]
pub(crate) mod tests { pub(crate) mod tests {
use std::collections::HashSet; use std::collections::HashSet;