Clean up the stats and size of database infos subcommands

This commit is contained in:
Kerollmops 2020-10-01 11:23:37 +02:00 committed by Clément Renault
parent d0c73564b1
commit 079742b4d3
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -365,7 +365,7 @@ fn size_of_database(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Re
DOCID_WORD_POSITIONS_DB_NAME => index.docid_word_positions.as_polymorph(), DOCID_WORD_POSITIONS_DB_NAME => index.docid_word_positions.as_polymorph(),
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => index.word_pair_proximity_docids.as_polymorph(), WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => index.word_pair_proximity_docids.as_polymorph(),
DOCUMENTS_DB_NAME => index.documents.as_polymorph(), DOCUMENTS_DB_NAME => index.documents.as_polymorph(),
otherwise => anyhow::bail!("unknown database {:?}", otherwise), unknown => anyhow::bail!("unknown database {:?}", unknown),
}; };
let mut key_size: u64 = 0; let mut key_size: u64 = 0;
@ -376,22 +376,29 @@ fn size_of_database(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Re
val_size += v.len() as u64; val_size += v.len() as u64;
} }
eprintln!("The {} database weigh {} bytes in terms of keys and {} bytes in terms of values.", println!("The {} database weigh:", name);
name, key_size, val_size, println!("\ttotal key size: {} bytes", key_size);
); println!("\ttotal val size: {} bytes", val_size);
println!("\ttotal size: {} bytes", key_size + val_size);
Ok(()) Ok(())
} }
fn word_pair_proximity_stats(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { fn word_pair_proximity_stats(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore; use heed::types::ByteSlice;
use heed::{Error, BytesDecode};
use milli::CboRoaringBitmapCodec; use milli::CboRoaringBitmapCodec;
let mut key_size = 0u64;
let mut val_size = 0u64;
let mut values_length = Vec::new(); let mut values_length = Vec::new();
let db = index.word_pair_proximity_docids.as_polymorph(); let db = index.word_pair_proximity_docids.as_polymorph();
for result in db.iter::<_, DecodeIgnore, CboRoaringBitmapCodec>(rtxn)? { for result in db.iter::<_, ByteSlice, ByteSlice>(rtxn)? {
let ((), val) = result?; let (key, val) = result?;
key_size += key.len() as u64;
val_size += val.len() as u64;
let val = CboRoaringBitmapCodec::bytes_decode(val).ok_or(Error::Decoding)?;
values_length.push(val.len() as u32); values_length.push(val.len() as u32);
} }
@ -408,7 +415,7 @@ fn word_pair_proximity_stats(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Resul
let count = values_length.len(); let count = values_length.len();
let sum = values_length.iter().map(|l| *l as u64).sum::<u64>(); let sum = values_length.iter().map(|l| *l as u64).sum::<u64>();
println!("words pairs proximities stats on the lengths"); println!("word-pair-proximity-docids stats on the lengths");
println!("\tnumber of proximity pairs: {}", count); println!("\tnumber of proximity pairs: {}", count);
println!("\tfirst quartile: {}", first_quartile); println!("\tfirst quartile: {}", first_quartile);
println!("\tmedian: {}", median); println!("\tmedian: {}", median);
@ -419,6 +426,10 @@ fn word_pair_proximity_stats(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Resul
println!("\tminimum: {}", minimum); println!("\tminimum: {}", minimum);
println!("\tmaximum: {}", maximum); println!("\tmaximum: {}", maximum);
println!("\taverage: {}", sum as f64 / count as f64); println!("\taverage: {}", sum as f64 / count as f64);
println!();
println!("\ttotal key size: {} bytes", key_size);
println!("\ttotal val size: {} bytes", val_size);
println!("\ttotal size: {} bytes", key_size + val_size);
Ok(()) Ok(())
} }