From ba2eb0d7ad3177f1a9032dc5e7da2ae413942318 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 26 Aug 2020 14:36:22 +0200 Subject: [PATCH] Take the words-fst into account when retrieving the biggests values --- src/bin/infos.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/bin/infos.rs b/src/bin/infos.rs index ac7695778..270892e90 100644 --- a/src/bin/infos.rs +++ b/src/bin/infos.rs @@ -185,6 +185,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho use std::convert::TryInto; use heed::types::{Str, ByteSlice}; + let main_name = "main"; let word_positions_name = "word_positions"; let word_position_docids_name = "word_position_docids"; let word_attribute_docids_name = "word_attribute_docids"; @@ -192,6 +193,11 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho let mut heap = BinaryHeap::with_capacity(limit + 1); if limit > 0 { + if let Some(fst) = index.fst(rtxn)? { + heap.push(Reverse((fst.as_fst().as_bytes().len(), format!("words-fst"), main_name))); + if heap.len() > limit { heap.pop(); } + } + for result in index.word_positions.as_polymorph().iter::<_, Str, ByteSlice>(rtxn)? { let (word, value) = result?; heap.push(Reverse((value.len(), word.to_string(), word_positions_name))); @@ -223,7 +229,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho let stdout = io::stdout(); let mut wtr = csv::Writer::from_writer(stdout.lock()); - wtr.write_record(&["size", "key_name", "database_name"])?; + wtr.write_record(&["database_name", "key_name", "size"])?; for Reverse((size, key_name, database_name)) in heap.into_sorted_vec() { wtr.write_record(&[database_name.to_string(), key_name, size.to_string()])?;