mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Compute the biggest values of the words_level_positions_docids
This commit is contained in:
parent
f713828406
commit
8bd4f5d93e
@ -346,6 +346,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
||||
let docid_word_positions_name = "docid_word_positions";
|
||||
let word_prefix_pair_proximity_docids_name = "word_prefix_pair_proximity_docids";
|
||||
let word_pair_proximity_docids_name = "word_pair_proximity_docids";
|
||||
let word_level_position_docids_name = "word_level_position_docids";
|
||||
let facet_field_id_value_docids_name = "facet_field_id_value_docids";
|
||||
let documents_name = "documents";
|
||||
|
||||
@ -402,6 +403,13 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
||||
if heap.len() > limit { heap.pop(); }
|
||||
}
|
||||
|
||||
for result in word_level_position_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
|
||||
let ((word, level, left, right), value) = result?;
|
||||
let key = format!("{} {} {:?}", word, level, left..=right);
|
||||
heap.push(Reverse((value.len(), key, word_level_position_docids_name)));
|
||||
if heap.len() > limit { heap.pop(); }
|
||||
}
|
||||
|
||||
let faceted_fields = index.faceted_fields_ids(rtxn)?;
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
for (field_id, field_type) in faceted_fields {
|
||||
@ -549,7 +557,7 @@ fn words_level_positions_docids(
|
||||
{
|
||||
let stdout = io::stdout();
|
||||
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
||||
wtr.write_record(&["word", "level", "position_range", "documents_count", "documents_ids"])?;
|
||||
wtr.write_record(&["word", "level", "positions", "documents_count", "documents_ids"])?;
|
||||
|
||||
for word in words.iter().map(AsRef::as_ref) {
|
||||
let range = {
|
||||
@ -561,14 +569,18 @@ fn words_level_positions_docids(
|
||||
let ((w, level, left, right), docids) = result?;
|
||||
if word != w { break }
|
||||
|
||||
let level = level.to_string();
|
||||
let count = docids.len().to_string();
|
||||
let docids = if debug {
|
||||
format!("{:?}", docids)
|
||||
} else {
|
||||
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
||||
};
|
||||
let position_range = format!("{:?}", left..=right);
|
||||
let position_range = if level == 0 {
|
||||
format!("{:?}", left)
|
||||
} else {
|
||||
format!("{:?}", left..=right)
|
||||
};
|
||||
let level = level.to_string();
|
||||
wtr.write_record(&[w, &level, &position_range, &count, &docids])?;
|
||||
}
|
||||
}
|
||||
|
@ -104,16 +104,16 @@ fn compute_positions_levels(
|
||||
for result in words_db.iter(rtxn)? {
|
||||
let (word, ()) = result?;
|
||||
|
||||
let first_level_size = words_positions_db.remap_data_type::<DecodeIgnore>()
|
||||
.prefix_iter(rtxn, &(word, 0, u32::min_value(), u32::min_value()))?
|
||||
.fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?;
|
||||
|
||||
let level_0_range = {
|
||||
let left = (word, 0, u32::min_value(), u32::min_value());
|
||||
let right = (word, 0, u32::max_value(), u32::max_value());
|
||||
left..=right
|
||||
};
|
||||
|
||||
let first_level_size = words_positions_db.remap_data_type::<DecodeIgnore>()
|
||||
.range(rtxn, &level_0_range)?
|
||||
.fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?;
|
||||
|
||||
// Groups sizes are always a power of the original level_group_size and therefore a group
|
||||
// always maps groups of the previous level and never splits previous levels groups in half.
|
||||
let group_size_iter = (1u8..)
|
||||
@ -132,7 +132,7 @@ fn compute_positions_levels(
|
||||
let mut group_docids = RoaringBitmap::new();
|
||||
|
||||
for (i, result) in words_positions_db.range(rtxn, &level_0_range)?.enumerate() {
|
||||
let ((_field_id, _level, value, _right), docids) = result?;
|
||||
let ((_word, _level, value, _right), docids) = result?;
|
||||
|
||||
if i == 0 {
|
||||
left = value;
|
||||
|
Loading…
Reference in New Issue
Block a user