mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 10:07:40 +08:00
Measure the SmallVec efficacity
This commit is contained in:
parent
4ce5d3d66d
commit
7f148c127c
@ -15,6 +15,8 @@ pub struct CboCachedSorter<MF> {
|
|||||||
sorter: Sorter<MF>,
|
sorter: Sorter<MF>,
|
||||||
deladd_buffer: Vec<u8>,
|
deladd_buffer: Vec<u8>,
|
||||||
cbo_buffer: Vec<u8>,
|
cbo_buffer: Vec<u8>,
|
||||||
|
total_insertions: usize,
|
||||||
|
fitted_in_key: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<MF> CboCachedSorter<MF> {
|
impl<MF> CboCachedSorter<MF> {
|
||||||
@ -24,6 +26,8 @@ impl<MF> CboCachedSorter<MF> {
|
|||||||
sorter,
|
sorter,
|
||||||
deladd_buffer: Vec::new(),
|
deladd_buffer: Vec::new(),
|
||||||
cbo_buffer: Vec::new(),
|
cbo_buffer: Vec::new(),
|
||||||
|
total_insertions: 0,
|
||||||
|
fitted_in_key: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -35,6 +39,8 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
|||||||
del.get_or_insert_with(PushOptimizedBitmap::default).insert(n);
|
del.get_or_insert_with(PushOptimizedBitmap::default).insert(n);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
self.total_insertions += 1;
|
||||||
|
self.fitted_in_key += (key.len() <= 20) as usize;
|
||||||
let value = DelAddRoaringBitmap::new_del_u32(n);
|
let value = DelAddRoaringBitmap::new_del_u32(n);
|
||||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||||
self.write_entry(key, deladd)?;
|
self.write_entry(key, deladd)?;
|
||||||
@ -55,6 +61,8 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
|||||||
del.get_or_insert_with(PushOptimizedBitmap::default).union_with_bitmap(bitmap);
|
del.get_or_insert_with(PushOptimizedBitmap::default).union_with_bitmap(bitmap);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
self.total_insertions += 1;
|
||||||
|
self.fitted_in_key += (key.len() <= 20) as usize;
|
||||||
let value = DelAddRoaringBitmap::new_del(bitmap);
|
let value = DelAddRoaringBitmap::new_del(bitmap);
|
||||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||||
self.write_entry(key, deladd)?;
|
self.write_entry(key, deladd)?;
|
||||||
@ -71,6 +79,8 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
|||||||
add.get_or_insert_with(PushOptimizedBitmap::default).insert(n);
|
add.get_or_insert_with(PushOptimizedBitmap::default).insert(n);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
self.total_insertions += 1;
|
||||||
|
self.fitted_in_key += (key.len() <= 20) as usize;
|
||||||
let value = DelAddRoaringBitmap::new_add_u32(n);
|
let value = DelAddRoaringBitmap::new_add_u32(n);
|
||||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||||
self.write_entry(key, deladd)?;
|
self.write_entry(key, deladd)?;
|
||||||
@ -91,6 +101,8 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
|||||||
add.get_or_insert_with(PushOptimizedBitmap::default).union_with_bitmap(bitmap);
|
add.get_or_insert_with(PushOptimizedBitmap::default).union_with_bitmap(bitmap);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
self.total_insertions += 1;
|
||||||
|
self.fitted_in_key += (key.len() <= 20) as usize;
|
||||||
let value = DelAddRoaringBitmap::new_add(bitmap);
|
let value = DelAddRoaringBitmap::new_add(bitmap);
|
||||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||||
self.write_entry(key, deladd)?;
|
self.write_entry(key, deladd)?;
|
||||||
@ -108,6 +120,8 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
|||||||
add.get_or_insert_with(PushOptimizedBitmap::default).insert(n);
|
add.get_or_insert_with(PushOptimizedBitmap::default).insert(n);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
self.total_insertions += 1;
|
||||||
|
self.fitted_in_key += (key.len() <= 20) as usize;
|
||||||
let value = DelAddRoaringBitmap::new_del_add_u32(n);
|
let value = DelAddRoaringBitmap::new_del_add_u32(n);
|
||||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||||
self.write_entry(key, deladd)?;
|
self.write_entry(key, deladd)?;
|
||||||
@ -161,14 +175,22 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
|||||||
for (key, deladd) in mem::replace(&mut self.cache, default_arc) {
|
for (key, deladd) in mem::replace(&mut self.cache, default_arc) {
|
||||||
self.write_entry(key, deladd)?;
|
self.write_entry(key, deladd)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
"LruCache stats: {} <= 20 bytes ({}%) on a total of {} insertions",
|
||||||
|
self.fitted_in_key,
|
||||||
|
(self.fitted_in_key as f32 / self.total_insertions as f32) * 100.0,
|
||||||
|
self.total_insertions,
|
||||||
|
);
|
||||||
|
|
||||||
Ok(self.sorter)
|
Ok(self.sorter)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct DelAddRoaringBitmap {
|
pub struct DelAddRoaringBitmap {
|
||||||
pub del: Option<PushOptimizedBitmap>,
|
pub(crate) del: Option<PushOptimizedBitmap>,
|
||||||
pub add: Option<PushOptimizedBitmap>,
|
pub(crate) add: Option<PushOptimizedBitmap>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DelAddRoaringBitmap {
|
impl DelAddRoaringBitmap {
|
||||||
|
@ -1,26 +1,21 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::{borrow::Cow, fs::File, num::NonZero};
|
use std::fs::File;
|
||||||
|
use std::num::NonZero;
|
||||||
|
|
||||||
use grenad::Merger;
|
use grenad::{Merger, MergerBuilder};
|
||||||
use grenad::MergerBuilder;
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rayon::iter::IntoParallelIterator;
|
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||||
use rayon::iter::ParallelIterator;
|
|
||||||
|
|
||||||
use super::{
|
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
tokenize_document::{tokenizer_builder, DocumentTokenizer},
|
use super::SearchableExtractor;
|
||||||
SearchableExtractor,
|
use crate::update::new::extract::cache::CboCachedSorter;
|
||||||
};
|
|
||||||
use crate::update::new::extract::perm_json_p::contained_in;
|
use crate::update::new::extract::perm_json_p::contained_in;
|
||||||
use crate::DocumentId;
|
use crate::update::new::{DocumentChange, ItemsPool};
|
||||||
|
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||||
use crate::{
|
use crate::{
|
||||||
bucketed_position,
|
bucketed_position, DocumentId, FieldId, GlobalFieldsIdsMap, Index, Result,
|
||||||
update::{
|
MAX_POSITION_PER_ATTRIBUTE,
|
||||||
create_sorter,
|
|
||||||
new::{extract::cache::CboCachedSorter, DocumentChange, ItemsPool},
|
|
||||||
GrenadParameters, MergeDeladdCboRoaringBitmaps,
|
|
||||||
},
|
|
||||||
FieldId, GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const MAX_COUNTED_WORDS: usize = 30;
|
const MAX_COUNTED_WORDS: usize = 30;
|
||||||
@ -565,7 +560,7 @@ impl WordDocidsExtractors {
|
|||||||
cached_sorter: &mut WordDocidsCachedSorters,
|
cached_sorter: &mut WordDocidsCachedSorters,
|
||||||
document_change: DocumentChange,
|
document_change: DocumentChange,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let exact_attributes = index.exact_attributes(&rtxn)?;
|
let exact_attributes = index.exact_attributes(rtxn)?;
|
||||||
let is_exact_attribute =
|
let is_exact_attribute =
|
||||||
|fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr));
|
|fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr));
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
|
@ -59,7 +59,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
DocumentChange::Update(inner) => {
|
DocumentChange::Update(inner) => {
|
||||||
let document = inner.current(rtxn, index)?.unwrap();
|
let document = inner.current(rtxn, index)?.unwrap();
|
||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
&document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
|
@ -92,24 +92,24 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// if the current field is searchable or contains a searchable attribute
|
// if the current field is searchable or contains a searchable attribute
|
||||||
if select_field(&field_name, self.attribute_to_extract, self.attribute_to_skip) {
|
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip) {
|
||||||
// parse json.
|
// parse json.
|
||||||
match serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)? {
|
match serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)? {
|
||||||
Value::Object(object) => seek_leaf_values_in_object(
|
Value::Object(object) => seek_leaf_values_in_object(
|
||||||
&object,
|
&object,
|
||||||
self.attribute_to_extract,
|
self.attribute_to_extract,
|
||||||
self.attribute_to_skip,
|
self.attribute_to_skip,
|
||||||
&field_name,
|
field_name,
|
||||||
&mut tokenize_field,
|
&mut tokenize_field,
|
||||||
)?,
|
)?,
|
||||||
Value::Array(array) => seek_leaf_values_in_array(
|
Value::Array(array) => seek_leaf_values_in_array(
|
||||||
&array,
|
&array,
|
||||||
self.attribute_to_extract,
|
self.attribute_to_extract,
|
||||||
self.attribute_to_skip,
|
self.attribute_to_skip,
|
||||||
&field_name,
|
field_name,
|
||||||
&mut tokenize_field,
|
&mut tokenize_field,
|
||||||
)?,
|
)?,
|
||||||
value => tokenize_field(&field_name, &value)?,
|
value => tokenize_field(field_name, &value)?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user