mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
fix clippy too many arguments
This commit is contained in:
parent
b4b859ec8c
commit
5deea631ea
@ -45,8 +45,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut key_buffer = Vec::new();
|
let mut buffers = Buffers::default();
|
||||||
let mut field_buffer = String::new();
|
|
||||||
let mut tokenizer_builder = TokenizerBuilder::new();
|
let mut tokenizer_builder = TokenizerBuilder::new();
|
||||||
if let Some(stop_words) = stop_words {
|
if let Some(stop_words) = stop_words {
|
||||||
tokenizer_builder.stop_words(stop_words);
|
tokenizer_builder.stop_words(stop_words);
|
||||||
@ -62,8 +61,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
let obkv = KvReader::<FieldId>::new(value);
|
let obkv = KvReader::<FieldId>::new(value);
|
||||||
|
|
||||||
documents_ids.push(document_id);
|
documents_ids.push(document_id);
|
||||||
key_buffer.clear();
|
buffers.key_buffer.clear();
|
||||||
key_buffer.extend_from_slice(&document_id.to_be_bytes());
|
buffers.key_buffer.extend_from_slice(&document_id.to_be_bytes());
|
||||||
|
|
||||||
let mut script_language_word_count = HashMap::new();
|
let mut script_language_word_count = HashMap::new();
|
||||||
|
|
||||||
@ -72,8 +71,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
searchable_fields,
|
searchable_fields,
|
||||||
&tokenizer,
|
&tokenizer,
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
&mut key_buffer,
|
&mut buffers,
|
||||||
&mut field_buffer,
|
|
||||||
&mut script_language_word_count,
|
&mut script_language_word_count,
|
||||||
&mut docid_word_positions_sorter,
|
&mut docid_word_positions_sorter,
|
||||||
)?;
|
)?;
|
||||||
@ -105,8 +103,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
searchable_fields,
|
searchable_fields,
|
||||||
&tokenizer,
|
&tokenizer,
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
&mut key_buffer,
|
&mut buffers,
|
||||||
&mut field_buffer,
|
|
||||||
&mut script_language_word_count,
|
&mut script_language_word_count,
|
||||||
&mut docid_word_positions_sorter,
|
&mut docid_word_positions_sorter,
|
||||||
)?;
|
)?;
|
||||||
@ -127,22 +124,20 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
.map(|reader| (documents_ids, reader, script_language_docids))
|
.map(|reader| (documents_ids, reader, script_language_docids))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
|
||||||
fn extract_tokens_from_document<T: AsRef<[u8]>>(
|
fn extract_tokens_from_document<T: AsRef<[u8]>>(
|
||||||
obkv: &KvReader<FieldId>,
|
obkv: &KvReader<FieldId>,
|
||||||
searchable_fields: &Option<HashSet<FieldId>>,
|
searchable_fields: &Option<HashSet<FieldId>>,
|
||||||
tokenizer: &Tokenizer<T>,
|
tokenizer: &Tokenizer<T>,
|
||||||
max_positions_per_attributes: u32,
|
max_positions_per_attributes: u32,
|
||||||
key_buffer: &mut Vec<u8>,
|
buffers: &mut Buffers,
|
||||||
field_buffer: &mut String,
|
|
||||||
script_language_word_count: &mut HashMap<Script, Vec<(Language, usize)>>,
|
script_language_word_count: &mut HashMap<Script, Vec<(Language, usize)>>,
|
||||||
docid_word_positions_sorter: &mut grenad::Sorter<MergeFn>,
|
docid_word_positions_sorter: &mut grenad::Sorter<MergeFn>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for (field_id, field_bytes) in obkv.iter() {
|
for (field_id, field_bytes) in obkv.iter() {
|
||||||
if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
|
if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
|
||||||
let value = serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
let value = serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
||||||
field_buffer.clear();
|
buffers.field_buffer.clear();
|
||||||
if let Some(field) = json_to_string(&value, field_buffer) {
|
if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
|
||||||
let tokens = process_tokens(tokenizer.tokenize(field))
|
let tokens = process_tokens(tokenizer.tokenize(field))
|
||||||
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
|
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
|
||||||
|
|
||||||
@ -159,14 +154,15 @@ fn extract_tokens_from_document<T: AsRef<[u8]>>(
|
|||||||
}
|
}
|
||||||
let token = token.lemma().trim();
|
let token = token.lemma().trim();
|
||||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||||
key_buffer.truncate(mem::size_of::<u32>());
|
buffers.key_buffer.truncate(mem::size_of::<u32>());
|
||||||
key_buffer.extend_from_slice(token.as_bytes());
|
buffers.key_buffer.extend_from_slice(token.as_bytes());
|
||||||
|
|
||||||
let position: u16 = index
|
let position: u16 = index
|
||||||
.try_into()
|
.try_into()
|
||||||
.map_err(|_| SerializationError::InvalidNumberSerialization)?;
|
.map_err(|_| SerializationError::InvalidNumberSerialization)?;
|
||||||
let position = absolute_from_relative_position(field_id, position);
|
let position = absolute_from_relative_position(field_id, position);
|
||||||
docid_word_positions_sorter.insert(&key_buffer, position.to_ne_bytes())?;
|
docid_word_positions_sorter
|
||||||
|
.insert(&buffers.key_buffer, position.to_ne_bytes())?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -290,3 +286,9 @@ fn compute_language_frequency_threshold(languages_frequency: &[(Language, usize)
|
|||||||
let total: usize = languages_frequency.iter().map(|(_, c)| c).sum();
|
let total: usize = languages_frequency.iter().map(|(_, c)| c).sum();
|
||||||
total / 10 // 10% is a completely arbitrary value.
|
total / 10 // 10% is a completely arbitrary value.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
struct Buffers {
|
||||||
|
key_buffer: Vec<u8>,
|
||||||
|
field_buffer: String,
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user