mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 09:35:51 +08:00
Fix clippy issues
This commit is contained in:
parent
1bccf2079e
commit
cbaa54cafd
@ -1292,7 +1292,7 @@ impl IndexScheduler {
|
|||||||
|| must_stop_processing.get(),
|
|| must_stop_processing.get(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let document_ids = documents.iter().cloned().flatten().collect();
|
let document_ids = documents.iter().flatten().cloned().collect();
|
||||||
|
|
||||||
let (new_builder, user_result) = builder.remove_documents(document_ids)?;
|
let (new_builder, user_result) = builder.remove_documents(document_ids)?;
|
||||||
builder = new_builder;
|
builder = new_builder;
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::convert::TryInto;
|
|
||||||
|
|
||||||
use heed::types::{OwnedType, Str};
|
use heed::types::{OwnedType, Str};
|
||||||
use heed::{Database, RoIter, RoTxn, RwTxn};
|
use heed::{Database, RoIter, RoTxn, RwTxn};
|
||||||
@ -31,7 +30,7 @@ impl ExternalDocumentsIds {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
|
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
|
||||||
Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get().try_into().unwrap()))
|
Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get()))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An helper function to debug this type, returns an `HashMap` of both,
|
/// An helper function to debug this type, returns an `HashMap` of both,
|
||||||
@ -40,7 +39,7 @@ impl ExternalDocumentsIds {
|
|||||||
let mut map = HashMap::default();
|
let mut map = HashMap::default();
|
||||||
for result in self.0.iter(rtxn)? {
|
for result in self.0.iter(rtxn)? {
|
||||||
let (external, internal) = result?;
|
let (external, internal) = result?;
|
||||||
map.insert(external.to_owned(), internal.get().try_into().unwrap());
|
map.insert(external.to_owned(), internal.get());
|
||||||
}
|
}
|
||||||
Ok(map)
|
Ok(map)
|
||||||
}
|
}
|
||||||
|
@ -1376,7 +1376,7 @@ impl Index {
|
|||||||
rtxn: &RoTxn,
|
rtxn: &RoTxn,
|
||||||
key: &(Script, Language),
|
key: &(Script, Language),
|
||||||
) -> heed::Result<Option<RoaringBitmap>> {
|
) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
Ok(self.script_language_docids.get(rtxn, key)?)
|
self.script_language_docids.get(rtxn, key)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn script_language(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Script, Vec<Language>>> {
|
pub fn script_language(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Script, Vec<Language>>> {
|
||||||
|
@ -198,7 +198,7 @@ fn tokenizer_builder<'a>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Some(script_language) = script_language {
|
if let Some(script_language) = script_language {
|
||||||
tokenizer_builder.allow_list(&script_language);
|
tokenizer_builder.allow_list(script_language);
|
||||||
}
|
}
|
||||||
|
|
||||||
tokenizer_builder
|
tokenizer_builder
|
||||||
@ -206,6 +206,7 @@ fn tokenizer_builder<'a>(
|
|||||||
|
|
||||||
/// Extract words maped with their positions of a document,
|
/// Extract words maped with their positions of a document,
|
||||||
/// ensuring no Language detection mistakes was made.
|
/// ensuring no Language detection mistakes was made.
|
||||||
|
#[allow(clippy::too_many_arguments)] // FIXME: consider grouping arguments in a struct
|
||||||
fn lang_safe_tokens_from_document<'a>(
|
fn lang_safe_tokens_from_document<'a>(
|
||||||
obkv: &KvReader<FieldId>,
|
obkv: &KvReader<FieldId>,
|
||||||
searchable_fields: &Option<HashSet<FieldId>>,
|
searchable_fields: &Option<HashSet<FieldId>>,
|
||||||
@ -220,9 +221,9 @@ fn lang_safe_tokens_from_document<'a>(
|
|||||||
let mut script_language_word_count = HashMap::new();
|
let mut script_language_word_count = HashMap::new();
|
||||||
|
|
||||||
tokens_from_document(
|
tokens_from_document(
|
||||||
&obkv,
|
obkv,
|
||||||
searchable_fields,
|
searchable_fields,
|
||||||
&tokenizer,
|
tokenizer,
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
del_add,
|
del_add,
|
||||||
buffers,
|
buffers,
|
||||||
@ -257,7 +258,7 @@ fn lang_safe_tokens_from_document<'a>(
|
|||||||
|
|
||||||
// rerun the extraction.
|
// rerun the extraction.
|
||||||
tokens_from_document(
|
tokens_from_document(
|
||||||
&obkv,
|
obkv,
|
||||||
searchable_fields,
|
searchable_fields,
|
||||||
&tokenizer,
|
&tokenizer,
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
|
@ -45,7 +45,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
|||||||
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
||||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||||
|
|
||||||
let del_add_reader = KvReaderDelAdd::new(&value);
|
let del_add_reader = KvReaderDelAdd::new(value);
|
||||||
let deletion = del_add_reader
|
let deletion = del_add_reader
|
||||||
// get deleted words
|
// get deleted words
|
||||||
.get(DelAdd::Deletion)
|
.get(DelAdd::Deletion)
|
||||||
|
@ -57,17 +57,17 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||||
let fid = u16::from_be_bytes(fid_bytes);
|
let fid = u16::from_be_bytes(fid_bytes);
|
||||||
|
|
||||||
let del_add_reader = KvReaderDelAdd::new(&value);
|
let del_add_reader = KvReaderDelAdd::new(value);
|
||||||
// extract all unique words to remove.
|
// extract all unique words to remove.
|
||||||
if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
|
if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
|
||||||
for (_pos, word) in KvReaderU16::new(&deletion).iter() {
|
for (_pos, word) in KvReaderU16::new(deletion).iter() {
|
||||||
del_words.insert(word.to_vec());
|
del_words.insert(word.to_vec());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// extract all unique additional words.
|
// extract all unique additional words.
|
||||||
if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
|
if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
|
||||||
for (_pos, word) in KvReaderU16::new(&addition).iter() {
|
for (_pos, word) in KvReaderU16::new(addition).iter() {
|
||||||
add_words.insert(word.to_vec());
|
add_words.insert(word.to_vec());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -122,9 +122,9 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
// every words contained in an attribute set to exact must be pushed in the exact_words list.
|
// every words contained in an attribute set to exact must be pushed in the exact_words list.
|
||||||
if exact_attributes.contains(&fid) {
|
if exact_attributes.contains(&fid) {
|
||||||
exact_word_docids_sorter.insert(word.as_bytes(), &value)?;
|
exact_word_docids_sorter.insert(word.as_bytes(), value)?;
|
||||||
} else {
|
} else {
|
||||||
word_docids_sorter.insert(word.as_bytes(), &value)?;
|
word_docids_sorter.insert(word.as_bytes(), value)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -169,7 +169,7 @@ fn words_into_sorter(
|
|||||||
};
|
};
|
||||||
|
|
||||||
key_buffer.clear();
|
key_buffer.clear();
|
||||||
key_buffer.extend_from_slice(&word_bytes);
|
key_buffer.extend_from_slice(word_bytes);
|
||||||
key_buffer.push(0);
|
key_buffer.push(0);
|
||||||
key_buffer.extend_from_slice(&fid.to_be_bytes());
|
key_buffer.extend_from_slice(&fid.to_be_bytes());
|
||||||
word_fid_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
|
word_fid_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
|
||||||
|
@ -29,7 +29,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
|
let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
|
||||||
.into_iter()
|
|
||||||
.map(|_| {
|
.map(|_| {
|
||||||
create_sorter(
|
create_sorter(
|
||||||
grenad::SortAlgorithm::Unstable,
|
grenad::SortAlgorithm::Unstable,
|
||||||
@ -75,7 +74,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||||||
let (del, add): (Result<_>, Result<_>) = rayon::join(
|
let (del, add): (Result<_>, Result<_>) = rayon::join(
|
||||||
|| {
|
|| {
|
||||||
// deletions
|
// deletions
|
||||||
if let Some(deletion) = KvReaderDelAdd::new(&value).get(DelAdd::Deletion) {
|
if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) {
|
||||||
for (position, word) in KvReaderU16::new(deletion).iter() {
|
for (position, word) in KvReaderU16::new(deletion).iter() {
|
||||||
// drain the proximity window until the head word is considered close to the word we are inserting.
|
// drain the proximity window until the head word is considered close to the word we are inserting.
|
||||||
while del_word_positions.get(0).map_or(false, |(_w, p)| {
|
while del_word_positions.get(0).map_or(false, |(_w, p)| {
|
||||||
@ -104,7 +103,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||||||
},
|
},
|
||||||
|| {
|
|| {
|
||||||
// additions
|
// additions
|
||||||
if let Some(addition) = KvReaderDelAdd::new(&value).get(DelAdd::Addition) {
|
if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) {
|
||||||
for (position, word) in KvReaderU16::new(addition).iter() {
|
for (position, word) in KvReaderU16::new(addition).iter() {
|
||||||
// drain the proximity window until the head word is considered close to the word we are inserting.
|
// drain the proximity window until the head word is considered close to the word we are inserting.
|
||||||
while add_word_positions.get(0).map_or(false, |(_w, p)| {
|
while add_word_positions.get(0).map_or(false, |(_w, p)| {
|
||||||
@ -170,7 +169,7 @@ fn document_word_positions_into_sorter(
|
|||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
del_word_pair_proximity: &BTreeMap<(String, String), u8>,
|
del_word_pair_proximity: &BTreeMap<(String, String), u8>,
|
||||||
add_word_pair_proximity: &BTreeMap<(String, String), u8>,
|
add_word_pair_proximity: &BTreeMap<(String, String), u8>,
|
||||||
word_pair_proximity_docids_sorters: &mut Vec<grenad::Sorter<MergeFn>>,
|
word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeFn>],
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
use itertools::merge_join_by;
|
use itertools::merge_join_by;
|
||||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||||
@ -201,7 +200,7 @@ fn document_word_positions_into_sorter(
|
|||||||
};
|
};
|
||||||
|
|
||||||
key_buffer.clear();
|
key_buffer.clear();
|
||||||
key_buffer.push(*prox as u8);
|
key_buffer.push(*prox);
|
||||||
key_buffer.extend_from_slice(w1.as_bytes());
|
key_buffer.extend_from_slice(w1.as_bytes());
|
||||||
key_buffer.push(0);
|
key_buffer.push(0);
|
||||||
key_buffer.extend_from_slice(w2.as_bytes());
|
key_buffer.extend_from_slice(w2.as_bytes());
|
||||||
|
@ -60,7 +60,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
current_document_id = Some(document_id);
|
current_document_id = Some(document_id);
|
||||||
|
|
||||||
let del_add_reader = KvReaderDelAdd::new(&value);
|
let del_add_reader = KvReaderDelAdd::new(value);
|
||||||
// extract all unique words to remove.
|
// extract all unique words to remove.
|
||||||
if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
|
if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
|
||||||
for (position, word_bytes) in KvReaderU16::new(deletion).iter() {
|
for (position, word_bytes) in KvReaderU16::new(deletion).iter() {
|
||||||
|
@ -157,7 +157,7 @@ fn inner_merge_del_add_obkvs<'a>(
|
|||||||
let mut acc = newest[1..].to_vec();
|
let mut acc = newest[1..].to_vec();
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
// reverse iter from the most recent to the oldest.
|
// reverse iter from the most recent to the oldest.
|
||||||
for current in obkvs.into_iter().rev() {
|
for current in obkvs.iter().rev() {
|
||||||
// if in the previous iteration there was a complete deletion,
|
// if in the previous iteration there was a complete deletion,
|
||||||
// stop the merge process.
|
// stop the merge process.
|
||||||
if acc_operation_type == Operation::Deletion as u8 {
|
if acc_operation_type == Operation::Deletion as u8 {
|
||||||
|
@ -2659,7 +2659,7 @@ mod tests {
|
|||||||
let external_document_ids = index.external_documents_ids();
|
let external_document_ids = index.external_documents_ids();
|
||||||
let ids_to_delete: Vec<u32> = external_ids
|
let ids_to_delete: Vec<u32> = external_ids
|
||||||
.iter()
|
.iter()
|
||||||
.map(|id| external_document_ids.get(&wtxn, id).unwrap().unwrap())
|
.map(|id| external_document_ids.get(wtxn, id).unwrap().unwrap())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// Delete some documents.
|
// Delete some documents.
|
||||||
|
@ -456,7 +456,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
|
|
||||||
if final_value.is_empty() {
|
if final_value.is_empty() {
|
||||||
// If the database entry exists, delete it.
|
// If the database entry exists, delete it.
|
||||||
if db_key_exists == true {
|
if db_key_exists {
|
||||||
index.script_language_docids.delete(wtxn, &key)?;
|
index.script_language_docids.delete(wtxn, &key)?;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -501,6 +501,7 @@ fn merge_word_docids_reader_into_fst(
|
|||||||
///
|
///
|
||||||
/// If there is no Add side we currently write an empty buffer
|
/// If there is no Add side we currently write an empty buffer
|
||||||
/// which is a valid CboRoaringBitmap.
|
/// which is a valid CboRoaringBitmap.
|
||||||
|
#[allow(clippy::ptr_arg)] // required to avoid signature mismatch
|
||||||
fn deladd_serialize_add_side<'a>(obkv: &'a [u8], _buffer: &mut Vec<u8>) -> Result<&'a [u8]> {
|
fn deladd_serialize_add_side<'a>(obkv: &'a [u8], _buffer: &mut Vec<u8>) -> Result<&'a [u8]> {
|
||||||
Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default())
|
Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user