Remove somme warnings

This commit is contained in:
Louis Dureuil 2024-10-10 22:42:37 +02:00
parent 68a2502388
commit 6028d6ba43
No known key found for this signature in database
13 changed files with 31 additions and 75 deletions

View File

@ -30,7 +30,6 @@ use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::new::indexer::document_changes::DocumentChanges;
use meilisearch_types::milli::update::new::indexer::{self, retrieve_or_guess_primary_key}; use meilisearch_types::milli::update::new::indexer::{self, retrieve_or_guess_primary_key};
use meilisearch_types::milli::update::{ use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
@ -1252,7 +1251,7 @@ impl IndexScheduler {
mut tasks, mut tasks,
} => { } => {
let started_processing_at = std::time::Instant::now(); let started_processing_at = std::time::Instant::now();
let mut primary_key_has_been_set = false; let primary_key_has_been_set = false;
let must_stop_processing = self.must_stop_processing.clone(); let must_stop_processing = self.must_stop_processing.clone();
let indexer_config = self.index_mapper.indexer_config(); let indexer_config = self.index_mapper.indexer_config();
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.

View File

@ -7,7 +7,7 @@ use super::document_change::{Entry, Versions};
use super::{KvReaderFieldId, KvWriterFieldId}; use super::{KvReaderFieldId, KvWriterFieldId};
use crate::documents::FieldIdMapper; use crate::documents::FieldIdMapper;
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
use crate::{DocumentId, FieldId, Index, InternalError, Result}; use crate::{DocumentId, Index, InternalError, Result};
/// A view into a document that can represent either the current version from the DB, /// A view into a document that can represent either the current version from the DB,
/// the update data from payload or other means, or the merged updated version. /// the update data from payload or other means, or the merged updated version.
@ -66,14 +66,6 @@ impl<'t, Mapper: FieldIdMapper> DocumentFromDb<'t, Mapper> {
reader.map(|reader| Self { fields_ids_map: db_fields_ids_map, content: reader }) reader.map(|reader| Self { fields_ids_map: db_fields_ids_map, content: reader })
}) })
} }
fn field_from_fid(&self, fid: FieldId) -> Result<Option<&'t RawValue>> {
Ok(self
.content
.get(fid)
.map(|v| serde_json::from_slice(v).map_err(InternalError::SerdeJson))
.transpose()?)
}
} }
#[derive(Clone, Copy)] #[derive(Clone, Copy)]

View File

@ -55,11 +55,7 @@ impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'extractor> {
change: DocumentChange, change: DocumentChange,
context: &crate::update::new::indexer::document_changes::DocumentChangeContext<Self::Data>, context: &crate::update::new::indexer::document_changes::DocumentChangeContext<Self::Data>,
) -> Result<()> { ) -> Result<()> {
FacetedDocidsExtractor::extract_document_change( FacetedDocidsExtractor::extract_document_change(context, self.attributes_to_extract, change)
&context,
self.attributes_to_extract,
change,
)
} }
} }
@ -162,7 +158,7 @@ impl FacetedDocidsExtractor {
// key: fid // key: fid
buffer.push(FacetKind::Exists as u8); buffer.push(FacetKind::Exists as u8);
buffer.extend_from_slice(&fid.to_be_bytes()); buffer.extend_from_slice(&fid.to_be_bytes());
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into)?; cache_fn(cached_sorter, &buffer, docid).map_err(Into::into)?;
match value { match value {
// Number // Number
@ -178,7 +174,7 @@ impl FacetedDocidsExtractor {
buffer.extend_from_slice(&ordered); buffer.extend_from_slice(&ordered);
buffer.extend_from_slice(&n.to_be_bytes()); buffer.extend_from_slice(&n.to_be_bytes());
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into) cache_fn(cached_sorter, &buffer, docid).map_err(Into::into)
} else { } else {
Ok(()) Ok(())
} }
@ -192,7 +188,7 @@ impl FacetedDocidsExtractor {
buffer.extend_from_slice(&fid.to_be_bytes()); buffer.extend_from_slice(&fid.to_be_bytes());
buffer.push(0); // level 0 buffer.push(0); // level 0
buffer.extend_from_slice(truncated.as_bytes()); buffer.extend_from_slice(truncated.as_bytes());
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into) cache_fn(cached_sorter, &buffer, docid).map_err(Into::into)
} }
// Null // Null
// key: fid // key: fid
@ -200,7 +196,7 @@ impl FacetedDocidsExtractor {
buffer.clear(); buffer.clear();
buffer.push(FacetKind::Null as u8); buffer.push(FacetKind::Null as u8);
buffer.extend_from_slice(&fid.to_be_bytes()); buffer.extend_from_slice(&fid.to_be_bytes());
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into) cache_fn(cached_sorter, &buffer, docid).map_err(Into::into)
} }
// Empty // Empty
// key: fid // key: fid
@ -208,13 +204,13 @@ impl FacetedDocidsExtractor {
buffer.clear(); buffer.clear();
buffer.push(FacetKind::Empty as u8); buffer.push(FacetKind::Empty as u8);
buffer.extend_from_slice(&fid.to_be_bytes()); buffer.extend_from_slice(&fid.to_be_bytes());
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into) cache_fn(cached_sorter, &buffer, docid).map_err(Into::into)
} }
Value::Object(o) if o.is_empty() => { Value::Object(o) if o.is_empty() => {
buffer.clear(); buffer.clear();
buffer.push(FacetKind::Empty as u8); buffer.push(FacetKind::Empty as u8);
buffer.extend_from_slice(&fid.to_be_bytes()); buffer.extend_from_slice(&fid.to_be_bytes());
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into) cache_fn(cached_sorter, &buffer, docid).map_err(Into::into)
} }
// Otherwise, do nothing // Otherwise, do nothing
/// TODO: What about Value::Bool? /// TODO: What about Value::Bool?

View File

@ -27,7 +27,7 @@ impl From<u8> for FacetKind {
} }
impl FacetKind { impl FacetKind {
pub fn extract_from_key<'k>(key: &'k [u8]) -> (FacetKind, &'k [u8]) { pub fn extract_from_key(key: &[u8]) -> (FacetKind, &[u8]) {
debug_assert!(key.len() > 3); debug_assert!(key.len() > 3);
(FacetKind::from(key[0]), &key[1..]) (FacetKind::from(key[0]), &key[1..])
} }

View File

@ -3,29 +3,21 @@ use std::collections::HashMap;
use std::fs::File; use std::fs::File;
use std::num::NonZero; use std::num::NonZero;
use std::ops::DerefMut as _; use std::ops::DerefMut as _;
use std::sync::Arc;
use bumpalo::Bump; use bumpalo::Bump;
use grenad::{Merger, MergerBuilder}; use grenad::{Merger, MergerBuilder};
use heed::RoTxn; use heed::RoTxn;
use rayon::iter::IntoParallelIterator;
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use super::SearchableExtractor;
use crate::update::new::document::Document;
use crate::update::new::extract::cache::CboCachedSorter; use crate::update::new::extract::cache::CboCachedSorter;
use crate::update::new::extract::perm_json_p::contained_in; use crate::update::new::extract::perm_json_p::contained_in;
use crate::update::new::indexer::document_changes::{ use crate::update::new::indexer::document_changes::{
for_each_document_change, DocumentChangeContext, DocumentChanges, Extractor, FullySend, for_each_document_change, DocumentChangeContext, DocumentChanges, Extractor, FullySend,
IndexingContext, ThreadLocal, IndexingContext, ThreadLocal,
}; };
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt;
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps}; use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
use crate::{ use crate::{bucketed_position, DocumentId, FieldId, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
bucketed_position, DocumentId, Error, FieldId, GlobalFieldsIdsMap, Index, Result,
MAX_POSITION_PER_ATTRIBUTE,
};
const MAX_COUNTED_WORDS: usize = 30; const MAX_COUNTED_WORDS: usize = 30;

View File

@ -2,9 +2,7 @@ use std::cell::RefCell;
use std::collections::VecDeque; use std::collections::VecDeque;
use std::rc::Rc; use std::rc::Rc;
use bumpalo::Bump;
use heed::RoTxn; use heed::RoTxn;
use obkv::KvReader;
use super::tokenize_document::DocumentTokenizer; use super::tokenize_document::DocumentTokenizer;
use super::SearchableExtractor; use super::SearchableExtractor;

View File

@ -5,7 +5,6 @@ mod tokenize_document;
use std::cell::RefCell; use std::cell::RefCell;
use std::fs::File; use std::fs::File;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::ops::DerefMut;
use bumpalo::Bump; use bumpalo::Bump;
pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers}; pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers};
@ -23,7 +22,7 @@ use crate::update::new::indexer::document_changes::{
}; };
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps}; use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
use crate::{GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE}; use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE};
pub struct SearchableExtractorData<'extractor, EX: SearchableExtractor> { pub struct SearchableExtractorData<'extractor, EX: SearchableExtractor> {
tokenizer: &'extractor DocumentTokenizer<'extractor>, tokenizer: &'extractor DocumentTokenizer<'extractor>,
@ -120,7 +119,7 @@ pub trait SearchableExtractor: Sized + Sync {
indexing_context, indexing_context,
extractor_allocs, extractor_allocs,
&datastore, &datastore,
); )?;
} }
{ {
let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps); let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps);

View File

@ -8,7 +8,6 @@ use crate::update::new::document::Document;
use crate::update::new::extract::perm_json_p::{ use crate::update::new::extract::perm_json_p::{
seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, seek_leaf_values_in_array, seek_leaf_values_in_object, select_field,
}; };
use crate::update::new::KvReaderFieldId;
use crate::{ use crate::{
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError, FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
MAX_WORD_LENGTH, MAX_WORD_LENGTH,
@ -172,7 +171,7 @@ mod test {
use bumpalo::Bump; use bumpalo::Bump;
use charabia::TokenizerBuilder; use charabia::TokenizerBuilder;
use meili_snap::snapshot; use meili_snap::snapshot;
use obkv::KvReader;
use raw_collections::RawMap; use raw_collections::RawMap;
use serde_json::json; use serde_json::json;
use serde_json::value::RawValue; use serde_json::value::RawValue;

View File

@ -45,7 +45,7 @@ impl<'de, 'p, 'indexer: 'de, Mapper: MutFieldIdMapper> serde::de::Visitor<'de>
let fid = fid.unwrap(); let fid = fid.unwrap();
match self.primary_key { match self.primary_key {
PrimaryKey::Flat { name, field_id } => { PrimaryKey::Flat { name: _, field_id } => {
let value: &'de RawValue = map.next_value()?; let value: &'de RawValue = map.next_value()?;
if fid == *field_id { if fid == *field_id {
let value = match value let value = match value
@ -145,8 +145,8 @@ impl<'de, 'indexer: 'de> serde::de::Visitor<'de> for DocumentIdVisitor<'indexer>
{ {
use std::fmt::Write as _; use std::fmt::Write as _;
let mut out = bumpalo::collections::String::new_in(&self.0); let mut out = bumpalo::collections::String::new_in(self.0);
write!(&mut out, "{v}"); write!(&mut out, "{v}").unwrap();
Ok(Ok(out.into_bump_str())) Ok(Ok(out.into_bump_str()))
} }

View File

@ -76,20 +76,6 @@ impl<T: MostlySend> MostlySendWrapper<T> {
Self(t) Self(t)
} }
fn new_send(t: T) -> Self
where
T: Send,
{
Self(t)
}
fn get(&self) -> T
where
T: Copy,
{
self.0
}
fn as_ref(&self) -> &T { fn as_ref(&self) -> &T {
&self.0 &self.0
} }
@ -111,6 +97,7 @@ impl<T: MostlySend> MostlySendWrapper<T> {
unsafe impl<T: MostlySend> Send for MostlySendWrapper<T> {} unsafe impl<T: MostlySend> Send for MostlySendWrapper<T> {}
/// A wrapper around [`thread_local::ThreadLocal`] that accepts [`MostlySend`] `T`s. /// A wrapper around [`thread_local::ThreadLocal`] that accepts [`MostlySend`] `T`s.
#[derive(Default)]
pub struct ThreadLocal<T: MostlySend> { pub struct ThreadLocal<T: MostlySend> {
inner: thread_local::ThreadLocal<MostlySendWrapper<T>>, inner: thread_local::ThreadLocal<MostlySendWrapper<T>>,
// FIXME: this should be necessary // FIXME: this should be necessary
@ -235,6 +222,7 @@ impl<
T: MostlySend, T: MostlySend,
> DocumentChangeContext<'doc, 'extractor, 'fid, 'indexer, T> > DocumentChangeContext<'doc, 'extractor, 'fid, 'indexer, T>
{ {
#[allow(clippy::too_many_arguments)]
pub fn new<F>( pub fn new<F>(
index: &'indexer Index, index: &'indexer Index,
db_fields_ids_map: &'indexer FieldsIdsMap, db_fields_ids_map: &'indexer FieldsIdsMap,
@ -252,7 +240,7 @@ impl<
doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024 * 1024)))); doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024 * 1024))));
let doc_alloc = doc_alloc.0.take(); let doc_alloc = doc_alloc.0.take();
let fields_ids_map = fields_ids_map_store let fields_ids_map = fields_ids_map_store
.get_or(|| RefCell::new(GlobalFieldsIdsMap::new(&new_fields_ids_map)).into()); .get_or(|| RefCell::new(GlobalFieldsIdsMap::new(new_fields_ids_map)).into());
let fields_ids_map = &fields_ids_map.0; let fields_ids_map = &fields_ids_map.0;
let extractor_alloc = extractor_allocs.get_or_default(); let extractor_alloc = extractor_allocs.get_or_default();

View File

@ -6,10 +6,10 @@ use roaring::RoaringBitmap;
use super::document_changes::{DocumentChangeContext, DocumentChanges, MostlySend}; use super::document_changes::{DocumentChangeContext, DocumentChanges, MostlySend};
use crate::documents::PrimaryKey; use crate::documents::PrimaryKey;
use crate::index::db_name::EXTERNAL_DOCUMENTS_IDS; use crate::index::db_name::EXTERNAL_DOCUMENTS_IDS;
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
use crate::update::new::{Deletion, DocumentChange}; use crate::update::new::{Deletion, DocumentChange};
use crate::{DocumentId, InternalError, Result}; use crate::{DocumentId, InternalError, Result};
#[derive(Default)]
pub struct DocumentDeletion { pub struct DocumentDeletion {
pub to_delete: RoaringBitmap, pub to_delete: RoaringBitmap,
} }
@ -177,8 +177,5 @@ mod test {
alloc.get_mut().reset(); alloc.get_mut().reset();
} }
} }
drop(deletion_tracker);
drop(changes);
drop(rtxn);
} }
} }

View File

@ -1,5 +1,5 @@
use std::cell::RefCell; use std::cell::RefCell;
use std::sync::{Arc, RwLock}; use std::sync::RwLock;
use std::thread::{self, Builder}; use std::thread::{self, Builder};
use big_s::S; use big_s::S;
@ -11,27 +11,25 @@ pub use document_deletion::DocumentDeletion;
pub use document_operation::DocumentOperation; pub use document_operation::DocumentOperation;
use heed::{RoTxn, RwTxn}; use heed::{RoTxn, RwTxn};
pub use partial_dump::PartialDump; pub use partial_dump::PartialDump;
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator};
use rayon::ThreadPool; use rayon::ThreadPool;
pub use update_by_function::UpdateByFunction; pub use update_by_function::UpdateByFunction;
use super::channel::*; use super::channel::*;
use super::document::write_to_obkv; use super::document::write_to_obkv;
use super::document_change::{Deletion, DocumentChange, Insertion, Update}; use super::document_change::DocumentChange;
use super::extract::*; use super::extract::*;
use super::merger::{merge_grenad_entries, FacetFieldIdsDelta}; use super::merger::{merge_grenad_entries, FacetFieldIdsDelta};
use super::word_fst_builder::PrefixDelta; use super::word_fst_builder::PrefixDelta;
use super::words_prefix_docids::{ use super::words_prefix_docids::{
compute_word_prefix_docids, compute_word_prefix_fid_docids, compute_word_prefix_position_docids, compute_word_prefix_docids, compute_word_prefix_fid_docids, compute_word_prefix_position_docids,
}; };
use super::{extract, StdResult, TopLevelMap}; use super::{StdResult, TopLevelMap};
use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY}; use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::update::new::channel::ExtractorSender; use crate::update::new::channel::ExtractorSender;
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt;
use crate::update::settings::InnerIndexSettings; use crate::update::settings::InnerIndexSettings;
use crate::update::{FacetsUpdateBulk, GrenadParameters}; use crate::update::{FacetsUpdateBulk, GrenadParameters};
use crate::{fields_ids_map, Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError}; use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError};
mod de; mod de;
pub mod document_changes; pub mod document_changes;
@ -49,7 +47,7 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentExtractor<'a> {
fn init_data( fn init_data(
&self, &self,
extractor_alloc: raw_collections::alloc::RefBump<'extractor>, _extractor_alloc: raw_collections::alloc::RefBump<'extractor>,
) -> Result<Self::Data> { ) -> Result<Self::Data> {
Ok(FullySend(())) Ok(FullySend(()))
} }
@ -271,7 +269,7 @@ where
Ok(()) as Result<_> Ok(()) as Result<_>
})?; })?;
drop(indexing_context); // required to into_inner the new_fields_ids_map
drop(fields_ids_map_store); drop(fields_ids_map_store);
let fields_ids_map = new_fields_ids_map.into_inner().unwrap(); let fields_ids_map = new_fields_ids_map.into_inner().unwrap();

View File

@ -11,14 +11,12 @@ use roaring::RoaringBitmap;
use super::channel::*; use super::channel::*;
use super::extract::FacetKind; use super::extract::FacetKind;
use super::word_fst_builder::{PrefixData, PrefixDelta, PrefixSettings}; use super::word_fst_builder::{PrefixData, PrefixDelta, PrefixSettings};
use super::{Deletion, DocumentChange, Insertion, KvReaderDelAdd, KvReaderFieldId, Update}; use super::{Deletion, DocumentChange, KvReaderDelAdd, KvReaderFieldId};
use crate::update::del_add::DelAdd; use crate::update::del_add::DelAdd;
use crate::update::new::channel::MergerOperation; use crate::update::new::channel::MergerOperation;
use crate::update::new::word_fst_builder::WordFstBuilder; use crate::update::new::word_fst_builder::WordFstBuilder;
use crate::update::MergeDeladdCboRoaringBitmaps; use crate::update::MergeDeladdCboRoaringBitmaps;
use crate::{ use crate::{CboRoaringBitmapCodec, Error, FieldId, GeoPoint, GlobalFieldsIdsMap, Index, Result};
CboRoaringBitmapCodec, Error, FieldId, GeoPoint, GlobalFieldsIdsMap, Index, Prefix, Result,
};
/// TODO We must return some infos/stats /// TODO We must return some infos/stats
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")]
@ -27,7 +25,7 @@ pub fn merge_grenad_entries(
sender: MergerSender, sender: MergerSender,
rtxn: &RoTxn, rtxn: &RoTxn,
index: &Index, index: &Index,
mut global_fields_ids_map: GlobalFieldsIdsMap<'_>, global_fields_ids_map: GlobalFieldsIdsMap<'_>,
) -> Result<MergerResult> { ) -> Result<MergerResult> {
let mut buffer: Vec<u8> = Vec::new(); let mut buffer: Vec<u8> = Vec::new();
let mut documents_ids = index.documents_ids(rtxn)?; let mut documents_ids = index.documents_ids(rtxn)?;
@ -386,7 +384,7 @@ impl FacetFieldIdsDelta {
} }
} }
fn extract_key_data<'a>(&self, key: &'a [u8]) -> (FacetKind, FieldId) { fn extract_key_data(&self, key: &[u8]) -> (FacetKind, FieldId) {
let facet_kind = FacetKind::from(key[0]); let facet_kind = FacetKind::from(key[0]);
let field_id = FieldId::from_be_bytes([key[1], key[2]]); let field_id = FieldId::from_be_bytes([key[1], key[2]]);
(facet_kind, field_id) (facet_kind, field_id)