Expose an IndexedParallelIterator to the index function

This commit is contained in:
Clément Renault 2024-09-24 17:24:50 +02:00
parent 6e87332410
commit e0c7067355
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
15 changed files with 85 additions and 101 deletions

View File

@ -18,7 +18,6 @@ one indexing operation.
*/ */
use std::collections::{BTreeSet, HashSet}; use std::collections::{BTreeSet, HashSet};
use std::env::VarError;
use std::ffi::OsStr; use std::ffi::OsStr;
use std::fmt; use std::fmt;
use std::fs::{self, File}; use std::fs::{self, File};
@ -27,19 +26,18 @@ use std::io::BufWriter;
use dump::IndexMetadata; use dump::IndexMetadata;
use meilisearch_types::error::Code; use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::new::indexer::{ use meilisearch_types::milli::update::new::indexer::{
self, retrieve_or_guess_primary_key, DocumentChanges, self, retrieve_or_guess_primary_key, DocumentChanges,
}; };
use meilisearch_types::milli::update::new::TopLevelMap;
use meilisearch_types::milli::update::{ use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
}; };
use meilisearch_types::milli::vector::parsed_vectors::{ use meilisearch_types::milli::vector::parsed_vectors::{
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME, ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
}; };
use meilisearch_types::milli::{self, Filter, InternalError, Object}; use meilisearch_types::milli::{self, Filter, Object};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME};

View File

@ -1,4 +1,4 @@
use std::io::{BufReader, ErrorKind}; use std::io::ErrorKind;
use actix_web::http::header::CONTENT_TYPE; use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data; use actix_web::web::Data;

View File

@ -1247,7 +1247,7 @@ impl<'a> HitMaker<'a> {
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?; self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
// First generate a document with all the displayed fields // First generate a document with all the displayed fields
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, &obkv)?; let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?;
let add_vectors_fid = let add_vectors_fid =
self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve); self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);

View File

@ -292,7 +292,7 @@ mod test {
.unwrap() .unwrap()
.into_cursor_and_fields_index(); .into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap(); let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap(); let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!( assert_eq!(
val, val,
@ -321,7 +321,7 @@ mod test {
.into_cursor_and_fields_index(); .into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap(); let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap(); let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!( assert_eq!(
val, val,
@ -348,7 +348,7 @@ mod test {
.into_cursor_and_fields_index(); .into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap(); let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap(); let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!( assert_eq!(
val, val,
@ -375,7 +375,7 @@ mod test {
.into_cursor_and_fields_index(); .into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap(); let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap(); let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!( assert_eq!(
val, val,
@ -402,7 +402,7 @@ mod test {
.into_cursor_and_fields_index(); .into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap(); let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap(); let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!( assert_eq!(
val, val,
@ -429,7 +429,7 @@ mod test {
.into_cursor_and_fields_index(); .into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap(); let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap(); let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!( assert_eq!(
val, val,
@ -456,7 +456,7 @@ mod test {
.into_cursor_and_fields_index(); .into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap(); let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap(); let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!( assert_eq!(
val, val,
@ -483,7 +483,7 @@ mod test {
.into_cursor_and_fields_index(); .into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap(); let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap(); let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!( assert_eq!(
val, val,
@ -510,7 +510,7 @@ mod test {
.into_cursor_and_fields_index(); .into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap(); let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap(); let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!( assert_eq!(
val, val,
@ -555,7 +555,7 @@ mod test {
.into_cursor_and_fields_index(); .into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap(); let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap(); let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!( assert_eq!(
val, val,

View File

@ -289,7 +289,7 @@ impl<'a, 'i> Transform<'a, 'i> {
.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?; .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
let base_obkv = KvReader::from_slice(base_obkv); let base_obkv = KvReader::from_slice(base_obkv);
if let Some(flattened_obkv) = if let Some(flattened_obkv) =
Self::flatten_from_fields_ids_map(&base_obkv, &mut self.fields_ids_map)? Self::flatten_from_fields_ids_map(base_obkv, &mut self.fields_ids_map)?
{ {
// we recreate our buffer with the flattened documents // we recreate our buffer with the flattened documents
document_sorter_value_buffer.clear(); document_sorter_value_buffer.clear();
@ -324,7 +324,7 @@ impl<'a, 'i> Transform<'a, 'i> {
let flattened_obkv = KvReader::from_slice(&obkv_buffer); let flattened_obkv = KvReader::from_slice(&obkv_buffer);
if let Some(obkv) = if let Some(obkv) =
Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)? Self::flatten_from_fields_ids_map(flattened_obkv, &mut self.fields_ids_map)?
{ {
document_sorter_value_buffer.clear(); document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Addition as u8); document_sorter_value_buffer.push(Operation::Addition as u8);
@ -531,7 +531,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// flatten it and push it as to delete in the flattened_sorter // flatten it and push it as to delete in the flattened_sorter
let flattened_obkv = KvReader::from_slice(base_obkv); let flattened_obkv = KvReader::from_slice(base_obkv);
if let Some(obkv) = if let Some(obkv) =
Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)? Self::flatten_from_fields_ids_map(flattened_obkv, &mut self.fields_ids_map)?
{ {
// we recreate our buffer with the flattened documents // we recreate our buffer with the flattened documents
document_sorter_value_buffer.clear(); document_sorter_value_buffer.clear();
@ -938,7 +938,7 @@ impl<'a, 'i> Transform<'a, 'i> {
if let Some(flattened_obkv_buffer) = flattened_obkv_buffer { if let Some(flattened_obkv_buffer) = flattened_obkv_buffer {
// take the non-flattened version if flatten_from_fields_ids_map returns None. // take the non-flattened version if flatten_from_fields_ids_map returns None.
let mut fields_ids_map = settings_diff.new.fields_ids_map.clone(); let mut fields_ids_map = settings_diff.new.fields_ids_map.clone();
let flattened = Self::flatten_from_fields_ids_map(&obkv, &mut fields_ids_map)?; let flattened = Self::flatten_from_fields_ids_map(obkv, &mut fields_ids_map)?;
let flattened = flattened.as_deref().map_or(obkv, KvReader::from_slice); let flattened = flattened.as_deref().map_or(obkv, KvReader::from_slice);
flattened_obkv_buffer.clear(); flattened_obkv_buffer.clear();

View File

@ -1,4 +1,4 @@
use std::collections::{HashMap, VecDeque}; use std::collections::VecDeque;
use std::rc::Rc; use std::rc::Rc;
use heed::RoTxn; use heed::RoTxn;

View File

@ -5,11 +5,7 @@ mod tokenize_document;
use std::fs::File; use std::fs::File;
pub use extract_fid_word_count_docids::FidWordCountDocidsExtractor; pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers};
pub use extract_word_docids::{
ExactWordDocidsExtractor, WordDocidsExtractor, WordDocidsExtractors, WordDocidsMergers,
WordFidDocidsExtractor, WordPositionDocidsExtractor,
};
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor; pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
use grenad::Merger; use grenad::Merger;
use heed::RoTxn; use heed::RoTxn;

View File

@ -1,6 +1,6 @@
use std::sync::Arc; use std::sync::Arc;
use rayon::iter::{ParallelBridge, ParallelIterator}; use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::DocumentChanges; use super::DocumentChanges;
@ -28,10 +28,11 @@ impl<'p> DocumentChanges<'p> for DocumentDeletion {
self, self,
_fields_ids_map: &mut FieldsIdsMap, _fields_ids_map: &mut FieldsIdsMap,
param: Self::Parameter, param: Self::Parameter,
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> { ) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
let index = param; let index = param;
let items = Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from))); let items = Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from)));
Ok(self.to_delete.into_iter().par_bridge().map_with(items, |items, docid| { let to_delete: Vec<_> = self.to_delete.into_iter().collect();
Ok(to_delete.into_par_iter().map_with(items, |items, docid| {
items.with(|rtxn| { items.with(|rtxn| {
let current = index.document(rtxn, docid)?; let current = index.document(rtxn, docid)?;
Ok(DocumentChange::Deletion(Deletion::create(docid, current.boxed()))) Ok(DocumentChange::Deletion(Deletion::create(docid, current.boxed())))

View File

@ -2,15 +2,15 @@ use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::sync::Arc; use std::sync::Arc;
use heed::types::Bytes; use heed::types::{Bytes, DecodeIgnore};
use heed::RoTxn; use heed::RoTxn;
use memmap2::Mmap; use memmap2::Mmap;
use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use IndexDocumentsMethod as Idm; use IndexDocumentsMethod as Idm;
use super::super::document_change::DocumentChange; use super::super::document_change::DocumentChange;
use super::super::items_pool::ItemsPool; use super::super::items_pool::ItemsPool;
use super::top_level_map::{CowStr, TopLevelMap}; use super::super::{CowStr, TopLevelMap};
use super::DocumentChanges; use super::DocumentChanges;
use crate::documents::{DocumentIdExtractionError, PrimaryKey}; use crate::documents::{DocumentIdExtractionError, PrimaryKey};
use crate::update::new::{Deletion, Insertion, KvReaderFieldId, KvWriterFieldId, Update}; use crate::update::new::{Deletion, Insertion, KvReaderFieldId, KvWriterFieldId, Update};
@ -73,7 +73,7 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> {
self, self,
fields_ids_map: &mut FieldsIdsMap, fields_ids_map: &mut FieldsIdsMap,
param: Self::Parameter, param: Self::Parameter,
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> { ) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
let (index, rtxn, primary_key) = param; let (index, rtxn, primary_key) = param;
let documents_ids = index.documents_ids(rtxn)?; let documents_ids = index.documents_ids(rtxn)?;
@ -199,29 +199,26 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> {
// And finally sort them // And finally sort them
docids_version_offsets.sort_unstable_by_key(|(_, (_, docops))| sort_function_key(docops)); docids_version_offsets.sort_unstable_by_key(|(_, (_, docops))| sort_function_key(docops));
Ok(docids_version_offsets Ok(docids_version_offsets.into_par_iter().map_with(
.into_par_iter() Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from))),
.map_with( move |context_pool, (external_docid, (internal_docid, operations))| {
Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from))), context_pool.with(|rtxn| {
move |context_pool, (external_docid, (internal_docid, operations))| { let document_merge_function = match self.index_documents_method {
context_pool.with(|rtxn| { Idm::ReplaceDocuments => MergeDocumentForReplacement::merge,
let document_merge_function = match self.index_documents_method { Idm::UpdateDocuments => MergeDocumentForUpdates::merge,
Idm::ReplaceDocuments => MergeDocumentForReplacement::merge, };
Idm::UpdateDocuments => MergeDocumentForUpdates::merge,
};
document_merge_function( document_merge_function(
rtxn, rtxn,
index, index,
&fields_ids_map, &fields_ids_map,
internal_docid, internal_docid,
external_docid.to_string(), // TODO do not clone external_docid.to_string(), // TODO do not clone
&operations, &operations,
) )
}) })
}, },
) ))
.filter_map(Result::transpose))
} }
} }
@ -239,7 +236,7 @@ trait MergeChanges {
docid: DocumentId, docid: DocumentId,
external_docid: String, external_docid: String,
operations: &[InnerDocOp], operations: &[InnerDocOp],
) -> Result<Option<DocumentChange>>; ) -> Result<DocumentChange>;
} }
struct MergeDocumentForReplacement; struct MergeDocumentForReplacement;
@ -266,7 +263,7 @@ impl MergeChanges for MergeDocumentForReplacement {
docid: DocumentId, docid: DocumentId,
external_docid: String, external_docid: String,
operations: &[InnerDocOp], operations: &[InnerDocOp],
) -> Result<Option<DocumentChange>> { ) -> Result<DocumentChange> {
let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?; let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?;
let current: Option<&KvReaderFieldId> = current.map(Into::into); let current: Option<&KvReaderFieldId> = current.map(Into::into);
@ -288,21 +285,21 @@ impl MergeChanges for MergeDocumentForReplacement {
let new = writer.into_boxed(); let new = writer.into_boxed();
match current { match current {
Some(current) => Ok(Some(DocumentChange::Update(Update::create( Some(current) => {
docid, let update = Update::create(docid, current.boxed(), new);
current.boxed(), Ok(DocumentChange::Update(update))
new, }
)))), None => Ok(DocumentChange::Insertion(Insertion::create(docid, new))),
None => Ok(Some(DocumentChange::Insertion(Insertion::create(docid, new)))),
} }
} }
Some(InnerDocOp::Deletion) => match current { Some(InnerDocOp::Deletion) => {
Some(current) => { let deletion = match current {
Ok(Some(DocumentChange::Deletion(Deletion::create(docid, current.boxed())))) Some(current) => Deletion::create(docid, current.boxed()),
} None => todo!("Do that with Louis"),
None => Ok(None), };
}, Ok(DocumentChange::Deletion(deletion))
None => Ok(None), // but it's strange }
None => unreachable!("We must not have empty set of operations on a document"),
} }
} }
} }
@ -332,13 +329,13 @@ impl MergeChanges for MergeDocumentForUpdates {
docid: DocumentId, docid: DocumentId,
external_docid: String, external_docid: String,
operations: &[InnerDocOp], operations: &[InnerDocOp],
) -> Result<Option<DocumentChange>> { ) -> Result<DocumentChange> {
let mut document = BTreeMap::<_, Cow<_>>::new(); let mut document = BTreeMap::<_, Cow<_>>::new();
let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?; let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?;
let current: Option<&KvReaderFieldId> = current.map(Into::into); let current: Option<&KvReaderFieldId> = current.map(Into::into);
if operations.is_empty() { if operations.is_empty() {
return Ok(None); // but it's strange unreachable!("We must not have empty set of operations on a document");
} }
let last_deletion = operations.iter().rposition(|op| matches!(op, InnerDocOp::Deletion)); let last_deletion = operations.iter().rposition(|op| matches!(op, InnerDocOp::Deletion));
@ -355,13 +352,11 @@ impl MergeChanges for MergeDocumentForUpdates {
} }
if operations.is_empty() { if operations.is_empty() {
match current { let deletion = match current {
Some(current) => { Some(current) => Deletion::create(docid, current.boxed()),
let deletion = Deletion::create(docid, current.boxed()); None => todo!("Do that with Louis"),
return Ok(Some(DocumentChange::Deletion(deletion))); };
} return Ok(DocumentChange::Deletion(deletion));
None => return Ok(None),
}
} }
for operation in operations { for operation in operations {
@ -386,11 +381,11 @@ impl MergeChanges for MergeDocumentForUpdates {
match current { match current {
Some(current) => { Some(current) => {
let update = Update::create(docid, current.boxed(), new); let update = Update::create(docid, current.boxed(), new);
Ok(Some(DocumentChange::Update(update))) Ok(DocumentChange::Update(update))
} }
None => { None => {
let insertion = Insertion::create(docid, new); let insertion = Insertion::create(docid, new);
Ok(Some(DocumentChange::Insertion(insertion))) Ok(DocumentChange::Insertion(insertion))
} }
} }
} }

View File

@ -6,16 +6,15 @@ pub use document_deletion::DocumentDeletion;
pub use document_operation::DocumentOperation; pub use document_operation::DocumentOperation;
use heed::{RoTxn, RwTxn}; use heed::{RoTxn, RwTxn};
pub use partial_dump::PartialDump; pub use partial_dump::PartialDump;
use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use rayon::ThreadPool; use rayon::ThreadPool;
pub use top_level_map::{CowStr, TopLevelMap};
pub use update_by_function::UpdateByFunction; pub use update_by_function::UpdateByFunction;
use super::channel::*; use super::channel::*;
use super::document_change::DocumentChange; use super::document_change::DocumentChange;
use super::extract::*; use super::extract::*;
use super::merger::merge_grenad_entries; use super::merger::merge_grenad_entries;
use super::StdResult; use super::{StdResult, TopLevelMap};
use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY}; use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
use crate::update::new::channel::ExtractorSender; use crate::update::new::channel::ExtractorSender;
use crate::update::GrenadParameters; use crate::update::GrenadParameters;
@ -24,7 +23,6 @@ use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError};
mod document_deletion; mod document_deletion;
mod document_operation; mod document_operation;
mod partial_dump; mod partial_dump;
mod top_level_map;
mod update_by_function; mod update_by_function;
pub trait DocumentChanges<'p> { pub trait DocumentChanges<'p> {
@ -34,7 +32,7 @@ pub trait DocumentChanges<'p> {
self, self,
fields_ids_map: &mut FieldsIdsMap, fields_ids_map: &mut FieldsIdsMap,
param: Self::Parameter, param: Self::Parameter,
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p>; ) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p>;
} }
/// This is the main function of this crate. /// This is the main function of this crate.
@ -50,8 +48,7 @@ pub fn index<PI>(
document_changes: PI, document_changes: PI,
) -> Result<()> ) -> Result<()>
where where
PI: IntoParallelIterator<Item = Result<DocumentChange>> + Send, PI: IndexedParallelIterator<Item = Result<DocumentChange>> + Send + Clone,
PI::Iter: Clone,
{ {
let (merger_sender, writer_receiver) = merger_writer_channel(10_000); let (merger_sender, writer_receiver) = merger_writer_channel(10_000);
// This channel acts as a rendezvous point to ensure that we are one task ahead // This channel acts as a rendezvous point to ensure that we are one task ahead

View File

@ -1,4 +1,4 @@
use rayon::iter::{ParallelBridge, ParallelIterator}; use rayon::iter::{IndexedParallelIterator, ParallelBridge, ParallelIterator};
use super::DocumentChanges; use super::DocumentChanges;
use crate::documents::{DocumentIdExtractionError, PrimaryKey}; use crate::documents::{DocumentIdExtractionError, PrimaryKey};
@ -18,9 +18,7 @@ impl<I> PartialDump<I> {
impl<'p, I> DocumentChanges<'p> for PartialDump<I> impl<'p, I> DocumentChanges<'p> for PartialDump<I>
where where
I: IntoIterator<Item = Object>, I: IndexedParallelIterator<Item = Object> + Clone + 'p,
I::IntoIter: Send + Clone + 'p,
I::Item: Send,
{ {
type Parameter = (&'p FieldsIdsMap, &'p ConcurrentAvailableIds, &'p PrimaryKey<'p>); type Parameter = (&'p FieldsIdsMap, &'p ConcurrentAvailableIds, &'p PrimaryKey<'p>);
@ -32,10 +30,10 @@ where
self, self,
_fields_ids_map: &mut FieldsIdsMap, _fields_ids_map: &mut FieldsIdsMap,
param: Self::Parameter, param: Self::Parameter,
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> { ) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
let (fields_ids_map, concurrent_available_ids, primary_key) = param; let (fields_ids_map, concurrent_available_ids, primary_key) = param;
Ok(self.iter.into_iter().par_bridge().map(|object| { Ok(self.iter.map(|object| {
let docid = match concurrent_available_ids.next() { let docid = match concurrent_available_ids.next() {
Some(id) => id, Some(id) => id,
None => return Err(Error::UserError(UserError::DocumentLimitReached)), None => return Err(Error::UserError(UserError::DocumentLimitReached)),

View File

@ -1,4 +1,4 @@
use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use super::DocumentChanges; use super::DocumentChanges;
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
@ -13,7 +13,7 @@ impl<'p> DocumentChanges<'p> for UpdateByFunction {
self, self,
_fields_ids_map: &mut FieldsIdsMap, _fields_ids_map: &mut FieldsIdsMap,
_param: Self::Parameter, _param: Self::Parameter,
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> { ) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
Ok((0..100).into_par_iter().map(|_| todo!())) Ok((0..100).into_par_iter().map(|_| todo!()))
} }
} }

View File

@ -5,7 +5,7 @@ use bincode::ErrorKind;
use fst::{Set, SetBuilder, Streamer}; use fst::{Set, SetBuilder, Streamer};
use grenad::Merger; use grenad::Merger;
use heed::types::Bytes; use heed::types::Bytes;
use heed::{BoxedError, Database, RoTxn}; use heed::{Database, RoTxn};
use memmap2::Mmap; use memmap2::Mmap;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use tempfile::tempfile; use tempfile::tempfile;
@ -16,9 +16,7 @@ use super::{Deletion, DocumentChange, Insertion, KvReaderDelAdd, KvReaderFieldId
use crate::update::del_add::DelAdd; use crate::update::del_add::DelAdd;
use crate::update::new::channel::MergerOperation; use crate::update::new::channel::MergerOperation;
use crate::update::MergeDeladdCboRoaringBitmaps; use crate::update::MergeDeladdCboRoaringBitmaps;
use crate::{ use crate::{CboRoaringBitmapCodec, Error, GeoPoint, GlobalFieldsIdsMap, Index, Result};
CboRoaringBitmapCodec, Error, GeoPoint, GlobalFieldsIdsMap, Index, InternalError, Result,
};
/// TODO We must return some infos/stats /// TODO We must return some infos/stats
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")]

View File

@ -1,6 +1,6 @@
pub use document_change::{Deletion, DocumentChange, Insertion, Update}; pub use document_change::{Deletion, DocumentChange, Insertion, Update};
pub use indexer::{CowStr, TopLevelMap};
pub use items_pool::ItemsPool; pub use items_pool::ItemsPool;
pub use top_level_map::{CowStr, TopLevelMap};
use super::del_add::DelAdd; use super::del_add::DelAdd;
use crate::FieldId; use crate::FieldId;
@ -11,6 +11,7 @@ mod extract;
pub mod indexer; pub mod indexer;
mod items_pool; mod items_pool;
mod merger; mod merger;
mod top_level_map;
/// TODO move them elsewhere /// TODO move them elsewhere
pub type StdResult<T, E> = std::result::Result<T, E>; pub type StdResult<T, E> = std::result::Result<T, E>;