mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 10:07:40 +08:00
Expose an IndexedParallelIterator to the index function
This commit is contained in:
parent
6e87332410
commit
e0c7067355
@ -18,7 +18,6 @@ one indexing operation.
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
use std::collections::{BTreeSet, HashSet};
|
use std::collections::{BTreeSet, HashSet};
|
||||||
use std::env::VarError;
|
|
||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fs::{self, File};
|
use std::fs::{self, File};
|
||||||
@ -27,19 +26,18 @@ use std::io::BufWriter;
|
|||||||
use dump::IndexMetadata;
|
use dump::IndexMetadata;
|
||||||
use meilisearch_types::error::Code;
|
use meilisearch_types::error::Code;
|
||||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
|
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||||
use meilisearch_types::milli::heed::CompactionOption;
|
use meilisearch_types::milli::heed::CompactionOption;
|
||||||
use meilisearch_types::milli::update::new::indexer::{
|
use meilisearch_types::milli::update::new::indexer::{
|
||||||
self, retrieve_or_guess_primary_key, DocumentChanges,
|
self, retrieve_or_guess_primary_key, DocumentChanges,
|
||||||
};
|
};
|
||||||
use meilisearch_types::milli::update::new::TopLevelMap;
|
|
||||||
use meilisearch_types::milli::update::{
|
use meilisearch_types::milli::update::{
|
||||||
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
|
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
|
||||||
};
|
};
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::{
|
use meilisearch_types::milli::vector::parsed_vectors::{
|
||||||
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
||||||
};
|
};
|
||||||
use meilisearch_types::milli::{self, Filter, InternalError, Object};
|
use meilisearch_types::milli::{self, Filter, Object};
|
||||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::io::{BufReader, ErrorKind};
|
use std::io::ErrorKind;
|
||||||
|
|
||||||
use actix_web::http::header::CONTENT_TYPE;
|
use actix_web::http::header::CONTENT_TYPE;
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
|
@ -1247,7 +1247,7 @@ impl<'a> HitMaker<'a> {
|
|||||||
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
|
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
|
||||||
|
|
||||||
// First generate a document with all the displayed fields
|
// First generate a document with all the displayed fields
|
||||||
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, &obkv)?;
|
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?;
|
||||||
|
|
||||||
let add_vectors_fid =
|
let add_vectors_fid =
|
||||||
self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);
|
self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);
|
||||||
|
@ -292,7 +292,7 @@ mod test {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
.into_cursor_and_fields_index();
|
.into_cursor_and_fields_index();
|
||||||
let doc = cursor.next_document().unwrap().unwrap();
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
|
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -321,7 +321,7 @@ mod test {
|
|||||||
.into_cursor_and_fields_index();
|
.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let doc = cursor.next_document().unwrap().unwrap();
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
|
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -348,7 +348,7 @@ mod test {
|
|||||||
.into_cursor_and_fields_index();
|
.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let doc = cursor.next_document().unwrap().unwrap();
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
|
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -375,7 +375,7 @@ mod test {
|
|||||||
.into_cursor_and_fields_index();
|
.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let doc = cursor.next_document().unwrap().unwrap();
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
|
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -402,7 +402,7 @@ mod test {
|
|||||||
.into_cursor_and_fields_index();
|
.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let doc = cursor.next_document().unwrap().unwrap();
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
|
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -429,7 +429,7 @@ mod test {
|
|||||||
.into_cursor_and_fields_index();
|
.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let doc = cursor.next_document().unwrap().unwrap();
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
|
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -456,7 +456,7 @@ mod test {
|
|||||||
.into_cursor_and_fields_index();
|
.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let doc = cursor.next_document().unwrap().unwrap();
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
|
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -483,7 +483,7 @@ mod test {
|
|||||||
.into_cursor_and_fields_index();
|
.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let doc = cursor.next_document().unwrap().unwrap();
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
|
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -510,7 +510,7 @@ mod test {
|
|||||||
.into_cursor_and_fields_index();
|
.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let doc = cursor.next_document().unwrap().unwrap();
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
|
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -555,7 +555,7 @@ mod test {
|
|||||||
.into_cursor_and_fields_index();
|
.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let doc = cursor.next_document().unwrap().unwrap();
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
|
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
|
@ -289,7 +289,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
|
.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
|
||||||
let base_obkv = KvReader::from_slice(base_obkv);
|
let base_obkv = KvReader::from_slice(base_obkv);
|
||||||
if let Some(flattened_obkv) =
|
if let Some(flattened_obkv) =
|
||||||
Self::flatten_from_fields_ids_map(&base_obkv, &mut self.fields_ids_map)?
|
Self::flatten_from_fields_ids_map(base_obkv, &mut self.fields_ids_map)?
|
||||||
{
|
{
|
||||||
// we recreate our buffer with the flattened documents
|
// we recreate our buffer with the flattened documents
|
||||||
document_sorter_value_buffer.clear();
|
document_sorter_value_buffer.clear();
|
||||||
@ -324,7 +324,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
|
|
||||||
let flattened_obkv = KvReader::from_slice(&obkv_buffer);
|
let flattened_obkv = KvReader::from_slice(&obkv_buffer);
|
||||||
if let Some(obkv) =
|
if let Some(obkv) =
|
||||||
Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)?
|
Self::flatten_from_fields_ids_map(flattened_obkv, &mut self.fields_ids_map)?
|
||||||
{
|
{
|
||||||
document_sorter_value_buffer.clear();
|
document_sorter_value_buffer.clear();
|
||||||
document_sorter_value_buffer.push(Operation::Addition as u8);
|
document_sorter_value_buffer.push(Operation::Addition as u8);
|
||||||
@ -531,7 +531,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
// flatten it and push it as to delete in the flattened_sorter
|
// flatten it and push it as to delete in the flattened_sorter
|
||||||
let flattened_obkv = KvReader::from_slice(base_obkv);
|
let flattened_obkv = KvReader::from_slice(base_obkv);
|
||||||
if let Some(obkv) =
|
if let Some(obkv) =
|
||||||
Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)?
|
Self::flatten_from_fields_ids_map(flattened_obkv, &mut self.fields_ids_map)?
|
||||||
{
|
{
|
||||||
// we recreate our buffer with the flattened documents
|
// we recreate our buffer with the flattened documents
|
||||||
document_sorter_value_buffer.clear();
|
document_sorter_value_buffer.clear();
|
||||||
@ -938,7 +938,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
if let Some(flattened_obkv_buffer) = flattened_obkv_buffer {
|
if let Some(flattened_obkv_buffer) = flattened_obkv_buffer {
|
||||||
// take the non-flattened version if flatten_from_fields_ids_map returns None.
|
// take the non-flattened version if flatten_from_fields_ids_map returns None.
|
||||||
let mut fields_ids_map = settings_diff.new.fields_ids_map.clone();
|
let mut fields_ids_map = settings_diff.new.fields_ids_map.clone();
|
||||||
let flattened = Self::flatten_from_fields_ids_map(&obkv, &mut fields_ids_map)?;
|
let flattened = Self::flatten_from_fields_ids_map(obkv, &mut fields_ids_map)?;
|
||||||
let flattened = flattened.as_deref().map_or(obkv, KvReader::from_slice);
|
let flattened = flattened.as_deref().map_or(obkv, KvReader::from_slice);
|
||||||
|
|
||||||
flattened_obkv_buffer.clear();
|
flattened_obkv_buffer.clear();
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::{HashMap, VecDeque};
|
use std::collections::VecDeque;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
@ -5,11 +5,7 @@ mod tokenize_document;
|
|||||||
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
pub use extract_fid_word_count_docids::FidWordCountDocidsExtractor;
|
pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers};
|
||||||
pub use extract_word_docids::{
|
|
||||||
ExactWordDocidsExtractor, WordDocidsExtractor, WordDocidsExtractors, WordDocidsMergers,
|
|
||||||
WordFidDocidsExtractor, WordPositionDocidsExtractor,
|
|
||||||
};
|
|
||||||
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
||||||
use grenad::Merger;
|
use grenad::Merger;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use rayon::iter::{ParallelBridge, ParallelIterator};
|
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::DocumentChanges;
|
use super::DocumentChanges;
|
||||||
@ -28,10 +28,11 @@ impl<'p> DocumentChanges<'p> for DocumentDeletion {
|
|||||||
self,
|
self,
|
||||||
_fields_ids_map: &mut FieldsIdsMap,
|
_fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
||||||
let index = param;
|
let index = param;
|
||||||
let items = Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from)));
|
let items = Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from)));
|
||||||
Ok(self.to_delete.into_iter().par_bridge().map_with(items, |items, docid| {
|
let to_delete: Vec<_> = self.to_delete.into_iter().collect();
|
||||||
|
Ok(to_delete.into_par_iter().map_with(items, |items, docid| {
|
||||||
items.with(|rtxn| {
|
items.with(|rtxn| {
|
||||||
let current = index.document(rtxn, docid)?;
|
let current = index.document(rtxn, docid)?;
|
||||||
Ok(DocumentChange::Deletion(Deletion::create(docid, current.boxed())))
|
Ok(DocumentChange::Deletion(Deletion::create(docid, current.boxed())))
|
||||||
|
@ -2,15 +2,15 @@ use std::borrow::Cow;
|
|||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use heed::types::Bytes;
|
use heed::types::{Bytes, DecodeIgnore};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
||||||
use IndexDocumentsMethod as Idm;
|
use IndexDocumentsMethod as Idm;
|
||||||
|
|
||||||
use super::super::document_change::DocumentChange;
|
use super::super::document_change::DocumentChange;
|
||||||
use super::super::items_pool::ItemsPool;
|
use super::super::items_pool::ItemsPool;
|
||||||
use super::top_level_map::{CowStr, TopLevelMap};
|
use super::super::{CowStr, TopLevelMap};
|
||||||
use super::DocumentChanges;
|
use super::DocumentChanges;
|
||||||
use crate::documents::{DocumentIdExtractionError, PrimaryKey};
|
use crate::documents::{DocumentIdExtractionError, PrimaryKey};
|
||||||
use crate::update::new::{Deletion, Insertion, KvReaderFieldId, KvWriterFieldId, Update};
|
use crate::update::new::{Deletion, Insertion, KvReaderFieldId, KvWriterFieldId, Update};
|
||||||
@ -73,7 +73,7 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> {
|
|||||||
self,
|
self,
|
||||||
fields_ids_map: &mut FieldsIdsMap,
|
fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
||||||
let (index, rtxn, primary_key) = param;
|
let (index, rtxn, primary_key) = param;
|
||||||
|
|
||||||
let documents_ids = index.documents_ids(rtxn)?;
|
let documents_ids = index.documents_ids(rtxn)?;
|
||||||
@ -199,29 +199,26 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> {
|
|||||||
// And finally sort them
|
// And finally sort them
|
||||||
docids_version_offsets.sort_unstable_by_key(|(_, (_, docops))| sort_function_key(docops));
|
docids_version_offsets.sort_unstable_by_key(|(_, (_, docops))| sort_function_key(docops));
|
||||||
|
|
||||||
Ok(docids_version_offsets
|
Ok(docids_version_offsets.into_par_iter().map_with(
|
||||||
.into_par_iter()
|
Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from))),
|
||||||
.map_with(
|
move |context_pool, (external_docid, (internal_docid, operations))| {
|
||||||
Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from))),
|
context_pool.with(|rtxn| {
|
||||||
move |context_pool, (external_docid, (internal_docid, operations))| {
|
let document_merge_function = match self.index_documents_method {
|
||||||
context_pool.with(|rtxn| {
|
Idm::ReplaceDocuments => MergeDocumentForReplacement::merge,
|
||||||
let document_merge_function = match self.index_documents_method {
|
Idm::UpdateDocuments => MergeDocumentForUpdates::merge,
|
||||||
Idm::ReplaceDocuments => MergeDocumentForReplacement::merge,
|
};
|
||||||
Idm::UpdateDocuments => MergeDocumentForUpdates::merge,
|
|
||||||
};
|
|
||||||
|
|
||||||
document_merge_function(
|
document_merge_function(
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
&fields_ids_map,
|
&fields_ids_map,
|
||||||
internal_docid,
|
internal_docid,
|
||||||
external_docid.to_string(), // TODO do not clone
|
external_docid.to_string(), // TODO do not clone
|
||||||
&operations,
|
&operations,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
)
|
))
|
||||||
.filter_map(Result::transpose))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -239,7 +236,7 @@ trait MergeChanges {
|
|||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
external_docid: String,
|
external_docid: String,
|
||||||
operations: &[InnerDocOp],
|
operations: &[InnerDocOp],
|
||||||
) -> Result<Option<DocumentChange>>;
|
) -> Result<DocumentChange>;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct MergeDocumentForReplacement;
|
struct MergeDocumentForReplacement;
|
||||||
@ -266,7 +263,7 @@ impl MergeChanges for MergeDocumentForReplacement {
|
|||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
external_docid: String,
|
external_docid: String,
|
||||||
operations: &[InnerDocOp],
|
operations: &[InnerDocOp],
|
||||||
) -> Result<Option<DocumentChange>> {
|
) -> Result<DocumentChange> {
|
||||||
let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?;
|
let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?;
|
||||||
let current: Option<&KvReaderFieldId> = current.map(Into::into);
|
let current: Option<&KvReaderFieldId> = current.map(Into::into);
|
||||||
|
|
||||||
@ -288,21 +285,21 @@ impl MergeChanges for MergeDocumentForReplacement {
|
|||||||
let new = writer.into_boxed();
|
let new = writer.into_boxed();
|
||||||
|
|
||||||
match current {
|
match current {
|
||||||
Some(current) => Ok(Some(DocumentChange::Update(Update::create(
|
Some(current) => {
|
||||||
docid,
|
let update = Update::create(docid, current.boxed(), new);
|
||||||
current.boxed(),
|
Ok(DocumentChange::Update(update))
|
||||||
new,
|
}
|
||||||
)))),
|
None => Ok(DocumentChange::Insertion(Insertion::create(docid, new))),
|
||||||
None => Ok(Some(DocumentChange::Insertion(Insertion::create(docid, new)))),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(InnerDocOp::Deletion) => match current {
|
Some(InnerDocOp::Deletion) => {
|
||||||
Some(current) => {
|
let deletion = match current {
|
||||||
Ok(Some(DocumentChange::Deletion(Deletion::create(docid, current.boxed()))))
|
Some(current) => Deletion::create(docid, current.boxed()),
|
||||||
}
|
None => todo!("Do that with Louis"),
|
||||||
None => Ok(None),
|
};
|
||||||
},
|
Ok(DocumentChange::Deletion(deletion))
|
||||||
None => Ok(None), // but it's strange
|
}
|
||||||
|
None => unreachable!("We must not have empty set of operations on a document"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -332,13 +329,13 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
external_docid: String,
|
external_docid: String,
|
||||||
operations: &[InnerDocOp],
|
operations: &[InnerDocOp],
|
||||||
) -> Result<Option<DocumentChange>> {
|
) -> Result<DocumentChange> {
|
||||||
let mut document = BTreeMap::<_, Cow<_>>::new();
|
let mut document = BTreeMap::<_, Cow<_>>::new();
|
||||||
let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?;
|
let current = index.documents.remap_data_type::<Bytes>().get(rtxn, &docid)?;
|
||||||
let current: Option<&KvReaderFieldId> = current.map(Into::into);
|
let current: Option<&KvReaderFieldId> = current.map(Into::into);
|
||||||
|
|
||||||
if operations.is_empty() {
|
if operations.is_empty() {
|
||||||
return Ok(None); // but it's strange
|
unreachable!("We must not have empty set of operations on a document");
|
||||||
}
|
}
|
||||||
|
|
||||||
let last_deletion = operations.iter().rposition(|op| matches!(op, InnerDocOp::Deletion));
|
let last_deletion = operations.iter().rposition(|op| matches!(op, InnerDocOp::Deletion));
|
||||||
@ -355,13 +352,11 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if operations.is_empty() {
|
if operations.is_empty() {
|
||||||
match current {
|
let deletion = match current {
|
||||||
Some(current) => {
|
Some(current) => Deletion::create(docid, current.boxed()),
|
||||||
let deletion = Deletion::create(docid, current.boxed());
|
None => todo!("Do that with Louis"),
|
||||||
return Ok(Some(DocumentChange::Deletion(deletion)));
|
};
|
||||||
}
|
return Ok(DocumentChange::Deletion(deletion));
|
||||||
None => return Ok(None),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for operation in operations {
|
for operation in operations {
|
||||||
@ -386,11 +381,11 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
match current {
|
match current {
|
||||||
Some(current) => {
|
Some(current) => {
|
||||||
let update = Update::create(docid, current.boxed(), new);
|
let update = Update::create(docid, current.boxed(), new);
|
||||||
Ok(Some(DocumentChange::Update(update)))
|
Ok(DocumentChange::Update(update))
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
let insertion = Insertion::create(docid, new);
|
let insertion = Insertion::create(docid, new);
|
||||||
Ok(Some(DocumentChange::Insertion(insertion)))
|
Ok(DocumentChange::Insertion(insertion))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,16 +6,15 @@ pub use document_deletion::DocumentDeletion;
|
|||||||
pub use document_operation::DocumentOperation;
|
pub use document_operation::DocumentOperation;
|
||||||
use heed::{RoTxn, RwTxn};
|
use heed::{RoTxn, RwTxn};
|
||||||
pub use partial_dump::PartialDump;
|
pub use partial_dump::PartialDump;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
||||||
use rayon::ThreadPool;
|
use rayon::ThreadPool;
|
||||||
pub use top_level_map::{CowStr, TopLevelMap};
|
|
||||||
pub use update_by_function::UpdateByFunction;
|
pub use update_by_function::UpdateByFunction;
|
||||||
|
|
||||||
use super::channel::*;
|
use super::channel::*;
|
||||||
use super::document_change::DocumentChange;
|
use super::document_change::DocumentChange;
|
||||||
use super::extract::*;
|
use super::extract::*;
|
||||||
use super::merger::merge_grenad_entries;
|
use super::merger::merge_grenad_entries;
|
||||||
use super::StdResult;
|
use super::{StdResult, TopLevelMap};
|
||||||
use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
|
use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
|
||||||
use crate::update::new::channel::ExtractorSender;
|
use crate::update::new::channel::ExtractorSender;
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
@ -24,7 +23,6 @@ use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError};
|
|||||||
mod document_deletion;
|
mod document_deletion;
|
||||||
mod document_operation;
|
mod document_operation;
|
||||||
mod partial_dump;
|
mod partial_dump;
|
||||||
mod top_level_map;
|
|
||||||
mod update_by_function;
|
mod update_by_function;
|
||||||
|
|
||||||
pub trait DocumentChanges<'p> {
|
pub trait DocumentChanges<'p> {
|
||||||
@ -34,7 +32,7 @@ pub trait DocumentChanges<'p> {
|
|||||||
self,
|
self,
|
||||||
fields_ids_map: &mut FieldsIdsMap,
|
fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p>;
|
) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This is the main function of this crate.
|
/// This is the main function of this crate.
|
||||||
@ -50,8 +48,7 @@ pub fn index<PI>(
|
|||||||
document_changes: PI,
|
document_changes: PI,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
PI: IntoParallelIterator<Item = Result<DocumentChange>> + Send,
|
PI: IndexedParallelIterator<Item = Result<DocumentChange>> + Send + Clone,
|
||||||
PI::Iter: Clone,
|
|
||||||
{
|
{
|
||||||
let (merger_sender, writer_receiver) = merger_writer_channel(10_000);
|
let (merger_sender, writer_receiver) = merger_writer_channel(10_000);
|
||||||
// This channel acts as a rendezvous point to ensure that we are one task ahead
|
// This channel acts as a rendezvous point to ensure that we are one task ahead
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use rayon::iter::{ParallelBridge, ParallelIterator};
|
use rayon::iter::{IndexedParallelIterator, ParallelBridge, ParallelIterator};
|
||||||
|
|
||||||
use super::DocumentChanges;
|
use super::DocumentChanges;
|
||||||
use crate::documents::{DocumentIdExtractionError, PrimaryKey};
|
use crate::documents::{DocumentIdExtractionError, PrimaryKey};
|
||||||
@ -18,9 +18,7 @@ impl<I> PartialDump<I> {
|
|||||||
|
|
||||||
impl<'p, I> DocumentChanges<'p> for PartialDump<I>
|
impl<'p, I> DocumentChanges<'p> for PartialDump<I>
|
||||||
where
|
where
|
||||||
I: IntoIterator<Item = Object>,
|
I: IndexedParallelIterator<Item = Object> + Clone + 'p,
|
||||||
I::IntoIter: Send + Clone + 'p,
|
|
||||||
I::Item: Send,
|
|
||||||
{
|
{
|
||||||
type Parameter = (&'p FieldsIdsMap, &'p ConcurrentAvailableIds, &'p PrimaryKey<'p>);
|
type Parameter = (&'p FieldsIdsMap, &'p ConcurrentAvailableIds, &'p PrimaryKey<'p>);
|
||||||
|
|
||||||
@ -32,10 +30,10 @@ where
|
|||||||
self,
|
self,
|
||||||
_fields_ids_map: &mut FieldsIdsMap,
|
_fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
||||||
let (fields_ids_map, concurrent_available_ids, primary_key) = param;
|
let (fields_ids_map, concurrent_available_ids, primary_key) = param;
|
||||||
|
|
||||||
Ok(self.iter.into_iter().par_bridge().map(|object| {
|
Ok(self.iter.map(|object| {
|
||||||
let docid = match concurrent_available_ids.next() {
|
let docid = match concurrent_available_ids.next() {
|
||||||
Some(id) => id,
|
Some(id) => id,
|
||||||
None => return Err(Error::UserError(UserError::DocumentLimitReached)),
|
None => return Err(Error::UserError(UserError::DocumentLimitReached)),
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
||||||
|
|
||||||
use super::DocumentChanges;
|
use super::DocumentChanges;
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
@ -13,7 +13,7 @@ impl<'p> DocumentChanges<'p> for UpdateByFunction {
|
|||||||
self,
|
self,
|
||||||
_fields_ids_map: &mut FieldsIdsMap,
|
_fields_ids_map: &mut FieldsIdsMap,
|
||||||
_param: Self::Parameter,
|
_param: Self::Parameter,
|
||||||
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
||||||
Ok((0..100).into_par_iter().map(|_| todo!()))
|
Ok((0..100).into_par_iter().map(|_| todo!()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ use bincode::ErrorKind;
|
|||||||
use fst::{Set, SetBuilder, Streamer};
|
use fst::{Set, SetBuilder, Streamer};
|
||||||
use grenad::Merger;
|
use grenad::Merger;
|
||||||
use heed::types::Bytes;
|
use heed::types::Bytes;
|
||||||
use heed::{BoxedError, Database, RoTxn};
|
use heed::{Database, RoTxn};
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use tempfile::tempfile;
|
use tempfile::tempfile;
|
||||||
@ -16,9 +16,7 @@ use super::{Deletion, DocumentChange, Insertion, KvReaderDelAdd, KvReaderFieldId
|
|||||||
use crate::update::del_add::DelAdd;
|
use crate::update::del_add::DelAdd;
|
||||||
use crate::update::new::channel::MergerOperation;
|
use crate::update::new::channel::MergerOperation;
|
||||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||||
use crate::{
|
use crate::{CboRoaringBitmapCodec, Error, GeoPoint, GlobalFieldsIdsMap, Index, Result};
|
||||||
CboRoaringBitmapCodec, Error, GeoPoint, GlobalFieldsIdsMap, Index, InternalError, Result,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// TODO We must return some infos/stats
|
/// TODO We must return some infos/stats
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
pub use document_change::{Deletion, DocumentChange, Insertion, Update};
|
pub use document_change::{Deletion, DocumentChange, Insertion, Update};
|
||||||
pub use indexer::{CowStr, TopLevelMap};
|
|
||||||
pub use items_pool::ItemsPool;
|
pub use items_pool::ItemsPool;
|
||||||
|
pub use top_level_map::{CowStr, TopLevelMap};
|
||||||
|
|
||||||
use super::del_add::DelAdd;
|
use super::del_add::DelAdd;
|
||||||
use crate::FieldId;
|
use crate::FieldId;
|
||||||
@ -11,6 +11,7 @@ mod extract;
|
|||||||
pub mod indexer;
|
pub mod indexer;
|
||||||
mod items_pool;
|
mod items_pool;
|
||||||
mod merger;
|
mod merger;
|
||||||
|
mod top_level_map;
|
||||||
|
|
||||||
/// TODO move them elsewhere
|
/// TODO move them elsewhere
|
||||||
pub type StdResult<T, E> = std::result::Result<T, E>;
|
pub type StdResult<T, E> = std::result::Result<T, E>;
|
||||||
|
Loading…
Reference in New Issue
Block a user