From 72e7b7846e2926afa987c8e957b92cbba04ba6c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 2 Sep 2024 14:42:27 +0200 Subject: [PATCH] Renaming the indexers --- .../update/new/indexer/document_deletion.rs | 6 +-- .../update/new/indexer/document_operation.rs | 40 +++++++++---------- milli/src/update/new/indexer/mod.rs | 8 ++-- milli/src/update/new/indexer/partial_dump.rs | 9 +++-- .../update/new/indexer/update_by_function.rs | 5 ++- milli/src/update/new/mod.rs | 6 +-- 6 files changed, 35 insertions(+), 39 deletions(-) diff --git a/milli/src/update/new/indexer/document_deletion.rs b/milli/src/update/new/indexer/document_deletion.rs index 24ba0c671..2b4bdaeb7 100644 --- a/milli/src/update/new/indexer/document_deletion.rs +++ b/milli/src/update/new/indexer/document_deletion.rs @@ -8,11 +8,11 @@ use crate::documents::PrimaryKey; use crate::update::new::{Deletion, DocumentChange, ItemsPool}; use crate::{FieldsIdsMap, Index, InternalError, Result}; -pub struct DocumentDeletionIndexer { +pub struct DocumentDeletion { pub to_delete: RoaringBitmap, } -impl DocumentDeletionIndexer { +impl DocumentDeletion { pub fn new() -> Self { Self { to_delete: Default::default() } } @@ -22,7 +22,7 @@ impl DocumentDeletionIndexer { } } -impl<'p> Indexer<'p> for DocumentDeletionIndexer { +impl<'p> Indexer<'p> for DocumentDeletion { type Parameter = (&'p Index, &'p FieldsIdsMap, &'p PrimaryKey<'p>); fn document_changes( diff --git a/milli/src/update/new/indexer/document_operation.rs b/milli/src/update/new/indexer/document_operation.rs index 80e7de51a..fdcb84c7b 100644 --- a/milli/src/update/new/indexer/document_operation.rs +++ b/milli/src/update/new/indexer/document_operation.rs @@ -19,9 +19,9 @@ use crate::update::new::{Deletion, Insertion, KvReaderFieldId, KvWriterFieldId, use crate::update::{AvailableIds, IndexDocumentsMethod}; use crate::{DocumentId, Error, FieldsIdsMap, Index, Result, UserError}; -pub struct DocumentOperationIndexer { - pub(crate) operations: Vec, - pub(crate) index_documents_method: IndexDocumentsMethod, +pub struct DocumentOperation { + operations: Vec, + index_documents_method: IndexDocumentsMethod, } pub enum Payload { @@ -34,7 +34,7 @@ pub struct PayloadStats { pub bytes: u64, } -pub enum DocumentOperation { +enum InnerDocOp { Addition(DocumentOffset), Deletion, } @@ -48,7 +48,7 @@ pub struct DocumentOffset { pub offset: u32, } -impl DocumentOperationIndexer { +impl DocumentOperation { pub fn new(method: IndexDocumentsMethod) -> Self { Self { operations: Default::default(), index_documents_method: method } } @@ -70,7 +70,7 @@ impl DocumentOperationIndexer { } } -impl<'p> Indexer<'p> for DocumentOperationIndexer { +impl<'p> Indexer<'p> for DocumentOperation { type Parameter = (&'p Index, &'p RoTxn<'static>, &'p mut FieldsIdsMap, &'p PrimaryKey<'p>); fn document_changes( @@ -120,7 +120,7 @@ impl<'p> Indexer<'p> for DocumentOperationIndexer { let content = content.clone(); let document_offset = DocumentOffset { content, offset }; - let document_operation = DocumentOperation::Addition(document_offset); + let document_operation = InnerDocOp::Addition(document_offset); match docids_version_offsets.get_mut(&external_document_id) { None => { @@ -160,10 +160,10 @@ impl<'p> Indexer<'p> for DocumentOperationIndexer { docids_version_offsets.insert( external_document_id, - (docid, vec![DocumentOperation::Deletion]), + (docid, vec![InnerDocOp::Deletion]), ); } - Some((_, offsets)) => offsets.push(DocumentOperation::Deletion), + Some((_, offsets)) => offsets.push(InnerDocOp::Deletion), } } } @@ -204,7 +204,7 @@ fn merge_document_for_updates( fields_ids_map: &FieldsIdsMap, docid: DocumentId, external_docid: String, - operations: &[DocumentOperation], + operations: &[InnerDocOp], ) -> Result> { let mut document = BTreeMap::<_, Cow<_>>::new(); let current = index.documents.remap_data_type::().get(rtxn, &docid)?; @@ -226,14 +226,12 @@ fn merge_document_for_updates( }); } } + if operations.is_empty() { match current { Some(current) => { - return Ok(Some(DocumentChange::Deletion(Deletion::create( - docid, - external_docid, - current.boxed(), - )))); + let deletion = Deletion::create(docid, external_docid, current.boxed()); + return Ok(Some(DocumentChange::Deletion(deletion))); } None => return Ok(None), } @@ -241,8 +239,8 @@ fn merge_document_for_updates( for operation in operations { let DocumentOffset { content, offset } = match operation { - DocumentOperation::Addition(offset) => offset, - DocumentOperation::Deletion => { + InnerDocOp::Addition(offset) => offset, + InnerDocOp::Deletion => { unreachable!("Deletion in document operations") } }; @@ -283,13 +281,13 @@ fn merge_document_for_replacements( fields_ids_map: &FieldsIdsMap, docid: DocumentId, external_docid: String, - operations: &[DocumentOperation], + operations: &[InnerDocOp], ) -> Result> { let current = index.documents.remap_data_type::().get(rtxn, &docid)?; let current: Option<&KvReaderFieldId> = current.map(Into::into); match operations.last() { - Some(DocumentOperation::Addition(DocumentOffset { content, offset })) => { + Some(InnerDocOp::Addition(DocumentOffset { content, offset })) => { let reader = DocumentsBatchReader::from_reader(Cursor::new(content.as_ref()))?; let (mut cursor, batch_index) = reader.into_cursor_and_fields_index(); let update = cursor.get(*offset)?.expect("must exists"); @@ -318,13 +316,13 @@ fn merge_document_for_replacements( } } } - Some(DocumentOperation::Deletion) => match current { + Some(InnerDocOp::Deletion) => match current { Some(current) => { let deletion = Deletion::create(docid, external_docid, current.boxed()); Ok(Some(DocumentChange::Deletion(deletion))) } None => Ok(None), }, - None => Ok(None), + None => Ok(None), // but it's strange } } diff --git a/milli/src/update/new/indexer/mod.rs b/milli/src/update/new/indexer/mod.rs index 998793b49..85d4dbcb1 100644 --- a/milli/src/update/new/indexer/mod.rs +++ b/milli/src/update/new/indexer/mod.rs @@ -1,13 +1,13 @@ use std::thread; use big_s::S; -pub use document_deletion::DocumentDeletionIndexer; -pub use document_operation::DocumentOperationIndexer; +pub use document_deletion::DocumentDeletion; +pub use document_operation::DocumentOperation; use heed::RwTxn; -pub use partial_dump::PartialDumpIndexer; +pub use partial_dump::PartialDump; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon::ThreadPool; -pub use update_by_function::UpdateByFunctionIndexer; +pub use update_by_function::UpdateByFunction; use super::channel::{ extractors_merger_channels, merger_writer_channels, EntryOperation, ExtractorsMergerChannels, diff --git a/milli/src/update/new/indexer/partial_dump.rs b/milli/src/update/new/indexer/partial_dump.rs index 24ba70bcb..7afb96d65 100644 --- a/milli/src/update/new/indexer/partial_dump.rs +++ b/milli/src/update/new/indexer/partial_dump.rs @@ -6,17 +6,17 @@ use crate::update::concurrent_available_ids::ConcurrentAvailableIds; use crate::update::new::{DocumentChange, Insertion, KvWriterFieldId}; use crate::{all_obkv_to_json, Error, FieldsIdsMap, Object, Result, UserError}; -pub struct PartialDumpIndexer { +pub struct PartialDump { pub iter: I, } -impl PartialDumpIndexer { +impl PartialDump { pub fn new_from_jsonlines(iter: I) -> Self { - PartialDumpIndexer { iter } + PartialDump { iter } } } -impl<'p, I> Indexer<'p> for PartialDumpIndexer +impl<'p, I> Indexer<'p> for PartialDump where I: IntoIterator, I::IntoIter: Send + 'p, @@ -45,6 +45,7 @@ where let key = fields_ids_map.id(key).unwrap(); /// TODO better error management let value = serde_json::to_vec(&value).unwrap(); + /// TODO it is not ordered writer.insert(key, value).unwrap(); }); diff --git a/milli/src/update/new/indexer/update_by_function.rs b/milli/src/update/new/indexer/update_by_function.rs index c8c434b72..e9bdf3640 100644 --- a/milli/src/update/new/indexer/update_by_function.rs +++ b/milli/src/update/new/indexer/update_by_function.rs @@ -4,15 +4,16 @@ use super::Indexer; use crate::update::new::DocumentChange; use crate::Result; -pub struct UpdateByFunctionIndexer; +pub struct UpdateByFunction; -impl<'p> Indexer<'p> for UpdateByFunctionIndexer { +impl<'p> Indexer<'p> for UpdateByFunction { type Parameter = (); fn document_changes( self, _param: Self::Parameter, ) -> Result>> + 'p> { + todo!(); Ok(vec![].into_par_iter()) } } diff --git a/milli/src/update/new/mod.rs b/milli/src/update/new/mod.rs index 830565368..cd94bd5d2 100644 --- a/milli/src/update/new/mod.rs +++ b/milli/src/update/new/mod.rs @@ -1,8 +1,4 @@ pub use document_change::{Deletion, DocumentChange, Insertion, Update}; -pub use indexer::{ - index, DocumentDeletionIndexer, DocumentOperationIndexer, PartialDumpIndexer, - UpdateByFunctionIndexer, -}; pub use items_pool::ItemsPool; use super::del_add::DelAdd; @@ -13,7 +9,7 @@ mod merger; // mod extract; // mod global_fields_ids_map; mod channel; -mod indexer; +pub mod indexer; mod items_pool; /// TODO move them elsewhere