Introduce a more detailed progress status enum

This commit is contained in:
Clément Renault 2020-11-11 12:16:01 +01:00
parent 8a4794fc51
commit e78b96a657
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 38 additions and 0 deletions

View File

@ -4,6 +4,7 @@ mod delete_documents;
mod index_documents; mod index_documents;
mod settings; mod settings;
mod update_builder; mod update_builder;
mod update_step;
mod update_store; mod update_store;
pub use self::available_documents_ids::AvailableDocumentsIds; pub use self::available_documents_ids::AvailableDocumentsIds;
@ -12,4 +13,5 @@ pub use self::delete_documents::DeleteDocuments;
pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat}; pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat};
pub use self::settings::Settings; pub use self::settings::Settings;
pub use self::update_builder::UpdateBuilder; pub use self::update_builder::UpdateBuilder;
pub use self::update_step::UpdateIndexingStep;
pub use self::update_store::UpdateStore; pub use self::update_store::UpdateStore;

36
src/update/update_step.rs Normal file
View File

@ -0,0 +1,36 @@
use UpdateIndexingStep::*;
#[derive(Debug, Clone, Copy)]
pub enum UpdateIndexingStep {
/// Transform from the original user given format (CSV, JSON, JSON lines)
/// into a generic format based on the obkv and grenad crates. This step also
/// deduplicate potential documents in this batch update by merging or replacing them.
TransformFromUserIntoGenericFormat { documents_seen: usize },
/// This step check the external document id, computes the internal ids and merge
/// the documents that are already present in the database.
ComputeIdsAndMergeDocuments { documents_seen: usize, total_documents: usize },
/// Extract the documents words using the tokenizer and compute the documents
/// facets. Stores those words, facets and documents ids on disk.
IndexDocuments { documents_seen: usize, total_documents: usize },
/// Merge the previously extracted data (words and facets) into the final LMDB database.
/// These extracted data are split into multiple databases.
MergeDataIntoFinalDatabase { databases_seen: usize, total_databases: usize },
}
impl UpdateIndexingStep {
pub const fn step_index(&self) -> usize {
match self {
TransformFromUserIntoGenericFormat { .. } => 0,
ComputeIdsAndMergeDocuments { .. } => 1,
IndexDocuments { .. } => 2,
MergeDataIntoFinalDatabase { .. } => 3,
}
}
pub const fn number_of_steps(&self) -> usize {
4
}
}