From e78b96a65767a0b791064a2e20afb92d3163b1ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 11 Nov 2020 12:16:01 +0100
Subject: [PATCH] Introduce a more detailed progress status enum

---
 src/update/mod.rs         |  2 ++
 src/update/update_step.rs | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 src/update/update_step.rs

diff --git a/src/update/mod.rs b/src/update/mod.rs
index 3582820b4..75724269a 100644
--- a/src/update/mod.rs
+++ b/src/update/mod.rs
@@ -4,6 +4,7 @@ mod delete_documents;
 mod index_documents;
 mod settings;
 mod update_builder;
+mod update_step;
 mod update_store;
 
 pub use self::available_documents_ids::AvailableDocumentsIds;
@@ -12,4 +13,5 @@ pub use self::delete_documents::DeleteDocuments;
 pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat};
 pub use self::settings::Settings;
 pub use self::update_builder::UpdateBuilder;
+pub use self::update_step::UpdateIndexingStep;
 pub use self::update_store::UpdateStore;
diff --git a/src/update/update_step.rs b/src/update/update_step.rs
new file mode 100644
index 000000000..691c9ec2a
--- /dev/null
+++ b/src/update/update_step.rs
@@ -0,0 +1,36 @@
+use UpdateIndexingStep::*;
+
+#[derive(Debug, Clone, Copy)]
+pub enum UpdateIndexingStep {
+    /// Transform from the original user given format (CSV, JSON, JSON lines)
+    /// into a generic format based on the obkv and grenad crates. This step also
+    /// deduplicate potential documents in this batch update by merging or replacing them.
+    TransformFromUserIntoGenericFormat { documents_seen: usize },
+
+    /// This step check the external document id, computes the internal ids and merge
+    /// the documents that are already present in the database.
+    ComputeIdsAndMergeDocuments { documents_seen: usize, total_documents: usize },
+
+    /// Extract the documents words using the tokenizer and compute the documents
+    /// facets. Stores those words, facets and documents ids on disk.
+    IndexDocuments { documents_seen: usize, total_documents: usize },
+
+    /// Merge the previously extracted data (words and facets) into the final LMDB database.
+    /// These extracted data are split into multiple databases.
+    MergeDataIntoFinalDatabase { databases_seen: usize, total_databases: usize },
+}
+
+impl UpdateIndexingStep {
+    pub const fn step_index(&self) -> usize {
+        match self {
+            TransformFromUserIntoGenericFormat { .. } => 0,
+            ComputeIdsAndMergeDocuments { .. } => 1,
+            IndexDocuments { .. } => 2,
+            MergeDataIntoFinalDatabase { .. } => 3,
+        }
+    }
+
+    pub const fn number_of_steps(&self) -> usize {
+        4
+    }
+}