From 700757c01f54ec4e084539cc235e23d183c777c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 7 Nov 2024 15:32:04 +0100 Subject: [PATCH] Adding a new step --- crates/milli/src/update/new/indexer/mod.rs | 50 ++++++++++++++++------ 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index e7f0cc825..001f59fe4 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -58,6 +58,7 @@ mod steps { "extracting embeddings", "writing to database", "writing embeddings to database", + "waiting for extractors", "post-processing facets", "post-processing words", "finalizing", @@ -99,16 +100,21 @@ mod steps { step(6) } - pub const fn post_processing_facets() -> (u16, &'static str) { + pub const fn waiting_extractors() -> (u16, &'static str) { step(7) } - pub const fn post_processing_words() -> (u16, &'static str) { + + pub const fn post_processing_facets() -> (u16, &'static str) { step(8) } - pub const fn finalizing() -> (u16, &'static str) { + pub const fn post_processing_words() -> (u16, &'static str) { step(9) } + + pub const fn finalizing() -> (u16, &'static str) { + step(10) + } } /// This is the main function of this crate. @@ -169,7 +175,7 @@ where let document_ids = &mut document_ids; // TODO manage the errors correctly let extractor_handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || { - pool.in_place_scope(|_s| { + let result = pool.in_place_scope(|_s| { let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract"); let _entered = span.enter(); @@ -231,7 +237,15 @@ where exact_word_docids, word_position_docids, fid_word_count_docids, - } = WordDocidsExtractors::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs, finished_steps, total_steps, step_name)?; + } = WordDocidsExtractors::run_extraction( + grenad_parameters, + document_changes, + indexing_context, + &mut extractor_allocs, + finished_steps, + total_steps, + step_name, + )?; // TODO Word Docids Merger // extractor_sender.send_searchable::(word_docids).unwrap(); @@ -358,13 +372,6 @@ where embedding_sender.finish(user_provided).unwrap(); } - { - let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH"); - let _entered = span.enter(); - let (finished_steps, step_name) = steps::write_db(); - (indexing_context.send_progress)(Progress { finished_steps, total_steps, step_name, finished_total_documents: None }); - } - // TODO THIS IS TOO MUCH // - [ ] Extract fieldid docid facet number // - [ ] Extract fieldid docid facet string @@ -381,7 +388,16 @@ where // - [x] Extract fieldid facet string docids Result::Ok(facet_field_ids_delta) - }) + }); + + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH"); + let _entered = span.enter(); + let (finished_steps, step_name) = steps::write_db(); + (indexing_context.send_progress)(Progress { finished_steps, total_steps, step_name, finished_total_documents: None }); + } + + result })?; let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map); @@ -494,6 +510,14 @@ where } } + let (finished_steps, step_name) = steps::waiting_extractors(); + (indexing_context.send_progress)(Progress { + finished_steps, + total_steps, + step_name, + finished_total_documents: None, + }); + let facet_field_ids_delta = extractor_handle.join().unwrap()?; let (finished_steps, step_name) = steps::post_processing_facets();