From eef95de30ea1a5fe0592bb86e9981e4d5263de6a Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 10 Jul 2023 18:41:54 +0200 Subject: [PATCH] First iteration on exposing puffin profiling --- Cargo.lock | 39 +++++++++++ index-scheduler/Cargo.toml | 1 + index-scheduler/src/batch.rs | 7 ++ index-scheduler/src/lib.rs | 2 + meilisearch/Cargo.toml | 2 + meilisearch/src/main.rs | 3 + milli/Cargo.toml | 3 + milli/src/update/clear_documents.rs | 2 + milli/src/update/delete_documents.rs | 2 + milli/src/update/index_documents/enrich.rs | 2 + .../extract/extract_docid_word_positions.rs | 2 + .../extract/extract_facet_number_docids.rs | 2 + .../extract/extract_facet_string_docids.rs | 2 + .../extract/extract_fid_docid_facet_values.rs | 2 + .../extract/extract_fid_word_count_docids.rs | 2 + .../extract/extract_geo_points.rs | 2 + .../extract/extract_vector_points.rs | 2 + .../extract/extract_word_docids.rs | 2 + .../extract/extract_word_fid_docids.rs | 2 + .../extract_word_pair_proximity_docids.rs | 2 + .../extract/extract_word_position_docids.rs | 2 + .../src/update/index_documents/extract/mod.rs | 4 ++ .../index_documents/helpers/grenad_helpers.rs | 1 + milli/src/update/index_documents/mod.rs | 57 +++++++++++----- milli/src/update/index_documents/transform.rs | 2 + .../src/update/index_documents/typed_chunk.rs | 66 +++++++++++++++++++ milli/src/update/prefix_word_pairs/mod.rs | 2 + .../update/prefix_word_pairs/prefix_word.rs | 2 + .../update/prefix_word_pairs/word_prefix.rs | 1 + milli/src/update/settings.rs | 2 + milli/src/update/word_prefix_docids.rs | 2 + .../src/update/words_prefix_integer_docids.rs | 1 + milli/src/update/words_prefixes_fst.rs | 2 + 33 files changed, 210 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9a59e2ea8..8c3d76980 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1973,6 +1973,7 @@ dependencies = [ "meilisearch-types", "nelson", "page_size 0.5.0", + "puffin", "roaring", "serde", "serde_json", @@ -2498,6 +2499,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "lz4_flex" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83" + [[package]] name = "manifest-dir-macros" version = "0.1.17" @@ -2587,6 +2594,8 @@ dependencies = [ "pin-project-lite", "platform-dirs", "prometheus", + "puffin", + "puffin_http", "rand", "rayon", "regex", @@ -2731,6 +2740,7 @@ dependencies = [ "obkv", "once_cell", "ordered-float", + "puffin", "rand", "rand_pcg", "rayon", @@ -3256,6 +3266,35 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" +[[package]] +name = "puffin" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76425abd4e1a0ad4bd6995dd974b52f414fca9974171df8e3708b3e660d05a21" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "cfg-if", + "instant", + "lz4_flex", + "once_cell", + "parking_lot", + "serde", +] + +[[package]] +name = "puffin_http" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193" +dependencies = [ + "anyhow", + "crossbeam-channel", + "log", + "puffin", +] + [[package]] name = "quote" version = "1.0.28" diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 36c28cd67..9e7c2ae4b 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -22,6 +22,7 @@ log = "0.4.17" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } page_size = "0.5.0" +puffin = "0.16.0" roaring = { version = "0.10.1", features = ["serde"] } serde = { version = "1.0.160", features = ["derive"] } serde_json = { version = "1.0.95", features = ["preserve_order"] } diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 2948e7506..fb865a98b 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -471,6 +471,8 @@ impl IndexScheduler { #[cfg(test)] self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; + puffin::profile_function!(); + let enqueued = &self.get_status(rtxn, Status::Enqueued)?; let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued; @@ -575,6 +577,9 @@ impl IndexScheduler { self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?; self.breakpoint(crate::Breakpoint::InsideProcessBatch); } + + puffin::profile_function!(format!("{:?}", batch)); + match batch { Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => { // 1. Retrieve the tasks that matched the query at enqueue-time. @@ -1111,6 +1116,8 @@ impl IndexScheduler { index: &'i Index, operation: IndexOperation, ) -> Result> { + puffin::profile_function!(); + match operation { IndexOperation::DocumentClear { mut tasks, .. } => { let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?; diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 092851edd..fc5a8a0f9 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1032,6 +1032,8 @@ impl IndexScheduler { self.breakpoint(Breakpoint::Start); } + puffin::GlobalProfiler::lock().new_frame(); + self.cleanup_task_queue()?; let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 7d6601ac5..1f51cea84 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -67,6 +67,8 @@ permissive-json-pointer = { path = "../permissive-json-pointer" } pin-project-lite = "0.2.9" platform-dirs = "0.3.0" prometheus = { version = "0.13.3", features = ["process"] } +puffin = "0.16.0" +puffin_http = "0.13.0" rand = "0.8.5" rayon = "1.7.0" regex = "1.7.3" diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index d6b9f027d..5189113ad 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -29,6 +29,9 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> { let (opt, config_read_from) = Opt::try_build()?; + puffin::set_scopes_on(true); + let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?; + anyhow::ensure!( !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index ce3070e5d..80b0a5f86 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -67,6 +67,9 @@ filter-parser = { path = "../filter-parser" } # documents words self-join itertools = "0.10.5" +# profiling +puffin = "0.16.0" + # logging log = "0.4.17" logging_timer = "1.1.0" diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index 5fdf8ef49..c021647ee 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -15,6 +15,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { } pub fn execute(self) -> Result { + puffin::profile_function!(); + self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; let Index { env: _env, diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index c9124e591..48415adef 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -110,6 +110,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { Some(docid) } pub fn execute(self) -> Result { + puffin::profile_function!(); + let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } = self.execute_inner()?; diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 09599ac82..35a7c33f3 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -31,6 +31,8 @@ pub fn enrich_documents_batch( autogenerate_docids: bool, reader: DocumentsBatchReader, ) -> Result, UserError>> { + puffin::profile_function!(); + let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index(); let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?; diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 8985534db..f726bf866 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -30,6 +30,8 @@ pub fn extract_docid_word_positions( stop_words: Option<&fst::Set<&[u8]>>, max_positions_per_attributes: Option, ) -> Result<(RoaringBitmap, grenad::Reader, ScriptLanguageDocidsMap)> { + puffin::profile_function!(); + let max_positions_per_attributes = max_positions_per_attributes .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE)); let max_memory = indexer.max_memory_by_thread(); diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs index 33dd5ce5b..dec02b120 100644 --- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs @@ -20,6 +20,8 @@ pub fn extract_facet_number_docids( docid_fid_facet_number: grenad::Reader, indexer: GrenadParameters, ) -> Result> { + puffin::profile_function!(); + let max_memory = indexer.max_memory_by_thread(); let mut facet_number_docids_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 0d9c0981e..e5e864b66 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -18,6 +18,8 @@ pub fn extract_facet_string_docids( docid_fid_facet_string: grenad::Reader, indexer: GrenadParameters, ) -> Result> { + puffin::profile_function!(); + let max_memory = indexer.max_memory_by_thread(); let mut facet_string_docids_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 77a5561fe..882d7779f 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -34,6 +34,8 @@ pub fn extract_fid_docid_facet_values( indexer: GrenadParameters, faceted_fields: &HashSet, ) -> Result { + puffin::profile_function!(); + let max_memory = indexer.max_memory_by_thread(); let mut fid_docid_facet_numbers_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index fe8eb93ed..79cf4c7fe 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -22,6 +22,8 @@ pub fn extract_fid_word_count_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, ) -> Result> { + puffin::profile_function!(); + let max_memory = indexer.max_memory_by_thread(); let mut fid_word_count_docids_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index ddb38abe5..139e8230a 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -19,6 +19,8 @@ pub fn extract_geo_points( primary_key_id: FieldId, (lat_fid, lng_fid): (FieldId, FieldId), ) -> Result> { + puffin::profile_function!(); + let mut writer = create_writer( indexer.chunk_compression_type, indexer.chunk_compression_level, diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 0fad3be07..69fbff9b5 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -19,6 +19,8 @@ pub fn extract_vector_points( primary_key_id: FieldId, vectors_fid: FieldId, ) -> Result> { + puffin::profile_function!(); + let mut writer = create_writer( indexer.chunk_compression_type, indexer.chunk_compression_level, diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index da59f9dde..f1656d024 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -27,6 +27,8 @@ pub fn extract_word_docids( indexer: GrenadParameters, exact_attributes: &HashSet, ) -> Result<(grenad::Reader, grenad::Reader)> { + puffin::profile_function!(); + let max_memory = indexer.max_memory_by_thread(); let mut word_docids_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/extract_word_fid_docids.rs b/milli/src/update/index_documents/extract/extract_word_fid_docids.rs index 9ee33ea0d..aaf8fad79 100644 --- a/milli/src/update/index_documents/extract/extract_word_fid_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_fid_docids.rs @@ -15,6 +15,8 @@ pub fn extract_word_fid_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, ) -> Result> { + puffin::profile_function!(); + let max_memory = indexer.max_memory_by_thread(); let mut word_fid_docids_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index 6707fc268..4c910f32e 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -21,6 +21,8 @@ pub fn extract_word_pair_proximity_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, ) -> Result> { + puffin::profile_function!(); + let max_memory = indexer.max_memory_by_thread(); let mut word_pair_proximity_docids_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs index 9bb43b004..e945833e6 100644 --- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs @@ -18,6 +18,8 @@ pub fn extract_word_position_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, ) -> Result> { + puffin::profile_function!(); + let max_memory = indexer.max_memory_by_thread(); let mut word_position_docids_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 6259c7272..1b1dc1420 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -52,6 +52,8 @@ pub(crate) fn data_from_obkv_documents( max_positions_per_attributes: Option, exact_attributes: HashSet, ) -> Result<()> { + puffin::profile_function!(); + original_obkv_chunks .par_bridge() .map(|original_documents_chunk| { @@ -238,11 +240,13 @@ fn spawn_extraction_task( M::Output: Send, { rayon::spawn(move || { + puffin::profile_scope!("extract_multiple_chunks", name); let chunks: Result = chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect(); rayon::spawn(move || match chunks { Ok(chunks) => { debug!("merge {} database", name); + puffin::profile_scope!("merge_multiple_chunks", name); let reader = chunks.merge(merge_fn, &indexer); let _ = lmdb_writer_sx.send(reader.map(serialize_fn)); } diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index eb66a28fe..d5f5ac0bd 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -214,6 +214,7 @@ pub fn sorter_into_lmdb_database( sorter: Sorter, merge: MergeFn, ) -> Result<()> { + puffin::profile_function!(); debug!("Writing MTBL sorter..."); let before = Instant::now(); diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 20a36237e..1b2aab827 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -137,6 +137,8 @@ where mut self, reader: DocumentsBatchReader, ) -> Result<(Self, StdResult)> { + puffin::profile_function!(); + // Early return when there is no document to add if reader.is_empty() { return Ok((self, Ok(0))); @@ -175,6 +177,8 @@ where mut self, to_delete: Vec, ) -> Result<(Self, StdResult)> { + puffin::profile_function!(); + // Early return when there is no document to add if to_delete.is_empty() { return Ok((self, Ok(0))); @@ -194,6 +198,8 @@ where #[logging_timer::time("IndexDocuments::{}")] pub fn execute(mut self) -> Result { + puffin::profile_function!(); + if self.added_documents == 0 { let number_of_documents = self.index.number_of_documents(self.wtxn)?; return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents }); @@ -232,6 +238,8 @@ where FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { + puffin::profile_function!(); + let TransformOutput { primary_key, fields_ids_map, @@ -322,6 +330,7 @@ where // Run extraction pipeline in parallel. pool.install(|| { + puffin::profile_scope!("extract_and_send_grenad_chunks"); // split obkv file into several chunks let original_chunk_iter = grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size); @@ -477,6 +486,8 @@ where FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { + puffin::profile_function!(); + // Merged databases are already been indexed, we start from this count; let mut databases_seen = MERGED_DATABASE_COUNT; @@ -511,26 +522,36 @@ where return Err(Error::InternalError(InternalError::AbortedIndexation)); } - let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?; + let current_prefix_fst; + let common_prefix_fst_words_tmp; + let common_prefix_fst_words: Vec<_>; + let new_prefix_fst_words; + let del_prefix_fst_words; - // We retrieve the common words between the previous and new prefix word fst. - let common_prefix_fst_words = fst_stream_into_vec( - previous_words_prefixes_fst.op().add(¤t_prefix_fst).intersection(), - ); - let common_prefix_fst_words: Vec<_> = common_prefix_fst_words - .as_slice() - .linear_group_by_key(|x| x.chars().next().unwrap()) - .collect(); + { + puffin::profile_scope!("compute_prefix_diffs"); - // We retrieve the newly added words between the previous and new prefix word fst. - let new_prefix_fst_words = fst_stream_into_vec( - current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(), - ); + current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?; - // We compute the set of prefixes that are no more part of the prefix fst. - let del_prefix_fst_words = fst_stream_into_hashset( - previous_words_prefixes_fst.op().add(¤t_prefix_fst).difference(), - ); + // We retrieve the common words between the previous and new prefix word fst. + common_prefix_fst_words_tmp = fst_stream_into_vec( + previous_words_prefixes_fst.op().add(¤t_prefix_fst).intersection(), + ); + common_prefix_fst_words = common_prefix_fst_words_tmp + .as_slice() + .linear_group_by_key(|x| x.chars().next().unwrap()) + .collect(); + + // We retrieve the newly added words between the previous and new prefix word fst. + new_prefix_fst_words = fst_stream_into_vec( + current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(), + ); + + // We compute the set of prefixes that are no more part of the prefix fst. + del_prefix_fst_words = fst_stream_into_hashset( + previous_words_prefixes_fst.op().add(¤t_prefix_fst).difference(), + ); + } databases_seen += 1; (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase { @@ -668,6 +689,8 @@ fn execute_word_prefix_docids( common_prefix_fst_words: &[&[String]], del_prefix_fst_words: &HashSet>, ) -> Result<()> { + puffin::profile_function!(); + let cursor = reader.into_cursor()?; let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db); builder.chunk_compression_type = indexer_config.chunk_compression_type; diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index ee6831be5..7a0c811a8 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -558,6 +558,8 @@ impl<'a, 'i> Transform<'a, 'i> { where F: Fn(UpdateIndexingStep) + Sync, { + puffin::profile_function!(); + let primary_key = self .index .primary_key(wtxn)? diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 3f197fbd1..96dea0480 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -49,6 +49,66 @@ pub(crate) enum TypedChunk { ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>), } +impl TypedChunk { + pub fn to_debug_string(&self) -> String { + match self { + TypedChunk::FieldIdDocidFacetStrings(grenad) => { + format!("FieldIdDocidFacetStrings {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::FieldIdDocidFacetNumbers(grenad) => { + format!("FieldIdDocidFacetNumbers {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::Documents(grenad) => { + format!("Documents {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::FieldIdWordcountDocids(grenad) => { + format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::NewDocumentsIds(grenad) => { + format!("NewDocumentsIds {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => format!( + "WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {} }}", + word_docids_reader.len(), + exact_word_docids_reader.len() + ), + TypedChunk::WordPositionDocids(grenad) => { + format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::WordFidDocids(grenad) => { + format!("WordFidDocids {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::WordPairProximityDocids(grenad) => { + format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::FieldIdFacetStringDocids(grenad) => { + format!("FieldIdFacetStringDocids {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::FieldIdFacetNumberDocids(grenad) => { + format!("FieldIdFacetNumberDocids {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::FieldIdFacetExistsDocids(grenad) => { + format!("FieldIdFacetExistsDocids {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::FieldIdFacetIsNullDocids(grenad) => { + format!("FieldIdFacetIsNullDocids {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::FieldIdFacetIsEmptyDocids(grenad) => { + format!("FieldIdFacetIsEmptyDocids {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::GeoPoints(grenad) => { + format!("GeoPoints {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::VectorPoints(grenad) => { + format!("VectorPoints {{ number_of_entries: {} }}", grenad.len()) + } + TypedChunk::ScriptLanguageDocids(grenad) => { + format!("ScriptLanguageDocids {{ number_of_entries: {} }}", grenad.len()) + } + } + } +} + /// Write typed chunk in the corresponding LMDB database of the provided index. /// Return new documents seen. pub(crate) fn write_typed_chunk_into_index( @@ -57,6 +117,8 @@ pub(crate) fn write_typed_chunk_into_index( wtxn: &mut RwTxn, index_is_empty: bool, ) -> Result<(RoaringBitmap, bool)> { + puffin::profile_function!(typed_chunk.to_debug_string()); + let mut is_merged_database = false; match typed_chunk { TypedChunk::Documents(obkv_documents_iter) => { @@ -336,6 +398,8 @@ where FS: for<'a> Fn(&'a [u8], &'a mut Vec) -> Result<&'a [u8]>, FM: Fn(&[u8], &[u8], &mut Vec) -> Result<()>, { + puffin::profile_function!(format!("number of entries: {}", data.len())); + let mut buffer = Vec::new(); let database = database.remap_types::(); @@ -378,6 +442,8 @@ where FS: for<'a> Fn(&'a [u8], &'a mut Vec) -> Result<&'a [u8]>, FM: Fn(&[u8], &[u8], &mut Vec) -> Result<()>, { + puffin::profile_function!(format!("number of entries: {}", data.len())); + if !index_is_empty { return write_entries_into_database( data, diff --git a/milli/src/update/prefix_word_pairs/mod.rs b/milli/src/update/prefix_word_pairs/mod.rs index bed542bdb..3105b16e4 100644 --- a/milli/src/update/prefix_word_pairs/mod.rs +++ b/milli/src/update/prefix_word_pairs/mod.rs @@ -50,6 +50,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> { common_prefix_fst_words: &[&'a [String]], del_prefix_fst_words: &HashSet>, ) -> Result<()> { + puffin::profile_function!(); + index_word_prefix_database( self.wtxn, self.index.word_pair_proximity_docids, diff --git a/milli/src/update/prefix_word_pairs/prefix_word.rs b/milli/src/update/prefix_word_pairs/prefix_word.rs index 60e2e554e..1ec66d010 100644 --- a/milli/src/update/prefix_word_pairs/prefix_word.rs +++ b/milli/src/update/prefix_word_pairs/prefix_word.rs @@ -27,6 +27,8 @@ pub fn index_prefix_word_database( chunk_compression_type: CompressionType, chunk_compression_level: Option, ) -> Result<()> { + puffin::profile_function!(); + let max_proximity = max_proximity - 1; debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk..."); diff --git a/milli/src/update/prefix_word_pairs/word_prefix.rs b/milli/src/update/prefix_word_pairs/word_prefix.rs index db607e56c..570adece9 100644 --- a/milli/src/update/prefix_word_pairs/word_prefix.rs +++ b/milli/src/update/prefix_word_pairs/word_prefix.rs @@ -191,6 +191,7 @@ pub fn index_word_prefix_database( chunk_compression_type: CompressionType, chunk_compression_level: Option, ) -> Result<()> { + puffin::profile_function!(); debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk..."); // Make a prefix trie from the common prefixes that are shorter than self.max_prefix_length diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 5b8e5a21c..aebbbeca2 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -303,6 +303,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { + puffin::profile_function!(); + let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; // if the settings are set before any document update, we don't need to do anything, and // will set the primary key during the first document addition. diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index b235c44a6..a30254994 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -45,6 +45,8 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { common_prefix_fst_words: &[&[String]], del_prefix_fst_words: &HashSet>, ) -> Result<()> { + puffin::profile_function!(); + // It is forbidden to keep a mutable reference into the database // and write into it at the same time, therefore we write into another file. let mut prefix_docids_sorter = create_sorter( diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index 63ca178ef..c65438928 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -50,6 +50,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { common_prefix_fst_words: &[&[String]], del_prefix_fst_words: &HashSet>, ) -> Result<()> { + puffin::profile_function!(); debug!("Computing and writing the word levels integers docids into LMDB on disk..."); let mut prefix_integer_docids_sorter = create_sorter( diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs index 57fed0922..121b45c4a 100644 --- a/milli/src/update/words_prefixes_fst.rs +++ b/milli/src/update/words_prefixes_fst.rs @@ -42,6 +42,8 @@ impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> { #[logging_timer::time("WordsPrefixesFst::{}")] pub fn execute(self) -> Result<()> { + puffin::profile_function!(); + let words_fst = self.index.words_fst(self.wtxn)?; let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];