From 8fd99b111b587714092c9267891343bdb0bebbf7 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 5 Sep 2024 17:36:19 +0200 Subject: [PATCH] Add tracing timers logs --- milli/src/update/new/extract/faceted/mod.rs | 1 + milli/src/update/new/indexer/mod.rs | 126 +++++++++++++------- milli/src/update/new/merger.rs | 36 +++++- workloads/movies.json | 2 +- 4 files changed, 115 insertions(+), 50 deletions(-) diff --git a/milli/src/update/new/extract/faceted/mod.rs b/milli/src/update/new/extract/faceted/mod.rs index bd58c21b4..62aa7adb2 100644 --- a/milli/src/update/new/extract/faceted/mod.rs +++ b/milli/src/update/new/extract/faceted/mod.rs @@ -17,6 +17,7 @@ mod extract_facets; mod facet_document; pub trait FacetedExtractor { + #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")] fn run_extraction( index: &Index, fields_ids_map: &GlobalFieldsIdsMap, diff --git a/milli/src/update/new/indexer/mod.rs b/milli/src/update/new/indexer/mod.rs index 0865374c8..5e3104de8 100644 --- a/milli/src/update/new/indexer/mod.rs +++ b/milli/src/update/new/indexer/mod.rs @@ -63,8 +63,11 @@ where thread::scope(|s| { // TODO manage the errors correctly + let current_span = tracing::Span::current(); let handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || { pool.in_place_scope(|_s| { + let span = tracing::trace_span!(target: "indexing::documents", parent: ¤t_span, "extract"); + let _entered = span.enter(); let document_changes = document_changes.into_par_iter(); // document but we need to create a function that collects and compresses documents. @@ -95,56 +98,85 @@ where max_memory: Some(max_memory), ..GrenadParameters::default() }; - extract_and_send_docids::( - index, - &global_fields_ids_map, - grenad_parameters, - document_changes.clone(), - &extractor_sender, - )?; + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids"); + let _entered = span.enter(); + extract_and_send_docids::( + index, + &global_fields_ids_map, + grenad_parameters, + document_changes.clone(), + &extractor_sender, + )?; + } - extract_and_send_docids::( - index, - &global_fields_ids_map, - grenad_parameters, - document_changes.clone(), - &extractor_sender, - )?; + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "word_fid_docids"); + let _entered = span.enter(); + extract_and_send_docids::( + index, + &global_fields_ids_map, + grenad_parameters, + document_changes.clone(), + &extractor_sender, + )?; + } + + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "exact_word_docids"); + let _entered = span.enter(); + extract_and_send_docids::( + index, + &global_fields_ids_map, + grenad_parameters, + document_changes.clone(), + &extractor_sender, + )?; + } - extract_and_send_docids::( - index, - &global_fields_ids_map, - grenad_parameters, - document_changes.clone(), - &extractor_sender, - )?; + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "word_position_docids"); + let _entered = span.enter(); + extract_and_send_docids::( + index, + &global_fields_ids_map, + grenad_parameters, + document_changes.clone(), + &extractor_sender, + )?; + } - extract_and_send_docids::( - index, - &global_fields_ids_map, - grenad_parameters, - document_changes.clone(), - &extractor_sender, - )?; + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "fid_word_count_docids"); + let _entered = span.enter(); + extract_and_send_docids::( + index, + &global_fields_ids_map, + GrenadParameters::default(), + document_changes.clone(), + &extractor_sender, + )?; + } - extract_and_send_docids::( - index, - &global_fields_ids_map, - GrenadParameters::default(), - document_changes.clone(), - &extractor_sender, - )?; + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids"); + let _entered = span.enter(); + extract_and_send_docids::< + WordPairProximityDocidsExtractor, + WordPairProximityDocids, + >( + index, + &global_fields_ids_map, + grenad_parameters, + document_changes.clone(), + &extractor_sender, + )?; + } - extract_and_send_docids::< - WordPairProximityDocidsExtractor, - WordPairProximityDocids, - >( - index, - &global_fields_ids_map, - grenad_parameters, - document_changes.clone(), - &extractor_sender, - )?; + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH"); + let _entered = span.enter(); + } // TODO THIS IS TOO MUCH // Extract fieldid docid facet number @@ -166,7 +198,11 @@ where })?; // TODO manage the errors correctly + let current_span = tracing::Span::current(); let handle2 = Builder::new().name(S("indexer-merger")).spawn_scoped(s, move || { + let span = + tracing::trace_span!(target: "indexing::documents", parent: ¤t_span, "merge"); + let _entered = span.enter(); let rtxn = index.read_txn().unwrap(); merge_grenad_entries(merger_receiver, merger_sender, &rtxn, index) })?; diff --git a/milli/src/update/new/merger.rs b/milli/src/update/new/merger.rs index 19f56a301..b38dc0865 100644 --- a/milli/src/update/new/merger.rs +++ b/milli/src/update/new/merger.rs @@ -16,6 +16,7 @@ use crate::update::MergeDeladdCboRoaringBitmaps; use crate::{CboRoaringBitmapCodec, Index, Result}; /// TODO We must return some infos/stats +#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")] pub fn merge_grenad_entries( receiver: MergerReceiver, sender: MergerSender, @@ -28,6 +29,9 @@ pub fn merge_grenad_entries( for merger_operation in receiver { match merger_operation { MergerOperation::ExactWordDocidsMerger(merger) => { + let span = + tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids"); + let _entered = span.enter(); merge_and_send_docids( merger, /// TODO do a MergerOperation::database(&Index) -> Database. @@ -40,6 +44,8 @@ pub fn merge_grenad_entries( )?; } MergerOperation::FidWordCountDocidsMerger(merger) => { + let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids"); + let _entered = span.enter(); merge_and_send_docids( merger, index.field_id_word_count_docids.remap_types(), @@ -51,6 +57,9 @@ pub fn merge_grenad_entries( )?; } MergerOperation::WordDocidsMerger(merger) => { + let span = + tracing::trace_span!(target: "indexing::documents::merge", "word_docids"); + let _entered = span.enter(); let mut add_words_fst = SetBuilder::new(tempfile()?)?; let mut del_words_fst = SetBuilder::new(tempfile()?)?; @@ -70,6 +79,9 @@ pub fn merge_grenad_entries( sender.main().write_words_fst(mmap).unwrap(); } MergerOperation::WordFidDocidsMerger(merger) => { + let span = + tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids"); + let _entered = span.enter(); merge_and_send_docids( merger, index.word_fid_docids.remap_types(), @@ -81,6 +93,8 @@ pub fn merge_grenad_entries( )?; } MergerOperation::WordPairProximityDocidsMerger(merger) => { + let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids"); + let _entered = span.enter(); merge_and_send_docids( merger, index.word_pair_proximity_docids.remap_types(), @@ -92,6 +106,8 @@ pub fn merge_grenad_entries( )?; } MergerOperation::WordPositionDocidsMerger(merger) => { + let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids"); + let _entered = span.enter(); merge_and_send_docids( merger, index.word_position_docids.remap_types(), @@ -103,10 +119,16 @@ pub fn merge_grenad_entries( )?; } MergerOperation::InsertDocument { docid, document } => { + let span = + tracing::trace_span!(target: "indexing::documents::merge", "insert_document"); + let _entered = span.enter(); documents_ids.insert(docid); sender.documents().uncompressed(docid, &document).unwrap(); } MergerOperation::DeleteDocument { docid } => { + let span = + tracing::trace_span!(target: "indexing::documents::merge", "delete_document"); + let _entered = span.enter(); if !documents_ids.remove(docid) { unreachable!("Tried deleting a document that we do not know about"); } @@ -115,10 +137,15 @@ pub fn merge_grenad_entries( } } - // Send the documents ids unionized with the current one - /// TODO return the slice of bytes directly - serialize_bitmap_into_vec(&documents_ids, &mut buffer); - sender.send_documents_ids(&buffer).unwrap(); + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "documents_ids"); + let _entered = span.enter(); + + // Send the documents ids unionized with the current one + /// TODO return the slice of bytes directly + serialize_bitmap_into_vec(&documents_ids, &mut buffer); + sender.send_documents_ids(&buffer).unwrap(); + } // ... @@ -149,6 +176,7 @@ fn compute_new_words_fst( Ok(words_fst_mmap) } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] fn merge_and_send_docids( merger: Merger, database: Database, diff --git a/workloads/movies.json b/workloads/movies.json index 445ff3aca..9ad3fb7eb 100644 --- a/workloads/movies.json +++ b/workloads/movies.json @@ -1,6 +1,6 @@ { "name": "movies.json", - "run_count": 10, + "run_count": 1, "extra_cli_args": [], "assets": { "movies.json": {