Add more precise spans

This commit is contained in:
Louis Dureuil 2024-11-25 16:09:15 +01:00
parent 5560452ef9
commit aa460819a7
No known key found for this signature in database

View File

@ -109,11 +109,14 @@ where
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
// document but we need to create a function that collects and compresses documents. // document but we need to create a function that collects and compresses documents.
let document_sender = extractor_sender.documents(); let document_sender = extractor_sender.documents();
let document_extractor = DocumentsExtractor::new(&document_sender, embedders); let document_extractor = DocumentsExtractor::new(&document_sender, embedders);
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
{
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
let _entered = span.enter();
extract(document_changes, extract(document_changes,
&document_extractor, &document_extractor,
indexing_context, indexing_context,
@ -121,7 +124,10 @@ where
&datastore, &datastore,
Step::ExtractingDocuments, Step::ExtractingDocuments,
)?; )?;
}
{
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
let _entered = span.enter();
for document_extractor_data in datastore { for document_extractor_data in datastore {
let document_extractor_data = document_extractor_data.0.into_inner(); let document_extractor_data = document_extractor_data.0.into_inner();
for (field, delta) in document_extractor_data.field_distribution_delta { for (field, delta) in document_extractor_data.field_distribution_delta {
@ -133,14 +139,15 @@ where
} }
field_distribution.retain(|_, v| *v != 0); field_distribution.retain(|_, v| *v != 0);
}
let facet_field_ids_delta; let facet_field_ids_delta;
{ {
let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted"); let caches = {
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
let _entered = span.enter(); let _entered = span.enter();
facet_field_ids_delta = merge_and_send_facet_docids(
FacetedDocidsExtractor::run_extraction( FacetedDocidsExtractor::run_extraction(
grenad_parameters, grenad_parameters,
document_changes, document_changes,
@ -148,16 +155,25 @@ where
&mut extractor_allocs, &mut extractor_allocs,
&extractor_sender.field_id_docid_facet_sender(), &extractor_sender.field_id_docid_facet_sender(),
Step::ExtractingFacets Step::ExtractingFacets
)?, )?
};
{
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
let _entered = span.enter();
facet_field_ids_delta = merge_and_send_facet_docids(
caches,
FacetDatabases::new(index), FacetDatabases::new(index),
index, index,
extractor_sender.facet_docids(), extractor_sender.facet_docids(),
)?; )?;
} }
}
{ {
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
let _entered = span.enter();
let WordDocidsCaches { let WordDocidsCaches {
@ -166,15 +182,19 @@ where
exact_word_docids, exact_word_docids,
word_position_docids, word_position_docids,
fid_word_count_docids, fid_word_count_docids,
} = WordDocidsExtractors::run_extraction( } = {
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
let _entered = span.enter();
WordDocidsExtractors::run_extraction(
grenad_parameters, grenad_parameters,
document_changes, document_changes,
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
Step::ExtractingWords Step::ExtractingWords
)?; )?
};
// TODO Word Docids Merger
{ {
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids"); let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
let _entered = span.enter(); let _entered = span.enter();
@ -187,7 +207,6 @@ where
)?; )?;
} }
// Word Fid Docids Merging
{ {
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids"); let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
let _entered = span.enter(); let _entered = span.enter();
@ -200,7 +219,6 @@ where
)?; )?;
} }
// Exact Word Docids Merging
{ {
let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids"); let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
let _entered = span.enter(); let _entered = span.enter();
@ -213,7 +231,6 @@ where
)?; )?;
} }
// Word Position Docids Merging
{ {
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids"); let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
let _entered = span.enter(); let _entered = span.enter();
@ -226,7 +243,6 @@ where
)?; )?;
} }
// Fid Word Count Docids Merging
{ {
let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids"); let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
let _entered = span.enter(); let _entered = span.enter();
@ -244,17 +260,22 @@ where
// this works only if the settings didn't change during this transaction. // this works only if the settings didn't change during this transaction.
let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default(); let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
if proximity_precision == ProximityPrecision::ByWord { if proximity_precision == ProximityPrecision::ByWord {
let caches = {
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids"); let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
let _entered = span.enter(); let _entered = span.enter();
<WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
let caches = <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
grenad_parameters, grenad_parameters,
document_changes, document_changes,
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
Step::ExtractingWordProximity, Step::ExtractingWordProximity,
)?; )?
};
{
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
let _entered = span.enter();
merge_and_send_docids( merge_and_send_docids(
caches, caches,
@ -264,10 +285,9 @@ where
&indexing_context.must_stop_processing, &indexing_context.must_stop_processing,
)?; )?;
} }
}
'vectors: { 'vectors: {
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
let _entered = span.enter();
let mut index_embeddings = index.embedding_configs(&rtxn)?; let mut index_embeddings = index.embedding_configs(&rtxn)?;
if index_embeddings.is_empty() { if index_embeddings.is_empty() {
@ -277,7 +297,15 @@ where
let embedding_sender = extractor_sender.embeddings(); let embedding_sender = extractor_sender.embeddings();
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads()); let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
{
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
let _entered = span.enter();
extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?; extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
}
{
let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
let _entered = span.enter();
for config in &mut index_embeddings { for config in &mut index_embeddings {
'data: for data in datastore.iter_mut() { 'data: for data in datastore.iter_mut() {
@ -286,18 +314,21 @@ where
deladd.apply_to(&mut config.user_provided); deladd.apply_to(&mut config.user_provided);
} }
} }
}
embedding_sender.finish(index_embeddings).unwrap(); embedding_sender.finish(index_embeddings).unwrap();
} }
'geo: { 'geo: {
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
let _entered = span.enter();
let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else { let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
break 'geo; break 'geo;
}; };
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
{
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
let _entered = span.enter();
extract( extract(
document_changes, document_changes,
&extractor, &extractor,
@ -306,6 +337,7 @@ where
&datastore, &datastore,
Step::WritingGeoPoints Step::WritingGeoPoints
)?; )?;
}
merge_and_send_rtree( merge_and_send_rtree(
datastore, datastore,
@ -316,11 +348,7 @@ where
)?; )?;
} }
{
let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH");
let _entered = span.enter();
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase)); (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
}
Result::Ok(facet_field_ids_delta) Result::Ok(facet_field_ids_delta)
})?; })?;
@ -352,6 +380,10 @@ where
.collect(); .collect();
let mut arroy_writers = arroy_writers?; let mut arroy_writers = arroy_writers?;
{
let span = tracing::trace_span!(target: "indexing::write_db", "all");
let _entered = span.enter();
for operation in writer_receiver { for operation in writer_receiver {
match operation { match operation {
WriterOperation::DbOperation(db_operation) => { WriterOperation::DbOperation(db_operation) => {
@ -362,11 +394,13 @@ where
Ok(false) => unreachable!("We tried to delete an unknown key"), Ok(false) => unreachable!("We tried to delete an unknown key"),
Ok(_) => (), Ok(_) => (),
Err(error) => { Err(error) => {
return Err(Error::InternalError(InternalError::StoreDeletion { return Err(Error::InternalError(
InternalError::StoreDeletion {
database_name, database_name,
key: e.entry().to_owned(), key: e.entry().to_owned(),
error, error,
})); },
));
} }
}, },
EntryOperation::Write(e) => { EntryOperation::Write(e) => {
@ -383,8 +417,10 @@ where
} }
WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation { WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
ArroyOperation::DeleteVectors { docid } => { ArroyOperation::DeleteVectors { docid } => {
for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in for (
&mut arroy_writers _embedder_index,
(_embedder_name, _embedder, writer, dimensions),
) in &mut arroy_writers
{ {
let dimensions = *dimensions; let dimensions = *dimensions;
writer.del_items(wtxn, dimensions, docid)?; writer.del_items(wtxn, dimensions, docid)?;
@ -395,9 +431,10 @@ where
embedder_id, embedder_id,
embeddings: raw_embeddings, embeddings: raw_embeddings,
} => { } => {
let (_, _, writer, dimensions) = let (_, _, writer, dimensions) = arroy_writers
arroy_writers.get(&embedder_id).expect("requested a missing embedder"); .get(&embedder_id)
// TODO: switch to Embeddings .expect("requested a missing embedder");
let mut embeddings = Embeddings::new(*dimensions); let mut embeddings = Embeddings::new(*dimensions);
for embedding in raw_embeddings { for embedding in raw_embeddings {
embeddings.append(embedding).unwrap(); embeddings.append(embedding).unwrap();
@ -407,8 +444,9 @@ where
writer.add_items(wtxn, docid, &embeddings)?; writer.add_items(wtxn, docid, &embeddings)?;
} }
ArroyOperation::SetVector { docid, embedder_id, embedding } => { ArroyOperation::SetVector { docid, embedder_id, embedding } => {
let (_, _, writer, dimensions) = let (_, _, writer, dimensions) = arroy_writers
arroy_writers.get(&embedder_id).expect("requested a missing embedder"); .get(&embedder_id)
.expect("requested a missing embedder");
writer.del_items(wtxn, *dimensions, docid)?; writer.del_items(wtxn, *dimensions, docid)?;
writer.add_item(wtxn, docid, &embedding)?; writer.add_item(wtxn, docid, &embedding)?;
} }
@ -420,8 +458,10 @@ where
Step::WritingEmbeddingsToDatabase, Step::WritingEmbeddingsToDatabase,
)); ));
for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in for (
&mut arroy_writers _embedder_index,
(_embedder_name, _embedder, writer, dimensions),
) in &mut arroy_writers
{ {
let dimensions = *dimensions; let dimensions = *dimensions;
writer.build_and_quantize( writer.build_and_quantize(
@ -438,6 +478,7 @@ where
}, },
} }
} }
}
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors)); (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));