Expose the write channel congestion in the batches

This commit is contained in:
Kerollmops 2025-02-19 14:00:21 +01:00
parent 3ff1de0a21
commit 05cc8c650c
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
12 changed files with 138 additions and 92 deletions

View File

@ -322,6 +322,7 @@ pub(crate) mod test {
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 }, types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
index_uids: maplit::btreemap! { "doggo".to_string() => 1 }, index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
call_trace: Default::default(), call_trace: Default::default(),
write_channel_congestion: None,
}, },
enqueued_at: Some(BatchEnqueuedAt { enqueued_at: Some(BatchEnqueuedAt {
earliest: datetime!(2022-11-11 0:00 UTC), earliest: datetime!(2022-11-11 0:00 UTC),

View File

@ -215,14 +215,16 @@ impl IndexScheduler {
let mut stop_scheduler_forever = false; let mut stop_scheduler_forever = false;
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new(); let mut canceled = RoaringBitmap::new();
let mut congestion = None;
match res { match res {
Ok(tasks) => { Ok((tasks, cong)) => {
#[cfg(test)] #[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded); self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
progress.update_progress(task_progress_obj); progress.update_progress(task_progress_obj);
congestion = cong;
let mut success = 0; let mut success = 0;
let mut failure = 0; let mut failure = 0;
let mut canceled_by = None; let mut canceled_by = None;
@ -339,9 +341,17 @@ impl IndexScheduler {
// We must re-add the canceled task so they're part of the same batch. // We must re-add the canceled task so they're part of the same batch.
ids |= canceled; ids |= canceled;
let durations = progress.accumulated_durations();
processing_batch.stats.call_trace = processing_batch.stats.call_trace =
durations.into_iter().map(|(k, v)| (k, v.into())).collect(); progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
let mut congestion_info = serde_json::Map::new();
congestion_info.insert("attempts".into(), congestion.attempts.into());
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
congestion_info
});
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?; self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
#[cfg(test)] #[cfg(test)]

View File

@ -5,7 +5,7 @@ use std::sync::atomic::Ordering;
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId}; use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::progress::{Progress, VariableNameStep}; use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::{self}; use meilisearch_types::milli::{self, ChannelCongestion};
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task};
use milli::update::Settings as MilliSettings; use milli::update::Settings as MilliSettings;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -35,7 +35,7 @@ impl IndexScheduler {
batch: Batch, batch: Batch,
current_batch: &mut ProcessingBatch, current_batch: &mut ProcessingBatch,
progress: Progress, progress: Progress,
) -> Result<Vec<Task>> { ) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
#[cfg(test)] #[cfg(test)]
{ {
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?; self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
@ -76,7 +76,7 @@ impl IndexScheduler {
canceled_tasks.push(task); canceled_tasks.push(task);
Ok(canceled_tasks) Ok((canceled_tasks, None))
} }
Batch::TaskDeletions(mut tasks) => { Batch::TaskDeletions(mut tasks) => {
// 1. Retrieve the tasks that matched the query at enqueue-time. // 1. Retrieve the tasks that matched the query at enqueue-time.
@ -115,10 +115,14 @@ impl IndexScheduler {
_ => unreachable!(), _ => unreachable!(),
} }
} }
Ok(tasks) Ok((tasks, None))
}
Batch::SnapshotCreation(tasks) => {
self.process_snapshot(progress, tasks).map(|tasks| (tasks, None))
}
Batch::Dump(task) => {
self.process_dump_creation(progress, task).map(|tasks| (tasks, None))
} }
Batch::SnapshotCreation(tasks) => self.process_snapshot(progress, tasks),
Batch::Dump(task) => self.process_dump_creation(progress, task),
Batch::IndexOperation { op, must_create_index } => { Batch::IndexOperation { op, must_create_index } => {
let index_uid = op.index_uid().to_string(); let index_uid = op.index_uid().to_string();
let index = if must_create_index { let index = if must_create_index {
@ -135,7 +139,8 @@ impl IndexScheduler {
.set_currently_updating_index(Some((index_uid.clone(), index.clone()))); .set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let mut index_wtxn = index.write_txn()?; let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?; let (tasks, congestion) =
self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
{ {
let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
@ -166,7 +171,7 @@ impl IndexScheduler {
), ),
} }
Ok(tasks) Ok((tasks, congestion))
} }
Batch::IndexCreation { index_uid, primary_key, task } => { Batch::IndexCreation { index_uid, primary_key, task } => {
progress.update_progress(CreateIndexProgress::CreatingTheIndex); progress.update_progress(CreateIndexProgress::CreatingTheIndex);
@ -234,7 +239,7 @@ impl IndexScheduler {
), ),
} }
Ok(vec![task]) Ok((vec![task], None))
} }
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
progress.update_progress(DeleteIndexProgress::DeletingTheIndex); progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
@ -268,7 +273,7 @@ impl IndexScheduler {
}; };
} }
Ok(tasks) Ok((tasks, None))
} }
Batch::IndexSwap { mut task } => { Batch::IndexSwap { mut task } => {
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap); progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
@ -316,7 +321,7 @@ impl IndexScheduler {
} }
wtxn.commit()?; wtxn.commit()?;
task.status = Status::Succeeded; task.status = Status::Succeeded;
Ok(vec![task]) Ok((vec![task], None))
} }
Batch::UpgradeDatabase { mut tasks } => { Batch::UpgradeDatabase { mut tasks } => {
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else { let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
@ -346,7 +351,7 @@ impl IndexScheduler {
task.error = None; task.error = None;
} }
Ok(tasks) Ok((tasks, None))
} }
} }
} }

View File

@ -5,7 +5,7 @@ use meilisearch_types::milli::documents::PrimaryKey;
use meilisearch_types::milli::progress::Progress; use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
use meilisearch_types::milli::update::DocumentAdditionResult; use meilisearch_types::milli::update::DocumentAdditionResult;
use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; use meilisearch_types::milli::{self, ChannelCongestion, Filter, ThreadPoolNoAbortBuilder};
use meilisearch_types::settings::apply_settings_to_builder; use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::Index; use meilisearch_types::Index;
@ -33,9 +33,8 @@ impl IndexScheduler {
index: &'i Index, index: &'i Index,
operation: IndexOperation, operation: IndexOperation,
progress: Progress, progress: Progress,
) -> Result<Vec<Task>> { ) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now(); let started_processing_at = std::time::Instant::now();
let must_stop_processing = self.scheduler.must_stop_processing.clone(); let must_stop_processing = self.scheduler.must_stop_processing.clone();
@ -60,7 +59,7 @@ impl IndexScheduler {
}; };
} }
Ok(tasks) Ok((tasks, None))
} }
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => { IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
progress.update_progress(DocumentOperationProgress::RetrievingConfig); progress.update_progress(DocumentOperationProgress::RetrievingConfig);
@ -173,21 +172,24 @@ impl IndexScheduler {
} }
progress.update_progress(DocumentOperationProgress::Indexing); progress.update_progress(DocumentOperationProgress::Indexing);
let mut congestion = None;
if tasks.iter().any(|res| res.error.is_none()) { if tasks.iter().any(|res| res.error.is_none()) {
indexer::index( congestion = Some(
index_wtxn, indexer::index(
index, index_wtxn,
pool, index,
indexer_config.grenad_parameters(), pool,
&db_fields_ids_map, indexer_config.grenad_parameters(),
new_fields_ids_map, &db_fields_ids_map,
primary_key, new_fields_ids_map,
&document_changes, primary_key,
embedders, &document_changes,
&|| must_stop_processing.get(), embedders,
&progress, &|| must_stop_processing.get(),
) &progress,
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; )
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
);
let addition = DocumentAdditionResult { let addition = DocumentAdditionResult {
indexed_documents: candidates_count, indexed_documents: candidates_count,
@ -199,7 +201,7 @@ impl IndexScheduler {
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
} }
Ok(tasks) Ok((tasks, congestion))
} }
IndexOperation::DocumentEdition { index_uid, mut task } => { IndexOperation::DocumentEdition { index_uid, mut task } => {
progress.update_progress(DocumentEditionProgress::RetrievingConfig); progress.update_progress(DocumentEditionProgress::RetrievingConfig);
@ -247,7 +249,7 @@ impl IndexScheduler {
edited_documents: Some(0), edited_documents: Some(0),
}); });
return Ok(vec![task]); return Ok((vec![task], None));
} }
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
@ -262,6 +264,7 @@ impl IndexScheduler {
let result_count = Ok((candidates.len(), candidates.len())) as Result<_>; let result_count = Ok((candidates.len(), candidates.len())) as Result<_>;
let mut congestion = None;
if task.error.is_none() { if task.error.is_none() {
let local_pool; let local_pool;
let indexer_config = self.index_mapper.indexer_config(); let indexer_config = self.index_mapper.indexer_config();
@ -292,20 +295,22 @@ impl IndexScheduler {
let embedders = self.embedders(index_uid.clone(), embedders)?; let embedders = self.embedders(index_uid.clone(), embedders)?;
progress.update_progress(DocumentEditionProgress::Indexing); progress.update_progress(DocumentEditionProgress::Indexing);
indexer::index( congestion = Some(
index_wtxn, indexer::index(
index, index_wtxn,
pool, index,
indexer_config.grenad_parameters(), pool,
&db_fields_ids_map, indexer_config.grenad_parameters(),
new_fields_ids_map, &db_fields_ids_map,
None, // cannot change primary key in DocumentEdition new_fields_ids_map,
&document_changes, None, // cannot change primary key in DocumentEdition
embedders, &document_changes,
&|| must_stop_processing.get(), embedders,
&progress, &|| must_stop_processing.get(),
) &progress,
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
let addition = DocumentAdditionResult { let addition = DocumentAdditionResult {
indexed_documents: candidates_count, indexed_documents: candidates_count,
@ -341,7 +346,7 @@ impl IndexScheduler {
} }
} }
Ok(vec![task]) Ok((vec![task], congestion))
} }
IndexOperation::DocumentDeletion { mut tasks, index_uid } => { IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
progress.update_progress(DocumentDeletionProgress::RetrievingConfig); progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
@ -408,7 +413,7 @@ impl IndexScheduler {
} }
if to_delete.is_empty() { if to_delete.is_empty() {
return Ok(tasks); return Ok((tasks, None));
} }
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
@ -422,6 +427,7 @@ impl IndexScheduler {
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
let mut congestion = None;
if !tasks.iter().all(|res| res.error.is_some()) { if !tasks.iter().all(|res| res.error.is_some()) {
let local_pool; let local_pool;
let indexer_config = self.index_mapper.indexer_config(); let indexer_config = self.index_mapper.indexer_config();
@ -447,20 +453,22 @@ impl IndexScheduler {
let embedders = self.embedders(index_uid.clone(), embedders)?; let embedders = self.embedders(index_uid.clone(), embedders)?;
progress.update_progress(DocumentDeletionProgress::Indexing); progress.update_progress(DocumentDeletionProgress::Indexing);
indexer::index( congestion = Some(
index_wtxn, indexer::index(
index, index_wtxn,
pool, index,
indexer_config.grenad_parameters(), pool,
&db_fields_ids_map, indexer_config.grenad_parameters(),
new_fields_ids_map, &db_fields_ids_map,
None, // document deletion never changes primary key new_fields_ids_map,
&document_changes, None, // document deletion never changes primary key
embedders, &document_changes,
&|| must_stop_processing.get(), embedders,
&progress, &|| must_stop_processing.get(),
) &progress,
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
let addition = DocumentAdditionResult { let addition = DocumentAdditionResult {
indexed_documents: candidates_count, indexed_documents: candidates_count,
@ -472,7 +480,7 @@ impl IndexScheduler {
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
} }
Ok(tasks) Ok((tasks, congestion))
} }
IndexOperation::Settings { index_uid, settings, mut tasks } => { IndexOperation::Settings { index_uid, settings, mut tasks } => {
progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings); progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
@ -497,7 +505,7 @@ impl IndexScheduler {
) )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
Ok(tasks) Ok((tasks, None))
} }
IndexOperation::DocumentClearAndSetting { IndexOperation::DocumentClearAndSetting {
index_uid, index_uid,
@ -505,7 +513,7 @@ impl IndexScheduler {
settings, settings,
settings_tasks, settings_tasks,
} => { } => {
let mut import_tasks = self.apply_index_operation( let (mut import_tasks, _congestion) = self.apply_index_operation(
index_wtxn, index_wtxn,
index, index,
IndexOperation::DocumentClear { IndexOperation::DocumentClear {
@ -515,7 +523,7 @@ impl IndexScheduler {
progress.clone(), progress.clone(),
)?; )?;
let settings_tasks = self.apply_index_operation( let (settings_tasks, _congestion) = self.apply_index_operation(
index_wtxn, index_wtxn,
index, index,
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
@ -524,7 +532,7 @@ impl IndexScheduler {
let mut tasks = settings_tasks; let mut tasks = settings_tasks;
tasks.append(&mut import_tasks); tasks.append(&mut import_tasks);
Ok(tasks) Ok((tasks, None))
} }
} }
} }

View File

@ -61,4 +61,6 @@ pub struct BatchStats {
pub types: BTreeMap<Kind, u32>, pub types: BTreeMap<Kind, u32>,
pub index_uids: BTreeMap<String, u32>, pub index_uids: BTreeMap<String, u32>,
pub call_trace: serde_json::Map<String, serde_json::Value>, pub call_trace: serde_json::Map<String, serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub write_channel_congestion: Option<serde_json::Map<String, serde_json::Value>>,
} }

View File

@ -73,6 +73,7 @@ pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy, FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
}; };
pub use self::update::ChannelCongestion;
pub type Result<T> = std::result::Result<T, error::Error>; pub type Result<T> = std::result::Result<T, error::Error>;

View File

@ -5,6 +5,7 @@ pub use self::facet::bulk::FacetsUpdateBulk;
pub use self::facet::incremental::FacetsUpdateIncrementalInner; pub use self::facet::incremental::FacetsUpdateIncrementalInner;
pub use self::index_documents::*; pub use self::index_documents::*;
pub use self::indexer_config::IndexerConfig; pub use self::indexer_config::IndexerConfig;
pub use self::new::ChannelCongestion;
pub use self::settings::{validate_embedding_settings, Setting, Settings}; pub use self::settings::{validate_embedding_settings, Setting, Settings};
pub use self::update_step::UpdateIndexingStep; pub use self::update_step::UpdateIndexingStep;
pub use self::word_prefix_docids::WordPrefixDocids; pub use self::word_prefix_docids::WordPrefixDocids;

View File

@ -291,7 +291,7 @@ where
&indexing_context.must_stop_processing, &indexing_context.must_stop_processing,
)?; )?;
} }
indexing_context.progress.update_progress(IndexingStep::WritingToDatabase); indexing_context.progress.update_progress(IndexingStep::TailWritingToDatabase);
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed); finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
Result::Ok((facet_field_ids_delta, index_embeddings)) Result::Ok((facet_field_ids_delta, index_embeddings))

View File

@ -10,6 +10,7 @@ use hashbrown::HashMap;
use heed::RwTxn; use heed::RwTxn;
pub use partial_dump::PartialDump; pub use partial_dump::PartialDump;
pub use update_by_function::UpdateByFunction; pub use update_by_function::UpdateByFunction;
pub use write::ChannelCongestion;
use write::{build_vectors, update_index, write_to_db}; use write::{build_vectors, update_index, write_to_db};
use super::channel::*; use super::channel::*;
@ -53,7 +54,7 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP>(
embedders: EmbeddingConfigs, embedders: EmbeddingConfigs,
must_stop_processing: &'indexer MSP, must_stop_processing: &'indexer MSP,
progress: &'indexer Progress, progress: &'indexer Progress,
) -> Result<()> ) -> Result<ChannelCongestion>
where where
DC: DocumentChanges<'pl>, DC: DocumentChanges<'pl>,
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
@ -130,7 +131,7 @@ where
let mut field_distribution = index.field_distribution(wtxn)?; let mut field_distribution = index.field_distribution(wtxn)?;
let mut document_ids = index.documents_ids(wtxn)?; let mut document_ids = index.documents_ids(wtxn)?;
thread::scope(|s| -> Result<()> { let congestion = thread::scope(|s| -> Result<ChannelCongestion> {
let indexer_span = tracing::Span::current(); let indexer_span = tracing::Span::current();
let embedders = &embedders; let embedders = &embedders;
let finished_extraction = &finished_extraction; let finished_extraction = &finished_extraction;
@ -182,7 +183,8 @@ where
let mut arroy_writers = arroy_writers?; let mut arroy_writers = arroy_writers?;
write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?; let congestion =
write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?;
indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors); indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
@ -210,7 +212,7 @@ where
indexing_context.progress.update_progress(IndexingStep::Finalizing); indexing_context.progress.update_progress(IndexingStep::Finalizing);
Ok(()) as Result<_> Ok(congestion) as Result<_>
})?; })?;
// required to into_inner the new_fields_ids_map // required to into_inner the new_fields_ids_map
@ -227,5 +229,5 @@ where
document_ids, document_ids,
)?; )?;
Ok(()) Ok(congestion)
} }

View File

@ -14,13 +14,13 @@ use crate::update::settings::InnerIndexSettings;
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings}; use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings};
use crate::{Error, Index, InternalError, Result}; use crate::{Error, Index, InternalError, Result};
pub(super) fn write_to_db( pub fn write_to_db(
mut writer_receiver: WriterBbqueueReceiver<'_>, mut writer_receiver: WriterBbqueueReceiver<'_>,
finished_extraction: &AtomicBool, finished_extraction: &AtomicBool,
index: &Index, index: &Index,
wtxn: &mut RwTxn<'_>, wtxn: &mut RwTxn<'_>,
arroy_writers: &HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>, arroy_writers: &HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
) -> Result<()> { ) -> Result<ChannelCongestion> {
// Used by by the ArroySetVector to copy the embedding into an // Used by by the ArroySetVector to copy the embedding into an
// aligned memory area, required by arroy to accept a new vector. // aligned memory area, required by arroy to accept a new vector.
let mut aligned_embedding = Vec::new(); let mut aligned_embedding = Vec::new();
@ -75,21 +75,36 @@ pub(super) fn write_to_db(
write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?; write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?;
let direct_attempts = writer_receiver.sent_messages_attempts(); Ok(ChannelCongestion {
let blocking_attempts = writer_receiver.blocking_sent_messages_attempts(); attempts: writer_receiver.sent_messages_attempts(),
let congestion_pct = (blocking_attempts as f64 / direct_attempts as f64) * 100.0; blocking_attempts: writer_receiver.blocking_sent_messages_attempts(),
tracing::debug!( })
"Channel congestion metrics - \ }
Attempts: {direct_attempts}, \
Blocked attempts: {blocking_attempts} \
({congestion_pct:.1}% congestion)"
);
Ok(()) /// Stats exposing the congestion of a channel.
#[derive(Debug, Copy, Clone)]
pub struct ChannelCongestion {
/// Number of attempts to send a message into the bbqueue buffer.
pub attempts: usize,
/// Number of blocking attempts which require a retry.
pub blocking_attempts: usize,
}
impl ChannelCongestion {
pub fn congestion_ratio(&self) -> f32 {
// tracing::debug!(
// "Channel congestion metrics - \
// Attempts: {direct_attempts}, \
// Blocked attempts: {blocking_attempts} \
// ({congestion_pct:.1}% congestion)"
// );
self.blocking_attempts as f32 / self.attempts as f32
}
} }
#[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")] #[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")]
pub(super) fn build_vectors<MSP>( pub fn build_vectors<MSP>(
index: &Index, index: &Index,
wtxn: &mut RwTxn<'_>, wtxn: &mut RwTxn<'_>,
index_embeddings: Vec<IndexEmbeddingConfig>, index_embeddings: Vec<IndexEmbeddingConfig>,
@ -113,7 +128,7 @@ where
Ok(()) Ok(())
} }
pub(super) fn update_index( pub fn update_index(
index: &Index, index: &Index,
wtxn: &mut RwTxn<'_>, wtxn: &mut RwTxn<'_>,
new_fields_ids_map: FieldIdMapWithMetadata, new_fields_ids_map: FieldIdMapWithMetadata,

View File

@ -1,4 +1,5 @@
pub use document_change::{Deletion, DocumentChange, Insertion, Update}; pub use document_change::{Deletion, DocumentChange, Insertion, Update};
pub use indexer::ChannelCongestion;
pub use merger::{ pub use merger::{
merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta, merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta,
}; };

View File

@ -14,7 +14,7 @@ pub enum IndexingStep {
ExtractingWordProximity, ExtractingWordProximity,
ExtractingEmbeddings, ExtractingEmbeddings,
WritingGeoPoints, WritingGeoPoints,
WritingToDatabase, TailWritingToDatabase,
WaitingForExtractors, WaitingForExtractors,
WritingEmbeddingsToDatabase, WritingEmbeddingsToDatabase,
PostProcessingFacets, PostProcessingFacets,
@ -32,7 +32,7 @@ impl Step for IndexingStep {
IndexingStep::ExtractingWordProximity => "extracting word proximity", IndexingStep::ExtractingWordProximity => "extracting word proximity",
IndexingStep::ExtractingEmbeddings => "extracting embeddings", IndexingStep::ExtractingEmbeddings => "extracting embeddings",
IndexingStep::WritingGeoPoints => "writing geo points", IndexingStep::WritingGeoPoints => "writing geo points",
IndexingStep::WritingToDatabase => "writing to database", IndexingStep::TailWritingToDatabase => "tail writing to database",
IndexingStep::WaitingForExtractors => "waiting for extractors", IndexingStep::WaitingForExtractors => "waiting for extractors",
IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database", IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
IndexingStep::PostProcessingFacets => "post-processing facets", IndexingStep::PostProcessingFacets => "post-processing facets",