mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-02-23 19:15:31 +08:00
Expose the write channel congestion in the batches
This commit is contained in:
parent
3ff1de0a21
commit
05cc8c650c
@ -322,6 +322,7 @@ pub(crate) mod test {
|
|||||||
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
|
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
|
||||||
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
|
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
|
||||||
call_trace: Default::default(),
|
call_trace: Default::default(),
|
||||||
|
write_channel_congestion: None,
|
||||||
},
|
},
|
||||||
enqueued_at: Some(BatchEnqueuedAt {
|
enqueued_at: Some(BatchEnqueuedAt {
|
||||||
earliest: datetime!(2022-11-11 0:00 UTC),
|
earliest: datetime!(2022-11-11 0:00 UTC),
|
||||||
|
@ -215,14 +215,16 @@ impl IndexScheduler {
|
|||||||
let mut stop_scheduler_forever = false;
|
let mut stop_scheduler_forever = false;
|
||||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||||
let mut canceled = RoaringBitmap::new();
|
let mut canceled = RoaringBitmap::new();
|
||||||
|
let mut congestion = None;
|
||||||
|
|
||||||
match res {
|
match res {
|
||||||
Ok(tasks) => {
|
Ok((tasks, cong)) => {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
|
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
|
||||||
|
|
||||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
||||||
progress.update_progress(task_progress_obj);
|
progress.update_progress(task_progress_obj);
|
||||||
|
congestion = cong;
|
||||||
let mut success = 0;
|
let mut success = 0;
|
||||||
let mut failure = 0;
|
let mut failure = 0;
|
||||||
let mut canceled_by = None;
|
let mut canceled_by = None;
|
||||||
@ -339,9 +341,17 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
// We must re-add the canceled task so they're part of the same batch.
|
// We must re-add the canceled task so they're part of the same batch.
|
||||||
ids |= canceled;
|
ids |= canceled;
|
||||||
let durations = progress.accumulated_durations();
|
|
||||||
processing_batch.stats.call_trace =
|
processing_batch.stats.call_trace =
|
||||||
durations.into_iter().map(|(k, v)| (k, v.into())).collect();
|
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
|
||||||
|
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
|
||||||
|
let mut congestion_info = serde_json::Map::new();
|
||||||
|
congestion_info.insert("attempts".into(), congestion.attempts.into());
|
||||||
|
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
|
||||||
|
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
|
||||||
|
congestion_info
|
||||||
|
});
|
||||||
|
|
||||||
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
|
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -5,7 +5,7 @@ use std::sync::atomic::Ordering;
|
|||||||
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
|
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
|
||||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||||
use meilisearch_types::milli::{self};
|
use meilisearch_types::milli::{self, ChannelCongestion};
|
||||||
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task};
|
||||||
use milli::update::Settings as MilliSettings;
|
use milli::update::Settings as MilliSettings;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -35,7 +35,7 @@ impl IndexScheduler {
|
|||||||
batch: Batch,
|
batch: Batch,
|
||||||
current_batch: &mut ProcessingBatch,
|
current_batch: &mut ProcessingBatch,
|
||||||
progress: Progress,
|
progress: Progress,
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
{
|
{
|
||||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
|
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
|
||||||
@ -76,7 +76,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
canceled_tasks.push(task);
|
canceled_tasks.push(task);
|
||||||
|
|
||||||
Ok(canceled_tasks)
|
Ok((canceled_tasks, None))
|
||||||
}
|
}
|
||||||
Batch::TaskDeletions(mut tasks) => {
|
Batch::TaskDeletions(mut tasks) => {
|
||||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||||
@ -115,10 +115,14 @@ impl IndexScheduler {
|
|||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(tasks)
|
Ok((tasks, None))
|
||||||
|
}
|
||||||
|
Batch::SnapshotCreation(tasks) => {
|
||||||
|
self.process_snapshot(progress, tasks).map(|tasks| (tasks, None))
|
||||||
|
}
|
||||||
|
Batch::Dump(task) => {
|
||||||
|
self.process_dump_creation(progress, task).map(|tasks| (tasks, None))
|
||||||
}
|
}
|
||||||
Batch::SnapshotCreation(tasks) => self.process_snapshot(progress, tasks),
|
|
||||||
Batch::Dump(task) => self.process_dump_creation(progress, task),
|
|
||||||
Batch::IndexOperation { op, must_create_index } => {
|
Batch::IndexOperation { op, must_create_index } => {
|
||||||
let index_uid = op.index_uid().to_string();
|
let index_uid = op.index_uid().to_string();
|
||||||
let index = if must_create_index {
|
let index = if must_create_index {
|
||||||
@ -135,7 +139,8 @@ impl IndexScheduler {
|
|||||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||||
|
|
||||||
let mut index_wtxn = index.write_txn()?;
|
let mut index_wtxn = index.write_txn()?;
|
||||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
|
let (tasks, congestion) =
|
||||||
|
self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
|
||||||
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
||||||
@ -166,7 +171,7 @@ impl IndexScheduler {
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(tasks)
|
Ok((tasks, congestion))
|
||||||
}
|
}
|
||||||
Batch::IndexCreation { index_uid, primary_key, task } => {
|
Batch::IndexCreation { index_uid, primary_key, task } => {
|
||||||
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
|
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
|
||||||
@ -234,7 +239,7 @@ impl IndexScheduler {
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(vec![task])
|
Ok((vec![task], None))
|
||||||
}
|
}
|
||||||
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
||||||
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
|
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
|
||||||
@ -268,7 +273,7 @@ impl IndexScheduler {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(tasks)
|
Ok((tasks, None))
|
||||||
}
|
}
|
||||||
Batch::IndexSwap { mut task } => {
|
Batch::IndexSwap { mut task } => {
|
||||||
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
|
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
|
||||||
@ -316,7 +321,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
Ok(vec![task])
|
Ok((vec![task], None))
|
||||||
}
|
}
|
||||||
Batch::UpgradeDatabase { mut tasks } => {
|
Batch::UpgradeDatabase { mut tasks } => {
|
||||||
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
||||||
@ -346,7 +351,7 @@ impl IndexScheduler {
|
|||||||
task.error = None;
|
task.error = None;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(tasks)
|
Ok((tasks, None))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ use meilisearch_types::milli::documents::PrimaryKey;
|
|||||||
use meilisearch_types::milli::progress::Progress;
|
use meilisearch_types::milli::progress::Progress;
|
||||||
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
||||||
use meilisearch_types::milli::update::DocumentAdditionResult;
|
use meilisearch_types::milli::update::DocumentAdditionResult;
|
||||||
use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
|
use meilisearch_types::milli::{self, ChannelCongestion, Filter, ThreadPoolNoAbortBuilder};
|
||||||
use meilisearch_types::settings::apply_settings_to_builder;
|
use meilisearch_types::settings::apply_settings_to_builder;
|
||||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||||
use meilisearch_types::Index;
|
use meilisearch_types::Index;
|
||||||
@ -33,9 +33,8 @@ impl IndexScheduler {
|
|||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
operation: IndexOperation,
|
operation: IndexOperation,
|
||||||
progress: Progress,
|
progress: Progress,
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||||
let indexer_alloc = Bump::new();
|
let indexer_alloc = Bump::new();
|
||||||
|
|
||||||
let started_processing_at = std::time::Instant::now();
|
let started_processing_at = std::time::Instant::now();
|
||||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||||
|
|
||||||
@ -60,7 +59,7 @@ impl IndexScheduler {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(tasks)
|
Ok((tasks, None))
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
|
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
|
||||||
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
|
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
|
||||||
@ -173,21 +172,24 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
progress.update_progress(DocumentOperationProgress::Indexing);
|
progress.update_progress(DocumentOperationProgress::Indexing);
|
||||||
|
let mut congestion = None;
|
||||||
if tasks.iter().any(|res| res.error.is_none()) {
|
if tasks.iter().any(|res| res.error.is_none()) {
|
||||||
indexer::index(
|
congestion = Some(
|
||||||
index_wtxn,
|
indexer::index(
|
||||||
index,
|
index_wtxn,
|
||||||
pool,
|
index,
|
||||||
indexer_config.grenad_parameters(),
|
pool,
|
||||||
&db_fields_ids_map,
|
indexer_config.grenad_parameters(),
|
||||||
new_fields_ids_map,
|
&db_fields_ids_map,
|
||||||
primary_key,
|
new_fields_ids_map,
|
||||||
&document_changes,
|
primary_key,
|
||||||
embedders,
|
&document_changes,
|
||||||
&|| must_stop_processing.get(),
|
embedders,
|
||||||
&progress,
|
&|| must_stop_processing.get(),
|
||||||
)
|
&progress,
|
||||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
)
|
||||||
|
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
|
||||||
|
);
|
||||||
|
|
||||||
let addition = DocumentAdditionResult {
|
let addition = DocumentAdditionResult {
|
||||||
indexed_documents: candidates_count,
|
indexed_documents: candidates_count,
|
||||||
@ -199,7 +201,7 @@ impl IndexScheduler {
|
|||||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(tasks)
|
Ok((tasks, congestion))
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentEdition { index_uid, mut task } => {
|
IndexOperation::DocumentEdition { index_uid, mut task } => {
|
||||||
progress.update_progress(DocumentEditionProgress::RetrievingConfig);
|
progress.update_progress(DocumentEditionProgress::RetrievingConfig);
|
||||||
@ -247,7 +249,7 @@ impl IndexScheduler {
|
|||||||
edited_documents: Some(0),
|
edited_documents: Some(0),
|
||||||
});
|
});
|
||||||
|
|
||||||
return Ok(vec![task]);
|
return Ok((vec![task], None));
|
||||||
}
|
}
|
||||||
|
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
@ -262,6 +264,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
let result_count = Ok((candidates.len(), candidates.len())) as Result<_>;
|
let result_count = Ok((candidates.len(), candidates.len())) as Result<_>;
|
||||||
|
|
||||||
|
let mut congestion = None;
|
||||||
if task.error.is_none() {
|
if task.error.is_none() {
|
||||||
let local_pool;
|
let local_pool;
|
||||||
let indexer_config = self.index_mapper.indexer_config();
|
let indexer_config = self.index_mapper.indexer_config();
|
||||||
@ -292,20 +295,22 @@ impl IndexScheduler {
|
|||||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||||
|
|
||||||
progress.update_progress(DocumentEditionProgress::Indexing);
|
progress.update_progress(DocumentEditionProgress::Indexing);
|
||||||
indexer::index(
|
congestion = Some(
|
||||||
index_wtxn,
|
indexer::index(
|
||||||
index,
|
index_wtxn,
|
||||||
pool,
|
index,
|
||||||
indexer_config.grenad_parameters(),
|
pool,
|
||||||
&db_fields_ids_map,
|
indexer_config.grenad_parameters(),
|
||||||
new_fields_ids_map,
|
&db_fields_ids_map,
|
||||||
None, // cannot change primary key in DocumentEdition
|
new_fields_ids_map,
|
||||||
&document_changes,
|
None, // cannot change primary key in DocumentEdition
|
||||||
embedders,
|
&document_changes,
|
||||||
&|| must_stop_processing.get(),
|
embedders,
|
||||||
&progress,
|
&|| must_stop_processing.get(),
|
||||||
)
|
&progress,
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
)
|
||||||
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||||
|
);
|
||||||
|
|
||||||
let addition = DocumentAdditionResult {
|
let addition = DocumentAdditionResult {
|
||||||
indexed_documents: candidates_count,
|
indexed_documents: candidates_count,
|
||||||
@ -341,7 +346,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(vec![task])
|
Ok((vec![task], congestion))
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
|
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
|
||||||
progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
|
progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
|
||||||
@ -408,7 +413,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if to_delete.is_empty() {
|
if to_delete.is_empty() {
|
||||||
return Ok(tasks);
|
return Ok((tasks, None));
|
||||||
}
|
}
|
||||||
|
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
@ -422,6 +427,7 @@ impl IndexScheduler {
|
|||||||
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||||
|
|
||||||
|
let mut congestion = None;
|
||||||
if !tasks.iter().all(|res| res.error.is_some()) {
|
if !tasks.iter().all(|res| res.error.is_some()) {
|
||||||
let local_pool;
|
let local_pool;
|
||||||
let indexer_config = self.index_mapper.indexer_config();
|
let indexer_config = self.index_mapper.indexer_config();
|
||||||
@ -447,20 +453,22 @@ impl IndexScheduler {
|
|||||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||||
|
|
||||||
progress.update_progress(DocumentDeletionProgress::Indexing);
|
progress.update_progress(DocumentDeletionProgress::Indexing);
|
||||||
indexer::index(
|
congestion = Some(
|
||||||
index_wtxn,
|
indexer::index(
|
||||||
index,
|
index_wtxn,
|
||||||
pool,
|
index,
|
||||||
indexer_config.grenad_parameters(),
|
pool,
|
||||||
&db_fields_ids_map,
|
indexer_config.grenad_parameters(),
|
||||||
new_fields_ids_map,
|
&db_fields_ids_map,
|
||||||
None, // document deletion never changes primary key
|
new_fields_ids_map,
|
||||||
&document_changes,
|
None, // document deletion never changes primary key
|
||||||
embedders,
|
&document_changes,
|
||||||
&|| must_stop_processing.get(),
|
embedders,
|
||||||
&progress,
|
&|| must_stop_processing.get(),
|
||||||
)
|
&progress,
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
)
|
||||||
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||||
|
);
|
||||||
|
|
||||||
let addition = DocumentAdditionResult {
|
let addition = DocumentAdditionResult {
|
||||||
indexed_documents: candidates_count,
|
indexed_documents: candidates_count,
|
||||||
@ -472,7 +480,7 @@ impl IndexScheduler {
|
|||||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(tasks)
|
Ok((tasks, congestion))
|
||||||
}
|
}
|
||||||
IndexOperation::Settings { index_uid, settings, mut tasks } => {
|
IndexOperation::Settings { index_uid, settings, mut tasks } => {
|
||||||
progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
|
progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
|
||||||
@ -497,7 +505,7 @@ impl IndexScheduler {
|
|||||||
)
|
)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
Ok(tasks)
|
Ok((tasks, None))
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentClearAndSetting {
|
IndexOperation::DocumentClearAndSetting {
|
||||||
index_uid,
|
index_uid,
|
||||||
@ -505,7 +513,7 @@ impl IndexScheduler {
|
|||||||
settings,
|
settings,
|
||||||
settings_tasks,
|
settings_tasks,
|
||||||
} => {
|
} => {
|
||||||
let mut import_tasks = self.apply_index_operation(
|
let (mut import_tasks, _congestion) = self.apply_index_operation(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
index,
|
index,
|
||||||
IndexOperation::DocumentClear {
|
IndexOperation::DocumentClear {
|
||||||
@ -515,7 +523,7 @@ impl IndexScheduler {
|
|||||||
progress.clone(),
|
progress.clone(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let settings_tasks = self.apply_index_operation(
|
let (settings_tasks, _congestion) = self.apply_index_operation(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
index,
|
index,
|
||||||
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
||||||
@ -524,7 +532,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
let mut tasks = settings_tasks;
|
let mut tasks = settings_tasks;
|
||||||
tasks.append(&mut import_tasks);
|
tasks.append(&mut import_tasks);
|
||||||
Ok(tasks)
|
Ok((tasks, None))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -61,4 +61,6 @@ pub struct BatchStats {
|
|||||||
pub types: BTreeMap<Kind, u32>,
|
pub types: BTreeMap<Kind, u32>,
|
||||||
pub index_uids: BTreeMap<String, u32>,
|
pub index_uids: BTreeMap<String, u32>,
|
||||||
pub call_trace: serde_json::Map<String, serde_json::Value>,
|
pub call_trace: serde_json::Map<String, serde_json::Value>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub write_channel_congestion: Option<serde_json::Map<String, serde_json::Value>>,
|
||||||
}
|
}
|
||||||
|
@ -73,6 +73,7 @@ pub use self::search::{
|
|||||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
|
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
|
||||||
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||||
};
|
};
|
||||||
|
pub use self::update::ChannelCongestion;
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ pub use self::facet::bulk::FacetsUpdateBulk;
|
|||||||
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
|
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
|
||||||
pub use self::index_documents::*;
|
pub use self::index_documents::*;
|
||||||
pub use self::indexer_config::IndexerConfig;
|
pub use self::indexer_config::IndexerConfig;
|
||||||
|
pub use self::new::ChannelCongestion;
|
||||||
pub use self::settings::{validate_embedding_settings, Setting, Settings};
|
pub use self::settings::{validate_embedding_settings, Setting, Settings};
|
||||||
pub use self::update_step::UpdateIndexingStep;
|
pub use self::update_step::UpdateIndexingStep;
|
||||||
pub use self::word_prefix_docids::WordPrefixDocids;
|
pub use self::word_prefix_docids::WordPrefixDocids;
|
||||||
|
@ -291,7 +291,7 @@ where
|
|||||||
&indexing_context.must_stop_processing,
|
&indexing_context.must_stop_processing,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
indexing_context.progress.update_progress(IndexingStep::WritingToDatabase);
|
indexing_context.progress.update_progress(IndexingStep::TailWritingToDatabase);
|
||||||
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||||
|
|
||||||
Result::Ok((facet_field_ids_delta, index_embeddings))
|
Result::Ok((facet_field_ids_delta, index_embeddings))
|
||||||
|
@ -10,6 +10,7 @@ use hashbrown::HashMap;
|
|||||||
use heed::RwTxn;
|
use heed::RwTxn;
|
||||||
pub use partial_dump::PartialDump;
|
pub use partial_dump::PartialDump;
|
||||||
pub use update_by_function::UpdateByFunction;
|
pub use update_by_function::UpdateByFunction;
|
||||||
|
pub use write::ChannelCongestion;
|
||||||
use write::{build_vectors, update_index, write_to_db};
|
use write::{build_vectors, update_index, write_to_db};
|
||||||
|
|
||||||
use super::channel::*;
|
use super::channel::*;
|
||||||
@ -53,7 +54,7 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP>(
|
|||||||
embedders: EmbeddingConfigs,
|
embedders: EmbeddingConfigs,
|
||||||
must_stop_processing: &'indexer MSP,
|
must_stop_processing: &'indexer MSP,
|
||||||
progress: &'indexer Progress,
|
progress: &'indexer Progress,
|
||||||
) -> Result<()>
|
) -> Result<ChannelCongestion>
|
||||||
where
|
where
|
||||||
DC: DocumentChanges<'pl>,
|
DC: DocumentChanges<'pl>,
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
@ -130,7 +131,7 @@ where
|
|||||||
let mut field_distribution = index.field_distribution(wtxn)?;
|
let mut field_distribution = index.field_distribution(wtxn)?;
|
||||||
let mut document_ids = index.documents_ids(wtxn)?;
|
let mut document_ids = index.documents_ids(wtxn)?;
|
||||||
|
|
||||||
thread::scope(|s| -> Result<()> {
|
let congestion = thread::scope(|s| -> Result<ChannelCongestion> {
|
||||||
let indexer_span = tracing::Span::current();
|
let indexer_span = tracing::Span::current();
|
||||||
let embedders = &embedders;
|
let embedders = &embedders;
|
||||||
let finished_extraction = &finished_extraction;
|
let finished_extraction = &finished_extraction;
|
||||||
@ -182,7 +183,8 @@ where
|
|||||||
|
|
||||||
let mut arroy_writers = arroy_writers?;
|
let mut arroy_writers = arroy_writers?;
|
||||||
|
|
||||||
write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?;
|
let congestion =
|
||||||
|
write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?;
|
||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
|
indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
|
||||||
|
|
||||||
@ -210,7 +212,7 @@ where
|
|||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::Finalizing);
|
indexing_context.progress.update_progress(IndexingStep::Finalizing);
|
||||||
|
|
||||||
Ok(()) as Result<_>
|
Ok(congestion) as Result<_>
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
// required to into_inner the new_fields_ids_map
|
// required to into_inner the new_fields_ids_map
|
||||||
@ -227,5 +229,5 @@ where
|
|||||||
document_ids,
|
document_ids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(congestion)
|
||||||
}
|
}
|
||||||
|
@ -14,13 +14,13 @@ use crate::update::settings::InnerIndexSettings;
|
|||||||
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings};
|
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings};
|
||||||
use crate::{Error, Index, InternalError, Result};
|
use crate::{Error, Index, InternalError, Result};
|
||||||
|
|
||||||
pub(super) fn write_to_db(
|
pub fn write_to_db(
|
||||||
mut writer_receiver: WriterBbqueueReceiver<'_>,
|
mut writer_receiver: WriterBbqueueReceiver<'_>,
|
||||||
finished_extraction: &AtomicBool,
|
finished_extraction: &AtomicBool,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
wtxn: &mut RwTxn<'_>,
|
wtxn: &mut RwTxn<'_>,
|
||||||
arroy_writers: &HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
|
arroy_writers: &HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
|
||||||
) -> Result<()> {
|
) -> Result<ChannelCongestion> {
|
||||||
// Used by by the ArroySetVector to copy the embedding into an
|
// Used by by the ArroySetVector to copy the embedding into an
|
||||||
// aligned memory area, required by arroy to accept a new vector.
|
// aligned memory area, required by arroy to accept a new vector.
|
||||||
let mut aligned_embedding = Vec::new();
|
let mut aligned_embedding = Vec::new();
|
||||||
@ -75,21 +75,36 @@ pub(super) fn write_to_db(
|
|||||||
|
|
||||||
write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?;
|
write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?;
|
||||||
|
|
||||||
let direct_attempts = writer_receiver.sent_messages_attempts();
|
Ok(ChannelCongestion {
|
||||||
let blocking_attempts = writer_receiver.blocking_sent_messages_attempts();
|
attempts: writer_receiver.sent_messages_attempts(),
|
||||||
let congestion_pct = (blocking_attempts as f64 / direct_attempts as f64) * 100.0;
|
blocking_attempts: writer_receiver.blocking_sent_messages_attempts(),
|
||||||
tracing::debug!(
|
})
|
||||||
"Channel congestion metrics - \
|
}
|
||||||
Attempts: {direct_attempts}, \
|
|
||||||
Blocked attempts: {blocking_attempts} \
|
|
||||||
({congestion_pct:.1}% congestion)"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
/// Stats exposing the congestion of a channel.
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct ChannelCongestion {
|
||||||
|
/// Number of attempts to send a message into the bbqueue buffer.
|
||||||
|
pub attempts: usize,
|
||||||
|
/// Number of blocking attempts which require a retry.
|
||||||
|
pub blocking_attempts: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChannelCongestion {
|
||||||
|
pub fn congestion_ratio(&self) -> f32 {
|
||||||
|
// tracing::debug!(
|
||||||
|
// "Channel congestion metrics - \
|
||||||
|
// Attempts: {direct_attempts}, \
|
||||||
|
// Blocked attempts: {blocking_attempts} \
|
||||||
|
// ({congestion_pct:.1}% congestion)"
|
||||||
|
// );
|
||||||
|
|
||||||
|
self.blocking_attempts as f32 / self.attempts as f32
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")]
|
#[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")]
|
||||||
pub(super) fn build_vectors<MSP>(
|
pub fn build_vectors<MSP>(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
wtxn: &mut RwTxn<'_>,
|
wtxn: &mut RwTxn<'_>,
|
||||||
index_embeddings: Vec<IndexEmbeddingConfig>,
|
index_embeddings: Vec<IndexEmbeddingConfig>,
|
||||||
@ -113,7 +128,7 @@ where
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn update_index(
|
pub fn update_index(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
wtxn: &mut RwTxn<'_>,
|
wtxn: &mut RwTxn<'_>,
|
||||||
new_fields_ids_map: FieldIdMapWithMetadata,
|
new_fields_ids_map: FieldIdMapWithMetadata,
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
pub use document_change::{Deletion, DocumentChange, Insertion, Update};
|
pub use document_change::{Deletion, DocumentChange, Insertion, Update};
|
||||||
|
pub use indexer::ChannelCongestion;
|
||||||
pub use merger::{
|
pub use merger::{
|
||||||
merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta,
|
merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta,
|
||||||
};
|
};
|
||||||
|
@ -14,7 +14,7 @@ pub enum IndexingStep {
|
|||||||
ExtractingWordProximity,
|
ExtractingWordProximity,
|
||||||
ExtractingEmbeddings,
|
ExtractingEmbeddings,
|
||||||
WritingGeoPoints,
|
WritingGeoPoints,
|
||||||
WritingToDatabase,
|
TailWritingToDatabase,
|
||||||
WaitingForExtractors,
|
WaitingForExtractors,
|
||||||
WritingEmbeddingsToDatabase,
|
WritingEmbeddingsToDatabase,
|
||||||
PostProcessingFacets,
|
PostProcessingFacets,
|
||||||
@ -32,7 +32,7 @@ impl Step for IndexingStep {
|
|||||||
IndexingStep::ExtractingWordProximity => "extracting word proximity",
|
IndexingStep::ExtractingWordProximity => "extracting word proximity",
|
||||||
IndexingStep::ExtractingEmbeddings => "extracting embeddings",
|
IndexingStep::ExtractingEmbeddings => "extracting embeddings",
|
||||||
IndexingStep::WritingGeoPoints => "writing geo points",
|
IndexingStep::WritingGeoPoints => "writing geo points",
|
||||||
IndexingStep::WritingToDatabase => "writing to database",
|
IndexingStep::TailWritingToDatabase => "tail writing to database",
|
||||||
IndexingStep::WaitingForExtractors => "waiting for extractors",
|
IndexingStep::WaitingForExtractors => "waiting for extractors",
|
||||||
IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
||||||
IndexingStep::PostProcessingFacets => "post-processing facets",
|
IndexingStep::PostProcessingFacets => "post-processing facets",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user