Get panics in index-scheduler

This commit is contained in:
Louis Dureuil 2023-11-15 23:06:19 +01:00
parent 355d3b7e45
commit 84f701679d
No known key found for this signature in database
3 changed files with 25 additions and 5 deletions

View File

@ -117,8 +117,13 @@ pub enum Error {
Heed(#[from] heed::Error), Heed(#[from] heed::Error),
#[error(transparent)] #[error(transparent)]
Milli(#[from] milli::Error), Milli(#[from] milli::Error),
#[error("An unexpected crash occurred when processing the task.")] #[error("An unexpected crash occurred when processing the task. {}", {
ProcessBatchPanicked, match .0 {
Some(report) => format!("Get /reports/{}", report),
None => "No report was saved.".into(),
}
})]
ProcessBatchPanicked(Option<uuid::Uuid>),
#[error(transparent)] #[error(transparent)]
FileStore(#[from] file_store::Error), FileStore(#[from] file_store::Error),
#[error(transparent)] #[error(transparent)]
@ -181,7 +186,7 @@ impl Error {
| Error::Dump(_) | Error::Dump(_)
| Error::Heed(_) | Error::Heed(_)
| Error::Milli(_) | Error::Milli(_)
| Error::ProcessBatchPanicked | Error::ProcessBatchPanicked(_)
| Error::FileStore(_) | Error::FileStore(_)
| Error::IoError(_) | Error::IoError(_)
| Error::Persist(_) | Error::Persist(_)
@ -224,7 +229,7 @@ impl ErrorCode for Error {
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice, Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::Dump(e) => e.error_code(), Error::Dump(e) => e.error_code(),
Error::Milli(e) => e.error_code(), Error::Milli(e) => e.error_code(),
Error::ProcessBatchPanicked => Code::Internal, Error::ProcessBatchPanicked(_) => Code::Internal,
Error::Heed(e) => e.error_code(), Error::Heed(e) => e.error_code(),
Error::HeedTransaction(e) => e.error_code(), Error::HeedTransaction(e) => e.error_code(),
Error::FileStore(e) => e.error_code(), Error::FileStore(e) => e.error_code(),

View File

@ -39,6 +39,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
test_breakpoint_sdr: _, test_breakpoint_sdr: _,
planned_failures: _, planned_failures: _,
run_loop_iteration: _, run_loop_iteration: _,
panic_reader: _,
} = scheduler; } = scheduler;
let rtxn = env.read_txn().unwrap(); let rtxn = env.read_txn().unwrap();

View File

@ -54,6 +54,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use panic_hook::PanicReader;
use puffin::FrameView; use puffin::FrameView;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use synchronoise::SignalEvent; use synchronoise::SignalEvent;
@ -332,6 +333,8 @@ pub struct IndexScheduler {
/// The path to the version file of Meilisearch. /// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf, pub(crate) version_file_path: PathBuf,
pub(crate) panic_reader: PanicReader,
// ================= test // ================= test
// The next entry is dedicated to the tests. // The next entry is dedicated to the tests.
/// Provide a way to set a breakpoint in multiple part of the scheduler. /// Provide a way to set a breakpoint in multiple part of the scheduler.
@ -382,6 +385,7 @@ impl IndexScheduler {
#[cfg(test)] #[cfg(test)]
run_loop_iteration: self.run_loop_iteration.clone(), run_loop_iteration: self.run_loop_iteration.clone(),
features: self.features.clone(), features: self.features.clone(),
panic_reader: self.panic_reader.clone(),
} }
} }
} }
@ -439,6 +443,12 @@ impl IndexScheduler {
let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?; let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?;
wtxn.commit()?; wtxn.commit()?;
const MAX_REPORT_COUNT: usize = 20;
let panic_reader = panic_hook::PanicReader::install_panic_hook(
std::num::NonZeroUsize::new(MAX_REPORT_COUNT).unwrap(),
);
// allow unreachable_code to get rids of the warning in the case of a test build. // allow unreachable_code to get rids of the warning in the case of a test build.
let this = Self { let this = Self {
must_stop_processing: MustStopProcessing::default(), must_stop_processing: MustStopProcessing::default(),
@ -479,6 +489,7 @@ impl IndexScheduler {
#[cfg(test)] #[cfg(test)]
run_loop_iteration: Arc::new(RwLock::new(0)), run_loop_iteration: Arc::new(RwLock::new(0)),
features, features,
panic_reader,
}; };
this.run(); this.run();
@ -1131,7 +1142,10 @@ impl IndexScheduler {
.name(String::from("batch-operation")) .name(String::from("batch-operation"))
.spawn(move || cloned_index_scheduler.process_batch(batch)) .spawn(move || cloned_index_scheduler.process_batch(batch))
.unwrap(); .unwrap();
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
self.panic_reader
.join_thread(handle)
.unwrap_or_else(|maybe_report| Err(Error::ProcessBatchPanicked(maybe_report)))
}; };
#[cfg(test)] #[cfg(test)]