diff --git a/Cargo.lock b/Cargo.lock index b3991d130..4327fd59b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -231,9 +231,9 @@ dependencies = [ [[package]] name = "addr2line" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" dependencies = [ "gimli", ] @@ -435,9 +435,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "backtrace" -version = "0.3.68" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" dependencies = [ "addr2line", "cc", @@ -1638,9 +1638,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.27.3" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" +checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" [[package]] name = "git2" @@ -1894,6 +1894,7 @@ name = "index-scheduler" version = "1.4.1" dependencies = [ "anyhow", + "backtrace", "big_s", "bincode", "crossbeam", @@ -2856,9 +2857,9 @@ dependencies = [ [[package]] name = "object" -version = "0.31.1" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" dependencies = [ "memchr", ] @@ -3640,9 +3641,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.104" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "indexmap 2.0.0", "itoa", diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 9e7c2ae4b..adfc91dba 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -12,6 +12,7 @@ license.workspace = true [dependencies] anyhow = "1.0.70" +backtrace = "0.3.69" bincode = "1.3.3" csv = "1.2.1" derive_builder = "0.12.0" diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 825f97f46..03c5c3b0e 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -26,6 +26,7 @@ mod index_mapper; #[cfg(test)] mod insta_snapshot; mod lru; +mod panic_hook; mod utils; mod uuid_codec; diff --git a/index-scheduler/src/panic_hook.rs b/index-scheduler/src/panic_hook.rs new file mode 100644 index 000000000..9e65c820d --- /dev/null +++ b/index-scheduler/src/panic_hook.rs @@ -0,0 +1,222 @@ +//! Panic hook designed to fetch a panic from a subthread and recover it on join. + +use std::collections::VecDeque; +use std::num::NonZeroUsize; +use std::panic::PanicInfo; +use std::sync::{Arc, RwLock}; +use std::thread::{JoinHandle, ThreadId}; + +use backtrace::Backtrace; + +pub struct Panic { + pub payload: Option, + pub location: Option, + pub thread_name: Option, + pub thread_id: ThreadId, + pub backtrace: Backtrace, +} + +#[derive(serde::Serialize)] +pub struct Report { + pub id: uuid::Uuid, + #[serde(serialize_with = "serialize_panic")] + pub panic: Panic, +} + +fn serialize_panic(panic: &Panic, s: S) -> std::result::Result +where + S: serde::Serializer, +{ + use serde::Serialize; + + panic.to_json().serialize(s) +} + +impl Report { + pub fn new(panic: Panic) -> Self { + Self { id: uuid::Uuid::new_v4(), panic } + } +} + +impl Panic { + pub fn to_json(&self) -> serde_json::Value { + json::panic_to_json(self) + } +} + +mod json { + use backtrace::{Backtrace, BacktraceFrame, BacktraceSymbol}; + use serde_json::{json, Value}; + + use super::Panic; + + fn symbol_to_json(symbol: &BacktraceSymbol) -> Value { + let address = symbol.addr().map(|addr| format!("{:p}", addr)); + let column = symbol.colno(); + let line = symbol.lineno(); + let function = symbol.name().map(|name| name.to_string()); + let filename = symbol.filename(); + json!({ + "function": function, + "filename": filename, + "line": line, + "column": column, + "address": address, + }) + } + + fn frame_to_json(frame: &BacktraceFrame) -> Value { + let symbols: Vec<_> = frame.symbols().iter().map(symbol_to_json).collect(); + match symbols.as_slice() { + [] => { + let address = format!("{:p}", frame.ip()); + json!({"address": address}) + } + [symbol] => json!(symbol), + symbols => json!(symbols), + } + } + + fn backtrace_to_json(backtrace: &Backtrace) -> Value { + let frames: Vec<_> = backtrace.frames().iter().map(frame_to_json).collect(); + json!(frames) + } + + pub fn panic_to_json(panic: &Panic) -> Value { + let thread_id = format!("{:?}", panic.thread_id); + serde_json::json!({ + "payload": panic.payload, + "location": panic.location, + "thread": { + "id": thread_id, + "name": panic.thread_name, + }, + "backtrace": backtrace_to_json(&panic.backtrace), + }) + } +} + +struct PanicWriter(Arc>); + +pub struct ReportRegistry { + reports: std::collections::VecDeque, +} + +impl ReportRegistry { + pub fn new(capacity: NonZeroUsize) -> Self { + Self { reports: VecDeque::with_capacity(capacity.get()) } + } + + pub fn push(&mut self, report: Report) -> Option { + let popped = if self.reports.len() == self.reports.capacity() { + self.reports.pop_back() + } else { + None + }; + self.reports.push_front(report); + popped + } + + pub fn iter(&self) -> impl Iterator { + self.reports.iter() + } + + pub fn find(&self, report_id: uuid::Uuid) -> Option<&Report> { + self.iter().find(|report| report.id == report_id) + } +} + +impl PanicWriter { + #[track_caller] + fn write_panic(&self, panic_info: &PanicInfo<'_>) { + let payload = panic_info + .payload() + .downcast_ref::<&str>() + .map(ToString::to_string) + .or_else(|| panic_info.payload().downcast_ref::().cloned()); + let location = panic_info.location().map(|loc| { + format!( + "{file}:{line}:{column}", + file = loc.file(), + line = loc.line(), + column = loc.column() + ) + }); + + let thread_name = std::thread::current().name().map(ToString::to_string); + let thread_id = std::thread::current().id(); + let backtrace = backtrace::Backtrace::new(); + + let panic = Panic { payload, location, thread_name, thread_id, backtrace }; + + let report = Report::new(panic); + + log::error!( + "An unexpected panic occurred on thread {name} at {location}: {payload}. See report '{report}' for details.", + payload = report.panic.payload.as_deref().unwrap_or("Box"), + name = report.panic.thread_name.as_deref().unwrap_or(""), + location = report.panic.location.as_deref().unwrap_or(""), + report = report.id, + ); + + if let Ok(mut registry) = self.0.write() { + if let Some(old_report) = registry.push(report) { + log::trace!("Forgetting report {} to make space for new report.", old_report.id) + } + } + } +} + +#[derive(Clone)] +pub struct PanicReader(Arc>); + +impl PanicReader { + pub fn install_panic_hook(capacity: NonZeroUsize) -> Self { + let registry = Arc::new(RwLock::new(ReportRegistry::new(capacity))); + let reader = PanicReader(registry.clone()); + let writer = PanicWriter(registry.clone()); + + std::panic::set_hook(Box::new(move |panic_info| writer.write_panic(panic_info))); + reader + } + + pub fn join_thread(&self, thread: JoinHandle) -> Result> { + let thread_id = thread.thread().id(); + thread.join().map_err(|_e| { + self.0 + .read() + .unwrap() + .iter() + .find(|report| report.panic.thread_id == thread_id) + .map(|report| report.id) + }) + } + + pub fn registry(&self) -> Arc> { + self.0.clone() + } +} + +/* +fn deep_panic() { + panic!("Panic message sent from deep inside the sub-thread"); +} + +fn do_work() { + deep_panic(); +} + +fn main() { + let mut panic_receiver = PanicReceiver::install_panic_hook(); + let subthread = std::thread::Builder::new() + .name("subthread".into()) + .spawn(|| { + do_work(); + }) + .unwrap(); + match panic_receiver.join_thread(subthread) { + Ok(_) => {} + Err(frame) => println!("{}", serde_json::to_string_pretty(frame.to_json()).unwrap()), + } +} +*/