meilisearch/tracing-trace/src/processor/span_stats.rs

158 lines
5.4 KiB
Rust
Raw Normal View History

2024-01-24 16:50:51 +08:00
use std::collections::{BTreeMap, HashMap};
2024-02-26 23:37:35 +08:00
use std::ops::Range;
2024-01-24 16:50:51 +08:00
use std::time::Duration;
use serde::{Deserialize, Serialize};
2024-01-24 16:50:51 +08:00
use crate::entry::{Entry, NewCallsite, SpanClose, SpanEnter, SpanExit};
use crate::{Error, TraceReader};
#[derive(Debug, Clone, Copy)]
enum SpanStatus {
Outside,
Inside(std::time::Duration),
}
#[derive(Serialize, Deserialize)]
2024-01-24 16:50:51 +08:00
pub struct CallStats {
2024-02-06 00:38:50 +08:00
pub call_count: usize,
pub time: u64,
2024-02-26 23:37:35 +08:00
pub self_time: u64,
}
#[derive(Debug, Default)]
pub struct SelfTime {
child_ranges: Vec<Range<Duration>>,
}
impl SelfTime {
pub fn new() -> Self {
Default::default()
}
pub fn add_child_range(&mut self, child_range: Range<Duration>) {
self.child_ranges.push(child_range)
}
pub fn self_duration(&mut self, self_range: Range<Duration>) -> Duration {
if self.child_ranges.is_empty() {
return self_range.end - self_range.start;
}
// by sorting child ranges by their start time,
// we make sure that no child will start before the last one we visited.
self.child_ranges
.sort_by(|left, right| left.start.cmp(&right.start).then(left.end.cmp(&right.end)));
// self duration computed by adding all the segments where the span is not executing a child
let mut self_duration = Duration::from_nanos(0);
// last point in time where we are certain that this span was not executing a child.
let mut committed_point = self_range.start;
for child_range in &self.child_ranges {
if child_range.start > committed_point {
// we add to the self duration the point between the end of the latest span and the beginning of the next span
self_duration += child_range.start - committed_point;
}
if committed_point < child_range.end {
// then we set ourselves to the end of the latest span
committed_point = child_range.end;
}
}
self_duration
}
2024-01-24 16:50:51 +08:00
}
pub fn to_call_stats<R: std::io::Read>(
trace: TraceReader<R>,
) -> Result<BTreeMap<String, CallStats>, Error> {
let mut calls = HashMap::new();
let mut spans = HashMap::new();
2024-02-26 23:37:35 +08:00
let mut last_point = Duration::from_nanos(0);
let mut first_point = None;
let mut total_self_time = SelfTime::new();
2024-01-24 16:50:51 +08:00
for entry in trace {
let entry = entry?;
match entry {
Entry::NewCallsite(callsite) => {
calls.insert(callsite.call_id, (callsite, vec![]));
}
Entry::NewThread(_) => {}
Entry::NewSpan(span) => {
2024-02-26 23:37:35 +08:00
spans.insert(span.id, (span, SpanStatus::Outside, SelfTime::new()));
2024-01-24 16:50:51 +08:00
}
Entry::SpanEnter(SpanEnter { id, time, memory: _ }) => {
2024-02-26 23:37:35 +08:00
first_point.get_or_insert(time);
let (_, status, _) = spans.get_mut(&id).unwrap();
2024-01-24 16:50:51 +08:00
let SpanStatus::Outside = status else {
continue;
};
*status = SpanStatus::Inside(time);
}
Entry::SpanExit(SpanExit { id, time: end, memory: _ }) => {
2024-02-26 23:37:35 +08:00
let (span, status, self_time) = spans.get_mut(&id).unwrap();
2024-01-24 16:50:51 +08:00
let SpanStatus::Inside(begin) = status else {
continue;
};
let begin = *begin;
2024-02-26 23:37:35 +08:00
if last_point < end {
last_point = end;
}
2024-01-24 16:50:51 +08:00
*status = SpanStatus::Outside;
2024-02-26 23:37:35 +08:00
let self_range = begin..end;
let self_duration = self_time.self_duration(self_range.clone());
*self_time = SelfTime::new();
2024-01-24 16:50:51 +08:00
let span = *span;
2024-02-26 23:37:35 +08:00
if let Some(parent_id) = span.parent_id {
let (_, _, parent_self_time) = spans.get_mut(&parent_id).unwrap();
parent_self_time.add_child_range(self_range.clone())
}
total_self_time.add_child_range(self_range);
2024-01-24 16:50:51 +08:00
let (_, call_list) = calls.get_mut(&span.call_id).unwrap();
2024-02-26 23:37:35 +08:00
call_list.push((end - begin, self_duration));
2024-01-24 16:50:51 +08:00
}
Entry::SpanClose(SpanClose { id, time: _ }) => {
spans.remove(&id);
}
Entry::Event(_) => {}
}
}
2024-02-26 23:37:35 +08:00
let total_self_time = first_point
.map(|first_point| (first_point, total_self_time.self_duration(first_point..last_point)));
2024-01-24 16:50:51 +08:00
Ok(calls
.into_iter()
.map(|(_, (call_site, calls))| (site_to_string(call_site), calls_to_stats(calls)))
2024-02-26 23:37:35 +08:00
.chain(total_self_time.map(|(first_point, total_self_time)| {
(
"::meta::total".to_string(),
CallStats {
call_count: 1,
time: (last_point - first_point).as_nanos() as u64,
self_time: total_self_time.as_nanos() as u64,
},
)
}))
2024-01-24 16:50:51 +08:00
.collect())
}
fn site_to_string(call_site: NewCallsite) -> String {
format!("{}::{}", call_site.target, call_site.name)
}
2024-02-26 23:37:35 +08:00
fn calls_to_stats(calls: Vec<(Duration, Duration)>) -> CallStats {
2024-01-24 16:50:51 +08:00
let nb = calls.len();
2024-02-26 23:37:35 +08:00
let sum: Duration = calls.iter().map(|(total, _)| total).sum();
let self_sum: Duration = calls.iter().map(|(_, self_duration)| self_duration).sum();
CallStats { call_count: nb, time: sum.as_nanos() as u64, self_time: self_sum.as_nanos() as u64 }
2024-01-24 16:50:51 +08:00
}